e6406dcfc7b0efebf6fb8c957642586fb8ca5a88
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2016 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
71 } \
72 while (0)
73
74 #define NYI_assert(HI, LO, EXPECTED) \
75 do \
76 { \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
78 HALT_NYI; \
79 } \
80 while (0)
81
82 /* Helper functions used by expandLogicalImmediate. */
83
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
86 ones (int N)
87 {
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
89 }
90
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
94 {
95 return pickbits64 (val, N, N);
96 }
97
98 static uint64_t
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
100 {
101 uint64_t mask;
102 uint64_t imm;
103 unsigned simd_size;
104
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
107 if (N != 0)
108 {
109 simd_size = 64;
110 mask = 0xffffffffffffffffull;
111 }
112 else
113 {
114 switch (S)
115 {
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
121 default: return 0;
122 }
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
125 R &= simd_size - 1;
126 }
127
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
130 return 0;
131
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
135
136 /* Rotate to the left by simd_size - R. */
137 if (R != 0)
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
139
140 /* Replicate the value according to SIMD size. */
141 switch (simd_size)
142 {
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
148 case 64: break;
149 default: return 0;
150 }
151
152 return imm;
153 }
154
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
159
160 void
161 aarch64_init_LIT_table (void)
162 {
163 unsigned index;
164
165 for (index = 0; index < LI_TABLE_SIZE; index++)
166 {
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
170
171 LITable [index] = expand_logical_immediate (imms, immr, N);
172 }
173 }
174
175 static void
176 dexNotify (sim_cpu *cpu)
177 {
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
181
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
183
184 switch (type)
185 {
186 case 0:
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 case 1:
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 break;
194 case 2:
195 /* aarch64_notifyMethodExit (); */
196 break;
197 case 3:
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
200 break;
201 }
202 }
203
204 /* secondary decode within top level groups */
205
206 static void
207 dexPseudo (sim_cpu *cpu)
208 {
209 /* assert instr[28,27] = 00
210
211 We provide 2 pseudo instructions:
212
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
215
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
221
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
226 uint32_t dispatch;
227
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
229 {
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
233 }
234
235 dispatch = INSTR (31, 15);
236
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
239 {
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
243 }
244
245 else if (dispatch == PSEUDO_NOTIFY)
246 dexNotify (cpu);
247
248 else
249 HALT_UNALLOC;
250 }
251
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
254 9 bit offset.
255
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
258
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
260 static void
261 ldur32 (sim_cpu *cpu, int32_t offset)
262 {
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
265
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
269 + offset));
270 }
271
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
273 static void
274 ldur64 (sim_cpu *cpu, int32_t offset)
275 {
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
278
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
295 + offset));
296 }
297
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 static void
300 ldursb32 (sim_cpu *cpu, int32_t offset)
301 {
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
304
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
308 + offset));
309 }
310
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
312 static void
313 ldursb64 (sim_cpu *cpu, int32_t offset)
314 {
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
317
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 + offset));
322 }
323
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
325 static void
326 ldurh32 (sim_cpu *cpu, int32_t offset)
327 {
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
330
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
334 + offset));
335 }
336
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
338 static void
339 ldursh32 (sim_cpu *cpu, int32_t offset)
340 {
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
343
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
347 + offset));
348 }
349
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
351 static void
352 ldursh64 (sim_cpu *cpu, int32_t offset)
353 {
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
356
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
360 + offset));
361 }
362
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
364 static void
365 ldursw (sim_cpu *cpu, int32_t offset)
366 {
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
369
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
373 + offset));
374 }
375
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
378
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
380 static void
381 stur32 (sim_cpu *cpu, int32_t offset)
382 {
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
385
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
390 }
391
392 /* 64 bit store 64 bit unscaled signed 9 bit */
393 static void
394 stur64 (sim_cpu *cpu, int32_t offset)
395 {
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
398
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
403 }
404
405 /* 32 bit store byte unscaled signed 9 bit */
406 static void
407 sturb (sim_cpu *cpu, int32_t offset)
408 {
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
411
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
416 }
417
418 /* 32 bit store short unscaled signed 9 bit */
419 static void
420 sturh (sim_cpu *cpu, int32_t offset)
421 {
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
424
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
429 }
430
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
433 rt may not be SP. */
434
435 /* 32 bit pc-relative load */
436 static void
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 aarch64_get_mem_u32
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
445 }
446
447 /* 64 bit pc-relative load */
448 static void
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
450 {
451 unsigned rd = INSTR (4, 0);
452
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
455 aarch64_get_mem_u64
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 }
458
459 /* sign extended 32 bit pc-relative load */
460 static void
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
462 {
463 unsigned rd = INSTR (4, 0);
464
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
467 aarch64_get_mem_s32
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
469 }
470
471 /* float pc-relative load */
472 static void
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
474 {
475 unsigned int rd = INSTR (4, 0);
476
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
479 aarch64_get_mem_u32
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
481 }
482
483 /* double pc-relative load */
484 static void
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
486 {
487 unsigned int st = INSTR (4, 0);
488
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
491 aarch64_get_mem_u64
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
493 }
494
495 /* long double pc-relative load. */
496 static void
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
498 {
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
501 FRegister a;
502
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
506 }
507
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
510 16, 32 or 64. */
511
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
514
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
521
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
524
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
529
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
532 {
533 union
534 {
535 uint32_t u;
536 int32_t n;
537 } x;
538
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
541 return value;
542
543 x.u = value;
544 return x.n;
545 }
546
547 /* Scalar Floating Point
548
549 FP load/store single register (4 addressing modes)
550
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
553
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
555 static void
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
557 {
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
561
562 if (wb != Post)
563 address += offset;
564
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
567 if (wb == Post)
568 address += offset;
569
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
572 }
573
574 /* Load 8 bit with unsigned 12 bit offset. */
575 static void
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
577 {
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
581
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
584 }
585
586 /* Load 16 bit scaled unsigned 12 bit. */
587 static void
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
589 {
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
593
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
596 }
597
598 /* Load 32 bit scaled unsigned 12 bit. */
599 static void
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
601 {
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
605
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
608 }
609
610 /* Load 64 bit scaled unsigned 12 bit. */
611 static void
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
613 {
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
617
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
620 }
621
622 /* Load 128 bit scaled unsigned 12 bit. */
623 static void
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
625 {
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
629
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
633 }
634
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
637 static void
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
639 {
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
646
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
650 }
651
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
653 static void
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
655 {
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
659
660 if (wb != Post)
661 address += offset;
662
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
665
666 if (wb == Post)
667 address += offset;
668
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
671 }
672
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
674 static void
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
676 {
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
680
681 fldrd_wb (cpu, displacement, NoWriteBack);
682 }
683
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
685 static void
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
687 {
688 FRegister a;
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
692
693 if (wb != Post)
694 address += offset;
695
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
699
700 if (wb == Post)
701 address += offset;
702
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
705 }
706
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
708 static void
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
710 {
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
714
715 fldrq_wb (cpu, displacement, NoWriteBack);
716 }
717
718 /* Memory Access
719
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
723
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
726
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
728 post-index options.
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
730 writeback.
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
733
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
739
740 A separate method is provided for each possible addressing mode. */
741
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
743 static void
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
745 {
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
748
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
754 }
755
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
757 static void
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
759 {
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 uint64_t address;
763
764 if (rn == rt && wb != NoWriteBack)
765 HALT_UNALLOC;
766
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
768
769 if (wb != Post)
770 address += offset;
771
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
774
775 if (wb == Post)
776 address += offset;
777
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
780 }
781
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
784 static void
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
786 {
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
791
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
795
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
799 }
800
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
802 static void
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
804 {
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
807
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
813 }
814
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
816 static void
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
818 {
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
821 uint64_t address;
822
823 if (rn == rt && wb != NoWriteBack)
824 HALT_UNALLOC;
825
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
827
828 if (wb != Post)
829 address += offset;
830
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
833
834 if (wb == Post)
835 address += offset;
836
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
839 }
840
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
843 static void
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
845 {
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
850
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
854
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
858 }
859
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
861 static void
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
863 {
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
866
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
871 aarch64_get_mem_u8
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
873 }
874
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
876 static void
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
878 {
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
881 uint64_t address;
882
883 if (rn == rt && wb != NoWriteBack)
884 HALT_UNALLOC;
885
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
887
888 if (wb != Post)
889 address += offset;
890
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
893
894 if (wb == Post)
895 address += offset;
896
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
899 }
900
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
903 static void
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
905 {
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
910
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
913 extension);
914
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
919 }
920
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
923 static void
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
925 {
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
928 uint64_t address;
929 int64_t val;
930
931 if (rn == rt && wb != NoWriteBack)
932 HALT_UNALLOC;
933
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
935
936 if (wb != Post)
937 address += offset;
938
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
942
943 if (wb == Post)
944 address += offset;
945
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
948 }
949
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
951 static void
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
953 {
954 ldrsb_wb (cpu, offset, NoWriteBack);
955 }
956
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
959 static void
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
961 {
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
966
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
969 extension);
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
974 }
975
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
977 static void
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
979 {
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
982 uint32_t val;
983
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
989 }
990
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
993 static void
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
995 {
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
998 uint64_t address;
999
1000 if (rn == rt && wb != NoWriteBack)
1001 HALT_UNALLOC;
1002
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1004
1005 if (wb != Post)
1006 address += offset;
1007
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1010
1011 if (wb == Post)
1012 address += offset;
1013
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1016 }
1017
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1020 static void
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1022 {
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1027
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1031
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1035 }
1036
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1038 static void
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1040 {
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1043 int32_t val;
1044
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1050 }
1051
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1054 static void
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1056 {
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1059 uint64_t address;
1060
1061 if (rn == rt && wb != NoWriteBack)
1062 HALT_UNALLOC;
1063
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1065
1066 if (wb != Post)
1067 address += offset;
1068
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1072
1073 if (wb == Post)
1074 address += offset;
1075
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1078 }
1079
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1082 static void
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1084 {
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1089
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1093
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1098 }
1099
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1101 static void
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1103 {
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1106 int64_t val;
1107
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1113 }
1114
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1117 static void
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1119 {
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1122 uint64_t address;
1123 int64_t val;
1124
1125 if (rn == rt && wb != NoWriteBack)
1126 HALT_UNALLOC;
1127
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1130
1131 if (wb != Post)
1132 address += offset;
1133
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1136
1137 if (wb == Post)
1138 address += offset;
1139
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1142 }
1143
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1146 static void
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1148 {
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1152
1153 /* rn may reference SP, rm and rt must reference ZR */
1154
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1158 int64_t val;
1159
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1163 }
1164
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1166 static void
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1168 {
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1171 int64_t val;
1172
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1178 }
1179
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1182 static void
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1184 {
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1187 uint64_t address;
1188
1189 if (rn == rt && wb != NoWriteBack)
1190 HALT_UNALLOC;
1191
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1193
1194 if (wb != Post)
1195 address += offset;
1196
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1199
1200 if (wb == Post)
1201 address += offset;
1202
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1205 }
1206
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1209 static void
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1211 {
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1216
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1220
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1224 }
1225
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1228
1229 /* 32 bit store scaled unsigned 12 bit. */
1230 static void
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1232 {
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1235
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1241 }
1242
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1244 static void
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1246 {
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1249 uint64_t address;
1250
1251 if (rn == rt && wb != NoWriteBack)
1252 HALT_UNALLOC;
1253
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1255 if (wb != Post)
1256 address += offset;
1257
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1260
1261 if (wb == Post)
1262 address += offset;
1263
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1266 }
1267
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1270 static void
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1272 {
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1276
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1280
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1284 }
1285
1286 /* 64 bit store scaled unsigned 12 bit. */
1287 static void
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1289 {
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1292
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1298 }
1299
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1301 static void
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1303 {
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1306 uint64_t address;
1307
1308 if (rn == rt && wb != NoWriteBack)
1309 HALT_UNALLOC;
1310
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312
1313 if (wb != Post)
1314 address += offset;
1315
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1318
1319 if (wb == Post)
1320 address += offset;
1321
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1324 }
1325
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1328 static void
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1330 {
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1335
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1338 extension);
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1340
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1344 }
1345
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1347 static void
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1349 {
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1352
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1359 }
1360
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1362 static void
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1364 {
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1367 uint64_t address;
1368
1369 if (rn == rt && wb != NoWriteBack)
1370 HALT_UNALLOC;
1371
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1373
1374 if (wb != Post)
1375 address += offset;
1376
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1399 extension);
1400
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1405 }
1406
1407 /* 32 bit store short scaled unsigned 12 bit. */
1408 static void
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1410 {
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1413
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1419 }
1420
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1422 static void
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1424 {
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1427 uint64_t address;
1428
1429 if (rn == rt && wb != NoWriteBack)
1430 HALT_UNALLOC;
1431
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1433
1434 if (wb != Post)
1435 address += offset;
1436
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1439
1440 if (wb == Post)
1441 address += offset;
1442
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1445 }
1446
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1449 static void
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1451 {
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1456
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1460
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1464 }
1465
1466 /* Prefetch unsigned 12 bit. */
1467 static void
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1469 {
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1476 ow ==> UNALLOC
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1480
1481 /* TODO : implement prefetch of address. */
1482 }
1483
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1485 static void
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1487 {
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1494 ow ==> UNALLOC
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1499 extension);
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1502
1503 /* TODO : implement prefetch of address */
1504 }
1505
1506 /* 64 bit pc-relative prefetch. */
1507 static void
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1509 {
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1516 ow ==> UNALLOC
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1519
1520 /* TODO : implement this */
1521 }
1522
1523 /* Load-store exclusive. */
1524
1525 static void
1526 ldxr (sim_cpu *cpu)
1527 {
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1534
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1536 switch (size)
1537 {
1538 case 0:
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1540 break;
1541 case 1:
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1543 break;
1544 case 2:
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1546 break;
1547 case 3:
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1549 break;
1550 }
1551 }
1552
1553 static void
1554 stxr (sim_cpu *cpu)
1555 {
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1562
1563 switch (size)
1564 {
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1569 }
1570
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1573 }
1574
1575 static void
1576 dexLoadLiteral (sim_cpu *cpu)
1577 {
1578 /* instr[29,27] == 011
1579 instr[25,24] == 00
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1586
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1590
1591 switch (dispatch)
1592 {
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1600 case 7:
1601 default:
1602 HALT_UNALLOC;
1603 }
1604 }
1605
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1609
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1613
1614 /* 32 bit add immediate. */
1615 static void
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1617 {
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1620
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1624 }
1625
1626 /* 64 bit add immediate. */
1627 static void
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1629 {
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1632
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1636 }
1637
1638 static void
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1640 {
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1645 uint32_t flags = 0;
1646
1647 if (result == 0)
1648 flags |= Z;
1649
1650 if (result & (1 << 31))
1651 flags |= N;
1652
1653 if (uresult != result)
1654 flags |= C;
1655
1656 if (sresult != result)
1657 flags |= V;
1658
1659 aarch64_set_CPSR (cpu, flags);
1660 }
1661
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1664
1665 static void
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1667 {
1668 uint64_t result = value1 + value2;
1669 uint32_t flags = 0;
1670 uint64_t signbit = 1ULL << 63;
1671
1672 if (result == 0)
1673 flags |= Z;
1674
1675 if (NEG (result))
1676 flags |= N;
1677
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1681 flags |= C;
1682
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1685 flags |= V;
1686
1687 aarch64_set_CPSR (cpu, flags);
1688 }
1689
1690 static void
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1692 {
1693 uint32_t result = value1 - value2;
1694 uint32_t flags = 0;
1695 uint32_t signbit = 1U << 31;
1696
1697 if (result == 0)
1698 flags |= Z;
1699
1700 if (NEG (result))
1701 flags |= N;
1702
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1706 flags |= C;
1707
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1710 flags |= V;
1711
1712 aarch64_set_CPSR (cpu, flags);
1713 }
1714
1715 static void
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1717 {
1718 uint64_t result = value1 - value2;
1719 uint32_t flags = 0;
1720 uint64_t signbit = 1ULL << 63;
1721
1722 if (result == 0)
1723 flags |= Z;
1724
1725 if (NEG (result))
1726 flags |= N;
1727
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1731 flags |= C;
1732
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1735 flags |= V;
1736
1737 aarch64_set_CPSR (cpu, flags);
1738 }
1739
1740 static void
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1742 {
1743 uint32_t flags = 0;
1744
1745 if (result == 0)
1746 flags |= Z;
1747 else
1748 flags &= ~ Z;
1749
1750 if (result & (1 << 31))
1751 flags |= N;
1752 else
1753 flags &= ~ N;
1754
1755 aarch64_set_CPSR (cpu, flags);
1756 }
1757
1758 static void
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1760 {
1761 uint32_t flags = 0;
1762
1763 if (result == 0)
1764 flags |= Z;
1765 else
1766 flags &= ~ Z;
1767
1768 if (result & (1ULL << 63))
1769 flags |= N;
1770 else
1771 flags &= ~ N;
1772
1773 aarch64_set_CPSR (cpu, flags);
1774 }
1775
1776 /* 32 bit add immediate set flags. */
1777 static void
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1779 {
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1784
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1788 }
1789
1790 /* 64 bit add immediate set flags. */
1791 static void
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1793 {
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1798
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1802 }
1803
1804 /* 32 bit sub immediate. */
1805 static void
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1807 {
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1810
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1814 }
1815
1816 /* 64 bit sub immediate. */
1817 static void
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1819 {
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1822
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1826 }
1827
1828 /* 32 bit sub immediate set flags. */
1829 static void
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1831 {
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1836
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1840 }
1841
1842 /* 64 bit sub immediate set flags. */
1843 static void
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1845 {
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1850
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1854 }
1855
1856 /* Data Processing Register. */
1857
1858 /* First two helpers to perform the shift operations. */
1859
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1862 {
1863 switch (shift)
1864 {
1865 default:
1866 case LSL:
1867 return (value << count);
1868 case LSR:
1869 return (value >> count);
1870 case ASR:
1871 {
1872 int32_t svalue = value;
1873 return (svalue >> count);
1874 }
1875 case ROR:
1876 {
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1880 }
1881 }
1882 }
1883
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1886 {
1887 switch (shift)
1888 {
1889 default:
1890 case LSL:
1891 return (value << count);
1892 case LSR:
1893 return (value >> count);
1894 case ASR:
1895 {
1896 int64_t svalue = value;
1897 return (svalue >> count);
1898 }
1899 case ROR:
1900 {
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1904 }
1905 }
1906 }
1907
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1911
1912 N.B register args may not be SP. */
1913
1914 /* 32 bit ADD shifted register. */
1915 static void
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1917 {
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1921
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1926 shift, count));
1927 }
1928
1929 /* 64 bit ADD shifted register. */
1930 static void
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1932 {
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1936
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1941 shift, count));
1942 }
1943
1944 /* 32 bit ADD shifted register setting flags. */
1945 static void
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1947 {
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1951
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1954 shift, count);
1955
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1959 }
1960
1961 /* 64 bit ADD shifted register setting flags. */
1962 static void
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1964 {
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1968
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1971 shift, count);
1972
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1976 }
1977
1978 /* 32 bit SUB shifted register. */
1979 static void
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1981 {
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1985
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1990 shift, count));
1991 }
1992
1993 /* 64 bit SUB shifted register. */
1994 static void
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1996 {
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2000
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2005 shift, count));
2006 }
2007
2008 /* 32 bit SUB shifted register setting flags. */
2009 static void
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2011 {
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2015
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2018 shift, count);
2019
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2023 }
2024
2025 /* 64 bit SUB shifted register setting flags. */
2026 static void
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2028 {
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2032
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2035 shift, count);
2036
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2040 }
2041
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2045 extension value. */
2046
2047 static uint32_t
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2049 {
2050 switch (extension)
2051 {
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2061 }
2062 }
2063
2064 static uint64_t
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2066 {
2067 switch (extension)
2068 {
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2076 case SXTX:
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2078 }
2079 }
2080
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2085
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2089
2090 /* 32 bit ADD extending register. */
2091 static void
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2097
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2102 }
2103
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2106 static void
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2108 {
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2112
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2117 }
2118
2119 /* 32 bit ADD extending register setting flags. */
2120 static void
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2122 {
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2126
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2129
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2133 }
2134
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2137 static void
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2139 {
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2143
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2146
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2150 }
2151
2152 /* 32 bit SUB extending register. */
2153 static void
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2155 {
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2159
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2164 }
2165
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2168 static void
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2170 {
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2174
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2179 }
2180
2181 /* 32 bit SUB extending register setting flags. */
2182 static void
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2184 {
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2188
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2191
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2195 }
2196
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2199 static void
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2201 {
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2205
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2208
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2212 }
2213
2214 static void
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2216 {
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2223 instr[9,5] = Rn
2224 instr[4,0] = Rd */
2225
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2230
2231 NYI_assert (28, 24, 0x11);
2232
2233 if (shift > 1)
2234 HALT_UNALLOC;
2235
2236 if (shift)
2237 imm <<= 12;
2238
2239 switch (dispatch)
2240 {
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2249 }
2250 }
2251
2252 static void
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2254 {
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2259 instr[21] = 0
2260 instr[20,16] = Rm
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2262 instr[9,5] = Rn
2263 instr[4,0] = Rd */
2264
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2268
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2271
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2274 HALT_UNALLOC;
2275
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2279 HALT_UNALLOC;
2280
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2283 {
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2292 }
2293 }
2294
2295 static void
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2297 {
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2303 instr[21] = 1
2304 instr[20,16] = Rm
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2310 instr[9,5] = Rn
2311 instr[4,0] = Rd */
2312
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2315
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2318
2319 /* Shift may not exceed 4. */
2320 if (shift > 4)
2321 HALT_UNALLOC;
2322
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2325 {
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2334 }
2335 }
2336
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2339
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2342
2343 static void
2344 adc32 (sim_cpu *cpu)
2345 {
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2349
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2354 + IS_SET (C));
2355 }
2356
2357 /* 64 bit add with carry */
2358 static void
2359 adc64 (sim_cpu *cpu)
2360 {
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2364
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2369 + IS_SET (C));
2370 }
2371
2372 /* 32 bit add with carry setting flags. */
2373 static void
2374 adcs32 (sim_cpu *cpu)
2375 {
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2379
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2383
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2387 }
2388
2389 /* 64 bit add with carry setting flags. */
2390 static void
2391 adcs64 (sim_cpu *cpu)
2392 {
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2396
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2400
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2404 }
2405
2406 /* 32 bit sub with carry. */
2407 static void
2408 sbc32 (sim_cpu *cpu)
2409 {
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2413
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2418 - 1 + IS_SET (C));
2419 }
2420
2421 /* 64 bit sub with carry */
2422 static void
2423 sbc64 (sim_cpu *cpu)
2424 {
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2428
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2433 - 1 + IS_SET (C));
2434 }
2435
2436 /* 32 bit sub with carry setting flags */
2437 static void
2438 sbcs32 (sim_cpu *cpu)
2439 {
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2443
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2448
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2452 }
2453
2454 /* 64 bit sub with carry setting flags */
2455 static void
2456 sbcs64 (sim_cpu *cpu)
2457 {
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2461
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2466
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2470 }
2471
2472 static void
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2474 {
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2479 instr[20,16] = Rm
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2481 instr[9,5] = Rn
2482 instr[4,0] = Rd */
2483
2484 uint32_t op2 = INSTR (15, 10);
2485
2486 NYI_assert (28, 21, 0xD0);
2487
2488 if (op2 != 0)
2489 HALT_UNALLOC;
2490
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2493 {
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2502 }
2503 }
2504
2505 static uint32_t
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2507 {
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2512
2513 For now we do it with a switch. */
2514 int res;
2515
2516 switch (cc)
2517 {
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2532 case AL:
2533 case NV:
2534 default:
2535 res = 1;
2536 break;
2537 }
2538 return res;
2539 }
2540
2541 static void
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2543 {
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2548 instr[15,12] = cond
2549 instr[11] = compare reg (0) or const (1)
2550 instr[10] = 0
2551 instr[9,5] = Rn
2552 instr[4] = 0
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2554 signed int negate;
2555 unsigned rm;
2556 unsigned rn;
2557
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2561
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2564 {
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2566 return;
2567 }
2568
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2571 rn = INSTR ( 9, 5);
2572
2573 if (INSTR (31, 31))
2574 {
2575 if (INSTR (11, 11))
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2578 else
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2581 }
2582 else
2583 {
2584 if (INSTR (11, 11))
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2586 negate * rm);
2587 else
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2590 }
2591 }
2592
2593 static void
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2595 {
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2597
2598 instr[31] = 0
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2601 instr[20,16] = Vs
2602 instr[15,10] = 000111
2603 instr[9,5] = Vs
2604 instr[4,0] = Vd */
2605
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2608
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2611
2612 if (INSTR (20, 16) != vs)
2613 HALT_NYI;
2614
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2616 if (INSTR (30, 30))
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2618
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2620 }
2621
2622 static void
2623 do_vec_MOV_into_scalar (sim_cpu *cpu)
2624 {
2625 /* instr[31] = 0
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,18] = element size and index
2629 instr[17,10] = 00 0011 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2632
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2635
2636 NYI_assert (29, 21, 0x070);
2637 NYI_assert (17, 10, 0x0F);
2638
2639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2640 switch (INSTR (20, 18))
2641 {
2642 case 0x2:
2643 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2644 break;
2645
2646 case 0x6:
2647 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2648 break;
2649
2650 case 0x1:
2651 case 0x3:
2652 case 0x5:
2653 case 0x7:
2654 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2655 (cpu, vs, INSTR (20, 19)));
2656 break;
2657
2658 default:
2659 HALT_NYI;
2660 }
2661 }
2662
2663 static void
2664 do_vec_INS (sim_cpu *cpu)
2665 {
2666 /* instr[31,21] = 01001110000
2667 instr[20,16] = element size and index
2668 instr[15,10] = 000111
2669 instr[9,5] = W source
2670 instr[4,0] = V dest */
2671
2672 int index;
2673 unsigned rs = INSTR (9, 5);
2674 unsigned vd = INSTR (4, 0);
2675
2676 NYI_assert (31, 21, 0x270);
2677 NYI_assert (15, 10, 0x07);
2678
2679 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2680 if (INSTR (16, 16))
2681 {
2682 index = INSTR (20, 17);
2683 aarch64_set_vec_u8 (cpu, vd, index,
2684 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2685 }
2686 else if (INSTR (17, 17))
2687 {
2688 index = INSTR (20, 18);
2689 aarch64_set_vec_u16 (cpu, vd, index,
2690 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2691 }
2692 else if (INSTR (18, 18))
2693 {
2694 index = INSTR (20, 19);
2695 aarch64_set_vec_u32 (cpu, vd, index,
2696 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2697 }
2698 else if (INSTR (19, 19))
2699 {
2700 index = INSTR (20, 20);
2701 aarch64_set_vec_u64 (cpu, vd, index,
2702 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2703 }
2704 else
2705 HALT_NYI;
2706 }
2707
2708 static void
2709 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2710 {
2711 /* instr[31] = 0
2712 instr[30] = half(0)/full(1)
2713 instr[29,21] = 00 1110 000
2714 instr[20,16] = element size and index
2715 instr[15,10] = 0000 01
2716 instr[9,5] = V source
2717 instr[4,0] = V dest. */
2718
2719 unsigned full = INSTR (30, 30);
2720 unsigned vs = INSTR (9, 5);
2721 unsigned vd = INSTR (4, 0);
2722 int i, index;
2723
2724 NYI_assert (29, 21, 0x070);
2725 NYI_assert (15, 10, 0x01);
2726
2727 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2728 if (INSTR (16, 16))
2729 {
2730 index = INSTR (20, 17);
2731
2732 for (i = 0; i < (full ? 16 : 8); i++)
2733 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2734 }
2735 else if (INSTR (17, 17))
2736 {
2737 index = INSTR (20, 18);
2738
2739 for (i = 0; i < (full ? 8 : 4); i++)
2740 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2741 }
2742 else if (INSTR (18, 18))
2743 {
2744 index = INSTR (20, 19);
2745
2746 for (i = 0; i < (full ? 4 : 2); i++)
2747 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2748 }
2749 else
2750 {
2751 if (INSTR (19, 19) == 0)
2752 HALT_UNALLOC;
2753
2754 if (! full)
2755 HALT_UNALLOC;
2756
2757 index = INSTR (20, 20);
2758
2759 for (i = 0; i < 2; i++)
2760 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2761 }
2762 }
2763
2764 static void
2765 do_vec_TBL (sim_cpu *cpu)
2766 {
2767 /* instr[31] = 0
2768 instr[30] = half(0)/full(1)
2769 instr[29,21] = 00 1110 000
2770 instr[20,16] = Vm
2771 instr[15] = 0
2772 instr[14,13] = vec length
2773 instr[12,10] = 000
2774 instr[9,5] = V start
2775 instr[4,0] = V dest */
2776
2777 int full = INSTR (30, 30);
2778 int len = INSTR (14, 13) + 1;
2779 unsigned vm = INSTR (20, 16);
2780 unsigned vn = INSTR (9, 5);
2781 unsigned vd = INSTR (4, 0);
2782 unsigned i;
2783
2784 NYI_assert (29, 21, 0x070);
2785 NYI_assert (12, 10, 0);
2786
2787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 for (i = 0; i < (full ? 16 : 8); i++)
2789 {
2790 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2791 uint8_t val;
2792
2793 if (selector < 16)
2794 val = aarch64_get_vec_u8 (cpu, vn, selector);
2795 else if (selector < 32)
2796 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2797 else if (selector < 48)
2798 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2799 else if (selector < 64)
2800 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2801 else
2802 val = 0;
2803
2804 aarch64_set_vec_u8 (cpu, vd, i, val);
2805 }
2806 }
2807
2808 static void
2809 do_vec_TRN (sim_cpu *cpu)
2810 {
2811 /* instr[31] = 0
2812 instr[30] = half(0)/full(1)
2813 instr[29,24] = 00 1110
2814 instr[23,22] = size
2815 instr[21] = 0
2816 instr[20,16] = Vm
2817 instr[15] = 0
2818 instr[14] = TRN1 (0) / TRN2 (1)
2819 instr[13,10] = 1010
2820 instr[9,5] = V source
2821 instr[4,0] = V dest. */
2822
2823 int full = INSTR (30, 30);
2824 int second = INSTR (14, 14);
2825 unsigned vm = INSTR (20, 16);
2826 unsigned vn = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2828 unsigned i;
2829
2830 NYI_assert (29, 24, 0x0E);
2831 NYI_assert (13, 10, 0xA);
2832
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2834 switch (INSTR (23, 22))
2835 {
2836 case 0:
2837 for (i = 0; i < (full ? 8 : 4); i++)
2838 {
2839 aarch64_set_vec_u8
2840 (cpu, vd, i * 2,
2841 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2842 aarch64_set_vec_u8
2843 (cpu, vd, 1 * 2 + 1,
2844 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2845 }
2846 break;
2847
2848 case 1:
2849 for (i = 0; i < (full ? 4 : 2); i++)
2850 {
2851 aarch64_set_vec_u16
2852 (cpu, vd, i * 2,
2853 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2854 aarch64_set_vec_u16
2855 (cpu, vd, 1 * 2 + 1,
2856 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2857 }
2858 break;
2859
2860 case 2:
2861 aarch64_set_vec_u32
2862 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2863 aarch64_set_vec_u32
2864 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2865 aarch64_set_vec_u32
2866 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2867 aarch64_set_vec_u32
2868 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2869 break;
2870
2871 case 3:
2872 if (! full)
2873 HALT_UNALLOC;
2874
2875 aarch64_set_vec_u64 (cpu, vd, 0,
2876 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2877 aarch64_set_vec_u64 (cpu, vd, 1,
2878 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2879 break;
2880 }
2881 }
2882
2883 static void
2884 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2885 {
2886 /* instr[31] = 0
2887 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2888 [must be 1 for 64-bit xfer]
2889 instr[29,20] = 00 1110 0000
2890 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2891 0100=> 32-bits. 1000=>64-bits
2892 instr[15,10] = 0000 11
2893 instr[9,5] = W source
2894 instr[4,0] = V dest. */
2895
2896 unsigned i;
2897 unsigned Vd = INSTR (4, 0);
2898 unsigned Rs = INSTR (9, 5);
2899 int both = INSTR (30, 30);
2900
2901 NYI_assert (29, 20, 0x0E0);
2902 NYI_assert (15, 10, 0x03);
2903
2904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2905 switch (INSTR (19, 16))
2906 {
2907 case 1:
2908 for (i = 0; i < (both ? 16 : 8); i++)
2909 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2910 break;
2911
2912 case 2:
2913 for (i = 0; i < (both ? 8 : 4); i++)
2914 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2915 break;
2916
2917 case 4:
2918 for (i = 0; i < (both ? 4 : 2); i++)
2919 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2920 break;
2921
2922 case 8:
2923 if (!both)
2924 HALT_NYI;
2925 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2926 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2927 break;
2928
2929 default:
2930 HALT_NYI;
2931 }
2932 }
2933
2934 static void
2935 do_vec_UZP (sim_cpu *cpu)
2936 {
2937 /* instr[31] = 0
2938 instr[30] = half(0)/full(1)
2939 instr[29,24] = 00 1110
2940 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2941 instr[21] = 0
2942 instr[20,16] = Vm
2943 instr[15] = 0
2944 instr[14] = lower (0) / upper (1)
2945 instr[13,10] = 0110
2946 instr[9,5] = Vn
2947 instr[4,0] = Vd. */
2948
2949 int full = INSTR (30, 30);
2950 int upper = INSTR (14, 14);
2951
2952 unsigned vm = INSTR (20, 16);
2953 unsigned vn = INSTR (9, 5);
2954 unsigned vd = INSTR (4, 0);
2955
2956 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2957 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2958 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2959 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2960
2961 uint64_t val1 = 0;
2962 uint64_t val2 = 0;
2963
2964 uint64_t input1 = upper ? val_n1 : val_m1;
2965 uint64_t input2 = upper ? val_n2 : val_m2;
2966 unsigned i;
2967
2968 NYI_assert (29, 24, 0x0E);
2969 NYI_assert (21, 21, 0);
2970 NYI_assert (15, 15, 0);
2971 NYI_assert (13, 10, 6);
2972
2973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2974 switch (INSTR (23, 23))
2975 {
2976 case 0:
2977 for (i = 0; i < 8; i++)
2978 {
2979 val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
2980 val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
2981 }
2982 break;
2983
2984 case 1:
2985 for (i = 0; i < 4; i++)
2986 {
2987 val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
2988 val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
2989 }
2990 break;
2991
2992 case 2:
2993 val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
2994 val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
2995
2996 case 3:
2997 val1 = input1;
2998 val2 = input2;
2999 break;
3000 }
3001
3002 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3003 if (full)
3004 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3005 }
3006
3007 static void
3008 do_vec_ZIP (sim_cpu *cpu)
3009 {
3010 /* instr[31] = 0
3011 instr[30] = half(0)/full(1)
3012 instr[29,24] = 00 1110
3013 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3014 instr[21] = 0
3015 instr[20,16] = Vm
3016 instr[15] = 0
3017 instr[14] = lower (0) / upper (1)
3018 instr[13,10] = 1110
3019 instr[9,5] = Vn
3020 instr[4,0] = Vd. */
3021
3022 int full = INSTR (30, 30);
3023 int upper = INSTR (14, 14);
3024
3025 unsigned vm = INSTR (20, 16);
3026 unsigned vn = INSTR (9, 5);
3027 unsigned vd = INSTR (4, 0);
3028
3029 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3030 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3031 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3032 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3033
3034 uint64_t val1 = 0;
3035 uint64_t val2 = 0;
3036
3037 uint64_t input1 = upper ? val_n1 : val_m1;
3038 uint64_t input2 = upper ? val_n2 : val_m2;
3039
3040 NYI_assert (29, 24, 0x0E);
3041 NYI_assert (21, 21, 0);
3042 NYI_assert (15, 15, 0);
3043 NYI_assert (13, 10, 0xE);
3044
3045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3046 switch (INSTR (23, 23))
3047 {
3048 case 0:
3049 val1 =
3050 ((input1 << 0) & (0xFF << 0))
3051 | ((input2 << 8) & (0xFF << 8))
3052 | ((input1 << 8) & (0xFF << 16))
3053 | ((input2 << 16) & (0xFF << 24))
3054 | ((input1 << 16) & (0xFFULL << 32))
3055 | ((input2 << 24) & (0xFFULL << 40))
3056 | ((input1 << 24) & (0xFFULL << 48))
3057 | ((input2 << 32) & (0xFFULL << 56));
3058
3059 val2 =
3060 ((input1 >> 32) & (0xFF << 0))
3061 | ((input2 >> 24) & (0xFF << 8))
3062 | ((input1 >> 24) & (0xFF << 16))
3063 | ((input2 >> 16) & (0xFF << 24))
3064 | ((input1 >> 16) & (0xFFULL << 32))
3065 | ((input2 >> 8) & (0xFFULL << 40))
3066 | ((input1 >> 8) & (0xFFULL << 48))
3067 | ((input2 >> 0) & (0xFFULL << 56));
3068 break;
3069
3070 case 1:
3071 val1 =
3072 ((input1 << 0) & (0xFFFF << 0))
3073 | ((input2 << 16) & (0xFFFF << 16))
3074 | ((input1 << 16) & (0xFFFFULL << 32))
3075 | ((input2 << 32) & (0xFFFFULL << 48));
3076
3077 val2 =
3078 ((input1 >> 32) & (0xFFFF << 0))
3079 | ((input2 >> 16) & (0xFFFF << 16))
3080 | ((input1 >> 16) & (0xFFFFULL << 32))
3081 | ((input2 >> 0) & (0xFFFFULL << 48));
3082 break;
3083
3084 case 2:
3085 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3086 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3087 break;
3088
3089 case 3:
3090 val1 = input1;
3091 val2 = input2;
3092 break;
3093 }
3094
3095 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3096 if (full)
3097 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3098 }
3099
3100 /* Floating point immediates are encoded in 8 bits.
3101 fpimm[7] = sign bit.
3102 fpimm[6:4] = signed exponent.
3103 fpimm[3:0] = fraction (assuming leading 1).
3104 i.e. F = s * 1.f * 2^(e - b). */
3105
3106 static float
3107 fp_immediate_for_encoding_32 (uint32_t imm8)
3108 {
3109 float u;
3110 uint32_t s, e, f, i;
3111
3112 s = (imm8 >> 7) & 0x1;
3113 e = (imm8 >> 4) & 0x7;
3114 f = imm8 & 0xf;
3115
3116 /* The fp value is s * n/16 * 2r where n is 16+e. */
3117 u = (16.0 + f) / 16.0;
3118
3119 /* N.B. exponent is signed. */
3120 if (e < 4)
3121 {
3122 int epos = e;
3123
3124 for (i = 0; i <= epos; i++)
3125 u *= 2.0;
3126 }
3127 else
3128 {
3129 int eneg = 7 - e;
3130
3131 for (i = 0; i < eneg; i++)
3132 u /= 2.0;
3133 }
3134
3135 if (s)
3136 u = - u;
3137
3138 return u;
3139 }
3140
3141 static double
3142 fp_immediate_for_encoding_64 (uint32_t imm8)
3143 {
3144 double u;
3145 uint32_t s, e, f, i;
3146
3147 s = (imm8 >> 7) & 0x1;
3148 e = (imm8 >> 4) & 0x7;
3149 f = imm8 & 0xf;
3150
3151 /* The fp value is s * n/16 * 2r where n is 16+e. */
3152 u = (16.0 + f) / 16.0;
3153
3154 /* N.B. exponent is signed. */
3155 if (e < 4)
3156 {
3157 int epos = e;
3158
3159 for (i = 0; i <= epos; i++)
3160 u *= 2.0;
3161 }
3162 else
3163 {
3164 int eneg = 7 - e;
3165
3166 for (i = 0; i < eneg; i++)
3167 u /= 2.0;
3168 }
3169
3170 if (s)
3171 u = - u;
3172
3173 return u;
3174 }
3175
3176 static void
3177 do_vec_MOV_immediate (sim_cpu *cpu)
3178 {
3179 /* instr[31] = 0
3180 instr[30] = full/half selector
3181 instr[29,19] = 00111100000
3182 instr[18,16] = high 3 bits of uimm8
3183 instr[15,12] = size & shift:
3184 0000 => 32-bit
3185 0010 => 32-bit + LSL#8
3186 0100 => 32-bit + LSL#16
3187 0110 => 32-bit + LSL#24
3188 1010 => 16-bit + LSL#8
3189 1000 => 16-bit
3190 1101 => 32-bit + MSL#16
3191 1100 => 32-bit + MSL#8
3192 1110 => 8-bit
3193 1111 => double
3194 instr[11,10] = 01
3195 instr[9,5] = low 5-bits of uimm8
3196 instr[4,0] = Vd. */
3197
3198 int full = INSTR (30, 30);
3199 unsigned vd = INSTR (4, 0);
3200 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3201 unsigned i;
3202
3203 NYI_assert (29, 19, 0x1E0);
3204 NYI_assert (11, 10, 1);
3205
3206 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3207 switch (INSTR (15, 12))
3208 {
3209 case 0x0: /* 32-bit, no shift. */
3210 case 0x2: /* 32-bit, shift by 8. */
3211 case 0x4: /* 32-bit, shift by 16. */
3212 case 0x6: /* 32-bit, shift by 24. */
3213 val <<= (8 * INSTR (14, 13));
3214 for (i = 0; i < (full ? 4 : 2); i++)
3215 aarch64_set_vec_u32 (cpu, vd, i, val);
3216 break;
3217
3218 case 0xa: /* 16-bit, shift by 8. */
3219 val <<= 8;
3220 /* Fall through. */
3221 case 0x8: /* 16-bit, no shift. */
3222 for (i = 0; i < (full ? 8 : 4); i++)
3223 aarch64_set_vec_u16 (cpu, vd, i, val);
3224 /* Fall through. */
3225 case 0xd: /* 32-bit, mask shift by 16. */
3226 val <<= 8;
3227 val |= 0xFF;
3228 /* Fall through. */
3229 case 0xc: /* 32-bit, mask shift by 8. */
3230 val <<= 8;
3231 val |= 0xFF;
3232 for (i = 0; i < (full ? 4 : 2); i++)
3233 aarch64_set_vec_u32 (cpu, vd, i, val);
3234 break;
3235
3236 case 0xe: /* 8-bit, no shift. */
3237 for (i = 0; i < (full ? 16 : 8); i++)
3238 aarch64_set_vec_u8 (cpu, vd, i, val);
3239 break;
3240
3241 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3242 {
3243 float u = fp_immediate_for_encoding_32 (val);
3244 for (i = 0; i < (full ? 4 : 2); i++)
3245 aarch64_set_vec_float (cpu, vd, i, u);
3246 break;
3247 }
3248
3249 default:
3250 HALT_NYI;
3251 }
3252 }
3253
3254 static void
3255 do_vec_MVNI (sim_cpu *cpu)
3256 {
3257 /* instr[31] = 0
3258 instr[30] = full/half selector
3259 instr[29,19] = 10111100000
3260 instr[18,16] = high 3 bits of uimm8
3261 instr[15,12] = selector
3262 instr[11,10] = 01
3263 instr[9,5] = low 5-bits of uimm8
3264 instr[4,0] = Vd. */
3265
3266 int full = INSTR (30, 30);
3267 unsigned vd = INSTR (4, 0);
3268 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3269 unsigned i;
3270
3271 NYI_assert (29, 19, 0x5E0);
3272 NYI_assert (11, 10, 1);
3273
3274 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3275 switch (INSTR (15, 12))
3276 {
3277 case 0x0: /* 32-bit, no shift. */
3278 case 0x2: /* 32-bit, shift by 8. */
3279 case 0x4: /* 32-bit, shift by 16. */
3280 case 0x6: /* 32-bit, shift by 24. */
3281 val <<= (8 * INSTR (14, 13));
3282 val = ~ val;
3283 for (i = 0; i < (full ? 4 : 2); i++)
3284 aarch64_set_vec_u32 (cpu, vd, i, val);
3285 return;
3286
3287 case 0xa: /* 16-bit, 8 bit shift. */
3288 val <<= 8;
3289 case 0x8: /* 16-bit, no shift. */
3290 val = ~ val;
3291 for (i = 0; i < (full ? 8 : 4); i++)
3292 aarch64_set_vec_u16 (cpu, vd, i, val);
3293 return;
3294
3295 case 0xd: /* 32-bit, mask shift by 16. */
3296 val <<= 8;
3297 val |= 0xFF;
3298 case 0xc: /* 32-bit, mask shift by 8. */
3299 val <<= 8;
3300 val |= 0xFF;
3301 val = ~ val;
3302 for (i = 0; i < (full ? 4 : 2); i++)
3303 aarch64_set_vec_u32 (cpu, vd, i, val);
3304 return;
3305
3306 case 0xE: /* MOVI Dn, #mask64 */
3307 {
3308 uint64_t mask = 0;
3309
3310 for (i = 0; i < 8; i++)
3311 if (val & (1 << i))
3312 mask |= (0xFFUL << (i * 8));
3313 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3314 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3315 return;
3316 }
3317
3318 case 0xf: /* FMOV Vd.2D, #fpimm. */
3319 {
3320 double u = fp_immediate_for_encoding_64 (val);
3321
3322 if (! full)
3323 HALT_UNALLOC;
3324
3325 aarch64_set_vec_double (cpu, vd, 0, u);
3326 aarch64_set_vec_double (cpu, vd, 1, u);
3327 return;
3328 }
3329
3330 default:
3331 HALT_NYI;
3332 }
3333 }
3334
3335 #define ABS(A) ((A) < 0 ? - (A) : (A))
3336
3337 static void
3338 do_vec_ABS (sim_cpu *cpu)
3339 {
3340 /* instr[31] = 0
3341 instr[30] = half(0)/full(1)
3342 instr[29,24] = 00 1110
3343 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3344 instr[21,10] = 10 0000 1011 10
3345 instr[9,5] = Vn
3346 instr[4.0] = Vd. */
3347
3348 unsigned vn = INSTR (9, 5);
3349 unsigned vd = INSTR (4, 0);
3350 unsigned full = INSTR (30, 30);
3351 unsigned i;
3352
3353 NYI_assert (29, 24, 0x0E);
3354 NYI_assert (21, 10, 0x82E);
3355
3356 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3357 switch (INSTR (23, 22))
3358 {
3359 case 0:
3360 for (i = 0; i < (full ? 16 : 8); i++)
3361 aarch64_set_vec_s8 (cpu, vd, i,
3362 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3363 break;
3364
3365 case 1:
3366 for (i = 0; i < (full ? 8 : 4); i++)
3367 aarch64_set_vec_s16 (cpu, vd, i,
3368 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3369 break;
3370
3371 case 2:
3372 for (i = 0; i < (full ? 4 : 2); i++)
3373 aarch64_set_vec_s32 (cpu, vd, i,
3374 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3375 break;
3376
3377 case 3:
3378 if (! full)
3379 HALT_NYI;
3380 for (i = 0; i < 2; i++)
3381 aarch64_set_vec_s64 (cpu, vd, i,
3382 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3383 break;
3384 }
3385 }
3386
3387 static void
3388 do_vec_ADDV (sim_cpu *cpu)
3389 {
3390 /* instr[31] = 0
3391 instr[30] = full/half selector
3392 instr[29,24] = 00 1110
3393 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3394 instr[21,10] = 11 0001 1011 10
3395 instr[9,5] = Vm
3396 instr[4.0] = Rd. */
3397
3398 unsigned vm = INSTR (9, 5);
3399 unsigned rd = INSTR (4, 0);
3400 unsigned i;
3401 uint64_t val = 0;
3402 int full = INSTR (30, 30);
3403
3404 NYI_assert (29, 24, 0x0E);
3405 NYI_assert (21, 10, 0xC6E);
3406
3407 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3408 switch (INSTR (23, 22))
3409 {
3410 case 0:
3411 for (i = 0; i < (full ? 16 : 8); i++)
3412 val += aarch64_get_vec_u8 (cpu, vm, i);
3413 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3414 return;
3415
3416 case 1:
3417 for (i = 0; i < (full ? 8 : 4); i++)
3418 val += aarch64_get_vec_u16 (cpu, vm, i);
3419 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3420 return;
3421
3422 case 2:
3423 for (i = 0; i < (full ? 4 : 2); i++)
3424 val += aarch64_get_vec_u32 (cpu, vm, i);
3425 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3426 return;
3427
3428 case 3:
3429 if (! full)
3430 HALT_UNALLOC;
3431 val = aarch64_get_vec_u64 (cpu, vm, 0);
3432 val += aarch64_get_vec_u64 (cpu, vm, 1);
3433 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3434 return;
3435 }
3436 }
3437
3438 static void
3439 do_vec_ins_2 (sim_cpu *cpu)
3440 {
3441 /* instr[31,21] = 01001110000
3442 instr[20,18] = size & element selector
3443 instr[17,14] = 0000
3444 instr[13] = direction: to vec(0), from vec (1)
3445 instr[12,10] = 111
3446 instr[9,5] = Vm
3447 instr[4,0] = Vd. */
3448
3449 unsigned elem;
3450 unsigned vm = INSTR (9, 5);
3451 unsigned vd = INSTR (4, 0);
3452
3453 NYI_assert (31, 21, 0x270);
3454 NYI_assert (17, 14, 0);
3455 NYI_assert (12, 10, 7);
3456
3457 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3458 if (INSTR (13, 13) == 1)
3459 {
3460 if (INSTR (18, 18) == 1)
3461 {
3462 /* 32-bit moves. */
3463 elem = INSTR (20, 19);
3464 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3465 aarch64_get_vec_u32 (cpu, vm, elem));
3466 }
3467 else
3468 {
3469 /* 64-bit moves. */
3470 if (INSTR (19, 19) != 1)
3471 HALT_NYI;
3472
3473 elem = INSTR (20, 20);
3474 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3475 aarch64_get_vec_u64 (cpu, vm, elem));
3476 }
3477 }
3478 else
3479 {
3480 if (INSTR (18, 18) == 1)
3481 {
3482 /* 32-bit moves. */
3483 elem = INSTR (20, 19);
3484 aarch64_set_vec_u32 (cpu, vd, elem,
3485 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3486 }
3487 else
3488 {
3489 /* 64-bit moves. */
3490 if (INSTR (19, 19) != 1)
3491 HALT_NYI;
3492
3493 elem = INSTR (20, 20);
3494 aarch64_set_vec_u64 (cpu, vd, elem,
3495 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3496 }
3497 }
3498 }
3499
3500 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3501 do \
3502 { \
3503 DST_TYPE a[N], b[N]; \
3504 \
3505 for (i = 0; i < (N); i++) \
3506 { \
3507 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3508 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3509 } \
3510 for (i = 0; i < (N); i++) \
3511 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3512 } \
3513 while (0)
3514
3515 static void
3516 do_vec_mull (sim_cpu *cpu)
3517 {
3518 /* instr[31] = 0
3519 instr[30] = lower(0)/upper(1) selector
3520 instr[29] = signed(0)/unsigned(1)
3521 instr[28,24] = 0 1110
3522 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3523 instr[21] = 1
3524 instr[20,16] = Vm
3525 instr[15,10] = 11 0000
3526 instr[9,5] = Vn
3527 instr[4.0] = Vd. */
3528
3529 int unsign = INSTR (29, 29);
3530 int bias = INSTR (30, 30);
3531 unsigned vm = INSTR (20, 16);
3532 unsigned vn = INSTR ( 9, 5);
3533 unsigned vd = INSTR ( 4, 0);
3534 unsigned i;
3535
3536 NYI_assert (28, 24, 0x0E);
3537 NYI_assert (15, 10, 0x30);
3538
3539 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3540 /* NB: Read source values before writing results, in case
3541 the source and destination vectors are the same. */
3542 switch (INSTR (23, 22))
3543 {
3544 case 0:
3545 if (bias)
3546 bias = 8;
3547 if (unsign)
3548 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3549 else
3550 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3551 return;
3552
3553 case 1:
3554 if (bias)
3555 bias = 4;
3556 if (unsign)
3557 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3558 else
3559 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3560 return;
3561
3562 case 2:
3563 if (bias)
3564 bias = 2;
3565 if (unsign)
3566 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3567 else
3568 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3569 return;
3570
3571 case 3:
3572 HALT_NYI;
3573 }
3574 }
3575
3576 static void
3577 do_vec_fadd (sim_cpu *cpu)
3578 {
3579 /* instr[31] = 0
3580 instr[30] = half(0)/full(1)
3581 instr[29,24] = 001110
3582 instr[23] = FADD(0)/FSUB(1)
3583 instr[22] = float (0)/double(1)
3584 instr[21] = 1
3585 instr[20,16] = Vm
3586 instr[15,10] = 110101
3587 instr[9,5] = Vn
3588 instr[4.0] = Vd. */
3589
3590 unsigned vm = INSTR (20, 16);
3591 unsigned vn = INSTR (9, 5);
3592 unsigned vd = INSTR (4, 0);
3593 unsigned i;
3594 int full = INSTR (30, 30);
3595
3596 NYI_assert (29, 24, 0x0E);
3597 NYI_assert (21, 21, 1);
3598 NYI_assert (15, 10, 0x35);
3599
3600 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3601 if (INSTR (23, 23))
3602 {
3603 if (INSTR (22, 22))
3604 {
3605 if (! full)
3606 HALT_NYI;
3607
3608 for (i = 0; i < 2; i++)
3609 aarch64_set_vec_double (cpu, vd, i,
3610 aarch64_get_vec_double (cpu, vn, i)
3611 - aarch64_get_vec_double (cpu, vm, i));
3612 }
3613 else
3614 {
3615 for (i = 0; i < (full ? 4 : 2); i++)
3616 aarch64_set_vec_float (cpu, vd, i,
3617 aarch64_get_vec_float (cpu, vn, i)
3618 - aarch64_get_vec_float (cpu, vm, i));
3619 }
3620 }
3621 else
3622 {
3623 if (INSTR (22, 22))
3624 {
3625 if (! full)
3626 HALT_NYI;
3627
3628 for (i = 0; i < 2; i++)
3629 aarch64_set_vec_double (cpu, vd, i,
3630 aarch64_get_vec_double (cpu, vm, i)
3631 + aarch64_get_vec_double (cpu, vn, i));
3632 }
3633 else
3634 {
3635 for (i = 0; i < (full ? 4 : 2); i++)
3636 aarch64_set_vec_float (cpu, vd, i,
3637 aarch64_get_vec_float (cpu, vm, i)
3638 + aarch64_get_vec_float (cpu, vn, i));
3639 }
3640 }
3641 }
3642
3643 static void
3644 do_vec_add (sim_cpu *cpu)
3645 {
3646 /* instr[31] = 0
3647 instr[30] = full/half selector
3648 instr[29,24] = 001110
3649 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3650 instr[21] = 1
3651 instr[20,16] = Vn
3652 instr[15,10] = 100001
3653 instr[9,5] = Vm
3654 instr[4.0] = Vd. */
3655
3656 unsigned vm = INSTR (20, 16);
3657 unsigned vn = INSTR (9, 5);
3658 unsigned vd = INSTR (4, 0);
3659 unsigned i;
3660 int full = INSTR (30, 30);
3661
3662 NYI_assert (29, 24, 0x0E);
3663 NYI_assert (21, 21, 1);
3664 NYI_assert (15, 10, 0x21);
3665
3666 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3667 switch (INSTR (23, 22))
3668 {
3669 case 0:
3670 for (i = 0; i < (full ? 16 : 8); i++)
3671 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3672 + aarch64_get_vec_u8 (cpu, vm, i));
3673 return;
3674
3675 case 1:
3676 for (i = 0; i < (full ? 8 : 4); i++)
3677 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3678 + aarch64_get_vec_u16 (cpu, vm, i));
3679 return;
3680
3681 case 2:
3682 for (i = 0; i < (full ? 4 : 2); i++)
3683 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3684 + aarch64_get_vec_u32 (cpu, vm, i));
3685 return;
3686
3687 case 3:
3688 if (! full)
3689 HALT_UNALLOC;
3690 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3691 + aarch64_get_vec_u64 (cpu, vm, 0));
3692 aarch64_set_vec_u64 (cpu, vd, 1,
3693 aarch64_get_vec_u64 (cpu, vn, 1)
3694 + aarch64_get_vec_u64 (cpu, vm, 1));
3695 return;
3696 }
3697 }
3698
3699 static void
3700 do_vec_mul (sim_cpu *cpu)
3701 {
3702 /* instr[31] = 0
3703 instr[30] = full/half selector
3704 instr[29,24] = 00 1110
3705 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3706 instr[21] = 1
3707 instr[20,16] = Vn
3708 instr[15,10] = 10 0111
3709 instr[9,5] = Vm
3710 instr[4.0] = Vd. */
3711
3712 unsigned vm = INSTR (20, 16);
3713 unsigned vn = INSTR (9, 5);
3714 unsigned vd = INSTR (4, 0);
3715 unsigned i;
3716 int full = INSTR (30, 30);
3717 int bias = 0;
3718
3719 NYI_assert (29, 24, 0x0E);
3720 NYI_assert (21, 21, 1);
3721 NYI_assert (15, 10, 0x27);
3722
3723 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3724 switch (INSTR (23, 22))
3725 {
3726 case 0:
3727 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint16_t, u8, u16);
3728 return;
3729
3730 case 1:
3731 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint32_t, u16, u32);
3732 return;
3733
3734 case 2:
3735 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint64_t, u32, u64);
3736 return;
3737
3738 case 3:
3739 HALT_UNALLOC;
3740 }
3741 }
3742
3743 static void
3744 do_vec_MLA (sim_cpu *cpu)
3745 {
3746 /* instr[31] = 0
3747 instr[30] = full/half selector
3748 instr[29,24] = 00 1110
3749 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3750 instr[21] = 1
3751 instr[20,16] = Vn
3752 instr[15,10] = 1001 01
3753 instr[9,5] = Vm
3754 instr[4.0] = Vd. */
3755
3756 unsigned vm = INSTR (20, 16);
3757 unsigned vn = INSTR (9, 5);
3758 unsigned vd = INSTR (4, 0);
3759 unsigned i;
3760 int full = INSTR (30, 30);
3761
3762 NYI_assert (29, 24, 0x0E);
3763 NYI_assert (21, 21, 1);
3764 NYI_assert (15, 10, 0x25);
3765
3766 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3767 switch (INSTR (23, 22))
3768 {
3769 case 0:
3770 {
3771 uint16_t a[16], b[16];
3772
3773 for (i = 0; i < (full ? 16 : 8); i++)
3774 {
3775 a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3776 b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3777 }
3778
3779 for (i = 0; i < (full ? 16 : 8); i++)
3780 {
3781 uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3782
3783 aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3784 }
3785 }
3786 return;
3787
3788 case 1:
3789 {
3790 uint32_t a[8], b[8];
3791
3792 for (i = 0; i < (full ? 8 : 4); i++)
3793 {
3794 a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3795 b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3796 }
3797
3798 for (i = 0; i < (full ? 8 : 4); i++)
3799 {
3800 uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3801
3802 aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3803 }
3804 }
3805 return;
3806
3807 case 2:
3808 {
3809 uint64_t a[4], b[4];
3810
3811 for (i = 0; i < (full ? 4 : 2); i++)
3812 {
3813 a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3814 b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3815 }
3816
3817 for (i = 0; i < (full ? 4 : 2); i++)
3818 {
3819 uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3820
3821 aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3822 }
3823 }
3824 return;
3825
3826 case 3:
3827 HALT_UNALLOC;
3828 }
3829 }
3830
3831 static float
3832 fmaxnm (float a, float b)
3833 {
3834 if (fpclassify (a) == FP_NORMAL)
3835 {
3836 if (fpclassify (b) == FP_NORMAL)
3837 return a > b ? a : b;
3838 return a;
3839 }
3840 else if (fpclassify (b) == FP_NORMAL)
3841 return b;
3842 return a;
3843 }
3844
3845 static float
3846 fminnm (float a, float b)
3847 {
3848 if (fpclassify (a) == FP_NORMAL)
3849 {
3850 if (fpclassify (b) == FP_NORMAL)
3851 return a < b ? a : b;
3852 return a;
3853 }
3854 else if (fpclassify (b) == FP_NORMAL)
3855 return b;
3856 return a;
3857 }
3858
3859 static double
3860 dmaxnm (double a, double b)
3861 {
3862 if (fpclassify (a) == FP_NORMAL)
3863 {
3864 if (fpclassify (b) == FP_NORMAL)
3865 return a > b ? a : b;
3866 return a;
3867 }
3868 else if (fpclassify (b) == FP_NORMAL)
3869 return b;
3870 return a;
3871 }
3872
3873 static double
3874 dminnm (double a, double b)
3875 {
3876 if (fpclassify (a) == FP_NORMAL)
3877 {
3878 if (fpclassify (b) == FP_NORMAL)
3879 return a < b ? a : b;
3880 return a;
3881 }
3882 else if (fpclassify (b) == FP_NORMAL)
3883 return b;
3884 return a;
3885 }
3886
3887 static void
3888 do_vec_FminmaxNMP (sim_cpu *cpu)
3889 {
3890 /* instr [31] = 0
3891 instr [30] = half (0)/full (1)
3892 instr [29,24] = 10 1110
3893 instr [23] = max(0)/min(1)
3894 instr [22] = float (0)/double (1)
3895 instr [21] = 1
3896 instr [20,16] = Vn
3897 instr [15,10] = 1100 01
3898 instr [9,5] = Vm
3899 instr [4.0] = Vd. */
3900
3901 unsigned vm = INSTR (20, 16);
3902 unsigned vn = INSTR (9, 5);
3903 unsigned vd = INSTR (4, 0);
3904 int full = INSTR (30, 30);
3905
3906 NYI_assert (29, 24, 0x2E);
3907 NYI_assert (21, 21, 1);
3908 NYI_assert (15, 10, 0x31);
3909
3910 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3911 if (INSTR (22, 22))
3912 {
3913 double (* fn)(double, double) = INSTR (23, 23)
3914 ? dminnm : dmaxnm;
3915
3916 if (! full)
3917 HALT_NYI;
3918 aarch64_set_vec_double (cpu, vd, 0,
3919 fn (aarch64_get_vec_double (cpu, vn, 0),
3920 aarch64_get_vec_double (cpu, vn, 1)));
3921 aarch64_set_vec_double (cpu, vd, 0,
3922 fn (aarch64_get_vec_double (cpu, vm, 0),
3923 aarch64_get_vec_double (cpu, vm, 1)));
3924 }
3925 else
3926 {
3927 float (* fn)(float, float) = INSTR (23, 23)
3928 ? fminnm : fmaxnm;
3929
3930 aarch64_set_vec_float (cpu, vd, 0,
3931 fn (aarch64_get_vec_float (cpu, vn, 0),
3932 aarch64_get_vec_float (cpu, vn, 1)));
3933 if (full)
3934 aarch64_set_vec_float (cpu, vd, 1,
3935 fn (aarch64_get_vec_float (cpu, vn, 2),
3936 aarch64_get_vec_float (cpu, vn, 3)));
3937
3938 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3939 fn (aarch64_get_vec_float (cpu, vm, 0),
3940 aarch64_get_vec_float (cpu, vm, 1)));
3941 if (full)
3942 aarch64_set_vec_float (cpu, vd, 3,
3943 fn (aarch64_get_vec_float (cpu, vm, 2),
3944 aarch64_get_vec_float (cpu, vm, 3)));
3945 }
3946 }
3947
3948 static void
3949 do_vec_AND (sim_cpu *cpu)
3950 {
3951 /* instr[31] = 0
3952 instr[30] = half (0)/full (1)
3953 instr[29,21] = 001110001
3954 instr[20,16] = Vm
3955 instr[15,10] = 000111
3956 instr[9,5] = Vn
3957 instr[4.0] = Vd. */
3958
3959 unsigned vm = INSTR (20, 16);
3960 unsigned vn = INSTR (9, 5);
3961 unsigned vd = INSTR (4, 0);
3962 unsigned i;
3963 int full = INSTR (30, 30);
3964
3965 NYI_assert (29, 21, 0x071);
3966 NYI_assert (15, 10, 0x07);
3967
3968 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3969 for (i = 0; i < (full ? 4 : 2); i++)
3970 aarch64_set_vec_u32 (cpu, vd, i,
3971 aarch64_get_vec_u32 (cpu, vn, i)
3972 & aarch64_get_vec_u32 (cpu, vm, i));
3973 }
3974
3975 static void
3976 do_vec_BSL (sim_cpu *cpu)
3977 {
3978 /* instr[31] = 0
3979 instr[30] = half (0)/full (1)
3980 instr[29,21] = 101110011
3981 instr[20,16] = Vm
3982 instr[15,10] = 000111
3983 instr[9,5] = Vn
3984 instr[4.0] = Vd. */
3985
3986 unsigned vm = INSTR (20, 16);
3987 unsigned vn = INSTR (9, 5);
3988 unsigned vd = INSTR (4, 0);
3989 unsigned i;
3990 int full = INSTR (30, 30);
3991
3992 NYI_assert (29, 21, 0x173);
3993 NYI_assert (15, 10, 0x07);
3994
3995 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3996 for (i = 0; i < (full ? 16 : 8); i++)
3997 aarch64_set_vec_u8 (cpu, vd, i,
3998 ( aarch64_get_vec_u8 (cpu, vd, i)
3999 & aarch64_get_vec_u8 (cpu, vn, i))
4000 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4001 & aarch64_get_vec_u8 (cpu, vm, i)));
4002 }
4003
4004 static void
4005 do_vec_EOR (sim_cpu *cpu)
4006 {
4007 /* instr[31] = 0
4008 instr[30] = half (0)/full (1)
4009 instr[29,21] = 10 1110 001
4010 instr[20,16] = Vm
4011 instr[15,10] = 000111
4012 instr[9,5] = Vn
4013 instr[4.0] = Vd. */
4014
4015 unsigned vm = INSTR (20, 16);
4016 unsigned vn = INSTR (9, 5);
4017 unsigned vd = INSTR (4, 0);
4018 unsigned i;
4019 int full = INSTR (30, 30);
4020
4021 NYI_assert (29, 21, 0x171);
4022 NYI_assert (15, 10, 0x07);
4023
4024 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4025 for (i = 0; i < (full ? 4 : 2); i++)
4026 aarch64_set_vec_u32 (cpu, vd, i,
4027 aarch64_get_vec_u32 (cpu, vn, i)
4028 ^ aarch64_get_vec_u32 (cpu, vm, i));
4029 }
4030
4031 static void
4032 do_vec_bit (sim_cpu *cpu)
4033 {
4034 /* instr[31] = 0
4035 instr[30] = half (0)/full (1)
4036 instr[29,23] = 10 1110 1
4037 instr[22] = BIT (0) / BIF (1)
4038 instr[21] = 1
4039 instr[20,16] = Vm
4040 instr[15,10] = 0001 11
4041 instr[9,5] = Vn
4042 instr[4.0] = Vd. */
4043
4044 unsigned vm = INSTR (20, 16);
4045 unsigned vn = INSTR (9, 5);
4046 unsigned vd = INSTR (4, 0);
4047 unsigned full = INSTR (30, 30);
4048 unsigned test_false = INSTR (22, 22);
4049 unsigned i;
4050
4051 NYI_assert (29, 23, 0x5D);
4052 NYI_assert (21, 21, 1);
4053 NYI_assert (15, 10, 0x07);
4054
4055 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4056 if (test_false)
4057 {
4058 for (i = 0; i < (full ? 16 : 8); i++)
4059 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
4060 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4061 }
4062 else
4063 {
4064 for (i = 0; i < (full ? 16 : 8); i++)
4065 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
4066 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4067 }
4068 }
4069
4070 static void
4071 do_vec_ORN (sim_cpu *cpu)
4072 {
4073 /* instr[31] = 0
4074 instr[30] = half (0)/full (1)
4075 instr[29,21] = 00 1110 111
4076 instr[20,16] = Vm
4077 instr[15,10] = 00 0111
4078 instr[9,5] = Vn
4079 instr[4.0] = Vd. */
4080
4081 unsigned vm = INSTR (20, 16);
4082 unsigned vn = INSTR (9, 5);
4083 unsigned vd = INSTR (4, 0);
4084 unsigned i;
4085 int full = INSTR (30, 30);
4086
4087 NYI_assert (29, 21, 0x077);
4088 NYI_assert (15, 10, 0x07);
4089
4090 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4091 for (i = 0; i < (full ? 16 : 8); i++)
4092 aarch64_set_vec_u8 (cpu, vd, i,
4093 aarch64_get_vec_u8 (cpu, vn, i)
4094 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4095 }
4096
4097 static void
4098 do_vec_ORR (sim_cpu *cpu)
4099 {
4100 /* instr[31] = 0
4101 instr[30] = half (0)/full (1)
4102 instr[29,21] = 00 1110 101
4103 instr[20,16] = Vm
4104 instr[15,10] = 0001 11
4105 instr[9,5] = Vn
4106 instr[4.0] = Vd. */
4107
4108 unsigned vm = INSTR (20, 16);
4109 unsigned vn = INSTR (9, 5);
4110 unsigned vd = INSTR (4, 0);
4111 unsigned i;
4112 int full = INSTR (30, 30);
4113
4114 NYI_assert (29, 21, 0x075);
4115 NYI_assert (15, 10, 0x07);
4116
4117 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4118 for (i = 0; i < (full ? 16 : 8); i++)
4119 aarch64_set_vec_u8 (cpu, vd, i,
4120 aarch64_get_vec_u8 (cpu, vn, i)
4121 | aarch64_get_vec_u8 (cpu, vm, i));
4122 }
4123
4124 static void
4125 do_vec_BIC (sim_cpu *cpu)
4126 {
4127 /* instr[31] = 0
4128 instr[30] = half (0)/full (1)
4129 instr[29,21] = 00 1110 011
4130 instr[20,16] = Vm
4131 instr[15,10] = 00 0111
4132 instr[9,5] = Vn
4133 instr[4.0] = Vd. */
4134
4135 unsigned vm = INSTR (20, 16);
4136 unsigned vn = INSTR (9, 5);
4137 unsigned vd = INSTR (4, 0);
4138 unsigned i;
4139 int full = INSTR (30, 30);
4140
4141 NYI_assert (29, 21, 0x073);
4142 NYI_assert (15, 10, 0x07);
4143
4144 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4145 for (i = 0; i < (full ? 16 : 8); i++)
4146 aarch64_set_vec_u8 (cpu, vd, i,
4147 aarch64_get_vec_u8 (cpu, vn, i)
4148 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4149 }
4150
4151 static void
4152 do_vec_XTN (sim_cpu *cpu)
4153 {
4154 /* instr[31] = 0
4155 instr[30] = first part (0)/ second part (1)
4156 instr[29,24] = 00 1110
4157 instr[23,22] = size: byte(00), half(01), word (10)
4158 instr[21,10] = 1000 0100 1010
4159 instr[9,5] = Vs
4160 instr[4,0] = Vd. */
4161
4162 unsigned vs = INSTR (9, 5);
4163 unsigned vd = INSTR (4, 0);
4164 unsigned bias = INSTR (30, 30);
4165 unsigned i;
4166
4167 NYI_assert (29, 24, 0x0E);
4168 NYI_assert (21, 10, 0x84A);
4169
4170 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4171 switch (INSTR (23, 22))
4172 {
4173 case 0:
4174 if (bias)
4175 for (i = 0; i < 8; i++)
4176 aarch64_set_vec_u8 (cpu, vd, i + 8,
4177 aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4178 else
4179 for (i = 0; i < 8; i++)
4180 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4181 return;
4182
4183 case 1:
4184 if (bias)
4185 for (i = 0; i < 4; i++)
4186 aarch64_set_vec_u16 (cpu, vd, i + 4,
4187 aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4188 else
4189 for (i = 0; i < 4; i++)
4190 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4191 return;
4192
4193 case 2:
4194 if (bias)
4195 for (i = 0; i < 2; i++)
4196 aarch64_set_vec_u32 (cpu, vd, i + 4,
4197 aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4198 else
4199 for (i = 0; i < 2; i++)
4200 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4201 return;
4202 }
4203 }
4204
4205 static void
4206 do_vec_maxv (sim_cpu *cpu)
4207 {
4208 /* instr[31] = 0
4209 instr[30] = half(0)/full(1)
4210 instr[29] = signed (0)/unsigned(1)
4211 instr[28,24] = 0 1110
4212 instr[23,22] = size: byte(00), half(01), word (10)
4213 instr[21] = 1
4214 instr[20,17] = 1 000
4215 instr[16] = max(0)/min(1)
4216 instr[15,10] = 1010 10
4217 instr[9,5] = V source
4218 instr[4.0] = R dest. */
4219
4220 unsigned vs = INSTR (9, 5);
4221 unsigned rd = INSTR (4, 0);
4222 unsigned full = INSTR (30, 30);
4223 unsigned i;
4224
4225 NYI_assert (28, 24, 0x0E);
4226 NYI_assert (21, 21, 1);
4227 NYI_assert (20, 17, 8);
4228 NYI_assert (15, 10, 0x2A);
4229
4230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4231 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4232 {
4233 case 0: /* SMAXV. */
4234 {
4235 int64_t smax;
4236 switch (INSTR (23, 22))
4237 {
4238 case 0:
4239 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4240 for (i = 1; i < (full ? 16 : 8); i++)
4241 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4242 break;
4243 case 1:
4244 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4245 for (i = 1; i < (full ? 8 : 4); i++)
4246 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4247 break;
4248 case 2:
4249 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4250 for (i = 1; i < (full ? 4 : 2); i++)
4251 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4252 break;
4253 case 3:
4254 HALT_UNALLOC;
4255 }
4256 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4257 return;
4258 }
4259
4260 case 1: /* SMINV. */
4261 {
4262 int64_t smin;
4263 switch (INSTR (23, 22))
4264 {
4265 case 0:
4266 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4267 for (i = 1; i < (full ? 16 : 8); i++)
4268 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4269 break;
4270 case 1:
4271 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4272 for (i = 1; i < (full ? 8 : 4); i++)
4273 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4274 break;
4275 case 2:
4276 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4277 for (i = 1; i < (full ? 4 : 2); i++)
4278 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4279 break;
4280
4281 case 3:
4282 HALT_UNALLOC;
4283 }
4284 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4285 return;
4286 }
4287
4288 case 2: /* UMAXV. */
4289 {
4290 uint64_t umax;
4291 switch (INSTR (23, 22))
4292 {
4293 case 0:
4294 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4295 for (i = 1; i < (full ? 16 : 8); i++)
4296 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4297 break;
4298 case 1:
4299 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4300 for (i = 1; i < (full ? 8 : 4); i++)
4301 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4302 break;
4303 case 2:
4304 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4305 for (i = 1; i < (full ? 4 : 2); i++)
4306 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4307 break;
4308
4309 case 3:
4310 HALT_UNALLOC;
4311 }
4312 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4313 return;
4314 }
4315
4316 case 3: /* UMINV. */
4317 {
4318 uint64_t umin;
4319 switch (INSTR (23, 22))
4320 {
4321 case 0:
4322 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4323 for (i = 1; i < (full ? 16 : 8); i++)
4324 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4325 break;
4326 case 1:
4327 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4328 for (i = 1; i < (full ? 8 : 4); i++)
4329 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4330 break;
4331 case 2:
4332 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4333 for (i = 1; i < (full ? 4 : 2); i++)
4334 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4335 break;
4336
4337 case 3:
4338 HALT_UNALLOC;
4339 }
4340 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4341 return;
4342 }
4343 }
4344 }
4345
4346 static void
4347 do_vec_fminmaxV (sim_cpu *cpu)
4348 {
4349 /* instr[31,24] = 0110 1110
4350 instr[23] = max(0)/min(1)
4351 instr[22,14] = 011 0000 11
4352 instr[13,12] = nm(00)/normal(11)
4353 instr[11,10] = 10
4354 instr[9,5] = V source
4355 instr[4.0] = R dest. */
4356
4357 unsigned vs = INSTR (9, 5);
4358 unsigned rd = INSTR (4, 0);
4359 unsigned i;
4360 float res = aarch64_get_vec_float (cpu, vs, 0);
4361
4362 NYI_assert (31, 24, 0x6E);
4363 NYI_assert (22, 14, 0x0C3);
4364 NYI_assert (11, 10, 2);
4365
4366 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4367 if (INSTR (23, 23))
4368 {
4369 switch (INSTR (13, 12))
4370 {
4371 case 0: /* FMNINNMV. */
4372 for (i = 1; i < 4; i++)
4373 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4374 break;
4375
4376 case 3: /* FMINV. */
4377 for (i = 1; i < 4; i++)
4378 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4379 break;
4380
4381 default:
4382 HALT_NYI;
4383 }
4384 }
4385 else
4386 {
4387 switch (INSTR (13, 12))
4388 {
4389 case 0: /* FMNAXNMV. */
4390 for (i = 1; i < 4; i++)
4391 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4392 break;
4393
4394 case 3: /* FMAXV. */
4395 for (i = 1; i < 4; i++)
4396 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4397 break;
4398
4399 default:
4400 HALT_NYI;
4401 }
4402 }
4403
4404 aarch64_set_FP_float (cpu, rd, res);
4405 }
4406
4407 static void
4408 do_vec_Fminmax (sim_cpu *cpu)
4409 {
4410 /* instr[31] = 0
4411 instr[30] = half(0)/full(1)
4412 instr[29,24] = 00 1110
4413 instr[23] = max(0)/min(1)
4414 instr[22] = float(0)/double(1)
4415 instr[21] = 1
4416 instr[20,16] = Vm
4417 instr[15,14] = 11
4418 instr[13,12] = nm(00)/normal(11)
4419 instr[11,10] = 01
4420 instr[9,5] = Vn
4421 instr[4,0] = Vd. */
4422
4423 unsigned vm = INSTR (20, 16);
4424 unsigned vn = INSTR (9, 5);
4425 unsigned vd = INSTR (4, 0);
4426 unsigned full = INSTR (30, 30);
4427 unsigned min = INSTR (23, 23);
4428 unsigned i;
4429
4430 NYI_assert (29, 24, 0x0E);
4431 NYI_assert (21, 21, 1);
4432 NYI_assert (15, 14, 3);
4433 NYI_assert (11, 10, 1);
4434
4435 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4436 if (INSTR (22, 22))
4437 {
4438 double (* func)(double, double);
4439
4440 if (! full)
4441 HALT_NYI;
4442
4443 if (INSTR (13, 12) == 0)
4444 func = min ? dminnm : dmaxnm;
4445 else if (INSTR (13, 12) == 3)
4446 func = min ? fmin : fmax;
4447 else
4448 HALT_NYI;
4449
4450 for (i = 0; i < 2; i++)
4451 aarch64_set_vec_double (cpu, vd, i,
4452 func (aarch64_get_vec_double (cpu, vn, i),
4453 aarch64_get_vec_double (cpu, vm, i)));
4454 }
4455 else
4456 {
4457 float (* func)(float, float);
4458
4459 if (INSTR (13, 12) == 0)
4460 func = min ? fminnm : fmaxnm;
4461 else if (INSTR (13, 12) == 3)
4462 func = min ? fminf : fmaxf;
4463 else
4464 HALT_NYI;
4465
4466 for (i = 0; i < (full ? 4 : 2); i++)
4467 aarch64_set_vec_float (cpu, vd, i,
4468 func (aarch64_get_vec_float (cpu, vn, i),
4469 aarch64_get_vec_float (cpu, vm, i)));
4470 }
4471 }
4472
4473 static void
4474 do_vec_SCVTF (sim_cpu *cpu)
4475 {
4476 /* instr[31] = 0
4477 instr[30] = Q
4478 instr[29,23] = 00 1110 0
4479 instr[22] = float(0)/double(1)
4480 instr[21,10] = 10 0001 1101 10
4481 instr[9,5] = Vn
4482 instr[4,0] = Vd. */
4483
4484 unsigned vn = INSTR (9, 5);
4485 unsigned vd = INSTR (4, 0);
4486 unsigned full = INSTR (30, 30);
4487 unsigned size = INSTR (22, 22);
4488 unsigned i;
4489
4490 NYI_assert (29, 23, 0x1C);
4491 NYI_assert (21, 10, 0x876);
4492
4493 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4494 if (size)
4495 {
4496 if (! full)
4497 HALT_UNALLOC;
4498
4499 for (i = 0; i < 2; i++)
4500 {
4501 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4502 aarch64_set_vec_double (cpu, vd, i, val);
4503 }
4504 }
4505 else
4506 {
4507 for (i = 0; i < (full ? 4 : 2); i++)
4508 {
4509 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4510 aarch64_set_vec_float (cpu, vd, i, val);
4511 }
4512 }
4513 }
4514
4515 #define VEC_CMP(SOURCE, CMP) \
4516 do \
4517 { \
4518 switch (size) \
4519 { \
4520 case 0: \
4521 for (i = 0; i < (full ? 16 : 8); i++) \
4522 aarch64_set_vec_u8 (cpu, vd, i, \
4523 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4524 CMP \
4525 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4526 ? -1 : 0); \
4527 return; \
4528 case 1: \
4529 for (i = 0; i < (full ? 8 : 4); i++) \
4530 aarch64_set_vec_u16 (cpu, vd, i, \
4531 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4532 CMP \
4533 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4534 ? -1 : 0); \
4535 return; \
4536 case 2: \
4537 for (i = 0; i < (full ? 4 : 2); i++) \
4538 aarch64_set_vec_u32 (cpu, vd, i, \
4539 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4540 CMP \
4541 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4542 ? -1 : 0); \
4543 return; \
4544 case 3: \
4545 if (! full) \
4546 HALT_UNALLOC; \
4547 for (i = 0; i < 2; i++) \
4548 aarch64_set_vec_u64 (cpu, vd, i, \
4549 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4550 CMP \
4551 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4552 ? -1ULL : 0); \
4553 return; \
4554 } \
4555 } \
4556 while (0)
4557
4558 #define VEC_CMP0(SOURCE, CMP) \
4559 do \
4560 { \
4561 switch (size) \
4562 { \
4563 case 0: \
4564 for (i = 0; i < (full ? 16 : 8); i++) \
4565 aarch64_set_vec_u8 (cpu, vd, i, \
4566 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4567 CMP 0 ? -1 : 0); \
4568 return; \
4569 case 1: \
4570 for (i = 0; i < (full ? 8 : 4); i++) \
4571 aarch64_set_vec_u16 (cpu, vd, i, \
4572 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4573 CMP 0 ? -1 : 0); \
4574 return; \
4575 case 2: \
4576 for (i = 0; i < (full ? 4 : 2); i++) \
4577 aarch64_set_vec_u32 (cpu, vd, i, \
4578 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4579 CMP 0 ? -1 : 0); \
4580 return; \
4581 case 3: \
4582 if (! full) \
4583 HALT_UNALLOC; \
4584 for (i = 0; i < 2; i++) \
4585 aarch64_set_vec_u64 (cpu, vd, i, \
4586 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4587 CMP 0 ? -1ULL : 0); \
4588 return; \
4589 } \
4590 } \
4591 while (0)
4592
4593 #define VEC_FCMP0(CMP) \
4594 do \
4595 { \
4596 if (vm != 0) \
4597 HALT_NYI; \
4598 if (INSTR (22, 22)) \
4599 { \
4600 if (! full) \
4601 HALT_NYI; \
4602 for (i = 0; i < 2; i++) \
4603 aarch64_set_vec_u64 (cpu, vd, i, \
4604 aarch64_get_vec_double (cpu, vn, i) \
4605 CMP 0.0 ? -1 : 0); \
4606 } \
4607 else \
4608 { \
4609 for (i = 0; i < (full ? 4 : 2); i++) \
4610 aarch64_set_vec_u32 (cpu, vd, i, \
4611 aarch64_get_vec_float (cpu, vn, i) \
4612 CMP 0.0 ? -1 : 0); \
4613 } \
4614 return; \
4615 } \
4616 while (0)
4617
4618 #define VEC_FCMP(CMP) \
4619 do \
4620 { \
4621 if (INSTR (22, 22)) \
4622 { \
4623 if (! full) \
4624 HALT_NYI; \
4625 for (i = 0; i < 2; i++) \
4626 aarch64_set_vec_u64 (cpu, vd, i, \
4627 aarch64_get_vec_double (cpu, vn, i) \
4628 CMP \
4629 aarch64_get_vec_double (cpu, vm, i) \
4630 ? -1 : 0); \
4631 } \
4632 else \
4633 { \
4634 for (i = 0; i < (full ? 4 : 2); i++) \
4635 aarch64_set_vec_u32 (cpu, vd, i, \
4636 aarch64_get_vec_float (cpu, vn, i) \
4637 CMP \
4638 aarch64_get_vec_float (cpu, vm, i) \
4639 ? -1 : 0); \
4640 } \
4641 return; \
4642 } \
4643 while (0)
4644
4645 static void
4646 do_vec_compare (sim_cpu *cpu)
4647 {
4648 /* instr[31] = 0
4649 instr[30] = half(0)/full(1)
4650 instr[29] = part-of-comparison-type
4651 instr[28,24] = 0 1110
4652 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4653 type of float compares: single (-0) / double (-1)
4654 instr[21] = 1
4655 instr[20,16] = Vm or 00000 (compare vs 0)
4656 instr[15,10] = part-of-comparison-type
4657 instr[9,5] = Vn
4658 instr[4.0] = Vd. */
4659
4660 int full = INSTR (30, 30);
4661 int size = INSTR (23, 22);
4662 unsigned vm = INSTR (20, 16);
4663 unsigned vn = INSTR (9, 5);
4664 unsigned vd = INSTR (4, 0);
4665 unsigned i;
4666
4667 NYI_assert (28, 24, 0x0E);
4668 NYI_assert (21, 21, 1);
4669
4670 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4671 if ((INSTR (11, 11)
4672 && INSTR (14, 14))
4673 || ((INSTR (11, 11) == 0
4674 && INSTR (10, 10) == 0)))
4675 {
4676 /* A compare vs 0. */
4677 if (vm != 0)
4678 {
4679 if (INSTR (15, 10) == 0x2A)
4680 do_vec_maxv (cpu);
4681 else if (INSTR (15, 10) == 0x32
4682 || INSTR (15, 10) == 0x3E)
4683 do_vec_fminmaxV (cpu);
4684 else if (INSTR (29, 23) == 0x1C
4685 && INSTR (21, 10) == 0x876)
4686 do_vec_SCVTF (cpu);
4687 else
4688 HALT_NYI;
4689 return;
4690 }
4691 }
4692
4693 if (INSTR (14, 14))
4694 {
4695 /* A floating point compare. */
4696 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4697 | INSTR (13, 10);
4698
4699 NYI_assert (15, 15, 1);
4700
4701 switch (decode)
4702 {
4703 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4704 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4705 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4706 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4707 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4708 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4709 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4710 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4711
4712 default:
4713 HALT_NYI;
4714 }
4715 }
4716 else
4717 {
4718 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4719
4720 switch (decode)
4721 {
4722 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4723 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4724 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4725 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4726 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4727 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4728 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4729 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4730 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4731 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4732 default:
4733 if (vm == 0)
4734 HALT_NYI;
4735 do_vec_maxv (cpu);
4736 }
4737 }
4738 }
4739
4740 static void
4741 do_vec_SSHL (sim_cpu *cpu)
4742 {
4743 /* instr[31] = 0
4744 instr[30] = first part (0)/ second part (1)
4745 instr[29,24] = 00 1110
4746 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4747 instr[21] = 1
4748 instr[20,16] = Vm
4749 instr[15,10] = 0100 01
4750 instr[9,5] = Vn
4751 instr[4,0] = Vd. */
4752
4753 unsigned full = INSTR (30, 30);
4754 unsigned vm = INSTR (20, 16);
4755 unsigned vn = INSTR (9, 5);
4756 unsigned vd = INSTR (4, 0);
4757 unsigned i;
4758 signed int shift;
4759
4760 NYI_assert (29, 24, 0x0E);
4761 NYI_assert (21, 21, 1);
4762 NYI_assert (15, 10, 0x11);
4763
4764 /* FIXME: What is a signed shift left in this context ?. */
4765
4766 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4767 switch (INSTR (23, 22))
4768 {
4769 case 0:
4770 for (i = 0; i < (full ? 16 : 8); i++)
4771 {
4772 shift = aarch64_get_vec_s8 (cpu, vm, i);
4773 if (shift >= 0)
4774 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4775 << shift);
4776 else
4777 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4778 >> - shift);
4779 }
4780 return;
4781
4782 case 1:
4783 for (i = 0; i < (full ? 8 : 4); i++)
4784 {
4785 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4786 if (shift >= 0)
4787 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4788 << shift);
4789 else
4790 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4791 >> - shift);
4792 }
4793 return;
4794
4795 case 2:
4796 for (i = 0; i < (full ? 4 : 2); i++)
4797 {
4798 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4799 if (shift >= 0)
4800 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4801 << shift);
4802 else
4803 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4804 >> - shift);
4805 }
4806 return;
4807
4808 case 3:
4809 if (! full)
4810 HALT_UNALLOC;
4811 for (i = 0; i < 2; i++)
4812 {
4813 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4814 if (shift >= 0)
4815 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4816 << shift);
4817 else
4818 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4819 >> - shift);
4820 }
4821 return;
4822 }
4823 }
4824
4825 static void
4826 do_vec_USHL (sim_cpu *cpu)
4827 {
4828 /* instr[31] = 0
4829 instr[30] = first part (0)/ second part (1)
4830 instr[29,24] = 10 1110
4831 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4832 instr[21] = 1
4833 instr[20,16] = Vm
4834 instr[15,10] = 0100 01
4835 instr[9,5] = Vn
4836 instr[4,0] = Vd */
4837
4838 unsigned full = INSTR (30, 30);
4839 unsigned vm = INSTR (20, 16);
4840 unsigned vn = INSTR (9, 5);
4841 unsigned vd = INSTR (4, 0);
4842 unsigned i;
4843 signed int shift;
4844
4845 NYI_assert (29, 24, 0x2E);
4846 NYI_assert (15, 10, 0x11);
4847
4848 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4849 switch (INSTR (23, 22))
4850 {
4851 case 0:
4852 for (i = 0; i < (full ? 16 : 8); i++)
4853 {
4854 shift = aarch64_get_vec_s8 (cpu, vm, i);
4855 if (shift >= 0)
4856 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4857 << shift);
4858 else
4859 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4860 >> - shift);
4861 }
4862 return;
4863
4864 case 1:
4865 for (i = 0; i < (full ? 8 : 4); i++)
4866 {
4867 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4868 if (shift >= 0)
4869 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4870 << shift);
4871 else
4872 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4873 >> - shift);
4874 }
4875 return;
4876
4877 case 2:
4878 for (i = 0; i < (full ? 4 : 2); i++)
4879 {
4880 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4881 if (shift >= 0)
4882 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4883 << shift);
4884 else
4885 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4886 >> - shift);
4887 }
4888 return;
4889
4890 case 3:
4891 if (! full)
4892 HALT_UNALLOC;
4893 for (i = 0; i < 2; i++)
4894 {
4895 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4896 if (shift >= 0)
4897 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4898 << shift);
4899 else
4900 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4901 >> - shift);
4902 }
4903 return;
4904 }
4905 }
4906
4907 static void
4908 do_vec_FMLA (sim_cpu *cpu)
4909 {
4910 /* instr[31] = 0
4911 instr[30] = full/half selector
4912 instr[29,23] = 0011100
4913 instr[22] = size: 0=>float, 1=>double
4914 instr[21] = 1
4915 instr[20,16] = Vn
4916 instr[15,10] = 1100 11
4917 instr[9,5] = Vm
4918 instr[4.0] = Vd. */
4919
4920 unsigned vm = INSTR (20, 16);
4921 unsigned vn = INSTR (9, 5);
4922 unsigned vd = INSTR (4, 0);
4923 unsigned i;
4924 int full = INSTR (30, 30);
4925
4926 NYI_assert (29, 23, 0x1C);
4927 NYI_assert (21, 21, 1);
4928 NYI_assert (15, 10, 0x33);
4929
4930 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4931 if (INSTR (22, 22))
4932 {
4933 if (! full)
4934 HALT_UNALLOC;
4935 for (i = 0; i < 2; i++)
4936 aarch64_set_vec_double (cpu, vd, i,
4937 aarch64_get_vec_double (cpu, vn, i) *
4938 aarch64_get_vec_double (cpu, vm, i) +
4939 aarch64_get_vec_double (cpu, vd, i));
4940 }
4941 else
4942 {
4943 for (i = 0; i < (full ? 4 : 2); i++)
4944 aarch64_set_vec_float (cpu, vd, i,
4945 aarch64_get_vec_float (cpu, vn, i) *
4946 aarch64_get_vec_float (cpu, vm, i) +
4947 aarch64_get_vec_float (cpu, vd, i));
4948 }
4949 }
4950
4951 static void
4952 do_vec_max (sim_cpu *cpu)
4953 {
4954 /* instr[31] = 0
4955 instr[30] = full/half selector
4956 instr[29] = SMAX (0) / UMAX (1)
4957 instr[28,24] = 0 1110
4958 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4959 instr[21] = 1
4960 instr[20,16] = Vn
4961 instr[15,10] = 0110 01
4962 instr[9,5] = Vm
4963 instr[4.0] = Vd. */
4964
4965 unsigned vm = INSTR (20, 16);
4966 unsigned vn = INSTR (9, 5);
4967 unsigned vd = INSTR (4, 0);
4968 unsigned i;
4969 int full = INSTR (30, 30);
4970
4971 NYI_assert (28, 24, 0x0E);
4972 NYI_assert (21, 21, 1);
4973 NYI_assert (15, 10, 0x19);
4974
4975 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4976 if (INSTR (29, 29))
4977 {
4978 switch (INSTR (23, 22))
4979 {
4980 case 0:
4981 for (i = 0; i < (full ? 16 : 8); i++)
4982 aarch64_set_vec_u8 (cpu, vd, i,
4983 aarch64_get_vec_u8 (cpu, vn, i)
4984 > aarch64_get_vec_u8 (cpu, vm, i)
4985 ? aarch64_get_vec_u8 (cpu, vn, i)
4986 : aarch64_get_vec_u8 (cpu, vm, i));
4987 return;
4988
4989 case 1:
4990 for (i = 0; i < (full ? 8 : 4); i++)
4991 aarch64_set_vec_u16 (cpu, vd, i,
4992 aarch64_get_vec_u16 (cpu, vn, i)
4993 > aarch64_get_vec_u16 (cpu, vm, i)
4994 ? aarch64_get_vec_u16 (cpu, vn, i)
4995 : aarch64_get_vec_u16 (cpu, vm, i));
4996 return;
4997
4998 case 2:
4999 for (i = 0; i < (full ? 4 : 2); i++)
5000 aarch64_set_vec_u32 (cpu, vd, i,
5001 aarch64_get_vec_u32 (cpu, vn, i)
5002 > aarch64_get_vec_u32 (cpu, vm, i)
5003 ? aarch64_get_vec_u32 (cpu, vn, i)
5004 : aarch64_get_vec_u32 (cpu, vm, i));
5005 return;
5006
5007 case 3:
5008 HALT_UNALLOC;
5009 }
5010 }
5011 else
5012 {
5013 switch (INSTR (23, 22))
5014 {
5015 case 0:
5016 for (i = 0; i < (full ? 16 : 8); i++)
5017 aarch64_set_vec_s8 (cpu, vd, i,
5018 aarch64_get_vec_s8 (cpu, vn, i)
5019 > aarch64_get_vec_s8 (cpu, vm, i)
5020 ? aarch64_get_vec_s8 (cpu, vn, i)
5021 : aarch64_get_vec_s8 (cpu, vm, i));
5022 return;
5023
5024 case 1:
5025 for (i = 0; i < (full ? 8 : 4); i++)
5026 aarch64_set_vec_s16 (cpu, vd, i,
5027 aarch64_get_vec_s16 (cpu, vn, i)
5028 > aarch64_get_vec_s16 (cpu, vm, i)
5029 ? aarch64_get_vec_s16 (cpu, vn, i)
5030 : aarch64_get_vec_s16 (cpu, vm, i));
5031 return;
5032
5033 case 2:
5034 for (i = 0; i < (full ? 4 : 2); i++)
5035 aarch64_set_vec_s32 (cpu, vd, i,
5036 aarch64_get_vec_s32 (cpu, vn, i)
5037 > aarch64_get_vec_s32 (cpu, vm, i)
5038 ? aarch64_get_vec_s32 (cpu, vn, i)
5039 : aarch64_get_vec_s32 (cpu, vm, i));
5040 return;
5041
5042 case 3:
5043 HALT_UNALLOC;
5044 }
5045 }
5046 }
5047
5048 static void
5049 do_vec_min (sim_cpu *cpu)
5050 {
5051 /* instr[31] = 0
5052 instr[30] = full/half selector
5053 instr[29] = SMIN (0) / UMIN (1)
5054 instr[28,24] = 0 1110
5055 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5056 instr[21] = 1
5057 instr[20,16] = Vn
5058 instr[15,10] = 0110 11
5059 instr[9,5] = Vm
5060 instr[4.0] = Vd. */
5061
5062 unsigned vm = INSTR (20, 16);
5063 unsigned vn = INSTR (9, 5);
5064 unsigned vd = INSTR (4, 0);
5065 unsigned i;
5066 int full = INSTR (30, 30);
5067
5068 NYI_assert (28, 24, 0x0E);
5069 NYI_assert (21, 21, 1);
5070 NYI_assert (15, 10, 0x1B);
5071
5072 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5073 if (INSTR (29, 29))
5074 {
5075 switch (INSTR (23, 22))
5076 {
5077 case 0:
5078 for (i = 0; i < (full ? 16 : 8); i++)
5079 aarch64_set_vec_u8 (cpu, vd, i,
5080 aarch64_get_vec_u8 (cpu, vn, i)
5081 < aarch64_get_vec_u8 (cpu, vm, i)
5082 ? aarch64_get_vec_u8 (cpu, vn, i)
5083 : aarch64_get_vec_u8 (cpu, vm, i));
5084 return;
5085
5086 case 1:
5087 for (i = 0; i < (full ? 8 : 4); i++)
5088 aarch64_set_vec_u16 (cpu, vd, i,
5089 aarch64_get_vec_u16 (cpu, vn, i)
5090 < aarch64_get_vec_u16 (cpu, vm, i)
5091 ? aarch64_get_vec_u16 (cpu, vn, i)
5092 : aarch64_get_vec_u16 (cpu, vm, i));
5093 return;
5094
5095 case 2:
5096 for (i = 0; i < (full ? 4 : 2); i++)
5097 aarch64_set_vec_u32 (cpu, vd, i,
5098 aarch64_get_vec_u32 (cpu, vn, i)
5099 < aarch64_get_vec_u32 (cpu, vm, i)
5100 ? aarch64_get_vec_u32 (cpu, vn, i)
5101 : aarch64_get_vec_u32 (cpu, vm, i));
5102 return;
5103
5104 case 3:
5105 HALT_UNALLOC;
5106 }
5107 }
5108 else
5109 {
5110 switch (INSTR (23, 22))
5111 {
5112 case 0:
5113 for (i = 0; i < (full ? 16 : 8); i++)
5114 aarch64_set_vec_s8 (cpu, vd, i,
5115 aarch64_get_vec_s8 (cpu, vn, i)
5116 < aarch64_get_vec_s8 (cpu, vm, i)
5117 ? aarch64_get_vec_s8 (cpu, vn, i)
5118 : aarch64_get_vec_s8 (cpu, vm, i));
5119 return;
5120
5121 case 1:
5122 for (i = 0; i < (full ? 8 : 4); i++)
5123 aarch64_set_vec_s16 (cpu, vd, i,
5124 aarch64_get_vec_s16 (cpu, vn, i)
5125 < aarch64_get_vec_s16 (cpu, vm, i)
5126 ? aarch64_get_vec_s16 (cpu, vn, i)
5127 : aarch64_get_vec_s16 (cpu, vm, i));
5128 return;
5129
5130 case 2:
5131 for (i = 0; i < (full ? 4 : 2); i++)
5132 aarch64_set_vec_s32 (cpu, vd, i,
5133 aarch64_get_vec_s32 (cpu, vn, i)
5134 < aarch64_get_vec_s32 (cpu, vm, i)
5135 ? aarch64_get_vec_s32 (cpu, vn, i)
5136 : aarch64_get_vec_s32 (cpu, vm, i));
5137 return;
5138
5139 case 3:
5140 HALT_UNALLOC;
5141 }
5142 }
5143 }
5144
5145 static void
5146 do_vec_sub_long (sim_cpu *cpu)
5147 {
5148 /* instr[31] = 0
5149 instr[30] = lower (0) / upper (1)
5150 instr[29] = signed (0) / unsigned (1)
5151 instr[28,24] = 0 1110
5152 instr[23,22] = size: bytes (00), half (01), word (10)
5153 instr[21] = 1
5154 insrt[20,16] = Vm
5155 instr[15,10] = 0010 00
5156 instr[9,5] = Vn
5157 instr[4,0] = V dest. */
5158
5159 unsigned size = INSTR (23, 22);
5160 unsigned vm = INSTR (20, 16);
5161 unsigned vn = INSTR (9, 5);
5162 unsigned vd = INSTR (4, 0);
5163 unsigned bias = 0;
5164 unsigned i;
5165
5166 NYI_assert (28, 24, 0x0E);
5167 NYI_assert (21, 21, 1);
5168 NYI_assert (15, 10, 0x08);
5169
5170 if (size == 3)
5171 HALT_UNALLOC;
5172
5173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5174 switch (INSTR (30, 29))
5175 {
5176 case 2: /* SSUBL2. */
5177 bias = 2;
5178 case 0: /* SSUBL. */
5179 switch (size)
5180 {
5181 case 0:
5182 bias *= 3;
5183 for (i = 0; i < 8; i++)
5184 aarch64_set_vec_s16 (cpu, vd, i,
5185 aarch64_get_vec_s8 (cpu, vn, i + bias)
5186 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5187 break;
5188
5189 case 1:
5190 bias *= 2;
5191 for (i = 0; i < 4; i++)
5192 aarch64_set_vec_s32 (cpu, vd, i,
5193 aarch64_get_vec_s16 (cpu, vn, i + bias)
5194 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5195 break;
5196
5197 case 2:
5198 for (i = 0; i < 2; i++)
5199 aarch64_set_vec_s64 (cpu, vd, i,
5200 aarch64_get_vec_s32 (cpu, vn, i + bias)
5201 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5202 break;
5203
5204 default:
5205 HALT_UNALLOC;
5206 }
5207 break;
5208
5209 case 3: /* USUBL2. */
5210 bias = 2;
5211 case 1: /* USUBL. */
5212 switch (size)
5213 {
5214 case 0:
5215 bias *= 3;
5216 for (i = 0; i < 8; i++)
5217 aarch64_set_vec_u16 (cpu, vd, i,
5218 aarch64_get_vec_u8 (cpu, vn, i + bias)
5219 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5220 break;
5221
5222 case 1:
5223 bias *= 2;
5224 for (i = 0; i < 4; i++)
5225 aarch64_set_vec_u32 (cpu, vd, i,
5226 aarch64_get_vec_u16 (cpu, vn, i + bias)
5227 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5228 break;
5229
5230 case 2:
5231 for (i = 0; i < 2; i++)
5232 aarch64_set_vec_u64 (cpu, vd, i,
5233 aarch64_get_vec_u32 (cpu, vn, i + bias)
5234 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5235 break;
5236
5237 default:
5238 HALT_UNALLOC;
5239 }
5240 break;
5241 }
5242 }
5243
5244 static void
5245 do_vec_ADDP (sim_cpu *cpu)
5246 {
5247 /* instr[31] = 0
5248 instr[30] = half(0)/full(1)
5249 instr[29,24] = 00 1110
5250 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5251 instr[21] = 1
5252 insrt[20,16] = Vm
5253 instr[15,10] = 1011 11
5254 instr[9,5] = Vn
5255 instr[4,0] = V dest. */
5256
5257 FRegister copy_vn;
5258 FRegister copy_vm;
5259 unsigned full = INSTR (30, 30);
5260 unsigned size = INSTR (23, 22);
5261 unsigned vm = INSTR (20, 16);
5262 unsigned vn = INSTR (9, 5);
5263 unsigned vd = INSTR (4, 0);
5264 unsigned i, range;
5265
5266 NYI_assert (29, 24, 0x0E);
5267 NYI_assert (21, 21, 1);
5268 NYI_assert (15, 10, 0x2F);
5269
5270 /* Make copies of the source registers in case vd == vn/vm. */
5271 copy_vn = cpu->fr[vn];
5272 copy_vm = cpu->fr[vm];
5273
5274 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5275 switch (size)
5276 {
5277 case 0:
5278 range = full ? 8 : 4;
5279 for (i = 0; i < range; i++)
5280 {
5281 aarch64_set_vec_u8 (cpu, vd, i,
5282 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5283 aarch64_set_vec_u8 (cpu, vd, i + range,
5284 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5285 }
5286 return;
5287
5288 case 1:
5289 range = full ? 4 : 2;
5290 for (i = 0; i < range; i++)
5291 {
5292 aarch64_set_vec_u16 (cpu, vd, i,
5293 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5294 aarch64_set_vec_u16 (cpu, vd, i + range,
5295 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5296 }
5297 return;
5298
5299 case 2:
5300 range = full ? 2 : 1;
5301 for (i = 0; i < range; i++)
5302 {
5303 aarch64_set_vec_u32 (cpu, vd, i,
5304 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5305 aarch64_set_vec_u32 (cpu, vd, i + range,
5306 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5307 }
5308 return;
5309
5310 case 3:
5311 if (! full)
5312 HALT_UNALLOC;
5313 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5314 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5315 return;
5316 }
5317 }
5318
5319 static void
5320 do_vec_UMOV (sim_cpu *cpu)
5321 {
5322 /* instr[31] = 0
5323 instr[30] = 32-bit(0)/64-bit(1)
5324 instr[29,21] = 00 1110 000
5325 insrt[20,16] = size & index
5326 instr[15,10] = 0011 11
5327 instr[9,5] = V source
5328 instr[4,0] = R dest. */
5329
5330 unsigned vs = INSTR (9, 5);
5331 unsigned rd = INSTR (4, 0);
5332 unsigned index;
5333
5334 NYI_assert (29, 21, 0x070);
5335 NYI_assert (15, 10, 0x0F);
5336
5337 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5338 if (INSTR (16, 16))
5339 {
5340 /* Byte transfer. */
5341 index = INSTR (20, 17);
5342 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5343 aarch64_get_vec_u8 (cpu, vs, index));
5344 }
5345 else if (INSTR (17, 17))
5346 {
5347 index = INSTR (20, 18);
5348 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5349 aarch64_get_vec_u16 (cpu, vs, index));
5350 }
5351 else if (INSTR (18, 18))
5352 {
5353 index = INSTR (20, 19);
5354 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5355 aarch64_get_vec_u32 (cpu, vs, index));
5356 }
5357 else
5358 {
5359 if (INSTR (30, 30) != 1)
5360 HALT_UNALLOC;
5361
5362 index = INSTR (20, 20);
5363 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5364 aarch64_get_vec_u64 (cpu, vs, index));
5365 }
5366 }
5367
5368 static void
5369 do_vec_FABS (sim_cpu *cpu)
5370 {
5371 /* instr[31] = 0
5372 instr[30] = half(0)/full(1)
5373 instr[29,23] = 00 1110 1
5374 instr[22] = float(0)/double(1)
5375 instr[21,16] = 10 0000
5376 instr[15,10] = 1111 10
5377 instr[9,5] = Vn
5378 instr[4,0] = Vd. */
5379
5380 unsigned vn = INSTR (9, 5);
5381 unsigned vd = INSTR (4, 0);
5382 unsigned full = INSTR (30, 30);
5383 unsigned i;
5384
5385 NYI_assert (29, 23, 0x1D);
5386 NYI_assert (21, 10, 0x83E);
5387
5388 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5389 if (INSTR (22, 22))
5390 {
5391 if (! full)
5392 HALT_NYI;
5393
5394 for (i = 0; i < 2; i++)
5395 aarch64_set_vec_double (cpu, vd, i,
5396 fabs (aarch64_get_vec_double (cpu, vn, i)));
5397 }
5398 else
5399 {
5400 for (i = 0; i < (full ? 4 : 2); i++)
5401 aarch64_set_vec_float (cpu, vd, i,
5402 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5403 }
5404 }
5405
5406 static void
5407 do_vec_FCVTZS (sim_cpu *cpu)
5408 {
5409 /* instr[31] = 0
5410 instr[30] = half (0) / all (1)
5411 instr[29,23] = 00 1110 1
5412 instr[22] = single (0) / double (1)
5413 instr[21,10] = 10 0001 1011 10
5414 instr[9,5] = Rn
5415 instr[4,0] = Rd. */
5416
5417 unsigned rn = INSTR (9, 5);
5418 unsigned rd = INSTR (4, 0);
5419 unsigned full = INSTR (30, 30);
5420 unsigned i;
5421
5422 NYI_assert (31, 31, 0);
5423 NYI_assert (29, 23, 0x1D);
5424 NYI_assert (21, 10, 0x86E);
5425
5426 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5427 if (INSTR (22, 22))
5428 {
5429 if (! full)
5430 HALT_UNALLOC;
5431
5432 for (i = 0; i < 2; i++)
5433 aarch64_set_vec_s64 (cpu, rd, i,
5434 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5435 }
5436 else
5437 for (i = 0; i < (full ? 4 : 2); i++)
5438 aarch64_set_vec_s32 (cpu, rd, i,
5439 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5440 }
5441
5442 static void
5443 do_vec_REV64 (sim_cpu *cpu)
5444 {
5445 /* instr[31] = 0
5446 instr[30] = full/half
5447 instr[29,24] = 00 1110
5448 instr[23,22] = size
5449 instr[21,10] = 10 0000 0000 10
5450 instr[9,5] = Rn
5451 instr[4,0] = Rd. */
5452
5453 unsigned rn = INSTR (9, 5);
5454 unsigned rd = INSTR (4, 0);
5455 unsigned size = INSTR (23, 22);
5456 unsigned full = INSTR (30, 30);
5457 unsigned i;
5458 FRegister val;
5459
5460 NYI_assert (29, 24, 0x0E);
5461 NYI_assert (21, 10, 0x802);
5462
5463 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5464 switch (size)
5465 {
5466 case 0:
5467 for (i = 0; i < (full ? 16 : 8); i++)
5468 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5469 break;
5470
5471 case 1:
5472 for (i = 0; i < (full ? 8 : 4); i++)
5473 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5474 break;
5475
5476 case 2:
5477 for (i = 0; i < (full ? 4 : 2); i++)
5478 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5479 break;
5480
5481 case 3:
5482 HALT_UNALLOC;
5483 }
5484
5485 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5486 if (full)
5487 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5488 }
5489
5490 static void
5491 do_vec_REV16 (sim_cpu *cpu)
5492 {
5493 /* instr[31] = 0
5494 instr[30] = full/half
5495 instr[29,24] = 00 1110
5496 instr[23,22] = size
5497 instr[21,10] = 10 0000 0001 10
5498 instr[9,5] = Rn
5499 instr[4,0] = Rd. */
5500
5501 unsigned rn = INSTR (9, 5);
5502 unsigned rd = INSTR (4, 0);
5503 unsigned size = INSTR (23, 22);
5504 unsigned full = INSTR (30, 30);
5505 unsigned i;
5506 FRegister val;
5507
5508 NYI_assert (29, 24, 0x0E);
5509 NYI_assert (21, 10, 0x806);
5510
5511 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5512 switch (size)
5513 {
5514 case 0:
5515 for (i = 0; i < (full ? 16 : 8); i++)
5516 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5517 break;
5518
5519 default:
5520 HALT_UNALLOC;
5521 }
5522
5523 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5524 if (full)
5525 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5526 }
5527
5528 static void
5529 do_vec_op1 (sim_cpu *cpu)
5530 {
5531 /* instr[31] = 0
5532 instr[30] = half/full
5533 instr[29,24] = 00 1110
5534 instr[23,21] = ???
5535 instr[20,16] = Vm
5536 instr[15,10] = sub-opcode
5537 instr[9,5] = Vn
5538 instr[4,0] = Vd */
5539 NYI_assert (29, 24, 0x0E);
5540
5541 if (INSTR (21, 21) == 0)
5542 {
5543 if (INSTR (23, 22) == 0)
5544 {
5545 if (INSTR (30, 30) == 1
5546 && INSTR (17, 14) == 0
5547 && INSTR (12, 10) == 7)
5548 return do_vec_ins_2 (cpu);
5549
5550 switch (INSTR (15, 10))
5551 {
5552 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5553 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5554 case 0x07: do_vec_INS (cpu); return;
5555 case 0x0A: do_vec_TRN (cpu); return;
5556
5557 case 0x0F:
5558 if (INSTR (17, 16) == 0)
5559 {
5560 do_vec_MOV_into_scalar (cpu);
5561 return;
5562 }
5563 break;
5564
5565 case 0x00:
5566 case 0x08:
5567 case 0x10:
5568 case 0x18:
5569 do_vec_TBL (cpu); return;
5570
5571 case 0x06:
5572 case 0x16:
5573 do_vec_UZP (cpu); return;
5574
5575 case 0x0E:
5576 case 0x1E:
5577 do_vec_ZIP (cpu); return;
5578
5579 default:
5580 HALT_NYI;
5581 }
5582 }
5583
5584 switch (INSTR (13, 10))
5585 {
5586 case 0x6: do_vec_UZP (cpu); return;
5587 case 0xE: do_vec_ZIP (cpu); return;
5588 case 0xA: do_vec_TRN (cpu); return;
5589 case 0xF: do_vec_UMOV (cpu); return;
5590 default: HALT_NYI;
5591 }
5592 }
5593
5594 switch (INSTR (15, 10))
5595 {
5596 case 0x02: do_vec_REV64 (cpu); return;
5597 case 0x06: do_vec_REV16 (cpu); return;
5598
5599 case 0x07:
5600 switch (INSTR (23, 21))
5601 {
5602 case 1: do_vec_AND (cpu); return;
5603 case 3: do_vec_BIC (cpu); return;
5604 case 5: do_vec_ORR (cpu); return;
5605 case 7: do_vec_ORN (cpu); return;
5606 default: HALT_NYI;
5607 }
5608
5609 case 0x08: do_vec_sub_long (cpu); return;
5610 case 0x0a: do_vec_XTN (cpu); return;
5611 case 0x11: do_vec_SSHL (cpu); return;
5612 case 0x19: do_vec_max (cpu); return;
5613 case 0x1B: do_vec_min (cpu); return;
5614 case 0x21: do_vec_add (cpu); return;
5615 case 0x25: do_vec_MLA (cpu); return;
5616 case 0x27: do_vec_mul (cpu); return;
5617 case 0x2F: do_vec_ADDP (cpu); return;
5618 case 0x30: do_vec_mull (cpu); return;
5619 case 0x33: do_vec_FMLA (cpu); return;
5620 case 0x35: do_vec_fadd (cpu); return;
5621
5622 case 0x2E:
5623 switch (INSTR (20, 16))
5624 {
5625 case 0x00: do_vec_ABS (cpu); return;
5626 case 0x01: do_vec_FCVTZS (cpu); return;
5627 case 0x11: do_vec_ADDV (cpu); return;
5628 default: HALT_NYI;
5629 }
5630
5631 case 0x31:
5632 case 0x3B:
5633 do_vec_Fminmax (cpu); return;
5634
5635 case 0x0D:
5636 case 0x0F:
5637 case 0x22:
5638 case 0x23:
5639 case 0x26:
5640 case 0x2A:
5641 case 0x32:
5642 case 0x36:
5643 case 0x39:
5644 case 0x3A:
5645 do_vec_compare (cpu); return;
5646
5647 case 0x3E:
5648 do_vec_FABS (cpu); return;
5649
5650 default:
5651 HALT_NYI;
5652 }
5653 }
5654
5655 static void
5656 do_vec_xtl (sim_cpu *cpu)
5657 {
5658 /* instr[31] = 0
5659 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5660 instr[28,22] = 0 1111 00
5661 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5662 instr[15,10] = 1010 01
5663 instr[9,5] = V source
5664 instr[4,0] = V dest. */
5665
5666 unsigned vs = INSTR (9, 5);
5667 unsigned vd = INSTR (4, 0);
5668 unsigned i, shift, bias = 0;
5669
5670 NYI_assert (28, 22, 0x3C);
5671 NYI_assert (15, 10, 0x29);
5672
5673 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5674 switch (INSTR (30, 29))
5675 {
5676 case 2: /* SXTL2, SSHLL2. */
5677 bias = 2;
5678 case 0: /* SXTL, SSHLL. */
5679 if (INSTR (21, 21))
5680 {
5681 int64_t val1, val2;
5682
5683 shift = INSTR (20, 16);
5684 /* Get the source values before setting the destination values
5685 in case the source and destination are the same. */
5686 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5687 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5688 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5689 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5690 }
5691 else if (INSTR (20, 20))
5692 {
5693 int32_t v[4];
5694 int32_t v1,v2,v3,v4;
5695
5696 shift = INSTR (19, 16);
5697 bias *= 2;
5698 for (i = 0; i < 4; i++)
5699 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5700 for (i = 0; i < 4; i++)
5701 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5702 }
5703 else
5704 {
5705 int16_t v[8];
5706 NYI_assert (19, 19, 1);
5707
5708 shift = INSTR (18, 16);
5709 bias *= 3;
5710 for (i = 0; i < 8; i++)
5711 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5712 for (i = 0; i < 8; i++)
5713 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5714 }
5715 return;
5716
5717 case 3: /* UXTL2, USHLL2. */
5718 bias = 2;
5719 case 1: /* UXTL, USHLL. */
5720 if (INSTR (21, 21))
5721 {
5722 uint64_t v1, v2;
5723 shift = INSTR (20, 16);
5724 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5725 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5726 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5727 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5728 }
5729 else if (INSTR (20, 20))
5730 {
5731 uint32_t v[4];
5732 shift = INSTR (19, 16);
5733 bias *= 2;
5734 for (i = 0; i < 4; i++)
5735 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5736 for (i = 0; i < 4; i++)
5737 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5738 }
5739 else
5740 {
5741 uint16_t v[8];
5742 NYI_assert (19, 19, 1);
5743
5744 shift = INSTR (18, 16);
5745 bias *= 3;
5746 for (i = 0; i < 8; i++)
5747 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5748 for (i = 0; i < 8; i++)
5749 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5750 }
5751 return;
5752 }
5753 }
5754
5755 static void
5756 do_vec_SHL (sim_cpu *cpu)
5757 {
5758 /* instr [31] = 0
5759 instr [30] = half(0)/full(1)
5760 instr [29,23] = 001 1110
5761 instr [22,16] = size and shift amount
5762 instr [15,10] = 01 0101
5763 instr [9, 5] = Vs
5764 instr [4, 0] = Vd. */
5765
5766 int shift;
5767 int full = INSTR (30, 30);
5768 unsigned vs = INSTR (9, 5);
5769 unsigned vd = INSTR (4, 0);
5770 unsigned i;
5771
5772 NYI_assert (29, 23, 0x1E);
5773 NYI_assert (15, 10, 0x15);
5774
5775 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5776 if (INSTR (22, 22))
5777 {
5778 shift = INSTR (21, 16);
5779
5780 if (full == 0)
5781 HALT_UNALLOC;
5782
5783 for (i = 0; i < 2; i++)
5784 {
5785 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5786 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5787 }
5788
5789 return;
5790 }
5791
5792 if (INSTR (21, 21))
5793 {
5794 shift = INSTR (20, 16);
5795
5796 for (i = 0; i < (full ? 4 : 2); i++)
5797 {
5798 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5799 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5800 }
5801
5802 return;
5803 }
5804
5805 if (INSTR (20, 20))
5806 {
5807 shift = INSTR (19, 16);
5808
5809 for (i = 0; i < (full ? 8 : 4); i++)
5810 {
5811 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5812 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5813 }
5814
5815 return;
5816 }
5817
5818 if (INSTR (19, 19) == 0)
5819 HALT_UNALLOC;
5820
5821 shift = INSTR (18, 16);
5822
5823 for (i = 0; i < (full ? 16 : 8); i++)
5824 {
5825 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5826 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5827 }
5828 }
5829
5830 static void
5831 do_vec_SSHR_USHR (sim_cpu *cpu)
5832 {
5833 /* instr [31] = 0
5834 instr [30] = half(0)/full(1)
5835 instr [29] = signed(0)/unsigned(1)
5836 instr [28,23] = 0 1111 0
5837 instr [22,16] = size and shift amount
5838 instr [15,10] = 0000 01
5839 instr [9, 5] = Vs
5840 instr [4, 0] = Vd. */
5841
5842 int full = INSTR (30, 30);
5843 int sign = ! INSTR (29, 29);
5844 unsigned shift = INSTR (22, 16);
5845 unsigned vs = INSTR (9, 5);
5846 unsigned vd = INSTR (4, 0);
5847 unsigned i;
5848
5849 NYI_assert (28, 23, 0x1E);
5850 NYI_assert (15, 10, 0x01);
5851
5852 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5853 if (INSTR (22, 22))
5854 {
5855 shift = 128 - shift;
5856
5857 if (full == 0)
5858 HALT_UNALLOC;
5859
5860 if (sign)
5861 for (i = 0; i < 2; i++)
5862 {
5863 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5864 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5865 }
5866 else
5867 for (i = 0; i < 2; i++)
5868 {
5869 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5870 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5871 }
5872
5873 return;
5874 }
5875
5876 if (INSTR (21, 21))
5877 {
5878 shift = 64 - shift;
5879
5880 if (sign)
5881 for (i = 0; i < (full ? 4 : 2); i++)
5882 {
5883 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5884 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5885 }
5886 else
5887 for (i = 0; i < (full ? 4 : 2); i++)
5888 {
5889 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5890 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5891 }
5892
5893 return;
5894 }
5895
5896 if (INSTR (20, 20))
5897 {
5898 shift = 32 - shift;
5899
5900 if (sign)
5901 for (i = 0; i < (full ? 8 : 4); i++)
5902 {
5903 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5904 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5905 }
5906 else
5907 for (i = 0; i < (full ? 8 : 4); i++)
5908 {
5909 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5910 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5911 }
5912
5913 return;
5914 }
5915
5916 if (INSTR (19, 19) == 0)
5917 HALT_UNALLOC;
5918
5919 shift = 16 - shift;
5920
5921 if (sign)
5922 for (i = 0; i < (full ? 16 : 8); i++)
5923 {
5924 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5925 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5926 }
5927 else
5928 for (i = 0; i < (full ? 16 : 8); i++)
5929 {
5930 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5931 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5932 }
5933 }
5934
5935 static void
5936 do_vec_MUL_by_element (sim_cpu *cpu)
5937 {
5938 /* instr[31] = 0
5939 instr[30] = half/full
5940 instr[29,24] = 00 1111
5941 instr[23,22] = size
5942 instr[21] = L
5943 instr[20] = M
5944 instr[19,16] = m
5945 instr[15,12] = 1000
5946 instr[11] = H
5947 instr[10] = 0
5948 instr[9,5] = Vn
5949 instr[4,0] = Vd */
5950
5951 unsigned full = INSTR (30, 30);
5952 unsigned L = INSTR (21, 21);
5953 unsigned H = INSTR (11, 11);
5954 unsigned vn = INSTR (9, 5);
5955 unsigned vd = INSTR (4, 0);
5956 unsigned size = INSTR (23, 22);
5957 unsigned index;
5958 unsigned vm;
5959 unsigned e;
5960
5961 NYI_assert (29, 24, 0x0F);
5962 NYI_assert (15, 12, 0x8);
5963 NYI_assert (10, 10, 0);
5964
5965 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5966 switch (size)
5967 {
5968 case 1:
5969 {
5970 /* 16 bit products. */
5971 uint16_t product;
5972 uint16_t element1;
5973 uint16_t element2;
5974
5975 index = (H << 2) | (L << 1) | INSTR (20, 20);
5976 vm = INSTR (19, 16);
5977 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5978
5979 for (e = 0; e < (full ? 8 : 4); e ++)
5980 {
5981 element1 = aarch64_get_vec_u16 (cpu, vn, e);
5982 product = element1 * element2;
5983 aarch64_set_vec_u16 (cpu, vd, e, product);
5984 }
5985 }
5986 break;
5987
5988 case 2:
5989 {
5990 /* 32 bit products. */
5991 uint32_t product;
5992 uint32_t element1;
5993 uint32_t element2;
5994
5995 index = (H << 1) | L;
5996 vm = INSTR (20, 16);
5997 element2 = aarch64_get_vec_u32 (cpu, vm, index);
5998
5999 for (e = 0; e < (full ? 4 : 2); e ++)
6000 {
6001 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6002 product = element1 * element2;
6003 aarch64_set_vec_u32 (cpu, vd, e, product);
6004 }
6005 }
6006 break;
6007
6008 default:
6009 HALT_UNALLOC;
6010 }
6011 }
6012
6013 static void
6014 do_FMLA_by_element (sim_cpu *cpu)
6015 {
6016 /* instr[31] = 0
6017 instr[30] = half/full
6018 instr[29,23] = 00 1111 1
6019 instr[22] = size
6020 instr[21] = L
6021 instr[20,16] = m
6022 instr[15,12] = 0001
6023 instr[11] = H
6024 instr[10] = 0
6025 instr[9,5] = Vn
6026 instr[4,0] = Vd */
6027
6028 unsigned full = INSTR (30, 30);
6029 unsigned size = INSTR (22, 22);
6030 unsigned L = INSTR (21, 21);
6031 unsigned vm = INSTR (20, 16);
6032 unsigned H = INSTR (11, 11);
6033 unsigned vn = INSTR (9, 5);
6034 unsigned vd = INSTR (4, 0);
6035 unsigned e;
6036
6037 NYI_assert (29, 23, 0x1F);
6038 NYI_assert (15, 12, 0x1);
6039 NYI_assert (10, 10, 0);
6040
6041 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6042 if (size)
6043 {
6044 double element1, element2;
6045
6046 if (! full || L)
6047 HALT_UNALLOC;
6048
6049 element2 = aarch64_get_vec_double (cpu, vm, H);
6050
6051 for (e = 0; e < 2; e++)
6052 {
6053 element1 = aarch64_get_vec_double (cpu, vn, e);
6054 element1 *= element2;
6055 element1 += aarch64_get_vec_double (cpu, vd, e);
6056 aarch64_set_vec_double (cpu, vd, e, element1);
6057 }
6058 }
6059 else
6060 {
6061 float element1;
6062 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6063
6064 for (e = 0; e < (full ? 4 : 2); e++)
6065 {
6066 element1 = aarch64_get_vec_float (cpu, vn, e);
6067 element1 *= element2;
6068 element1 += aarch64_get_vec_float (cpu, vd, e);
6069 aarch64_set_vec_float (cpu, vd, e, element1);
6070 }
6071 }
6072 }
6073
6074 static void
6075 do_vec_op2 (sim_cpu *cpu)
6076 {
6077 /* instr[31] = 0
6078 instr[30] = half/full
6079 instr[29,24] = 00 1111
6080 instr[23] = ?
6081 instr[22,16] = element size & index
6082 instr[15,10] = sub-opcode
6083 instr[9,5] = Vm
6084 instr[4,0] = Vd */
6085
6086 NYI_assert (29, 24, 0x0F);
6087
6088 if (INSTR (23, 23) != 0)
6089 {
6090 switch (INSTR (15, 10))
6091 {
6092 case 0x04:
6093 case 0x06:
6094 do_FMLA_by_element (cpu);
6095 return;
6096
6097 case 0x20:
6098 case 0x22:
6099 do_vec_MUL_by_element (cpu);
6100 return;
6101
6102 default:
6103 HALT_NYI;
6104 }
6105 }
6106 else
6107 {
6108 switch (INSTR (15, 10))
6109 {
6110 case 0x01: do_vec_SSHR_USHR (cpu); return;
6111 case 0x15: do_vec_SHL (cpu); return;
6112 case 0x20:
6113 case 0x22: do_vec_MUL_by_element (cpu); return;
6114 case 0x29: do_vec_xtl (cpu); return;
6115 default: HALT_NYI;
6116 }
6117 }
6118 }
6119
6120 static void
6121 do_vec_neg (sim_cpu *cpu)
6122 {
6123 /* instr[31] = 0
6124 instr[30] = full(1)/half(0)
6125 instr[29,24] = 10 1110
6126 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6127 instr[21,10] = 1000 0010 1110
6128 instr[9,5] = Vs
6129 instr[4,0] = Vd */
6130
6131 int full = INSTR (30, 30);
6132 unsigned vs = INSTR (9, 5);
6133 unsigned vd = INSTR (4, 0);
6134 unsigned i;
6135
6136 NYI_assert (29, 24, 0x2E);
6137 NYI_assert (21, 10, 0x82E);
6138
6139 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6140 switch (INSTR (23, 22))
6141 {
6142 case 0:
6143 for (i = 0; i < (full ? 16 : 8); i++)
6144 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6145 return;
6146
6147 case 1:
6148 for (i = 0; i < (full ? 8 : 4); i++)
6149 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6150 return;
6151
6152 case 2:
6153 for (i = 0; i < (full ? 4 : 2); i++)
6154 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6155 return;
6156
6157 case 3:
6158 if (! full)
6159 HALT_NYI;
6160 for (i = 0; i < 2; i++)
6161 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6162 return;
6163 }
6164 }
6165
6166 static void
6167 do_vec_sqrt (sim_cpu *cpu)
6168 {
6169 /* instr[31] = 0
6170 instr[30] = full(1)/half(0)
6171 instr[29,23] = 101 1101
6172 instr[22] = single(0)/double(1)
6173 instr[21,10] = 1000 0111 1110
6174 instr[9,5] = Vs
6175 instr[4,0] = Vd. */
6176
6177 int full = INSTR (30, 30);
6178 unsigned vs = INSTR (9, 5);
6179 unsigned vd = INSTR (4, 0);
6180 unsigned i;
6181
6182 NYI_assert (29, 23, 0x5B);
6183 NYI_assert (21, 10, 0x87E);
6184
6185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6186 if (INSTR (22, 22) == 0)
6187 for (i = 0; i < (full ? 4 : 2); i++)
6188 aarch64_set_vec_float (cpu, vd, i,
6189 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6190 else
6191 for (i = 0; i < 2; i++)
6192 aarch64_set_vec_double (cpu, vd, i,
6193 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6194 }
6195
6196 static void
6197 do_vec_mls_indexed (sim_cpu *cpu)
6198 {
6199 /* instr[31] = 0
6200 instr[30] = half(0)/full(1)
6201 instr[29,24] = 10 1111
6202 instr[23,22] = 16-bit(01)/32-bit(10)
6203 instr[21,20+11] = index (if 16-bit)
6204 instr[21+11] = index (if 32-bit)
6205 instr[20,16] = Vm
6206 instr[15,12] = 0100
6207 instr[11] = part of index
6208 instr[10] = 0
6209 instr[9,5] = Vs
6210 instr[4,0] = Vd. */
6211
6212 int full = INSTR (30, 30);
6213 unsigned vs = INSTR (9, 5);
6214 unsigned vd = INSTR (4, 0);
6215 unsigned vm = INSTR (20, 16);
6216 unsigned i;
6217
6218 NYI_assert (15, 12, 4);
6219 NYI_assert (10, 10, 0);
6220
6221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6222 switch (INSTR (23, 22))
6223 {
6224 case 1:
6225 {
6226 unsigned elem;
6227 uint32_t val;
6228
6229 if (vm > 15)
6230 HALT_NYI;
6231
6232 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6233 val = aarch64_get_vec_u16 (cpu, vm, elem);
6234
6235 for (i = 0; i < (full ? 8 : 4); i++)
6236 aarch64_set_vec_u32 (cpu, vd, i,
6237 aarch64_get_vec_u32 (cpu, vd, i) -
6238 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6239 return;
6240 }
6241
6242 case 2:
6243 {
6244 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6245 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6246
6247 for (i = 0; i < (full ? 4 : 2); i++)
6248 aarch64_set_vec_u64 (cpu, vd, i,
6249 aarch64_get_vec_u64 (cpu, vd, i) -
6250 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6251 return;
6252 }
6253
6254 case 0:
6255 case 3:
6256 default:
6257 HALT_NYI;
6258 }
6259 }
6260
6261 static void
6262 do_vec_SUB (sim_cpu *cpu)
6263 {
6264 /* instr [31] = 0
6265 instr [30] = half(0)/full(1)
6266 instr [29,24] = 10 1110
6267 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6268 instr [21] = 1
6269 instr [20,16] = Vm
6270 instr [15,10] = 10 0001
6271 instr [9, 5] = Vn
6272 instr [4, 0] = Vd. */
6273
6274 unsigned full = INSTR (30, 30);
6275 unsigned vm = INSTR (20, 16);
6276 unsigned vn = INSTR (9, 5);
6277 unsigned vd = INSTR (4, 0);
6278 unsigned i;
6279
6280 NYI_assert (29, 24, 0x2E);
6281 NYI_assert (21, 21, 1);
6282 NYI_assert (15, 10, 0x21);
6283
6284 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6285 switch (INSTR (23, 22))
6286 {
6287 case 0:
6288 for (i = 0; i < (full ? 16 : 8); i++)
6289 aarch64_set_vec_s8 (cpu, vd, i,
6290 aarch64_get_vec_s8 (cpu, vn, i)
6291 - aarch64_get_vec_s8 (cpu, vm, i));
6292 return;
6293
6294 case 1:
6295 for (i = 0; i < (full ? 8 : 4); i++)
6296 aarch64_set_vec_s16 (cpu, vd, i,
6297 aarch64_get_vec_s16 (cpu, vn, i)
6298 - aarch64_get_vec_s16 (cpu, vm, i));
6299 return;
6300
6301 case 2:
6302 for (i = 0; i < (full ? 4 : 2); i++)
6303 aarch64_set_vec_s32 (cpu, vd, i,
6304 aarch64_get_vec_s32 (cpu, vn, i)
6305 - aarch64_get_vec_s32 (cpu, vm, i));
6306 return;
6307
6308 case 3:
6309 if (full == 0)
6310 HALT_UNALLOC;
6311
6312 for (i = 0; i < 2; i++)
6313 aarch64_set_vec_s64 (cpu, vd, i,
6314 aarch64_get_vec_s64 (cpu, vn, i)
6315 - aarch64_get_vec_s64 (cpu, vm, i));
6316 return;
6317 }
6318 }
6319
6320 static void
6321 do_vec_MLS (sim_cpu *cpu)
6322 {
6323 /* instr [31] = 0
6324 instr [30] = half(0)/full(1)
6325 instr [29,24] = 10 1110
6326 instr [23,22] = size: byte(00, half(01), word (10)
6327 instr [21] = 1
6328 instr [20,16] = Vm
6329 instr [15,10] = 10 0101
6330 instr [9, 5] = Vn
6331 instr [4, 0] = Vd. */
6332
6333 unsigned full = INSTR (30, 30);
6334 unsigned vm = INSTR (20, 16);
6335 unsigned vn = INSTR (9, 5);
6336 unsigned vd = INSTR (4, 0);
6337 unsigned i;
6338
6339 NYI_assert (29, 24, 0x2E);
6340 NYI_assert (21, 21, 1);
6341 NYI_assert (15, 10, 0x25);
6342
6343 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6344 switch (INSTR (23, 22))
6345 {
6346 case 0:
6347 for (i = 0; i < (full ? 16 : 8); i++)
6348 aarch64_set_vec_u8 (cpu, vd, i,
6349 (aarch64_get_vec_u8 (cpu, vn, i)
6350 * aarch64_get_vec_u8 (cpu, vm, i))
6351 - aarch64_get_vec_u8 (cpu, vd, i));
6352 return;
6353
6354 case 1:
6355 for (i = 0; i < (full ? 8 : 4); i++)
6356 aarch64_set_vec_u16 (cpu, vd, i,
6357 (aarch64_get_vec_u16 (cpu, vn, i)
6358 * aarch64_get_vec_u16 (cpu, vm, i))
6359 - aarch64_get_vec_u16 (cpu, vd, i));
6360 return;
6361
6362 case 2:
6363 for (i = 0; i < (full ? 4 : 2); i++)
6364 aarch64_set_vec_u32 (cpu, vd, i,
6365 (aarch64_get_vec_u32 (cpu, vn, i)
6366 * aarch64_get_vec_u32 (cpu, vm, i))
6367 - aarch64_get_vec_u32 (cpu, vd, i));
6368 return;
6369
6370 default:
6371 HALT_UNALLOC;
6372 }
6373 }
6374
6375 static void
6376 do_vec_FDIV (sim_cpu *cpu)
6377 {
6378 /* instr [31] = 0
6379 instr [30] = half(0)/full(1)
6380 instr [29,23] = 10 1110 0
6381 instr [22] = float()/double(1)
6382 instr [21] = 1
6383 instr [20,16] = Vm
6384 instr [15,10] = 1111 11
6385 instr [9, 5] = Vn
6386 instr [4, 0] = Vd. */
6387
6388 unsigned full = INSTR (30, 30);
6389 unsigned vm = INSTR (20, 16);
6390 unsigned vn = INSTR (9, 5);
6391 unsigned vd = INSTR (4, 0);
6392 unsigned i;
6393
6394 NYI_assert (29, 23, 0x5C);
6395 NYI_assert (21, 21, 1);
6396 NYI_assert (15, 10, 0x3F);
6397
6398 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6399 if (INSTR (22, 22))
6400 {
6401 if (! full)
6402 HALT_UNALLOC;
6403
6404 for (i = 0; i < 2; i++)
6405 aarch64_set_vec_double (cpu, vd, i,
6406 aarch64_get_vec_double (cpu, vn, i)
6407 / aarch64_get_vec_double (cpu, vm, i));
6408 }
6409 else
6410 for (i = 0; i < (full ? 4 : 2); i++)
6411 aarch64_set_vec_float (cpu, vd, i,
6412 aarch64_get_vec_float (cpu, vn, i)
6413 / aarch64_get_vec_float (cpu, vm, i));
6414 }
6415
6416 static void
6417 do_vec_FMUL (sim_cpu *cpu)
6418 {
6419 /* instr [31] = 0
6420 instr [30] = half(0)/full(1)
6421 instr [29,23] = 10 1110 0
6422 instr [22] = float(0)/double(1)
6423 instr [21] = 1
6424 instr [20,16] = Vm
6425 instr [15,10] = 1101 11
6426 instr [9, 5] = Vn
6427 instr [4, 0] = Vd. */
6428
6429 unsigned full = INSTR (30, 30);
6430 unsigned vm = INSTR (20, 16);
6431 unsigned vn = INSTR (9, 5);
6432 unsigned vd = INSTR (4, 0);
6433 unsigned i;
6434
6435 NYI_assert (29, 23, 0x5C);
6436 NYI_assert (21, 21, 1);
6437 NYI_assert (15, 10, 0x37);
6438
6439 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6440 if (INSTR (22, 22))
6441 {
6442 if (! full)
6443 HALT_UNALLOC;
6444
6445 for (i = 0; i < 2; i++)
6446 aarch64_set_vec_double (cpu, vd, i,
6447 aarch64_get_vec_double (cpu, vn, i)
6448 * aarch64_get_vec_double (cpu, vm, i));
6449 }
6450 else
6451 for (i = 0; i < (full ? 4 : 2); i++)
6452 aarch64_set_vec_float (cpu, vd, i,
6453 aarch64_get_vec_float (cpu, vn, i)
6454 * aarch64_get_vec_float (cpu, vm, i));
6455 }
6456
6457 static void
6458 do_vec_FADDP (sim_cpu *cpu)
6459 {
6460 /* instr [31] = 0
6461 instr [30] = half(0)/full(1)
6462 instr [29,23] = 10 1110 0
6463 instr [22] = float(0)/double(1)
6464 instr [21] = 1
6465 instr [20,16] = Vm
6466 instr [15,10] = 1101 01
6467 instr [9, 5] = Vn
6468 instr [4, 0] = Vd. */
6469
6470 unsigned full = INSTR (30, 30);
6471 unsigned vm = INSTR (20, 16);
6472 unsigned vn = INSTR (9, 5);
6473 unsigned vd = INSTR (4, 0);
6474
6475 NYI_assert (29, 23, 0x5C);
6476 NYI_assert (21, 21, 1);
6477 NYI_assert (15, 10, 0x35);
6478
6479 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6480 if (INSTR (22, 22))
6481 {
6482 /* Extract values before adding them incase vd == vn/vm. */
6483 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6484 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6485 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6486 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6487
6488 if (! full)
6489 HALT_UNALLOC;
6490
6491 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6492 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6493 }
6494 else
6495 {
6496 /* Extract values before adding them incase vd == vn/vm. */
6497 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6498 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6499 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6500 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6501
6502 if (full)
6503 {
6504 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6505 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6506 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6507 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6508
6509 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6510 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6511 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6512 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6513 }
6514 else
6515 {
6516 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6517 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6518 }
6519 }
6520 }
6521
6522 static void
6523 do_vec_FSQRT (sim_cpu *cpu)
6524 {
6525 /* instr[31] = 0
6526 instr[30] = half(0)/full(1)
6527 instr[29,23] = 10 1110 1
6528 instr[22] = single(0)/double(1)
6529 instr[21,10] = 10 0001 1111 10
6530 instr[9,5] = Vsrc
6531 instr[4,0] = Vdest. */
6532
6533 unsigned vn = INSTR (9, 5);
6534 unsigned vd = INSTR (4, 0);
6535 unsigned full = INSTR (30, 30);
6536 int i;
6537
6538 NYI_assert (29, 23, 0x5D);
6539 NYI_assert (21, 10, 0x87E);
6540
6541 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6542 if (INSTR (22, 22))
6543 {
6544 if (! full)
6545 HALT_UNALLOC;
6546
6547 for (i = 0; i < 2; i++)
6548 aarch64_set_vec_double (cpu, vd, i,
6549 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6550 }
6551 else
6552 {
6553 for (i = 0; i < (full ? 4 : 2); i++)
6554 aarch64_set_vec_float (cpu, vd, i,
6555 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6556 }
6557 }
6558
6559 static void
6560 do_vec_FNEG (sim_cpu *cpu)
6561 {
6562 /* instr[31] = 0
6563 instr[30] = half (0)/full (1)
6564 instr[29,23] = 10 1110 1
6565 instr[22] = single (0)/double (1)
6566 instr[21,10] = 10 0000 1111 10
6567 instr[9,5] = Vsrc
6568 instr[4,0] = Vdest. */
6569
6570 unsigned vn = INSTR (9, 5);
6571 unsigned vd = INSTR (4, 0);
6572 unsigned full = INSTR (30, 30);
6573 int i;
6574
6575 NYI_assert (29, 23, 0x5D);
6576 NYI_assert (21, 10, 0x83E);
6577
6578 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6579 if (INSTR (22, 22))
6580 {
6581 if (! full)
6582 HALT_UNALLOC;
6583
6584 for (i = 0; i < 2; i++)
6585 aarch64_set_vec_double (cpu, vd, i,
6586 - aarch64_get_vec_double (cpu, vn, i));
6587 }
6588 else
6589 {
6590 for (i = 0; i < (full ? 4 : 2); i++)
6591 aarch64_set_vec_float (cpu, vd, i,
6592 - aarch64_get_vec_float (cpu, vn, i));
6593 }
6594 }
6595
6596 static void
6597 do_vec_NOT (sim_cpu *cpu)
6598 {
6599 /* instr[31] = 0
6600 instr[30] = half (0)/full (1)
6601 instr[29,10] = 10 1110 0010 0000 0101 10
6602 instr[9,5] = Vn
6603 instr[4.0] = Vd. */
6604
6605 unsigned vn = INSTR (9, 5);
6606 unsigned vd = INSTR (4, 0);
6607 unsigned i;
6608 int full = INSTR (30, 30);
6609
6610 NYI_assert (29, 10, 0xB8816);
6611
6612 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6613 for (i = 0; i < (full ? 16 : 8); i++)
6614 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6615 }
6616
6617 static unsigned int
6618 clz (uint64_t val, unsigned size)
6619 {
6620 uint64_t mask = 1;
6621 int count;
6622
6623 mask <<= (size - 1);
6624 count = 0;
6625 do
6626 {
6627 if (val & mask)
6628 break;
6629 mask >>= 1;
6630 count ++;
6631 }
6632 while (mask);
6633
6634 return count;
6635 }
6636
6637 static void
6638 do_vec_CLZ (sim_cpu *cpu)
6639 {
6640 /* instr[31] = 0
6641 instr[30] = half (0)/full (1)
6642 instr[29,24] = 10 1110
6643 instr[23,22] = size
6644 instr[21,10] = 10 0000 0100 10
6645 instr[9,5] = Vn
6646 instr[4.0] = Vd. */
6647
6648 unsigned vn = INSTR (9, 5);
6649 unsigned vd = INSTR (4, 0);
6650 unsigned i;
6651 int full = INSTR (30,30);
6652
6653 NYI_assert (29, 24, 0x2E);
6654 NYI_assert (21, 10, 0x812);
6655
6656 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6657 switch (INSTR (23, 22))
6658 {
6659 case 0:
6660 for (i = 0; i < (full ? 16 : 8); i++)
6661 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6662 break;
6663 case 1:
6664 for (i = 0; i < (full ? 8 : 4); i++)
6665 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6666 break;
6667 case 2:
6668 for (i = 0; i < (full ? 4 : 2); i++)
6669 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6670 break;
6671 case 3:
6672 if (! full)
6673 HALT_UNALLOC;
6674 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6675 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6676 break;
6677 }
6678 }
6679
6680 static void
6681 do_vec_MOV_element (sim_cpu *cpu)
6682 {
6683 /* instr[31,21] = 0110 1110 000
6684 instr[20,16] = size & dest index
6685 instr[15] = 0
6686 instr[14,11] = source index
6687 instr[10] = 1
6688 instr[9,5] = Vs
6689 instr[4.0] = Vd. */
6690
6691 unsigned vs = INSTR (9, 5);
6692 unsigned vd = INSTR (4, 0);
6693 unsigned src_index;
6694 unsigned dst_index;
6695
6696 NYI_assert (31, 21, 0x370);
6697 NYI_assert (15, 15, 0);
6698 NYI_assert (10, 10, 1);
6699
6700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6701 if (INSTR (16, 16))
6702 {
6703 /* Move a byte. */
6704 src_index = INSTR (14, 11);
6705 dst_index = INSTR (20, 17);
6706 aarch64_set_vec_u8 (cpu, vd, dst_index,
6707 aarch64_get_vec_u8 (cpu, vs, src_index));
6708 }
6709 else if (INSTR (17, 17))
6710 {
6711 /* Move 16-bits. */
6712 NYI_assert (11, 11, 0);
6713 src_index = INSTR (14, 12);
6714 dst_index = INSTR (20, 18);
6715 aarch64_set_vec_u16 (cpu, vd, dst_index,
6716 aarch64_get_vec_u16 (cpu, vs, src_index));
6717 }
6718 else if (INSTR (18, 18))
6719 {
6720 /* Move 32-bits. */
6721 NYI_assert (12, 11, 0);
6722 src_index = INSTR (14, 13);
6723 dst_index = INSTR (20, 19);
6724 aarch64_set_vec_u32 (cpu, vd, dst_index,
6725 aarch64_get_vec_u32 (cpu, vs, src_index));
6726 }
6727 else
6728 {
6729 NYI_assert (19, 19, 1);
6730 NYI_assert (13, 11, 0);
6731 src_index = INSTR (14, 14);
6732 dst_index = INSTR (20, 20);
6733 aarch64_set_vec_u64 (cpu, vd, dst_index,
6734 aarch64_get_vec_u64 (cpu, vs, src_index));
6735 }
6736 }
6737
6738 static void
6739 do_vec_REV32 (sim_cpu *cpu)
6740 {
6741 /* instr[31] = 0
6742 instr[30] = full/half
6743 instr[29,24] = 10 1110
6744 instr[23,22] = size
6745 instr[21,10] = 10 0000 0000 10
6746 instr[9,5] = Rn
6747 instr[4,0] = Rd. */
6748
6749 unsigned rn = INSTR (9, 5);
6750 unsigned rd = INSTR (4, 0);
6751 unsigned size = INSTR (23, 22);
6752 unsigned full = INSTR (30, 30);
6753 unsigned i;
6754 FRegister val;
6755
6756 NYI_assert (29, 24, 0x2E);
6757 NYI_assert (21, 10, 0x802);
6758
6759 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6760 switch (size)
6761 {
6762 case 0:
6763 for (i = 0; i < (full ? 16 : 8); i++)
6764 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6765 break;
6766
6767 case 1:
6768 for (i = 0; i < (full ? 8 : 4); i++)
6769 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6770 break;
6771
6772 default:
6773 HALT_UNALLOC;
6774 }
6775
6776 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6777 if (full)
6778 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6779 }
6780
6781 static void
6782 do_vec_EXT (sim_cpu *cpu)
6783 {
6784 /* instr[31] = 0
6785 instr[30] = full/half
6786 instr[29,21] = 10 1110 000
6787 instr[20,16] = Vm
6788 instr[15] = 0
6789 instr[14,11] = source index
6790 instr[10] = 0
6791 instr[9,5] = Vn
6792 instr[4.0] = Vd. */
6793
6794 unsigned vm = INSTR (20, 16);
6795 unsigned vn = INSTR (9, 5);
6796 unsigned vd = INSTR (4, 0);
6797 unsigned src_index = INSTR (14, 11);
6798 unsigned full = INSTR (30, 30);
6799 unsigned i;
6800 unsigned j;
6801 FRegister val;
6802
6803 NYI_assert (31, 21, 0x370);
6804 NYI_assert (15, 15, 0);
6805 NYI_assert (10, 10, 0);
6806
6807 if (!full && (src_index & 0x8))
6808 HALT_UNALLOC;
6809
6810 j = 0;
6811
6812 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6813 for (i = src_index; i < (full ? 16 : 8); i++)
6814 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6815 for (i = 0; i < src_index; i++)
6816 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6817
6818 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6819 if (full)
6820 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6821 }
6822
6823 static void
6824 dexAdvSIMD0 (sim_cpu *cpu)
6825 {
6826 /* instr [28,25] = 0 111. */
6827 if ( INSTR (15, 10) == 0x07
6828 && (INSTR (9, 5) ==
6829 INSTR (20, 16)))
6830 {
6831 if (INSTR (31, 21) == 0x075
6832 || INSTR (31, 21) == 0x275)
6833 {
6834 do_vec_MOV_whole_vector (cpu);
6835 return;
6836 }
6837 }
6838
6839 if (INSTR (29, 19) == 0x1E0)
6840 {
6841 do_vec_MOV_immediate (cpu);
6842 return;
6843 }
6844
6845 if (INSTR (29, 19) == 0x5E0)
6846 {
6847 do_vec_MVNI (cpu);
6848 return;
6849 }
6850
6851 if (INSTR (29, 19) == 0x1C0
6852 || INSTR (29, 19) == 0x1C1)
6853 {
6854 if (INSTR (15, 10) == 0x03)
6855 {
6856 do_vec_DUP_scalar_into_vector (cpu);
6857 return;
6858 }
6859 }
6860
6861 switch (INSTR (29, 24))
6862 {
6863 case 0x0E: do_vec_op1 (cpu); return;
6864 case 0x0F: do_vec_op2 (cpu); return;
6865
6866 case 0x2E:
6867 if (INSTR (21, 21) == 1)
6868 {
6869 switch (INSTR (15, 10))
6870 {
6871 case 0x02:
6872 do_vec_REV32 (cpu);
6873 return;
6874
6875 case 0x07:
6876 switch (INSTR (23, 22))
6877 {
6878 case 0: do_vec_EOR (cpu); return;
6879 case 1: do_vec_BSL (cpu); return;
6880 case 2:
6881 case 3: do_vec_bit (cpu); return;
6882 }
6883 break;
6884
6885 case 0x08: do_vec_sub_long (cpu); return;
6886 case 0x11: do_vec_USHL (cpu); return;
6887 case 0x12: do_vec_CLZ (cpu); return;
6888 case 0x16: do_vec_NOT (cpu); return;
6889 case 0x19: do_vec_max (cpu); return;
6890 case 0x1B: do_vec_min (cpu); return;
6891 case 0x21: do_vec_SUB (cpu); return;
6892 case 0x25: do_vec_MLS (cpu); return;
6893 case 0x31: do_vec_FminmaxNMP (cpu); return;
6894 case 0x35: do_vec_FADDP (cpu); return;
6895 case 0x37: do_vec_FMUL (cpu); return;
6896 case 0x3F: do_vec_FDIV (cpu); return;
6897
6898 case 0x3E:
6899 switch (INSTR (20, 16))
6900 {
6901 case 0x00: do_vec_FNEG (cpu); return;
6902 case 0x01: do_vec_FSQRT (cpu); return;
6903 default: HALT_NYI;
6904 }
6905
6906 case 0x0D:
6907 case 0x0F:
6908 case 0x22:
6909 case 0x23:
6910 case 0x26:
6911 case 0x2A:
6912 case 0x32:
6913 case 0x36:
6914 case 0x39:
6915 case 0x3A:
6916 do_vec_compare (cpu); return;
6917
6918 default:
6919 break;
6920 }
6921 }
6922
6923 if (INSTR (31, 21) == 0x370)
6924 {
6925 if (INSTR (10, 10))
6926 do_vec_MOV_element (cpu);
6927 else
6928 do_vec_EXT (cpu);
6929 return;
6930 }
6931
6932 switch (INSTR (21, 10))
6933 {
6934 case 0x82E: do_vec_neg (cpu); return;
6935 case 0x87E: do_vec_sqrt (cpu); return;
6936 default:
6937 if (INSTR (15, 10) == 0x30)
6938 {
6939 do_vec_mull (cpu);
6940 return;
6941 }
6942 break;
6943 }
6944 break;
6945
6946 case 0x2f:
6947 switch (INSTR (15, 10))
6948 {
6949 case 0x01: do_vec_SSHR_USHR (cpu); return;
6950 case 0x10:
6951 case 0x12: do_vec_mls_indexed (cpu); return;
6952 case 0x29: do_vec_xtl (cpu); return;
6953 default:
6954 HALT_NYI;
6955 }
6956
6957 default:
6958 break;
6959 }
6960
6961 HALT_NYI;
6962 }
6963
6964 /* 3 sources. */
6965
6966 /* Float multiply add. */
6967 static void
6968 fmadds (sim_cpu *cpu)
6969 {
6970 unsigned sa = INSTR (14, 10);
6971 unsigned sm = INSTR (20, 16);
6972 unsigned sn = INSTR ( 9, 5);
6973 unsigned sd = INSTR ( 4, 0);
6974
6975 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6976 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6977 + aarch64_get_FP_float (cpu, sn)
6978 * aarch64_get_FP_float (cpu, sm));
6979 }
6980
6981 /* Double multiply add. */
6982 static void
6983 fmaddd (sim_cpu *cpu)
6984 {
6985 unsigned sa = INSTR (14, 10);
6986 unsigned sm = INSTR (20, 16);
6987 unsigned sn = INSTR ( 9, 5);
6988 unsigned sd = INSTR ( 4, 0);
6989
6990 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6991 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6992 + aarch64_get_FP_double (cpu, sn)
6993 * aarch64_get_FP_double (cpu, sm));
6994 }
6995
6996 /* Float multiply subtract. */
6997 static void
6998 fmsubs (sim_cpu *cpu)
6999 {
7000 unsigned sa = INSTR (14, 10);
7001 unsigned sm = INSTR (20, 16);
7002 unsigned sn = INSTR ( 9, 5);
7003 unsigned sd = INSTR ( 4, 0);
7004
7005 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7006 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7007 - aarch64_get_FP_float (cpu, sn)
7008 * aarch64_get_FP_float (cpu, sm));
7009 }
7010
7011 /* Double multiply subtract. */
7012 static void
7013 fmsubd (sim_cpu *cpu)
7014 {
7015 unsigned sa = INSTR (14, 10);
7016 unsigned sm = INSTR (20, 16);
7017 unsigned sn = INSTR ( 9, 5);
7018 unsigned sd = INSTR ( 4, 0);
7019
7020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7021 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7022 - aarch64_get_FP_double (cpu, sn)
7023 * aarch64_get_FP_double (cpu, sm));
7024 }
7025
7026 /* Float negative multiply add. */
7027 static void
7028 fnmadds (sim_cpu *cpu)
7029 {
7030 unsigned sa = INSTR (14, 10);
7031 unsigned sm = INSTR (20, 16);
7032 unsigned sn = INSTR ( 9, 5);
7033 unsigned sd = INSTR ( 4, 0);
7034
7035 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7036 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7037 + (- aarch64_get_FP_float (cpu, sn))
7038 * aarch64_get_FP_float (cpu, sm));
7039 }
7040
7041 /* Double negative multiply add. */
7042 static void
7043 fnmaddd (sim_cpu *cpu)
7044 {
7045 unsigned sa = INSTR (14, 10);
7046 unsigned sm = INSTR (20, 16);
7047 unsigned sn = INSTR ( 9, 5);
7048 unsigned sd = INSTR ( 4, 0);
7049
7050 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7051 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7052 + (- aarch64_get_FP_double (cpu, sn))
7053 * aarch64_get_FP_double (cpu, sm));
7054 }
7055
7056 /* Float negative multiply subtract. */
7057 static void
7058 fnmsubs (sim_cpu *cpu)
7059 {
7060 unsigned sa = INSTR (14, 10);
7061 unsigned sm = INSTR (20, 16);
7062 unsigned sn = INSTR ( 9, 5);
7063 unsigned sd = INSTR ( 4, 0);
7064
7065 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7066 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7067 + aarch64_get_FP_float (cpu, sn)
7068 * aarch64_get_FP_float (cpu, sm));
7069 }
7070
7071 /* Double negative multiply subtract. */
7072 static void
7073 fnmsubd (sim_cpu *cpu)
7074 {
7075 unsigned sa = INSTR (14, 10);
7076 unsigned sm = INSTR (20, 16);
7077 unsigned sn = INSTR ( 9, 5);
7078 unsigned sd = INSTR ( 4, 0);
7079
7080 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7081 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7082 + aarch64_get_FP_double (cpu, sn)
7083 * aarch64_get_FP_double (cpu, sm));
7084 }
7085
7086 static void
7087 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7088 {
7089 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7090 instr[30] = 0
7091 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7092 instr[28,25] = 1111
7093 instr[24] = 1
7094 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7095 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7096 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7097
7098 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7099 /* dispatch on combined type:o1:o2. */
7100 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7101
7102 if (M_S != 0)
7103 HALT_UNALLOC;
7104
7105 switch (dispatch)
7106 {
7107 case 0: fmadds (cpu); return;
7108 case 1: fmsubs (cpu); return;
7109 case 2: fnmadds (cpu); return;
7110 case 3: fnmsubs (cpu); return;
7111 case 4: fmaddd (cpu); return;
7112 case 5: fmsubd (cpu); return;
7113 case 6: fnmaddd (cpu); return;
7114 case 7: fnmsubd (cpu); return;
7115 default:
7116 /* type > 1 is currently unallocated. */
7117 HALT_UNALLOC;
7118 }
7119 }
7120
7121 static void
7122 dexSimpleFPFixedConvert (sim_cpu *cpu)
7123 {
7124 HALT_NYI;
7125 }
7126
7127 static void
7128 dexSimpleFPCondCompare (sim_cpu *cpu)
7129 {
7130 /* instr [31,23] = 0001 1110 0
7131 instr [22] = type
7132 instr [21] = 1
7133 instr [20,16] = Rm
7134 instr [15,12] = condition
7135 instr [11,10] = 01
7136 instr [9,5] = Rn
7137 instr [4] = 0
7138 instr [3,0] = nzcv */
7139
7140 unsigned rm = INSTR (20, 16);
7141 unsigned rn = INSTR (9, 5);
7142
7143 NYI_assert (31, 23, 0x3C);
7144 NYI_assert (11, 10, 0x1);
7145 NYI_assert (4, 4, 0);
7146
7147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7148 if (! testConditionCode (cpu, INSTR (15, 12)))
7149 {
7150 aarch64_set_CPSR (cpu, INSTR (3, 0));
7151 return;
7152 }
7153
7154 if (INSTR (22, 22))
7155 {
7156 /* Double precision. */
7157 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7158 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7159
7160 /* FIXME: Check for NaNs. */
7161 if (val1 == val2)
7162 aarch64_set_CPSR (cpu, (Z | C));
7163 else if (val1 < val2)
7164 aarch64_set_CPSR (cpu, N);
7165 else /* val1 > val2 */
7166 aarch64_set_CPSR (cpu, C);
7167 }
7168 else
7169 {
7170 /* Single precision. */
7171 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7172 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7173
7174 /* FIXME: Check for NaNs. */
7175 if (val1 == val2)
7176 aarch64_set_CPSR (cpu, (Z | C));
7177 else if (val1 < val2)
7178 aarch64_set_CPSR (cpu, N);
7179 else /* val1 > val2 */
7180 aarch64_set_CPSR (cpu, C);
7181 }
7182 }
7183
7184 /* 2 sources. */
7185
7186 /* Float add. */
7187 static void
7188 fadds (sim_cpu *cpu)
7189 {
7190 unsigned sm = INSTR (20, 16);
7191 unsigned sn = INSTR ( 9, 5);
7192 unsigned sd = INSTR ( 4, 0);
7193
7194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7195 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7196 + aarch64_get_FP_float (cpu, sm));
7197 }
7198
7199 /* Double add. */
7200 static void
7201 faddd (sim_cpu *cpu)
7202 {
7203 unsigned sm = INSTR (20, 16);
7204 unsigned sn = INSTR ( 9, 5);
7205 unsigned sd = INSTR ( 4, 0);
7206
7207 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7208 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7209 + aarch64_get_FP_double (cpu, sm));
7210 }
7211
7212 /* Float divide. */
7213 static void
7214 fdivs (sim_cpu *cpu)
7215 {
7216 unsigned sm = INSTR (20, 16);
7217 unsigned sn = INSTR ( 9, 5);
7218 unsigned sd = INSTR ( 4, 0);
7219
7220 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7221 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7222 / aarch64_get_FP_float (cpu, sm));
7223 }
7224
7225 /* Double divide. */
7226 static void
7227 fdivd (sim_cpu *cpu)
7228 {
7229 unsigned sm = INSTR (20, 16);
7230 unsigned sn = INSTR ( 9, 5);
7231 unsigned sd = INSTR ( 4, 0);
7232
7233 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7234 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7235 / aarch64_get_FP_double (cpu, sm));
7236 }
7237
7238 /* Float multiply. */
7239 static void
7240 fmuls (sim_cpu *cpu)
7241 {
7242 unsigned sm = INSTR (20, 16);
7243 unsigned sn = INSTR ( 9, 5);
7244 unsigned sd = INSTR ( 4, 0);
7245
7246 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7247 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7248 * aarch64_get_FP_float (cpu, sm));
7249 }
7250
7251 /* Double multiply. */
7252 static void
7253 fmuld (sim_cpu *cpu)
7254 {
7255 unsigned sm = INSTR (20, 16);
7256 unsigned sn = INSTR ( 9, 5);
7257 unsigned sd = INSTR ( 4, 0);
7258
7259 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7260 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7261 * aarch64_get_FP_double (cpu, sm));
7262 }
7263
7264 /* Float negate and multiply. */
7265 static void
7266 fnmuls (sim_cpu *cpu)
7267 {
7268 unsigned sm = INSTR (20, 16);
7269 unsigned sn = INSTR ( 9, 5);
7270 unsigned sd = INSTR ( 4, 0);
7271
7272 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7273 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7274 * aarch64_get_FP_float (cpu, sm)));
7275 }
7276
7277 /* Double negate and multiply. */
7278 static void
7279 fnmuld (sim_cpu *cpu)
7280 {
7281 unsigned sm = INSTR (20, 16);
7282 unsigned sn = INSTR ( 9, 5);
7283 unsigned sd = INSTR ( 4, 0);
7284
7285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7286 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7287 * aarch64_get_FP_double (cpu, sm)));
7288 }
7289
7290 /* Float subtract. */
7291 static void
7292 fsubs (sim_cpu *cpu)
7293 {
7294 unsigned sm = INSTR (20, 16);
7295 unsigned sn = INSTR ( 9, 5);
7296 unsigned sd = INSTR ( 4, 0);
7297
7298 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7299 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7300 - aarch64_get_FP_float (cpu, sm));
7301 }
7302
7303 /* Double subtract. */
7304 static void
7305 fsubd (sim_cpu *cpu)
7306 {
7307 unsigned sm = INSTR (20, 16);
7308 unsigned sn = INSTR ( 9, 5);
7309 unsigned sd = INSTR ( 4, 0);
7310
7311 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7312 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7313 - aarch64_get_FP_double (cpu, sm));
7314 }
7315
7316 static void
7317 do_FMINNM (sim_cpu *cpu)
7318 {
7319 /* instr[31,23] = 0 0011 1100
7320 instr[22] = float(0)/double(1)
7321 instr[21] = 1
7322 instr[20,16] = Sm
7323 instr[15,10] = 01 1110
7324 instr[9,5] = Sn
7325 instr[4,0] = Cpu */
7326
7327 unsigned sm = INSTR (20, 16);
7328 unsigned sn = INSTR ( 9, 5);
7329 unsigned sd = INSTR ( 4, 0);
7330
7331 NYI_assert (31, 23, 0x03C);
7332 NYI_assert (15, 10, 0x1E);
7333
7334 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7335 if (INSTR (22, 22))
7336 aarch64_set_FP_double (cpu, sd,
7337 dminnm (aarch64_get_FP_double (cpu, sn),
7338 aarch64_get_FP_double (cpu, sm)));
7339 else
7340 aarch64_set_FP_float (cpu, sd,
7341 fminnm (aarch64_get_FP_float (cpu, sn),
7342 aarch64_get_FP_float (cpu, sm)));
7343 }
7344
7345 static void
7346 do_FMAXNM (sim_cpu *cpu)
7347 {
7348 /* instr[31,23] = 0 0011 1100
7349 instr[22] = float(0)/double(1)
7350 instr[21] = 1
7351 instr[20,16] = Sm
7352 instr[15,10] = 01 1010
7353 instr[9,5] = Sn
7354 instr[4,0] = Cpu */
7355
7356 unsigned sm = INSTR (20, 16);
7357 unsigned sn = INSTR ( 9, 5);
7358 unsigned sd = INSTR ( 4, 0);
7359
7360 NYI_assert (31, 23, 0x03C);
7361 NYI_assert (15, 10, 0x1A);
7362
7363 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7364 if (INSTR (22, 22))
7365 aarch64_set_FP_double (cpu, sd,
7366 dmaxnm (aarch64_get_FP_double (cpu, sn),
7367 aarch64_get_FP_double (cpu, sm)));
7368 else
7369 aarch64_set_FP_float (cpu, sd,
7370 fmaxnm (aarch64_get_FP_float (cpu, sn),
7371 aarch64_get_FP_float (cpu, sm)));
7372 }
7373
7374 static void
7375 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7376 {
7377 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7378 instr[30] = 0
7379 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7380 instr[28,25] = 1111
7381 instr[24] = 0
7382 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7383 instr[21] = 1
7384 instr[20,16] = Vm
7385 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7386 0010 ==> FADD, 0011 ==> FSUB,
7387 0100 ==> FMAX, 0101 ==> FMIN
7388 0110 ==> FMAXNM, 0111 ==> FMINNM
7389 1000 ==> FNMUL, ow ==> UNALLOC
7390 instr[11,10] = 10
7391 instr[9,5] = Vn
7392 instr[4,0] = Vd */
7393
7394 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7395 uint32_t type = INSTR (23, 22);
7396 /* Dispatch on opcode. */
7397 uint32_t dispatch = INSTR (15, 12);
7398
7399 if (type > 1)
7400 HALT_UNALLOC;
7401
7402 if (M_S != 0)
7403 HALT_UNALLOC;
7404
7405 if (type)
7406 switch (dispatch)
7407 {
7408 case 0: fmuld (cpu); return;
7409 case 1: fdivd (cpu); return;
7410 case 2: faddd (cpu); return;
7411 case 3: fsubd (cpu); return;
7412 case 6: do_FMAXNM (cpu); return;
7413 case 7: do_FMINNM (cpu); return;
7414 case 8: fnmuld (cpu); return;
7415
7416 /* Have not yet implemented fmax and fmin. */
7417 case 4:
7418 case 5:
7419 HALT_NYI;
7420
7421 default:
7422 HALT_UNALLOC;
7423 }
7424 else /* type == 0 => floats. */
7425 switch (dispatch)
7426 {
7427 case 0: fmuls (cpu); return;
7428 case 1: fdivs (cpu); return;
7429 case 2: fadds (cpu); return;
7430 case 3: fsubs (cpu); return;
7431 case 6: do_FMAXNM (cpu); return;
7432 case 7: do_FMINNM (cpu); return;
7433 case 8: fnmuls (cpu); return;
7434
7435 case 4:
7436 case 5:
7437 HALT_NYI;
7438
7439 default:
7440 HALT_UNALLOC;
7441 }
7442 }
7443
7444 static void
7445 dexSimpleFPCondSelect (sim_cpu *cpu)
7446 {
7447 /* FCSEL
7448 instr[31,23] = 0 0011 1100
7449 instr[22] = 0=>single 1=>double
7450 instr[21] = 1
7451 instr[20,16] = Sm
7452 instr[15,12] = cond
7453 instr[11,10] = 11
7454 instr[9,5] = Sn
7455 instr[4,0] = Cpu */
7456 unsigned sm = INSTR (20, 16);
7457 unsigned sn = INSTR ( 9, 5);
7458 unsigned sd = INSTR ( 4, 0);
7459 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7460
7461 NYI_assert (31, 23, 0x03C);
7462 NYI_assert (11, 10, 0x3);
7463
7464 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7465 if (INSTR (22, 22))
7466 aarch64_set_FP_double (cpu, sd, set ? sn : sm);
7467 else
7468 aarch64_set_FP_float (cpu, sd, set ? sn : sm);
7469 }
7470
7471 /* Store 32 bit unscaled signed 9 bit. */
7472 static void
7473 fsturs (sim_cpu *cpu, int32_t offset)
7474 {
7475 unsigned int rn = INSTR (9, 5);
7476 unsigned int st = INSTR (4, 0);
7477
7478 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7479 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7480 aarch64_get_vec_u32 (cpu, st, 0));
7481 }
7482
7483 /* Store 64 bit unscaled signed 9 bit. */
7484 static void
7485 fsturd (sim_cpu *cpu, int32_t offset)
7486 {
7487 unsigned int rn = INSTR (9, 5);
7488 unsigned int st = INSTR (4, 0);
7489
7490 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7491 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7492 aarch64_get_vec_u64 (cpu, st, 0));
7493 }
7494
7495 /* Store 128 bit unscaled signed 9 bit. */
7496 static void
7497 fsturq (sim_cpu *cpu, int32_t offset)
7498 {
7499 unsigned int rn = INSTR (9, 5);
7500 unsigned int st = INSTR (4, 0);
7501 FRegister a;
7502
7503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7504 aarch64_get_FP_long_double (cpu, st, & a);
7505 aarch64_set_mem_long_double (cpu,
7506 aarch64_get_reg_u64 (cpu, rn, 1)
7507 + offset, a);
7508 }
7509
7510 /* TODO FP move register. */
7511
7512 /* 32 bit fp to fp move register. */
7513 static void
7514 ffmovs (sim_cpu *cpu)
7515 {
7516 unsigned int rn = INSTR (9, 5);
7517 unsigned int st = INSTR (4, 0);
7518
7519 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7520 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7521 }
7522
7523 /* 64 bit fp to fp move register. */
7524 static void
7525 ffmovd (sim_cpu *cpu)
7526 {
7527 unsigned int rn = INSTR (9, 5);
7528 unsigned int st = INSTR (4, 0);
7529
7530 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7531 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7532 }
7533
7534 /* 32 bit GReg to Vec move register. */
7535 static void
7536 fgmovs (sim_cpu *cpu)
7537 {
7538 unsigned int rn = INSTR (9, 5);
7539 unsigned int st = INSTR (4, 0);
7540
7541 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7542 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7543 }
7544
7545 /* 64 bit g to fp move register. */
7546 static void
7547 fgmovd (sim_cpu *cpu)
7548 {
7549 unsigned int rn = INSTR (9, 5);
7550 unsigned int st = INSTR (4, 0);
7551
7552 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7553 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7554 }
7555
7556 /* 32 bit fp to g move register. */
7557 static void
7558 gfmovs (sim_cpu *cpu)
7559 {
7560 unsigned int rn = INSTR (9, 5);
7561 unsigned int st = INSTR (4, 0);
7562
7563 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7564 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7565 }
7566
7567 /* 64 bit fp to g move register. */
7568 static void
7569 gfmovd (sim_cpu *cpu)
7570 {
7571 unsigned int rn = INSTR (9, 5);
7572 unsigned int st = INSTR (4, 0);
7573
7574 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7575 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7576 }
7577
7578 /* FP move immediate
7579
7580 These install an immediate 8 bit value in the target register
7581 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7582 bit exponent. */
7583
7584 static void
7585 fmovs (sim_cpu *cpu)
7586 {
7587 unsigned int sd = INSTR (4, 0);
7588 uint32_t imm = INSTR (20, 13);
7589 float f = fp_immediate_for_encoding_32 (imm);
7590
7591 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7592 aarch64_set_FP_float (cpu, sd, f);
7593 }
7594
7595 static void
7596 fmovd (sim_cpu *cpu)
7597 {
7598 unsigned int sd = INSTR (4, 0);
7599 uint32_t imm = INSTR (20, 13);
7600 double d = fp_immediate_for_encoding_64 (imm);
7601
7602 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7603 aarch64_set_FP_double (cpu, sd, d);
7604 }
7605
7606 static void
7607 dexSimpleFPImmediate (sim_cpu *cpu)
7608 {
7609 /* instr[31,23] == 00111100
7610 instr[22] == type : single(0)/double(1)
7611 instr[21] == 1
7612 instr[20,13] == imm8
7613 instr[12,10] == 100
7614 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7615 instr[4,0] == Rd */
7616 uint32_t imm5 = INSTR (9, 5);
7617
7618 NYI_assert (31, 23, 0x3C);
7619
7620 if (imm5 != 0)
7621 HALT_UNALLOC;
7622
7623 if (INSTR (22, 22))
7624 fmovd (cpu);
7625 else
7626 fmovs (cpu);
7627 }
7628
7629 /* TODO specific decode and execute for group Load Store. */
7630
7631 /* TODO FP load/store single register (unscaled offset). */
7632
7633 /* TODO load 8 bit unscaled signed 9 bit. */
7634 /* TODO load 16 bit unscaled signed 9 bit. */
7635
7636 /* Load 32 bit unscaled signed 9 bit. */
7637 static void
7638 fldurs (sim_cpu *cpu, int32_t offset)
7639 {
7640 unsigned int rn = INSTR (9, 5);
7641 unsigned int st = INSTR (4, 0);
7642
7643 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7644 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7645 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7646 }
7647
7648 /* Load 64 bit unscaled signed 9 bit. */
7649 static void
7650 fldurd (sim_cpu *cpu, int32_t offset)
7651 {
7652 unsigned int rn = INSTR (9, 5);
7653 unsigned int st = INSTR (4, 0);
7654
7655 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7656 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7657 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7658 }
7659
7660 /* Load 128 bit unscaled signed 9 bit. */
7661 static void
7662 fldurq (sim_cpu *cpu, int32_t offset)
7663 {
7664 unsigned int rn = INSTR (9, 5);
7665 unsigned int st = INSTR (4, 0);
7666 FRegister a;
7667 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7668
7669 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7670 aarch64_get_mem_long_double (cpu, addr, & a);
7671 aarch64_set_FP_long_double (cpu, st, a);
7672 }
7673
7674 /* TODO store 8 bit unscaled signed 9 bit. */
7675 /* TODO store 16 bit unscaled signed 9 bit. */
7676
7677
7678 /* 1 source. */
7679
7680 /* Float absolute value. */
7681 static void
7682 fabss (sim_cpu *cpu)
7683 {
7684 unsigned sn = INSTR (9, 5);
7685 unsigned sd = INSTR (4, 0);
7686 float value = aarch64_get_FP_float (cpu, sn);
7687
7688 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7689 aarch64_set_FP_float (cpu, sd, fabsf (value));
7690 }
7691
7692 /* Double absolute value. */
7693 static void
7694 fabcpu (sim_cpu *cpu)
7695 {
7696 unsigned sn = INSTR (9, 5);
7697 unsigned sd = INSTR (4, 0);
7698 double value = aarch64_get_FP_double (cpu, sn);
7699
7700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7701 aarch64_set_FP_double (cpu, sd, fabs (value));
7702 }
7703
7704 /* Float negative value. */
7705 static void
7706 fnegs (sim_cpu *cpu)
7707 {
7708 unsigned sn = INSTR (9, 5);
7709 unsigned sd = INSTR (4, 0);
7710
7711 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7712 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7713 }
7714
7715 /* Double negative value. */
7716 static void
7717 fnegd (sim_cpu *cpu)
7718 {
7719 unsigned sn = INSTR (9, 5);
7720 unsigned sd = INSTR (4, 0);
7721
7722 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7723 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7724 }
7725
7726 /* Float square root. */
7727 static void
7728 fsqrts (sim_cpu *cpu)
7729 {
7730 unsigned sn = INSTR (9, 5);
7731 unsigned sd = INSTR (4, 0);
7732
7733 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7734 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7735 }
7736
7737 /* Double square root. */
7738 static void
7739 fsqrtd (sim_cpu *cpu)
7740 {
7741 unsigned sn = INSTR (9, 5);
7742 unsigned sd = INSTR (4, 0);
7743
7744 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7745 aarch64_set_FP_double (cpu, sd,
7746 sqrt (aarch64_get_FP_double (cpu, sn)));
7747 }
7748
7749 /* Convert double to float. */
7750 static void
7751 fcvtds (sim_cpu *cpu)
7752 {
7753 unsigned sn = INSTR (9, 5);
7754 unsigned sd = INSTR (4, 0);
7755
7756 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7757 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7758 }
7759
7760 /* Convert float to double. */
7761 static void
7762 fcvtcpu (sim_cpu *cpu)
7763 {
7764 unsigned sn = INSTR (9, 5);
7765 unsigned sd = INSTR (4, 0);
7766
7767 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7768 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7769 }
7770
7771 static void
7772 do_FRINT (sim_cpu *cpu)
7773 {
7774 /* instr[31,23] = 0001 1110 0
7775 instr[22] = single(0)/double(1)
7776 instr[21,18] = 1001
7777 instr[17,15] = rounding mode
7778 instr[14,10] = 10000
7779 instr[9,5] = source
7780 instr[4,0] = dest */
7781
7782 float val;
7783 unsigned rs = INSTR (9, 5);
7784 unsigned rd = INSTR (4, 0);
7785 unsigned int rmode = INSTR (17, 15);
7786
7787 NYI_assert (31, 23, 0x03C);
7788 NYI_assert (21, 18, 0x9);
7789 NYI_assert (14, 10, 0x10);
7790
7791 if (rmode == 6 || rmode == 7)
7792 /* FIXME: Add support for rmode == 6 exactness check. */
7793 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7794
7795 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7796 if (INSTR (22, 22))
7797 {
7798 double val = aarch64_get_FP_double (cpu, rs);
7799
7800 switch (rmode)
7801 {
7802 case 0: /* mode N: nearest or even. */
7803 {
7804 double rval = round (val);
7805
7806 if (val - rval == 0.5)
7807 {
7808 if (((rval / 2.0) * 2.0) != rval)
7809 rval += 1.0;
7810 }
7811
7812 aarch64_set_FP_double (cpu, rd, round (val));
7813 return;
7814 }
7815
7816 case 1: /* mode P: towards +inf. */
7817 if (val < 0.0)
7818 aarch64_set_FP_double (cpu, rd, trunc (val));
7819 else
7820 aarch64_set_FP_double (cpu, rd, round (val));
7821 return;
7822
7823 case 2: /* mode M: towards -inf. */
7824 if (val < 0.0)
7825 aarch64_set_FP_double (cpu, rd, round (val));
7826 else
7827 aarch64_set_FP_double (cpu, rd, trunc (val));
7828 return;
7829
7830 case 3: /* mode Z: towards 0. */
7831 aarch64_set_FP_double (cpu, rd, trunc (val));
7832 return;
7833
7834 case 4: /* mode A: away from 0. */
7835 aarch64_set_FP_double (cpu, rd, round (val));
7836 return;
7837
7838 case 6: /* mode X: use FPCR with exactness check. */
7839 case 7: /* mode I: use FPCR mode. */
7840 HALT_NYI;
7841
7842 default:
7843 HALT_UNALLOC;
7844 }
7845 }
7846
7847 val = aarch64_get_FP_float (cpu, rs);
7848
7849 switch (rmode)
7850 {
7851 case 0: /* mode N: nearest or even. */
7852 {
7853 float rval = roundf (val);
7854
7855 if (val - rval == 0.5)
7856 {
7857 if (((rval / 2.0) * 2.0) != rval)
7858 rval += 1.0;
7859 }
7860
7861 aarch64_set_FP_float (cpu, rd, rval);
7862 return;
7863 }
7864
7865 case 1: /* mode P: towards +inf. */
7866 if (val < 0.0)
7867 aarch64_set_FP_float (cpu, rd, truncf (val));
7868 else
7869 aarch64_set_FP_float (cpu, rd, roundf (val));
7870 return;
7871
7872 case 2: /* mode M: towards -inf. */
7873 if (val < 0.0)
7874 aarch64_set_FP_float (cpu, rd, truncf (val));
7875 else
7876 aarch64_set_FP_float (cpu, rd, roundf (val));
7877 return;
7878
7879 case 3: /* mode Z: towards 0. */
7880 aarch64_set_FP_float (cpu, rd, truncf (val));
7881 return;
7882
7883 case 4: /* mode A: away from 0. */
7884 aarch64_set_FP_float (cpu, rd, roundf (val));
7885 return;
7886
7887 case 6: /* mode X: use FPCR with exactness check. */
7888 case 7: /* mode I: use FPCR mode. */
7889 HALT_NYI;
7890
7891 default:
7892 HALT_UNALLOC;
7893 }
7894 }
7895
7896 /* Convert half to float. */
7897 static void
7898 do_FCVT_half_to_single (sim_cpu *cpu)
7899 {
7900 unsigned rn = INSTR (9, 5);
7901 unsigned rd = INSTR (4, 0);
7902
7903 NYI_assert (31, 10, 0x7B890);
7904
7905 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7906 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7907 }
7908
7909 /* Convert half to double. */
7910 static void
7911 do_FCVT_half_to_double (sim_cpu *cpu)
7912 {
7913 unsigned rn = INSTR (9, 5);
7914 unsigned rd = INSTR (4, 0);
7915
7916 NYI_assert (31, 10, 0x7B8B0);
7917
7918 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7919 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7920 }
7921
7922 static void
7923 do_FCVT_single_to_half (sim_cpu *cpu)
7924 {
7925 unsigned rn = INSTR (9, 5);
7926 unsigned rd = INSTR (4, 0);
7927
7928 NYI_assert (31, 10, 0x788F0);
7929
7930 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7931 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7932 }
7933
7934 /* Convert double to half. */
7935 static void
7936 do_FCVT_double_to_half (sim_cpu *cpu)
7937 {
7938 unsigned rn = INSTR (9, 5);
7939 unsigned rd = INSTR (4, 0);
7940
7941 NYI_assert (31, 10, 0x798F0);
7942
7943 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7944 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7945 }
7946
7947 static void
7948 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7949 {
7950 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7951 instr[30] = 0
7952 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7953 instr[28,25] = 1111
7954 instr[24] = 0
7955 instr[23,22] ==> type : 00 ==> source is single,
7956 01 ==> source is double
7957 10 ==> UNALLOC
7958 11 ==> UNALLOC or source is half
7959 instr[21] = 1
7960 instr[20,15] ==> opcode : with type 00 or 01
7961 000000 ==> FMOV, 000001 ==> FABS,
7962 000010 ==> FNEG, 000011 ==> FSQRT,
7963 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7964 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7965 001000 ==> FRINTN, 001001 ==> FRINTP,
7966 001010 ==> FRINTM, 001011 ==> FRINTZ,
7967 001100 ==> FRINTA, 001101 ==> UNALLOC
7968 001110 ==> FRINTX, 001111 ==> FRINTI
7969 with type 11
7970 000100 ==> FCVT (half-to-single)
7971 000101 ==> FCVT (half-to-double)
7972 instr[14,10] = 10000. */
7973
7974 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7975 uint32_t type = INSTR (23, 22);
7976 uint32_t opcode = INSTR (20, 15);
7977
7978 if (M_S != 0)
7979 HALT_UNALLOC;
7980
7981 if (type == 3)
7982 {
7983 if (opcode == 4)
7984 do_FCVT_half_to_single (cpu);
7985 else if (opcode == 5)
7986 do_FCVT_half_to_double (cpu);
7987 else
7988 HALT_UNALLOC;
7989 return;
7990 }
7991
7992 if (type == 2)
7993 HALT_UNALLOC;
7994
7995 switch (opcode)
7996 {
7997 case 0:
7998 if (type)
7999 ffmovd (cpu);
8000 else
8001 ffmovs (cpu);
8002 return;
8003
8004 case 1:
8005 if (type)
8006 fabcpu (cpu);
8007 else
8008 fabss (cpu);
8009 return;
8010
8011 case 2:
8012 if (type)
8013 fnegd (cpu);
8014 else
8015 fnegs (cpu);
8016 return;
8017
8018 case 3:
8019 if (type)
8020 fsqrtd (cpu);
8021 else
8022 fsqrts (cpu);
8023 return;
8024
8025 case 4:
8026 if (type)
8027 fcvtds (cpu);
8028 else
8029 HALT_UNALLOC;
8030 return;
8031
8032 case 5:
8033 if (type)
8034 HALT_UNALLOC;
8035 fcvtcpu (cpu);
8036 return;
8037
8038 case 8: /* FRINTN etc. */
8039 case 9:
8040 case 10:
8041 case 11:
8042 case 12:
8043 case 14:
8044 case 15:
8045 do_FRINT (cpu);
8046 return;
8047
8048 case 7:
8049 if (INSTR (22, 22))
8050 do_FCVT_double_to_half (cpu);
8051 else
8052 do_FCVT_single_to_half (cpu);
8053 return;
8054
8055 case 13:
8056 HALT_NYI;
8057
8058 default:
8059 HALT_UNALLOC;
8060 }
8061 }
8062
8063 /* 32 bit signed int to float. */
8064 static void
8065 scvtf32 (sim_cpu *cpu)
8066 {
8067 unsigned rn = INSTR (9, 5);
8068 unsigned sd = INSTR (4, 0);
8069
8070 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8071 aarch64_set_FP_float
8072 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8073 }
8074
8075 /* signed int to float. */
8076 static void
8077 scvtf (sim_cpu *cpu)
8078 {
8079 unsigned rn = INSTR (9, 5);
8080 unsigned sd = INSTR (4, 0);
8081
8082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8083 aarch64_set_FP_float
8084 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8085 }
8086
8087 /* 32 bit signed int to double. */
8088 static void
8089 scvtd32 (sim_cpu *cpu)
8090 {
8091 unsigned rn = INSTR (9, 5);
8092 unsigned sd = INSTR (4, 0);
8093
8094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8095 aarch64_set_FP_double
8096 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8097 }
8098
8099 /* signed int to double. */
8100 static void
8101 scvtd (sim_cpu *cpu)
8102 {
8103 unsigned rn = INSTR (9, 5);
8104 unsigned sd = INSTR (4, 0);
8105
8106 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8107 aarch64_set_FP_double
8108 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8109 }
8110
8111 static const float FLOAT_INT_MAX = (float) INT_MAX;
8112 static const float FLOAT_INT_MIN = (float) INT_MIN;
8113 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8114 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8115 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8116 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8117 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8118 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8119
8120 /* Check for FP exception conditions:
8121 NaN raises IO
8122 Infinity raises IO
8123 Out of Range raises IO and IX and saturates value
8124 Denormal raises ID and IX and sets to zero. */
8125 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8126 do \
8127 { \
8128 switch (fpclassify (F)) \
8129 { \
8130 case FP_INFINITE: \
8131 case FP_NAN: \
8132 aarch64_set_FPSR (cpu, IO); \
8133 if (signbit (F)) \
8134 VALUE = ITYPE##_MAX; \
8135 else \
8136 VALUE = ITYPE##_MIN; \
8137 break; \
8138 \
8139 case FP_NORMAL: \
8140 if (F >= FTYPE##_##ITYPE##_MAX) \
8141 { \
8142 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8143 VALUE = ITYPE##_MAX; \
8144 } \
8145 else if (F <= FTYPE##_##ITYPE##_MIN) \
8146 { \
8147 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8148 VALUE = ITYPE##_MIN; \
8149 } \
8150 break; \
8151 \
8152 case FP_SUBNORMAL: \
8153 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8154 VALUE = 0; \
8155 break; \
8156 \
8157 default: \
8158 case FP_ZERO: \
8159 VALUE = 0; \
8160 break; \
8161 } \
8162 } \
8163 while (0)
8164
8165 /* 32 bit convert float to signed int truncate towards zero. */
8166 static void
8167 fcvtszs32 (sim_cpu *cpu)
8168 {
8169 unsigned sn = INSTR (9, 5);
8170 unsigned rd = INSTR (4, 0);
8171 /* TODO : check that this rounds toward zero. */
8172 float f = aarch64_get_FP_float (cpu, sn);
8173 int32_t value = (int32_t) f;
8174
8175 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8176
8177 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8178 /* Avoid sign extension to 64 bit. */
8179 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8180 }
8181
8182 /* 64 bit convert float to signed int truncate towards zero. */
8183 static void
8184 fcvtszs (sim_cpu *cpu)
8185 {
8186 unsigned sn = INSTR (9, 5);
8187 unsigned rd = INSTR (4, 0);
8188 float f = aarch64_get_FP_float (cpu, sn);
8189 int64_t value = (int64_t) f;
8190
8191 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8192
8193 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8194 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8195 }
8196
8197 /* 32 bit convert double to signed int truncate towards zero. */
8198 static void
8199 fcvtszd32 (sim_cpu *cpu)
8200 {
8201 unsigned sn = INSTR (9, 5);
8202 unsigned rd = INSTR (4, 0);
8203 /* TODO : check that this rounds toward zero. */
8204 double d = aarch64_get_FP_double (cpu, sn);
8205 int32_t value = (int32_t) d;
8206
8207 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8208
8209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8210 /* Avoid sign extension to 64 bit. */
8211 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8212 }
8213
8214 /* 64 bit convert double to signed int truncate towards zero. */
8215 static void
8216 fcvtszd (sim_cpu *cpu)
8217 {
8218 unsigned sn = INSTR (9, 5);
8219 unsigned rd = INSTR (4, 0);
8220 /* TODO : check that this rounds toward zero. */
8221 double d = aarch64_get_FP_double (cpu, sn);
8222 int64_t value;
8223
8224 value = (int64_t) d;
8225
8226 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8227
8228 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8229 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8230 }
8231
8232 static void
8233 do_fcvtzu (sim_cpu *cpu)
8234 {
8235 /* instr[31] = size: 32-bit (0), 64-bit (1)
8236 instr[30,23] = 00111100
8237 instr[22] = type: single (0)/ double (1)
8238 instr[21] = enable (0)/disable(1) precision
8239 instr[20,16] = 11001
8240 instr[15,10] = precision
8241 instr[9,5] = Rs
8242 instr[4,0] = Rd. */
8243
8244 unsigned rs = INSTR (9, 5);
8245 unsigned rd = INSTR (4, 0);
8246
8247 NYI_assert (30, 23, 0x3C);
8248 NYI_assert (20, 16, 0x19);
8249
8250 if (INSTR (21, 21) != 1)
8251 /* Convert to fixed point. */
8252 HALT_NYI;
8253
8254 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8255 if (INSTR (31, 31))
8256 {
8257 /* Convert to unsigned 64-bit integer. */
8258 if (INSTR (22, 22))
8259 {
8260 double d = aarch64_get_FP_double (cpu, rs);
8261 uint64_t value = (uint64_t) d;
8262
8263 /* Do not raise an exception if we have reached ULONG_MAX. */
8264 if (value != (1UL << 63))
8265 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8266
8267 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8268 }
8269 else
8270 {
8271 float f = aarch64_get_FP_float (cpu, rs);
8272 uint64_t value = (uint64_t) f;
8273
8274 /* Do not raise an exception if we have reached ULONG_MAX. */
8275 if (value != (1UL << 63))
8276 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8277
8278 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8279 }
8280 }
8281 else
8282 {
8283 uint32_t value;
8284
8285 /* Convert to unsigned 32-bit integer. */
8286 if (INSTR (22, 22))
8287 {
8288 double d = aarch64_get_FP_double (cpu, rs);
8289
8290 value = (uint32_t) d;
8291 /* Do not raise an exception if we have reached UINT_MAX. */
8292 if (value != (1UL << 31))
8293 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8294 }
8295 else
8296 {
8297 float f = aarch64_get_FP_float (cpu, rs);
8298
8299 value = (uint32_t) f;
8300 /* Do not raise an exception if we have reached UINT_MAX. */
8301 if (value != (1UL << 31))
8302 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8303 }
8304
8305 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8306 }
8307 }
8308
8309 static void
8310 do_UCVTF (sim_cpu *cpu)
8311 {
8312 /* instr[31] = size: 32-bit (0), 64-bit (1)
8313 instr[30,23] = 001 1110 0
8314 instr[22] = type: single (0)/ double (1)
8315 instr[21] = enable (0)/disable(1) precision
8316 instr[20,16] = 0 0011
8317 instr[15,10] = precision
8318 instr[9,5] = Rs
8319 instr[4,0] = Rd. */
8320
8321 unsigned rs = INSTR (9, 5);
8322 unsigned rd = INSTR (4, 0);
8323
8324 NYI_assert (30, 23, 0x3C);
8325 NYI_assert (20, 16, 0x03);
8326
8327 if (INSTR (21, 21) != 1)
8328 HALT_NYI;
8329
8330 /* FIXME: Add exception raising. */
8331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8332 if (INSTR (31, 31))
8333 {
8334 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8335
8336 if (INSTR (22, 22))
8337 aarch64_set_FP_double (cpu, rd, (double) value);
8338 else
8339 aarch64_set_FP_float (cpu, rd, (float) value);
8340 }
8341 else
8342 {
8343 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8344
8345 if (INSTR (22, 22))
8346 aarch64_set_FP_double (cpu, rd, (double) value);
8347 else
8348 aarch64_set_FP_float (cpu, rd, (float) value);
8349 }
8350 }
8351
8352 static void
8353 float_vector_move (sim_cpu *cpu)
8354 {
8355 /* instr[31,17] == 100 1111 0101 0111
8356 instr[16] ==> direction 0=> to GR, 1=> from GR
8357 instr[15,10] => ???
8358 instr[9,5] ==> source
8359 instr[4,0] ==> dest. */
8360
8361 unsigned rn = INSTR (9, 5);
8362 unsigned rd = INSTR (4, 0);
8363
8364 NYI_assert (31, 17, 0x4F57);
8365
8366 if (INSTR (15, 10) != 0)
8367 HALT_UNALLOC;
8368
8369 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8370 if (INSTR (16, 16))
8371 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8372 else
8373 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8374 }
8375
8376 static void
8377 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8378 {
8379 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8380 instr[30 = 0
8381 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8382 instr[28,25] = 1111
8383 instr[24] = 0
8384 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8385 instr[21] = 1
8386 instr[20,19] = rmode
8387 instr[18,16] = opcode
8388 instr[15,10] = 10 0000 */
8389
8390 uint32_t rmode_opcode;
8391 uint32_t size_type;
8392 uint32_t type;
8393 uint32_t size;
8394 uint32_t S;
8395
8396 if (INSTR (31, 17) == 0x4F57)
8397 {
8398 float_vector_move (cpu);
8399 return;
8400 }
8401
8402 size = INSTR (31, 31);
8403 S = INSTR (29, 29);
8404 if (S != 0)
8405 HALT_UNALLOC;
8406
8407 type = INSTR (23, 22);
8408 if (type > 1)
8409 HALT_UNALLOC;
8410
8411 rmode_opcode = INSTR (20, 16);
8412 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8413
8414 switch (rmode_opcode)
8415 {
8416 case 2: /* SCVTF. */
8417 switch (size_type)
8418 {
8419 case 0: scvtf32 (cpu); return;
8420 case 1: scvtd32 (cpu); return;
8421 case 2: scvtf (cpu); return;
8422 case 3: scvtd (cpu); return;
8423 }
8424
8425 case 6: /* FMOV GR, Vec. */
8426 switch (size_type)
8427 {
8428 case 0: gfmovs (cpu); return;
8429 case 3: gfmovd (cpu); return;
8430 default: HALT_UNALLOC;
8431 }
8432
8433 case 7: /* FMOV vec, GR. */
8434 switch (size_type)
8435 {
8436 case 0: fgmovs (cpu); return;
8437 case 3: fgmovd (cpu); return;
8438 default: HALT_UNALLOC;
8439 }
8440
8441 case 24: /* FCVTZS. */
8442 switch (size_type)
8443 {
8444 case 0: fcvtszs32 (cpu); return;
8445 case 1: fcvtszd32 (cpu); return;
8446 case 2: fcvtszs (cpu); return;
8447 case 3: fcvtszd (cpu); return;
8448 }
8449
8450 case 25: do_fcvtzu (cpu); return;
8451 case 3: do_UCVTF (cpu); return;
8452
8453 case 0: /* FCVTNS. */
8454 case 1: /* FCVTNU. */
8455 case 4: /* FCVTAS. */
8456 case 5: /* FCVTAU. */
8457 case 8: /* FCVPTS. */
8458 case 9: /* FCVTPU. */
8459 case 16: /* FCVTMS. */
8460 case 17: /* FCVTMU. */
8461 default:
8462 HALT_NYI;
8463 }
8464 }
8465
8466 static void
8467 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8468 {
8469 uint32_t flags;
8470
8471 if (isnan (fvalue1) || isnan (fvalue2))
8472 flags = C|V;
8473 else
8474 {
8475 float result = fvalue1 - fvalue2;
8476
8477 if (result == 0.0)
8478 flags = Z|C;
8479 else if (result < 0)
8480 flags = N;
8481 else /* (result > 0). */
8482 flags = C;
8483 }
8484
8485 aarch64_set_CPSR (cpu, flags);
8486 }
8487
8488 static void
8489 fcmps (sim_cpu *cpu)
8490 {
8491 unsigned sm = INSTR (20, 16);
8492 unsigned sn = INSTR ( 9, 5);
8493
8494 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8495 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8496
8497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8498 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8499 }
8500
8501 /* Float compare to zero -- Invalid Operation exception
8502 only on signaling NaNs. */
8503 static void
8504 fcmpzs (sim_cpu *cpu)
8505 {
8506 unsigned sn = INSTR ( 9, 5);
8507 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8508
8509 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8510 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8511 }
8512
8513 /* Float compare -- Invalid Operation exception on all NaNs. */
8514 static void
8515 fcmpes (sim_cpu *cpu)
8516 {
8517 unsigned sm = INSTR (20, 16);
8518 unsigned sn = INSTR ( 9, 5);
8519
8520 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8521 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8522
8523 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8524 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8525 }
8526
8527 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8528 static void
8529 fcmpzes (sim_cpu *cpu)
8530 {
8531 unsigned sn = INSTR ( 9, 5);
8532 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8533
8534 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8535 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8536 }
8537
8538 static void
8539 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8540 {
8541 uint32_t flags;
8542
8543 if (isnan (dval1) || isnan (dval2))
8544 flags = C|V;
8545 else
8546 {
8547 double result = dval1 - dval2;
8548
8549 if (result == 0.0)
8550 flags = Z|C;
8551 else if (result < 0)
8552 flags = N;
8553 else /* (result > 0). */
8554 flags = C;
8555 }
8556
8557 aarch64_set_CPSR (cpu, flags);
8558 }
8559
8560 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8561 static void
8562 fcmpd (sim_cpu *cpu)
8563 {
8564 unsigned sm = INSTR (20, 16);
8565 unsigned sn = INSTR ( 9, 5);
8566
8567 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8568 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8569
8570 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8571 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8572 }
8573
8574 /* Double compare to zero -- Invalid Operation exception
8575 only on signaling NaNs. */
8576 static void
8577 fcmpzd (sim_cpu *cpu)
8578 {
8579 unsigned sn = INSTR ( 9, 5);
8580 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8581
8582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8583 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8584 }
8585
8586 /* Double compare -- Invalid Operation exception on all NaNs. */
8587 static void
8588 fcmped (sim_cpu *cpu)
8589 {
8590 unsigned sm = INSTR (20, 16);
8591 unsigned sn = INSTR ( 9, 5);
8592
8593 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8594 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8595
8596 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8597 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8598 }
8599
8600 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8601 static void
8602 fcmpzed (sim_cpu *cpu)
8603 {
8604 unsigned sn = INSTR ( 9, 5);
8605 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8606
8607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8608 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8609 }
8610
8611 static void
8612 dexSimpleFPCompare (sim_cpu *cpu)
8613 {
8614 /* assert instr[28,25] == 1111
8615 instr[30:24:21:13,10] = 0011000
8616 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8617 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8618 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8619 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8620 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8621 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8622 ow ==> UNALLOC */
8623 uint32_t dispatch;
8624 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8625 uint32_t type = INSTR (23, 22);
8626 uint32_t op = INSTR (15, 14);
8627 uint32_t op2_2_0 = INSTR (2, 0);
8628
8629 if (op2_2_0 != 0)
8630 HALT_UNALLOC;
8631
8632 if (M_S != 0)
8633 HALT_UNALLOC;
8634
8635 if (type > 1)
8636 HALT_UNALLOC;
8637
8638 if (op != 0)
8639 HALT_UNALLOC;
8640
8641 /* dispatch on type and top 2 bits of opcode. */
8642 dispatch = (type << 2) | INSTR (4, 3);
8643
8644 switch (dispatch)
8645 {
8646 case 0: fcmps (cpu); return;
8647 case 1: fcmpzs (cpu); return;
8648 case 2: fcmpes (cpu); return;
8649 case 3: fcmpzes (cpu); return;
8650 case 4: fcmpd (cpu); return;
8651 case 5: fcmpzd (cpu); return;
8652 case 6: fcmped (cpu); return;
8653 case 7: fcmpzed (cpu); return;
8654 }
8655 }
8656
8657 static void
8658 do_scalar_FADDP (sim_cpu *cpu)
8659 {
8660 /* instr [31,23] = 0111 1110 0
8661 instr [22] = single(0)/double(1)
8662 instr [21,10] = 11 0000 1101 10
8663 instr [9,5] = Fn
8664 instr [4,0] = Fd. */
8665
8666 unsigned Fn = INSTR (9, 5);
8667 unsigned Fd = INSTR (4, 0);
8668
8669 NYI_assert (31, 23, 0x0FC);
8670 NYI_assert (21, 10, 0xC36);
8671
8672 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8673 if (INSTR (22, 22))
8674 {
8675 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8676 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8677
8678 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8679 }
8680 else
8681 {
8682 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8683 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8684
8685 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8686 }
8687 }
8688
8689 /* Floating point absolute difference. */
8690
8691 static void
8692 do_scalar_FABD (sim_cpu *cpu)
8693 {
8694 /* instr [31,23] = 0111 1110 1
8695 instr [22] = float(0)/double(1)
8696 instr [21] = 1
8697 instr [20,16] = Rm
8698 instr [15,10] = 1101 01
8699 instr [9, 5] = Rn
8700 instr [4, 0] = Rd. */
8701
8702 unsigned rm = INSTR (20, 16);
8703 unsigned rn = INSTR (9, 5);
8704 unsigned rd = INSTR (4, 0);
8705
8706 NYI_assert (31, 23, 0x0FD);
8707 NYI_assert (21, 21, 1);
8708 NYI_assert (15, 10, 0x35);
8709
8710 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8711 if (INSTR (22, 22))
8712 aarch64_set_FP_double (cpu, rd,
8713 fabs (aarch64_get_FP_double (cpu, rn)
8714 - aarch64_get_FP_double (cpu, rm)));
8715 else
8716 aarch64_set_FP_float (cpu, rd,
8717 fabsf (aarch64_get_FP_float (cpu, rn)
8718 - aarch64_get_FP_float (cpu, rm)));
8719 }
8720
8721 static void
8722 do_scalar_CMGT (sim_cpu *cpu)
8723 {
8724 /* instr [31,21] = 0101 1110 111
8725 instr [20,16] = Rm
8726 instr [15,10] = 00 1101
8727 instr [9, 5] = Rn
8728 instr [4, 0] = Rd. */
8729
8730 unsigned rm = INSTR (20, 16);
8731 unsigned rn = INSTR (9, 5);
8732 unsigned rd = INSTR (4, 0);
8733
8734 NYI_assert (31, 21, 0x2F7);
8735 NYI_assert (15, 10, 0x0D);
8736
8737 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8738 aarch64_set_vec_u64 (cpu, rd, 0,
8739 aarch64_get_vec_u64 (cpu, rn, 0) >
8740 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8741 }
8742
8743 static void
8744 do_scalar_USHR (sim_cpu *cpu)
8745 {
8746 /* instr [31,23] = 0111 1111 0
8747 instr [22,16] = shift amount
8748 instr [15,10] = 0000 01
8749 instr [9, 5] = Rn
8750 instr [4, 0] = Rd. */
8751
8752 unsigned amount = 128 - INSTR (22, 16);
8753 unsigned rn = INSTR (9, 5);
8754 unsigned rd = INSTR (4, 0);
8755
8756 NYI_assert (31, 23, 0x0FE);
8757 NYI_assert (15, 10, 0x01);
8758
8759 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8760 aarch64_set_vec_u64 (cpu, rd, 0,
8761 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8762 }
8763
8764 static void
8765 do_scalar_SSHL (sim_cpu *cpu)
8766 {
8767 /* instr [31,21] = 0101 1110 111
8768 instr [20,16] = Rm
8769 instr [15,10] = 0100 01
8770 instr [9, 5] = Rn
8771 instr [4, 0] = Rd. */
8772
8773 unsigned rm = INSTR (20, 16);
8774 unsigned rn = INSTR (9, 5);
8775 unsigned rd = INSTR (4, 0);
8776 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8777
8778 NYI_assert (31, 21, 0x2F7);
8779 NYI_assert (15, 10, 0x11);
8780
8781 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8782 if (shift >= 0)
8783 aarch64_set_vec_s64 (cpu, rd, 0,
8784 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8785 else
8786 aarch64_set_vec_s64 (cpu, rd, 0,
8787 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8788 }
8789
8790 static void
8791 do_scalar_shift (sim_cpu *cpu)
8792 {
8793 /* instr [31,23] = 0101 1111 0
8794 instr [22,16] = shift amount
8795 instr [15,10] = 0101 01 [SHL]
8796 instr [15,10] = 0000 01 [SSHR]
8797 instr [9, 5] = Rn
8798 instr [4, 0] = Rd. */
8799
8800 unsigned rn = INSTR (9, 5);
8801 unsigned rd = INSTR (4, 0);
8802 unsigned amount;
8803
8804 NYI_assert (31, 23, 0x0BE);
8805
8806 if (INSTR (22, 22) == 0)
8807 HALT_UNALLOC;
8808
8809 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8810 switch (INSTR (15, 10))
8811 {
8812 case 0x01: /* SSHR */
8813 amount = 128 - INSTR (22, 16);
8814 aarch64_set_vec_s64 (cpu, rd, 0,
8815 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8816 return;
8817 case 0x15: /* SHL */
8818 amount = INSTR (22, 16) - 64;
8819 aarch64_set_vec_u64 (cpu, rd, 0,
8820 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8821 return;
8822 default:
8823 HALT_NYI;
8824 }
8825 }
8826
8827 /* FCMEQ FCMGT FCMGE. */
8828 static void
8829 do_scalar_FCM (sim_cpu *cpu)
8830 {
8831 /* instr [31,30] = 01
8832 instr [29] = U
8833 instr [28,24] = 1 1110
8834 instr [23] = E
8835 instr [22] = size
8836 instr [21] = 1
8837 instr [20,16] = Rm
8838 instr [15,12] = 1110
8839 instr [11] = AC
8840 instr [10] = 1
8841 instr [9, 5] = Rn
8842 instr [4, 0] = Rd. */
8843
8844 unsigned rm = INSTR (20, 16);
8845 unsigned rn = INSTR (9, 5);
8846 unsigned rd = INSTR (4, 0);
8847 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8848 unsigned result;
8849 float val1;
8850 float val2;
8851
8852 NYI_assert (31, 30, 1);
8853 NYI_assert (28, 24, 0x1E);
8854 NYI_assert (21, 21, 1);
8855 NYI_assert (15, 12, 0xE);
8856 NYI_assert (10, 10, 1);
8857
8858 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8859 if (INSTR (22, 22))
8860 {
8861 double val1 = aarch64_get_FP_double (cpu, rn);
8862 double val2 = aarch64_get_FP_double (cpu, rm);
8863
8864 switch (EUac)
8865 {
8866 case 0: /* 000 */
8867 result = val1 == val2;
8868 break;
8869
8870 case 3: /* 011 */
8871 val1 = fabs (val1);
8872 val2 = fabs (val2);
8873 /* Fall through. */
8874 case 2: /* 010 */
8875 result = val1 >= val2;
8876 break;
8877
8878 case 7: /* 111 */
8879 val1 = fabs (val1);
8880 val2 = fabs (val2);
8881 /* Fall through. */
8882 case 6: /* 110 */
8883 result = val1 > val2;
8884 break;
8885
8886 default:
8887 HALT_UNALLOC;
8888 }
8889
8890 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8891 return;
8892 }
8893
8894 val1 = aarch64_get_FP_float (cpu, rn);
8895 val2 = aarch64_get_FP_float (cpu, rm);
8896
8897 switch (EUac)
8898 {
8899 case 0: /* 000 */
8900 result = val1 == val2;
8901 break;
8902
8903 case 3: /* 011 */
8904 val1 = fabsf (val1);
8905 val2 = fabsf (val2);
8906 /* Fall through. */
8907 case 2: /* 010 */
8908 result = val1 >= val2;
8909 break;
8910
8911 case 7: /* 111 */
8912 val1 = fabsf (val1);
8913 val2 = fabsf (val2);
8914 /* Fall through. */
8915 case 6: /* 110 */
8916 result = val1 > val2;
8917 break;
8918
8919 default:
8920 HALT_UNALLOC;
8921 }
8922
8923 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8924 }
8925
8926 /* An alias of DUP. */
8927 static void
8928 do_scalar_MOV (sim_cpu *cpu)
8929 {
8930 /* instr [31,21] = 0101 1110 000
8931 instr [20,16] = imm5
8932 instr [15,10] = 0000 01
8933 instr [9, 5] = Rn
8934 instr [4, 0] = Rd. */
8935
8936 unsigned rn = INSTR (9, 5);
8937 unsigned rd = INSTR (4, 0);
8938 unsigned index;
8939
8940 NYI_assert (31, 21, 0x2F0);
8941 NYI_assert (15, 10, 0x01);
8942
8943 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8944 if (INSTR (16, 16))
8945 {
8946 /* 8-bit. */
8947 index = INSTR (20, 17);
8948 aarch64_set_vec_u8
8949 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8950 }
8951 else if (INSTR (17, 17))
8952 {
8953 /* 16-bit. */
8954 index = INSTR (20, 18);
8955 aarch64_set_vec_u16
8956 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8957 }
8958 else if (INSTR (18, 18))
8959 {
8960 /* 32-bit. */
8961 index = INSTR (20, 19);
8962 aarch64_set_vec_u32
8963 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8964 }
8965 else if (INSTR (19, 19))
8966 {
8967 /* 64-bit. */
8968 index = INSTR (20, 20);
8969 aarch64_set_vec_u64
8970 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
8971 }
8972 else
8973 HALT_UNALLOC;
8974 }
8975
8976 static void
8977 do_scalar_NEG (sim_cpu *cpu)
8978 {
8979 /* instr [31,10] = 0111 1110 1110 0000 1011 10
8980 instr [9, 5] = Rn
8981 instr [4, 0] = Rd. */
8982
8983 unsigned rn = INSTR (9, 5);
8984 unsigned rd = INSTR (4, 0);
8985
8986 NYI_assert (31, 10, 0x1FB82E);
8987
8988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8989 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
8990 }
8991
8992 static void
8993 do_scalar_USHL (sim_cpu *cpu)
8994 {
8995 /* instr [31,21] = 0111 1110 111
8996 instr [20,16] = Rm
8997 instr [15,10] = 0100 01
8998 instr [9, 5] = Rn
8999 instr [4, 0] = Rd. */
9000
9001 unsigned rm = INSTR (20, 16);
9002 unsigned rn = INSTR (9, 5);
9003 unsigned rd = INSTR (4, 0);
9004 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9005
9006 NYI_assert (31, 21, 0x3F7);
9007 NYI_assert (15, 10, 0x11);
9008
9009 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9010 if (shift >= 0)
9011 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9012 else
9013 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9014 }
9015
9016 static void
9017 do_double_add (sim_cpu *cpu)
9018 {
9019 /* instr [31,21] = 0101 1110 111
9020 instr [20,16] = Fn
9021 instr [15,10] = 1000 01
9022 instr [9,5] = Fm
9023 instr [4,0] = Fd. */
9024 unsigned Fd;
9025 unsigned Fm;
9026 unsigned Fn;
9027 double val1;
9028 double val2;
9029
9030 NYI_assert (31, 21, 0x2F7);
9031 NYI_assert (15, 10, 0x21);
9032
9033 Fd = INSTR (4, 0);
9034 Fm = INSTR (9, 5);
9035 Fn = INSTR (20, 16);
9036
9037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9038 val1 = aarch64_get_FP_double (cpu, Fm);
9039 val2 = aarch64_get_FP_double (cpu, Fn);
9040
9041 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9042 }
9043
9044 static void
9045 do_scalar_UCVTF (sim_cpu *cpu)
9046 {
9047 /* instr [31,23] = 0111 1110 0
9048 instr [22] = single(0)/double(1)
9049 instr [21,10] = 10 0001 1101 10
9050 instr [9,5] = rn
9051 instr [4,0] = rd. */
9052
9053 unsigned rn = INSTR (9, 5);
9054 unsigned rd = INSTR (4, 0);
9055
9056 NYI_assert (31, 23, 0x0FC);
9057 NYI_assert (21, 10, 0x876);
9058
9059 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9060 if (INSTR (22, 22))
9061 {
9062 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9063
9064 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9065 }
9066 else
9067 {
9068 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9069
9070 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9071 }
9072 }
9073
9074 static void
9075 do_scalar_vec (sim_cpu *cpu)
9076 {
9077 /* instr [30] = 1. */
9078 /* instr [28,25] = 1111. */
9079 switch (INSTR (31, 23))
9080 {
9081 case 0xBC:
9082 switch (INSTR (15, 10))
9083 {
9084 case 0x01: do_scalar_MOV (cpu); return;
9085 case 0x39: do_scalar_FCM (cpu); return;
9086 case 0x3B: do_scalar_FCM (cpu); return;
9087 }
9088 break;
9089
9090 case 0xBE: do_scalar_shift (cpu); return;
9091
9092 case 0xFC:
9093 switch (INSTR (15, 10))
9094 {
9095 case 0x36:
9096 switch (INSTR (21, 16))
9097 {
9098 case 0x30: do_scalar_FADDP (cpu); return;
9099 case 0x21: do_scalar_UCVTF (cpu); return;
9100 }
9101 HALT_NYI;
9102 case 0x39: do_scalar_FCM (cpu); return;
9103 case 0x3B: do_scalar_FCM (cpu); return;
9104 }
9105 break;
9106
9107 case 0xFD:
9108 switch (INSTR (15, 10))
9109 {
9110 case 0x0D: do_scalar_CMGT (cpu); return;
9111 case 0x11: do_scalar_USHL (cpu); return;
9112 case 0x2E: do_scalar_NEG (cpu); return;
9113 case 0x35: do_scalar_FABD (cpu); return;
9114 case 0x39: do_scalar_FCM (cpu); return;
9115 case 0x3B: do_scalar_FCM (cpu); return;
9116 default:
9117 HALT_NYI;
9118 }
9119
9120 case 0xFE: do_scalar_USHR (cpu); return;
9121
9122 case 0xBD:
9123 switch (INSTR (15, 10))
9124 {
9125 case 0x21: do_double_add (cpu); return;
9126 case 0x11: do_scalar_SSHL (cpu); return;
9127 default:
9128 HALT_NYI;
9129 }
9130
9131 default:
9132 HALT_NYI;
9133 }
9134 }
9135
9136 static void
9137 dexAdvSIMD1 (sim_cpu *cpu)
9138 {
9139 /* instr [28,25] = 1 111. */
9140
9141 /* We are currently only interested in the basic
9142 scalar fp routines which all have bit 30 = 0. */
9143 if (INSTR (30, 30))
9144 do_scalar_vec (cpu);
9145
9146 /* instr[24] is set for FP data processing 3-source and clear for
9147 all other basic scalar fp instruction groups. */
9148 else if (INSTR (24, 24))
9149 dexSimpleFPDataProc3Source (cpu);
9150
9151 /* instr[21] is clear for floating <-> fixed conversions and set for
9152 all other basic scalar fp instruction groups. */
9153 else if (!INSTR (21, 21))
9154 dexSimpleFPFixedConvert (cpu);
9155
9156 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9157 11 ==> cond select, 00 ==> other. */
9158 else
9159 switch (INSTR (11, 10))
9160 {
9161 case 1: dexSimpleFPCondCompare (cpu); return;
9162 case 2: dexSimpleFPDataProc2Source (cpu); return;
9163 case 3: dexSimpleFPCondSelect (cpu); return;
9164
9165 default:
9166 /* Now an ordered cascade of tests.
9167 FP immediate has instr [12] == 1.
9168 FP compare has instr [13] == 1.
9169 FP Data Proc 1 Source has instr [14] == 1.
9170 FP floating <--> integer conversions has instr [15] == 0. */
9171 if (INSTR (12, 12))
9172 dexSimpleFPImmediate (cpu);
9173
9174 else if (INSTR (13, 13))
9175 dexSimpleFPCompare (cpu);
9176
9177 else if (INSTR (14, 14))
9178 dexSimpleFPDataProc1Source (cpu);
9179
9180 else if (!INSTR (15, 15))
9181 dexSimpleFPIntegerConvert (cpu);
9182
9183 else
9184 /* If we get here then instr[15] == 1 which means UNALLOC. */
9185 HALT_UNALLOC;
9186 }
9187 }
9188
9189 /* PC relative addressing. */
9190
9191 static void
9192 pcadr (sim_cpu *cpu)
9193 {
9194 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9195 instr[30,29] = immlo
9196 instr[23,5] = immhi. */
9197 uint64_t address;
9198 unsigned rd = INSTR (4, 0);
9199 uint32_t isPage = INSTR (31, 31);
9200 union { int64_t u64; uint64_t s64; } imm;
9201 uint64_t offset;
9202
9203 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9204 offset = imm.u64;
9205 offset = (offset << 2) | INSTR (30, 29);
9206
9207 address = aarch64_get_PC (cpu);
9208
9209 if (isPage)
9210 {
9211 offset <<= 12;
9212 address &= ~0xfff;
9213 }
9214
9215 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9216 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9217 }
9218
9219 /* Specific decode and execute for group Data Processing Immediate. */
9220
9221 static void
9222 dexPCRelAddressing (sim_cpu *cpu)
9223 {
9224 /* assert instr[28,24] = 10000. */
9225 pcadr (cpu);
9226 }
9227
9228 /* Immediate logical.
9229 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9230 16, 32 or 64 bit sequence pulled out at decode and possibly
9231 inverting it..
9232
9233 N.B. the output register (dest) can normally be Xn or SP
9234 the exception occurs for flag setting instructions which may
9235 only use Xn for the output (dest). The input register can
9236 never be SP. */
9237
9238 /* 32 bit and immediate. */
9239 static void
9240 and32 (sim_cpu *cpu, uint32_t bimm)
9241 {
9242 unsigned rn = INSTR (9, 5);
9243 unsigned rd = INSTR (4, 0);
9244
9245 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9246 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9247 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9248 }
9249
9250 /* 64 bit and immediate. */
9251 static void
9252 and64 (sim_cpu *cpu, uint64_t bimm)
9253 {
9254 unsigned rn = INSTR (9, 5);
9255 unsigned rd = INSTR (4, 0);
9256
9257 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9258 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9259 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9260 }
9261
9262 /* 32 bit and immediate set flags. */
9263 static void
9264 ands32 (sim_cpu *cpu, uint32_t bimm)
9265 {
9266 unsigned rn = INSTR (9, 5);
9267 unsigned rd = INSTR (4, 0);
9268
9269 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9270 uint32_t value2 = bimm;
9271
9272 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9273 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9274 set_flags_for_binop32 (cpu, value1 & value2);
9275 }
9276
9277 /* 64 bit and immediate set flags. */
9278 static void
9279 ands64 (sim_cpu *cpu, uint64_t bimm)
9280 {
9281 unsigned rn = INSTR (9, 5);
9282 unsigned rd = INSTR (4, 0);
9283
9284 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9285 uint64_t value2 = bimm;
9286
9287 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9288 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9289 set_flags_for_binop64 (cpu, value1 & value2);
9290 }
9291
9292 /* 32 bit exclusive or immediate. */
9293 static void
9294 eor32 (sim_cpu *cpu, uint32_t bimm)
9295 {
9296 unsigned rn = INSTR (9, 5);
9297 unsigned rd = INSTR (4, 0);
9298
9299 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9300 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9301 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9302 }
9303
9304 /* 64 bit exclusive or immediate. */
9305 static void
9306 eor64 (sim_cpu *cpu, uint64_t bimm)
9307 {
9308 unsigned rn = INSTR (9, 5);
9309 unsigned rd = INSTR (4, 0);
9310
9311 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9312 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9313 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9314 }
9315
9316 /* 32 bit or immediate. */
9317 static void
9318 orr32 (sim_cpu *cpu, uint32_t bimm)
9319 {
9320 unsigned rn = INSTR (9, 5);
9321 unsigned rd = INSTR (4, 0);
9322
9323 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9324 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9325 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9326 }
9327
9328 /* 64 bit or immediate. */
9329 static void
9330 orr64 (sim_cpu *cpu, uint64_t bimm)
9331 {
9332 unsigned rn = INSTR (9, 5);
9333 unsigned rd = INSTR (4, 0);
9334
9335 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9336 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9337 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9338 }
9339
9340 /* Logical shifted register.
9341 These allow an optional LSL, ASR, LSR or ROR to the second source
9342 register with a count up to the register bit count.
9343 N.B register args may not be SP. */
9344
9345 /* 32 bit AND shifted register. */
9346 static void
9347 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9348 {
9349 unsigned rm = INSTR (20, 16);
9350 unsigned rn = INSTR (9, 5);
9351 unsigned rd = INSTR (4, 0);
9352
9353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9354 aarch64_set_reg_u64
9355 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9356 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9357 }
9358
9359 /* 64 bit AND shifted register. */
9360 static void
9361 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9362 {
9363 unsigned rm = INSTR (20, 16);
9364 unsigned rn = INSTR (9, 5);
9365 unsigned rd = INSTR (4, 0);
9366
9367 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9368 aarch64_set_reg_u64
9369 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9370 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9371 }
9372
9373 /* 32 bit AND shifted register setting flags. */
9374 static void
9375 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9376 {
9377 unsigned rm = INSTR (20, 16);
9378 unsigned rn = INSTR (9, 5);
9379 unsigned rd = INSTR (4, 0);
9380
9381 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9382 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9383 shift, count);
9384
9385 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9386 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9387 set_flags_for_binop32 (cpu, value1 & value2);
9388 }
9389
9390 /* 64 bit AND shifted register setting flags. */
9391 static void
9392 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9393 {
9394 unsigned rm = INSTR (20, 16);
9395 unsigned rn = INSTR (9, 5);
9396 unsigned rd = INSTR (4, 0);
9397
9398 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9399 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9400 shift, count);
9401
9402 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9403 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9404 set_flags_for_binop64 (cpu, value1 & value2);
9405 }
9406
9407 /* 32 bit BIC shifted register. */
9408 static void
9409 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9410 {
9411 unsigned rm = INSTR (20, 16);
9412 unsigned rn = INSTR (9, 5);
9413 unsigned rd = INSTR (4, 0);
9414
9415 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9416 aarch64_set_reg_u64
9417 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9418 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9419 }
9420
9421 /* 64 bit BIC shifted register. */
9422 static void
9423 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9424 {
9425 unsigned rm = INSTR (20, 16);
9426 unsigned rn = INSTR (9, 5);
9427 unsigned rd = INSTR (4, 0);
9428
9429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9430 aarch64_set_reg_u64
9431 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9432 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9433 }
9434
9435 /* 32 bit BIC shifted register setting flags. */
9436 static void
9437 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9438 {
9439 unsigned rm = INSTR (20, 16);
9440 unsigned rn = INSTR (9, 5);
9441 unsigned rd = INSTR (4, 0);
9442
9443 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9444 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9445 shift, count);
9446
9447 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9448 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9449 set_flags_for_binop32 (cpu, value1 & value2);
9450 }
9451
9452 /* 64 bit BIC shifted register setting flags. */
9453 static void
9454 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9455 {
9456 unsigned rm = INSTR (20, 16);
9457 unsigned rn = INSTR (9, 5);
9458 unsigned rd = INSTR (4, 0);
9459
9460 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9461 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9462 shift, count);
9463
9464 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9465 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9466 set_flags_for_binop64 (cpu, value1 & value2);
9467 }
9468
9469 /* 32 bit EON shifted register. */
9470 static void
9471 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9472 {
9473 unsigned rm = INSTR (20, 16);
9474 unsigned rn = INSTR (9, 5);
9475 unsigned rd = INSTR (4, 0);
9476
9477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9478 aarch64_set_reg_u64
9479 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9480 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9481 }
9482
9483 /* 64 bit EON shifted register. */
9484 static void
9485 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9486 {
9487 unsigned rm = INSTR (20, 16);
9488 unsigned rn = INSTR (9, 5);
9489 unsigned rd = INSTR (4, 0);
9490
9491 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9492 aarch64_set_reg_u64
9493 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9494 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9495 }
9496
9497 /* 32 bit EOR shifted register. */
9498 static void
9499 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9500 {
9501 unsigned rm = INSTR (20, 16);
9502 unsigned rn = INSTR (9, 5);
9503 unsigned rd = INSTR (4, 0);
9504
9505 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9506 aarch64_set_reg_u64
9507 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9508 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9509 }
9510
9511 /* 64 bit EOR shifted register. */
9512 static void
9513 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9514 {
9515 unsigned rm = INSTR (20, 16);
9516 unsigned rn = INSTR (9, 5);
9517 unsigned rd = INSTR (4, 0);
9518
9519 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9520 aarch64_set_reg_u64
9521 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9522 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9523 }
9524
9525 /* 32 bit ORR shifted register. */
9526 static void
9527 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9528 {
9529 unsigned rm = INSTR (20, 16);
9530 unsigned rn = INSTR (9, 5);
9531 unsigned rd = INSTR (4, 0);
9532
9533 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9534 aarch64_set_reg_u64
9535 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9536 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9537 }
9538
9539 /* 64 bit ORR shifted register. */
9540 static void
9541 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9542 {
9543 unsigned rm = INSTR (20, 16);
9544 unsigned rn = INSTR (9, 5);
9545 unsigned rd = INSTR (4, 0);
9546
9547 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9548 aarch64_set_reg_u64
9549 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9550 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9551 }
9552
9553 /* 32 bit ORN shifted register. */
9554 static void
9555 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9556 {
9557 unsigned rm = INSTR (20, 16);
9558 unsigned rn = INSTR (9, 5);
9559 unsigned rd = INSTR (4, 0);
9560
9561 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9562 aarch64_set_reg_u64
9563 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9564 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9565 }
9566
9567 /* 64 bit ORN shifted register. */
9568 static void
9569 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9570 {
9571 unsigned rm = INSTR (20, 16);
9572 unsigned rn = INSTR (9, 5);
9573 unsigned rd = INSTR (4, 0);
9574
9575 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9576 aarch64_set_reg_u64
9577 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9578 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9579 }
9580
9581 static void
9582 dexLogicalImmediate (sim_cpu *cpu)
9583 {
9584 /* assert instr[28,23] = 1001000
9585 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9586 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9587 instr[22] = N : used to construct immediate mask
9588 instr[21,16] = immr
9589 instr[15,10] = imms
9590 instr[9,5] = Rn
9591 instr[4,0] = Rd */
9592
9593 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9594 uint32_t size = INSTR (31, 31);
9595 uint32_t N = INSTR (22, 22);
9596 /* uint32_t immr = INSTR (21, 16);. */
9597 /* uint32_t imms = INSTR (15, 10);. */
9598 uint32_t index = INSTR (22, 10);
9599 uint64_t bimm64 = LITable [index];
9600 uint32_t dispatch = INSTR (30, 29);
9601
9602 if (~size & N)
9603 HALT_UNALLOC;
9604
9605 if (!bimm64)
9606 HALT_UNALLOC;
9607
9608 if (size == 0)
9609 {
9610 uint32_t bimm = (uint32_t) bimm64;
9611
9612 switch (dispatch)
9613 {
9614 case 0: and32 (cpu, bimm); return;
9615 case 1: orr32 (cpu, bimm); return;
9616 case 2: eor32 (cpu, bimm); return;
9617 case 3: ands32 (cpu, bimm); return;
9618 }
9619 }
9620 else
9621 {
9622 switch (dispatch)
9623 {
9624 case 0: and64 (cpu, bimm64); return;
9625 case 1: orr64 (cpu, bimm64); return;
9626 case 2: eor64 (cpu, bimm64); return;
9627 case 3: ands64 (cpu, bimm64); return;
9628 }
9629 }
9630 HALT_UNALLOC;
9631 }
9632
9633 /* Immediate move.
9634 The uimm argument is a 16 bit value to be inserted into the
9635 target register the pos argument locates the 16 bit word in the
9636 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9637 3} for 64 bit.
9638 N.B register arg may not be SP so it should be.
9639 accessed using the setGZRegisterXXX accessors. */
9640
9641 /* 32 bit move 16 bit immediate zero remaining shorts. */
9642 static void
9643 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9644 {
9645 unsigned rd = INSTR (4, 0);
9646
9647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9648 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9649 }
9650
9651 /* 64 bit move 16 bit immediate zero remaining shorts. */
9652 static void
9653 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9654 {
9655 unsigned rd = INSTR (4, 0);
9656
9657 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9658 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9659 }
9660
9661 /* 32 bit move 16 bit immediate negated. */
9662 static void
9663 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9664 {
9665 unsigned rd = INSTR (4, 0);
9666
9667 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9668 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9669 }
9670
9671 /* 64 bit move 16 bit immediate negated. */
9672 static void
9673 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9674 {
9675 unsigned rd = INSTR (4, 0);
9676
9677 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9678 aarch64_set_reg_u64
9679 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9680 ^ 0xffffffffffffffffULL));
9681 }
9682
9683 /* 32 bit move 16 bit immediate keep remaining shorts. */
9684 static void
9685 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9686 {
9687 unsigned rd = INSTR (4, 0);
9688 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9689 uint32_t value = val << (pos * 16);
9690 uint32_t mask = ~(0xffffU << (pos * 16));
9691
9692 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9693 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9694 }
9695
9696 /* 64 bit move 16 it immediate keep remaining shorts. */
9697 static void
9698 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9699 {
9700 unsigned rd = INSTR (4, 0);
9701 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9702 uint64_t value = (uint64_t) val << (pos * 16);
9703 uint64_t mask = ~(0xffffULL << (pos * 16));
9704
9705 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9706 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9707 }
9708
9709 static void
9710 dexMoveWideImmediate (sim_cpu *cpu)
9711 {
9712 /* assert instr[28:23] = 100101
9713 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9714 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9715 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9716 instr[20,5] = uimm16
9717 instr[4,0] = Rd */
9718
9719 /* N.B. the (multiple of 16) shift is applied by the called routine,
9720 we just pass the multiplier. */
9721
9722 uint32_t imm;
9723 uint32_t size = INSTR (31, 31);
9724 uint32_t op = INSTR (30, 29);
9725 uint32_t shift = INSTR (22, 21);
9726
9727 /* 32 bit can only shift 0 or 1 lot of 16.
9728 anything else is an unallocated instruction. */
9729 if (size == 0 && (shift > 1))
9730 HALT_UNALLOC;
9731
9732 if (op == 1)
9733 HALT_UNALLOC;
9734
9735 imm = INSTR (20, 5);
9736
9737 if (size == 0)
9738 {
9739 if (op == 0)
9740 movn32 (cpu, imm, shift);
9741 else if (op == 2)
9742 movz32 (cpu, imm, shift);
9743 else
9744 movk32 (cpu, imm, shift);
9745 }
9746 else
9747 {
9748 if (op == 0)
9749 movn64 (cpu, imm, shift);
9750 else if (op == 2)
9751 movz64 (cpu, imm, shift);
9752 else
9753 movk64 (cpu, imm, shift);
9754 }
9755 }
9756
9757 /* Bitfield operations.
9758 These take a pair of bit positions r and s which are in {0..31}
9759 or {0..63} depending on the instruction word size.
9760 N.B register args may not be SP. */
9761
9762 /* OK, we start with ubfm which just needs to pick
9763 some bits out of source zero the rest and write
9764 the result to dest. Just need two logical shifts. */
9765
9766 /* 32 bit bitfield move, left and right of affected zeroed
9767 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9768 static void
9769 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9770 {
9771 unsigned rd;
9772 unsigned rn = INSTR (9, 5);
9773 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9774
9775 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9776 if (r <= s)
9777 {
9778 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9779 We want only bits s:xxx:r at the bottom of the word
9780 so we LSL bit s up to bit 31 i.e. by 31 - s
9781 and then we LSR to bring bit 31 down to bit s - r
9782 i.e. by 31 + r - s. */
9783 value <<= 31 - s;
9784 value >>= 31 + r - s;
9785 }
9786 else
9787 {
9788 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9789 We want only bits s:xxx:0 starting at it 31-(r-1)
9790 so we LSL bit s up to bit 31 i.e. by 31 - s
9791 and then we LSL to bring bit 31 down to 31-(r-1)+s
9792 i.e. by r - (s + 1). */
9793 value <<= 31 - s;
9794 value >>= r - (s + 1);
9795 }
9796
9797 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9798 rd = INSTR (4, 0);
9799 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9800 }
9801
9802 /* 64 bit bitfield move, left and right of affected zeroed
9803 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9804 static void
9805 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9806 {
9807 unsigned rd;
9808 unsigned rn = INSTR (9, 5);
9809 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9810
9811 if (r <= s)
9812 {
9813 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9814 We want only bits s:xxx:r at the bottom of the word.
9815 So we LSL bit s up to bit 63 i.e. by 63 - s
9816 and then we LSR to bring bit 63 down to bit s - r
9817 i.e. by 63 + r - s. */
9818 value <<= 63 - s;
9819 value >>= 63 + r - s;
9820 }
9821 else
9822 {
9823 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9824 We want only bits s:xxx:0 starting at it 63-(r-1).
9825 So we LSL bit s up to bit 63 i.e. by 63 - s
9826 and then we LSL to bring bit 63 down to 63-(r-1)+s
9827 i.e. by r - (s + 1). */
9828 value <<= 63 - s;
9829 value >>= r - (s + 1);
9830 }
9831
9832 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9833 rd = INSTR (4, 0);
9834 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9835 }
9836
9837 /* The signed versions need to insert sign bits
9838 on the left of the inserted bit field. so we do
9839 much the same as the unsigned version except we
9840 use an arithmetic shift right -- this just means
9841 we need to operate on signed values. */
9842
9843 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9844 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9845 static void
9846 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9847 {
9848 unsigned rd;
9849 unsigned rn = INSTR (9, 5);
9850 /* as per ubfm32 but use an ASR instead of an LSR. */
9851 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9852
9853 if (r <= s)
9854 {
9855 value <<= 31 - s;
9856 value >>= 31 + r - s;
9857 }
9858 else
9859 {
9860 value <<= 31 - s;
9861 value >>= r - (s + 1);
9862 }
9863
9864 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9865 rd = INSTR (4, 0);
9866 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9867 }
9868
9869 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9870 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9871 static void
9872 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9873 {
9874 unsigned rd;
9875 unsigned rn = INSTR (9, 5);
9876 /* acpu per ubfm but use an ASR instead of an LSR. */
9877 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9878
9879 if (r <= s)
9880 {
9881 value <<= 63 - s;
9882 value >>= 63 + r - s;
9883 }
9884 else
9885 {
9886 value <<= 63 - s;
9887 value >>= r - (s + 1);
9888 }
9889
9890 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9891 rd = INSTR (4, 0);
9892 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9893 }
9894
9895 /* Finally, these versions leave non-affected bits
9896 as is. so we need to generate the bits as per
9897 ubfm and also generate a mask to pick the
9898 bits from the original and computed values. */
9899
9900 /* 32 bit bitfield move, non-affected bits left as is.
9901 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9902 static void
9903 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9904 {
9905 unsigned rn = INSTR (9, 5);
9906 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9907 uint32_t mask = -1;
9908 unsigned rd;
9909 uint32_t value2;
9910
9911 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9912 if (r <= s)
9913 {
9914 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9915 We want only bits s:xxx:r at the bottom of the word
9916 so we LSL bit s up to bit 31 i.e. by 31 - s
9917 and then we LSR to bring bit 31 down to bit s - r
9918 i.e. by 31 + r - s. */
9919 value <<= 31 - s;
9920 value >>= 31 + r - s;
9921 /* the mask must include the same bits. */
9922 mask <<= 31 - s;
9923 mask >>= 31 + r - s;
9924 }
9925 else
9926 {
9927 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9928 We want only bits s:xxx:0 starting at it 31-(r-1)
9929 so we LSL bit s up to bit 31 i.e. by 31 - s
9930 and then we LSL to bring bit 31 down to 31-(r-1)+s
9931 i.e. by r - (s + 1). */
9932 value <<= 31 - s;
9933 value >>= r - (s + 1);
9934 /* The mask must include the same bits. */
9935 mask <<= 31 - s;
9936 mask >>= r - (s + 1);
9937 }
9938
9939 rd = INSTR (4, 0);
9940 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9941
9942 value2 &= ~mask;
9943 value2 |= value;
9944
9945 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9946 aarch64_set_reg_u64
9947 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9948 }
9949
9950 /* 64 bit bitfield move, non-affected bits left as is.
9951 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9952 static void
9953 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9954 {
9955 unsigned rd;
9956 unsigned rn = INSTR (9, 5);
9957 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9958 uint64_t mask = 0xffffffffffffffffULL;
9959
9960 if (r <= s)
9961 {
9962 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9963 We want only bits s:xxx:r at the bottom of the word
9964 so we LSL bit s up to bit 63 i.e. by 63 - s
9965 and then we LSR to bring bit 63 down to bit s - r
9966 i.e. by 63 + r - s. */
9967 value <<= 63 - s;
9968 value >>= 63 + r - s;
9969 /* The mask must include the same bits. */
9970 mask <<= 63 - s;
9971 mask >>= 63 + r - s;
9972 }
9973 else
9974 {
9975 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
9976 We want only bits s:xxx:0 starting at it 63-(r-1)
9977 so we LSL bit s up to bit 63 i.e. by 63 - s
9978 and then we LSL to bring bit 63 down to 63-(r-1)+s
9979 i.e. by r - (s + 1). */
9980 value <<= 63 - s;
9981 value >>= r - (s + 1);
9982 /* The mask must include the same bits. */
9983 mask <<= 63 - s;
9984 mask >>= r - (s + 1);
9985 }
9986
9987 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9988 rd = INSTR (4, 0);
9989 aarch64_set_reg_u64
9990 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
9991 }
9992
9993 static void
9994 dexBitfieldImmediate (sim_cpu *cpu)
9995 {
9996 /* assert instr[28:23] = 100110
9997 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9998 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
9999 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10000 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10001 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10002 instr[9,5] = Rn
10003 instr[4,0] = Rd */
10004
10005 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10006 uint32_t dispatch;
10007 uint32_t imms;
10008 uint32_t size = INSTR (31, 31);
10009 uint32_t N = INSTR (22, 22);
10010 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10011 /* or else we have an UNALLOC. */
10012 uint32_t immr = INSTR (21, 16);
10013
10014 if (~size & N)
10015 HALT_UNALLOC;
10016
10017 if (!size && uimm (immr, 5, 5))
10018 HALT_UNALLOC;
10019
10020 imms = INSTR (15, 10);
10021 if (!size && uimm (imms, 5, 5))
10022 HALT_UNALLOC;
10023
10024 /* Switch on combined size and op. */
10025 dispatch = INSTR (31, 29);
10026 switch (dispatch)
10027 {
10028 case 0: sbfm32 (cpu, immr, imms); return;
10029 case 1: bfm32 (cpu, immr, imms); return;
10030 case 2: ubfm32 (cpu, immr, imms); return;
10031 case 4: sbfm (cpu, immr, imms); return;
10032 case 5: bfm (cpu, immr, imms); return;
10033 case 6: ubfm (cpu, immr, imms); return;
10034 default: HALT_UNALLOC;
10035 }
10036 }
10037
10038 static void
10039 do_EXTR_32 (sim_cpu *cpu)
10040 {
10041 /* instr[31:21] = 00010011100
10042 instr[20,16] = Rm
10043 instr[15,10] = imms : 0xxxxx for 32 bit
10044 instr[9,5] = Rn
10045 instr[4,0] = Rd */
10046 unsigned rm = INSTR (20, 16);
10047 unsigned imms = INSTR (15, 10) & 31;
10048 unsigned rn = INSTR ( 9, 5);
10049 unsigned rd = INSTR ( 4, 0);
10050 uint64_t val1;
10051 uint64_t val2;
10052
10053 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10054 val1 >>= imms;
10055 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10056 val2 <<= (32 - imms);
10057
10058 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10059 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10060 }
10061
10062 static void
10063 do_EXTR_64 (sim_cpu *cpu)
10064 {
10065 /* instr[31:21] = 10010011100
10066 instr[20,16] = Rm
10067 instr[15,10] = imms
10068 instr[9,5] = Rn
10069 instr[4,0] = Rd */
10070 unsigned rm = INSTR (20, 16);
10071 unsigned imms = INSTR (15, 10) & 63;
10072 unsigned rn = INSTR ( 9, 5);
10073 unsigned rd = INSTR ( 4, 0);
10074 uint64_t val;
10075
10076 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10077 val >>= imms;
10078 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10079
10080 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10081 }
10082
10083 static void
10084 dexExtractImmediate (sim_cpu *cpu)
10085 {
10086 /* assert instr[28:23] = 100111
10087 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10088 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10089 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10090 instr[21] = op0 : must be 0 or UNALLOC
10091 instr[20,16] = Rm
10092 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10093 instr[9,5] = Rn
10094 instr[4,0] = Rd */
10095
10096 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10097 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10098 uint32_t dispatch;
10099 uint32_t size = INSTR (31, 31);
10100 uint32_t N = INSTR (22, 22);
10101 /* 32 bit operations must have imms[5] = 0
10102 or else we have an UNALLOC. */
10103 uint32_t imms = INSTR (15, 10);
10104
10105 if (size ^ N)
10106 HALT_UNALLOC;
10107
10108 if (!size && uimm (imms, 5, 5))
10109 HALT_UNALLOC;
10110
10111 /* Switch on combined size and op. */
10112 dispatch = INSTR (31, 29);
10113
10114 if (dispatch == 0)
10115 do_EXTR_32 (cpu);
10116
10117 else if (dispatch == 4)
10118 do_EXTR_64 (cpu);
10119
10120 else if (dispatch == 1)
10121 HALT_NYI;
10122 else
10123 HALT_UNALLOC;
10124 }
10125
10126 static void
10127 dexDPImm (sim_cpu *cpu)
10128 {
10129 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10130 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10131 bits [25,23] of a DPImm are the secondary dispatch vector. */
10132 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10133
10134 switch (group2)
10135 {
10136 case DPIMM_PCADR_000:
10137 case DPIMM_PCADR_001:
10138 dexPCRelAddressing (cpu);
10139 return;
10140
10141 case DPIMM_ADDSUB_010:
10142 case DPIMM_ADDSUB_011:
10143 dexAddSubtractImmediate (cpu);
10144 return;
10145
10146 case DPIMM_LOG_100:
10147 dexLogicalImmediate (cpu);
10148 return;
10149
10150 case DPIMM_MOV_101:
10151 dexMoveWideImmediate (cpu);
10152 return;
10153
10154 case DPIMM_BITF_110:
10155 dexBitfieldImmediate (cpu);
10156 return;
10157
10158 case DPIMM_EXTR_111:
10159 dexExtractImmediate (cpu);
10160 return;
10161
10162 default:
10163 /* Should never reach here. */
10164 HALT_NYI;
10165 }
10166 }
10167
10168 static void
10169 dexLoadUnscaledImmediate (sim_cpu *cpu)
10170 {
10171 /* instr[29,24] == 111_00
10172 instr[21] == 0
10173 instr[11,10] == 00
10174 instr[31,30] = size
10175 instr[26] = V
10176 instr[23,22] = opc
10177 instr[20,12] = simm9
10178 instr[9,5] = rn may be SP. */
10179 /* unsigned rt = INSTR (4, 0); */
10180 uint32_t V = INSTR (26, 26);
10181 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10182 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10183
10184 if (!V)
10185 {
10186 /* GReg operations. */
10187 switch (dispatch)
10188 {
10189 case 0: sturb (cpu, imm); return;
10190 case 1: ldurb32 (cpu, imm); return;
10191 case 2: ldursb64 (cpu, imm); return;
10192 case 3: ldursb32 (cpu, imm); return;
10193 case 4: sturh (cpu, imm); return;
10194 case 5: ldurh32 (cpu, imm); return;
10195 case 6: ldursh64 (cpu, imm); return;
10196 case 7: ldursh32 (cpu, imm); return;
10197 case 8: stur32 (cpu, imm); return;
10198 case 9: ldur32 (cpu, imm); return;
10199 case 10: ldursw (cpu, imm); return;
10200 case 12: stur64 (cpu, imm); return;
10201 case 13: ldur64 (cpu, imm); return;
10202
10203 case 14:
10204 /* PRFUM NYI. */
10205 HALT_NYI;
10206
10207 default:
10208 case 11:
10209 case 15:
10210 HALT_UNALLOC;
10211 }
10212 }
10213
10214 /* FReg operations. */
10215 switch (dispatch)
10216 {
10217 case 2: fsturq (cpu, imm); return;
10218 case 3: fldurq (cpu, imm); return;
10219 case 8: fsturs (cpu, imm); return;
10220 case 9: fldurs (cpu, imm); return;
10221 case 12: fsturd (cpu, imm); return;
10222 case 13: fldurd (cpu, imm); return;
10223
10224 case 0: /* STUR 8 bit FP. */
10225 case 1: /* LDUR 8 bit FP. */
10226 case 4: /* STUR 16 bit FP. */
10227 case 5: /* LDUR 8 bit FP. */
10228 HALT_NYI;
10229
10230 default:
10231 case 6:
10232 case 7:
10233 case 10:
10234 case 11:
10235 case 14:
10236 case 15:
10237 HALT_UNALLOC;
10238 }
10239 }
10240
10241 /* N.B. A preliminary note regarding all the ldrs<x>32
10242 instructions
10243
10244 The signed value loaded by these instructions is cast to unsigned
10245 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10246 64 bit element of the GReg union. this performs a 32 bit sign extension
10247 (as required) but avoids 64 bit sign extension, thus ensuring that the
10248 top half of the register word is zero. this is what the spec demands
10249 when a 32 bit load occurs. */
10250
10251 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10252 static void
10253 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10254 {
10255 unsigned int rn = INSTR (9, 5);
10256 unsigned int rt = INSTR (4, 0);
10257
10258 /* The target register may not be SP but the source may be
10259 there is no scaling required for a byte load. */
10260 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10261 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10262 (int64_t) aarch64_get_mem_s8 (cpu, address));
10263 }
10264
10265 /* 32 bit load sign-extended byte scaled or unscaled zero-
10266 or sign-extended 32-bit register offset. */
10267 static void
10268 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10269 {
10270 unsigned int rm = INSTR (20, 16);
10271 unsigned int rn = INSTR (9, 5);
10272 unsigned int rt = INSTR (4, 0);
10273
10274 /* rn may reference SP, rm and rt must reference ZR. */
10275
10276 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10277 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10278 extension);
10279
10280 /* There is no scaling required for a byte load. */
10281 aarch64_set_reg_u64
10282 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10283 + displacement));
10284 }
10285
10286 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10287 pre- or post-writeback. */
10288 static void
10289 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10290 {
10291 uint64_t address;
10292 unsigned int rn = INSTR (9, 5);
10293 unsigned int rt = INSTR (4, 0);
10294
10295 if (rn == rt && wb != NoWriteBack)
10296 HALT_UNALLOC;
10297
10298 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10299
10300 if (wb == Pre)
10301 address += offset;
10302
10303 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10304 (int64_t) aarch64_get_mem_s8 (cpu, address));
10305
10306 if (wb == Post)
10307 address += offset;
10308
10309 if (wb != NoWriteBack)
10310 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10311 }
10312
10313 /* 8 bit store scaled. */
10314 static void
10315 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10316 {
10317 unsigned st = INSTR (4, 0);
10318 unsigned rn = INSTR (9, 5);
10319
10320 aarch64_set_mem_u8 (cpu,
10321 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10322 aarch64_get_vec_u8 (cpu, st, 0));
10323 }
10324
10325 /* 8 bit store scaled or unscaled zero- or
10326 sign-extended 8-bit register offset. */
10327 static void
10328 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10329 {
10330 unsigned rm = INSTR (20, 16);
10331 unsigned rn = INSTR (9, 5);
10332 unsigned st = INSTR (4, 0);
10333
10334 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10335 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10336 extension);
10337 uint64_t displacement = scaling == Scaled ? extended : 0;
10338
10339 aarch64_set_mem_u8
10340 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10341 }
10342
10343 /* 16 bit store scaled. */
10344 static void
10345 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10346 {
10347 unsigned st = INSTR (4, 0);
10348 unsigned rn = INSTR (9, 5);
10349
10350 aarch64_set_mem_u16
10351 (cpu,
10352 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10353 aarch64_get_vec_u16 (cpu, st, 0));
10354 }
10355
10356 /* 16 bit store scaled or unscaled zero-
10357 or sign-extended 16-bit register offset. */
10358 static void
10359 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10360 {
10361 unsigned rm = INSTR (20, 16);
10362 unsigned rn = INSTR (9, 5);
10363 unsigned st = INSTR (4, 0);
10364
10365 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10366 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10367 extension);
10368 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10369
10370 aarch64_set_mem_u16
10371 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10372 }
10373
10374 /* 32 bit store scaled unsigned 12 bit. */
10375 static void
10376 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10377 {
10378 unsigned st = INSTR (4, 0);
10379 unsigned rn = INSTR (9, 5);
10380
10381 aarch64_set_mem_u32
10382 (cpu,
10383 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10384 aarch64_get_vec_u32 (cpu, st, 0));
10385 }
10386
10387 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10388 static void
10389 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10390 {
10391 unsigned rn = INSTR (9, 5);
10392 unsigned st = INSTR (4, 0);
10393
10394 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10395
10396 if (wb != Post)
10397 address += offset;
10398
10399 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10400
10401 if (wb == Post)
10402 address += offset;
10403
10404 if (wb != NoWriteBack)
10405 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10406 }
10407
10408 /* 32 bit store scaled or unscaled zero-
10409 or sign-extended 32-bit register offset. */
10410 static void
10411 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10412 {
10413 unsigned rm = INSTR (20, 16);
10414 unsigned rn = INSTR (9, 5);
10415 unsigned st = INSTR (4, 0);
10416
10417 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10418 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10419 extension);
10420 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10421
10422 aarch64_set_mem_u32
10423 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10424 }
10425
10426 /* 64 bit store scaled unsigned 12 bit. */
10427 static void
10428 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10429 {
10430 unsigned st = INSTR (4, 0);
10431 unsigned rn = INSTR (9, 5);
10432
10433 aarch64_set_mem_u64
10434 (cpu,
10435 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10436 aarch64_get_vec_u64 (cpu, st, 0));
10437 }
10438
10439 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10440 static void
10441 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10442 {
10443 unsigned rn = INSTR (9, 5);
10444 unsigned st = INSTR (4, 0);
10445
10446 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10447
10448 if (wb != Post)
10449 address += offset;
10450
10451 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10452
10453 if (wb == Post)
10454 address += offset;
10455
10456 if (wb != NoWriteBack)
10457 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10458 }
10459
10460 /* 64 bit store scaled or unscaled zero-
10461 or sign-extended 32-bit register offset. */
10462 static void
10463 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10464 {
10465 unsigned rm = INSTR (20, 16);
10466 unsigned rn = INSTR (9, 5);
10467 unsigned st = INSTR (4, 0);
10468
10469 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10470 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10471 extension);
10472 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10473
10474 aarch64_set_mem_u64
10475 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10476 }
10477
10478 /* 128 bit store scaled unsigned 12 bit. */
10479 static void
10480 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10481 {
10482 FRegister a;
10483 unsigned st = INSTR (4, 0);
10484 unsigned rn = INSTR (9, 5);
10485 uint64_t addr;
10486
10487 aarch64_get_FP_long_double (cpu, st, & a);
10488
10489 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10490 aarch64_set_mem_long_double (cpu, addr, a);
10491 }
10492
10493 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10494 static void
10495 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10496 {
10497 FRegister a;
10498 unsigned rn = INSTR (9, 5);
10499 unsigned st = INSTR (4, 0);
10500 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10501
10502 if (wb != Post)
10503 address += offset;
10504
10505 aarch64_get_FP_long_double (cpu, st, & a);
10506 aarch64_set_mem_long_double (cpu, address, a);
10507
10508 if (wb == Post)
10509 address += offset;
10510
10511 if (wb != NoWriteBack)
10512 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10513 }
10514
10515 /* 128 bit store scaled or unscaled zero-
10516 or sign-extended 32-bit register offset. */
10517 static void
10518 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10519 {
10520 unsigned rm = INSTR (20, 16);
10521 unsigned rn = INSTR (9, 5);
10522 unsigned st = INSTR (4, 0);
10523
10524 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10525 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10526 extension);
10527 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10528
10529 FRegister a;
10530
10531 aarch64_get_FP_long_double (cpu, st, & a);
10532 aarch64_set_mem_long_double (cpu, address + displacement, a);
10533 }
10534
10535 static void
10536 dexLoadImmediatePrePost (sim_cpu *cpu)
10537 {
10538 /* instr[31,30] = size
10539 instr[29,27] = 111
10540 instr[26] = V
10541 instr[25,24] = 00
10542 instr[23,22] = opc
10543 instr[21] = 0
10544 instr[20,12] = simm9
10545 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10546 instr[10] = 0
10547 instr[9,5] = Rn may be SP.
10548 instr[4,0] = Rt */
10549
10550 uint32_t V = INSTR (26, 26);
10551 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10552 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10553 WriteBack wb = INSTR (11, 11);
10554
10555 if (!V)
10556 {
10557 /* GReg operations. */
10558 switch (dispatch)
10559 {
10560 case 0: strb_wb (cpu, imm, wb); return;
10561 case 1: ldrb32_wb (cpu, imm, wb); return;
10562 case 2: ldrsb_wb (cpu, imm, wb); return;
10563 case 3: ldrsb32_wb (cpu, imm, wb); return;
10564 case 4: strh_wb (cpu, imm, wb); return;
10565 case 5: ldrh32_wb (cpu, imm, wb); return;
10566 case 6: ldrsh64_wb (cpu, imm, wb); return;
10567 case 7: ldrsh32_wb (cpu, imm, wb); return;
10568 case 8: str32_wb (cpu, imm, wb); return;
10569 case 9: ldr32_wb (cpu, imm, wb); return;
10570 case 10: ldrsw_wb (cpu, imm, wb); return;
10571 case 12: str_wb (cpu, imm, wb); return;
10572 case 13: ldr_wb (cpu, imm, wb); return;
10573
10574 default:
10575 case 11:
10576 case 14:
10577 case 15:
10578 HALT_UNALLOC;
10579 }
10580 }
10581
10582 /* FReg operations. */
10583 switch (dispatch)
10584 {
10585 case 2: fstrq_wb (cpu, imm, wb); return;
10586 case 3: fldrq_wb (cpu, imm, wb); return;
10587 case 8: fstrs_wb (cpu, imm, wb); return;
10588 case 9: fldrs_wb (cpu, imm, wb); return;
10589 case 12: fstrd_wb (cpu, imm, wb); return;
10590 case 13: fldrd_wb (cpu, imm, wb); return;
10591
10592 case 0: /* STUR 8 bit FP. */
10593 case 1: /* LDUR 8 bit FP. */
10594 case 4: /* STUR 16 bit FP. */
10595 case 5: /* LDUR 8 bit FP. */
10596 HALT_NYI;
10597
10598 default:
10599 case 6:
10600 case 7:
10601 case 10:
10602 case 11:
10603 case 14:
10604 case 15:
10605 HALT_UNALLOC;
10606 }
10607 }
10608
10609 static void
10610 dexLoadRegisterOffset (sim_cpu *cpu)
10611 {
10612 /* instr[31,30] = size
10613 instr[29,27] = 111
10614 instr[26] = V
10615 instr[25,24] = 00
10616 instr[23,22] = opc
10617 instr[21] = 1
10618 instr[20,16] = rm
10619 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10620 110 ==> SXTW, 111 ==> SXTX,
10621 ow ==> RESERVED
10622 instr[12] = scaled
10623 instr[11,10] = 10
10624 instr[9,5] = rn
10625 instr[4,0] = rt. */
10626
10627 uint32_t V = INSTR (26, 26);
10628 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10629 Scaling scale = INSTR (12, 12);
10630 Extension extensionType = INSTR (15, 13);
10631
10632 /* Check for illegal extension types. */
10633 if (uimm (extensionType, 1, 1) == 0)
10634 HALT_UNALLOC;
10635
10636 if (extensionType == UXTX || extensionType == SXTX)
10637 extensionType = NoExtension;
10638
10639 if (!V)
10640 {
10641 /* GReg operations. */
10642 switch (dispatch)
10643 {
10644 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10645 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10646 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10647 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10648 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10649 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10650 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10651 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10652 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10653 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10654 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10655 case 12: str_scale_ext (cpu, scale, extensionType); return;
10656 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10657 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10658
10659 default:
10660 case 11:
10661 case 15:
10662 HALT_UNALLOC;
10663 }
10664 }
10665
10666 /* FReg operations. */
10667 switch (dispatch)
10668 {
10669 case 1: /* LDUR 8 bit FP. */
10670 HALT_NYI;
10671 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10672 case 5: /* LDUR 8 bit FP. */
10673 HALT_NYI;
10674 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10675 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10676
10677 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10678 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10679 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10680 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10681 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10682
10683 default:
10684 case 6:
10685 case 7:
10686 case 10:
10687 case 11:
10688 case 14:
10689 case 15:
10690 HALT_UNALLOC;
10691 }
10692 }
10693
10694 static void
10695 dexLoadUnsignedImmediate (sim_cpu *cpu)
10696 {
10697 /* instr[29,24] == 111_01
10698 instr[31,30] = size
10699 instr[26] = V
10700 instr[23,22] = opc
10701 instr[21,10] = uimm12 : unsigned immediate offset
10702 instr[9,5] = rn may be SP.
10703 instr[4,0] = rt. */
10704
10705 uint32_t V = INSTR (26,26);
10706 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10707 uint32_t imm = INSTR (21, 10);
10708
10709 if (!V)
10710 {
10711 /* GReg operations. */
10712 switch (dispatch)
10713 {
10714 case 0: strb_abs (cpu, imm); return;
10715 case 1: ldrb32_abs (cpu, imm); return;
10716 case 2: ldrsb_abs (cpu, imm); return;
10717 case 3: ldrsb32_abs (cpu, imm); return;
10718 case 4: strh_abs (cpu, imm); return;
10719 case 5: ldrh32_abs (cpu, imm); return;
10720 case 6: ldrsh_abs (cpu, imm); return;
10721 case 7: ldrsh32_abs (cpu, imm); return;
10722 case 8: str32_abs (cpu, imm); return;
10723 case 9: ldr32_abs (cpu, imm); return;
10724 case 10: ldrsw_abs (cpu, imm); return;
10725 case 12: str_abs (cpu, imm); return;
10726 case 13: ldr_abs (cpu, imm); return;
10727 case 14: prfm_abs (cpu, imm); return;
10728
10729 default:
10730 case 11:
10731 case 15:
10732 HALT_UNALLOC;
10733 }
10734 }
10735
10736 /* FReg operations. */
10737 switch (dispatch)
10738 {
10739 case 0: fstrb_abs (cpu, imm); return;
10740 case 4: fstrh_abs (cpu, imm); return;
10741 case 8: fstrs_abs (cpu, imm); return;
10742 case 12: fstrd_abs (cpu, imm); return;
10743 case 2: fstrq_abs (cpu, imm); return;
10744
10745 case 1: fldrb_abs (cpu, imm); return;
10746 case 5: fldrh_abs (cpu, imm); return;
10747 case 9: fldrs_abs (cpu, imm); return;
10748 case 13: fldrd_abs (cpu, imm); return;
10749 case 3: fldrq_abs (cpu, imm); return;
10750
10751 default:
10752 case 6:
10753 case 7:
10754 case 10:
10755 case 11:
10756 case 14:
10757 case 15:
10758 HALT_UNALLOC;
10759 }
10760 }
10761
10762 static void
10763 dexLoadExclusive (sim_cpu *cpu)
10764 {
10765 /* assert instr[29:24] = 001000;
10766 instr[31,30] = size
10767 instr[23] = 0 if exclusive
10768 instr[22] = L : 1 if load, 0 if store
10769 instr[21] = 1 if pair
10770 instr[20,16] = Rs
10771 instr[15] = o0 : 1 if ordered
10772 instr[14,10] = Rt2
10773 instr[9,5] = Rn
10774 instr[4.0] = Rt. */
10775
10776 switch (INSTR (22, 21))
10777 {
10778 case 2: ldxr (cpu); return;
10779 case 0: stxr (cpu); return;
10780 default: HALT_NYI;
10781 }
10782 }
10783
10784 static void
10785 dexLoadOther (sim_cpu *cpu)
10786 {
10787 uint32_t dispatch;
10788
10789 /* instr[29,25] = 111_0
10790 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10791 instr[21:11,10] is the secondary dispatch. */
10792 if (INSTR (24, 24))
10793 {
10794 dexLoadUnsignedImmediate (cpu);
10795 return;
10796 }
10797
10798 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10799 switch (dispatch)
10800 {
10801 case 0: dexLoadUnscaledImmediate (cpu); return;
10802 case 1: dexLoadImmediatePrePost (cpu); return;
10803 case 3: dexLoadImmediatePrePost (cpu); return;
10804 case 6: dexLoadRegisterOffset (cpu); return;
10805
10806 default:
10807 case 2:
10808 case 4:
10809 case 5:
10810 case 7:
10811 HALT_NYI;
10812 }
10813 }
10814
10815 static void
10816 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10817 {
10818 unsigned rn = INSTR (14, 10);
10819 unsigned rd = INSTR (9, 5);
10820 unsigned rm = INSTR (4, 0);
10821 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10822
10823 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10824 HALT_UNALLOC; /* ??? */
10825
10826 offset <<= 2;
10827
10828 if (wb != Post)
10829 address += offset;
10830
10831 aarch64_set_mem_u32 (cpu, address,
10832 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10833 aarch64_set_mem_u32 (cpu, address + 4,
10834 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10835
10836 if (wb == Post)
10837 address += offset;
10838
10839 if (wb != NoWriteBack)
10840 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10841 }
10842
10843 static void
10844 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10845 {
10846 unsigned rn = INSTR (14, 10);
10847 unsigned rd = INSTR (9, 5);
10848 unsigned rm = INSTR (4, 0);
10849 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10850
10851 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10852 HALT_UNALLOC; /* ??? */
10853
10854 offset <<= 3;
10855
10856 if (wb != Post)
10857 address += offset;
10858
10859 aarch64_set_mem_u64 (cpu, address,
10860 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10861 aarch64_set_mem_u64 (cpu, address + 8,
10862 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10863
10864 if (wb == Post)
10865 address += offset;
10866
10867 if (wb != NoWriteBack)
10868 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10869 }
10870
10871 static void
10872 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10873 {
10874 unsigned rn = INSTR (14, 10);
10875 unsigned rd = INSTR (9, 5);
10876 unsigned rm = INSTR (4, 0);
10877 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10878
10879 /* Treat this as unalloc to make sure we don't do it. */
10880 if (rn == rm)
10881 HALT_UNALLOC;
10882
10883 offset <<= 2;
10884
10885 if (wb != Post)
10886 address += offset;
10887
10888 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10889 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10890
10891 if (wb == Post)
10892 address += offset;
10893
10894 if (wb != NoWriteBack)
10895 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10896 }
10897
10898 static void
10899 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10900 {
10901 unsigned rn = INSTR (14, 10);
10902 unsigned rd = INSTR (9, 5);
10903 unsigned rm = INSTR (4, 0);
10904 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10905
10906 /* Treat this as unalloc to make sure we don't do it. */
10907 if (rn == rm)
10908 HALT_UNALLOC;
10909
10910 offset <<= 2;
10911
10912 if (wb != Post)
10913 address += offset;
10914
10915 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10916 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10917
10918 if (wb == Post)
10919 address += offset;
10920
10921 if (wb != NoWriteBack)
10922 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10923 }
10924
10925 static void
10926 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10927 {
10928 unsigned rn = INSTR (14, 10);
10929 unsigned rd = INSTR (9, 5);
10930 unsigned rm = INSTR (4, 0);
10931 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10932
10933 /* Treat this as unalloc to make sure we don't do it. */
10934 if (rn == rm)
10935 HALT_UNALLOC;
10936
10937 offset <<= 3;
10938
10939 if (wb != Post)
10940 address += offset;
10941
10942 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10943 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10944
10945 if (wb == Post)
10946 address += offset;
10947
10948 if (wb != NoWriteBack)
10949 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10950 }
10951
10952 static void
10953 dex_load_store_pair_gr (sim_cpu *cpu)
10954 {
10955 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10956 instr[29,25] = instruction encoding: 101_0
10957 instr[26] = V : 1 if fp 0 if gp
10958 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10959 instr[22] = load/store (1=> load)
10960 instr[21,15] = signed, scaled, offset
10961 instr[14,10] = Rn
10962 instr[ 9, 5] = Rd
10963 instr[ 4, 0] = Rm. */
10964
10965 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10966 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10967
10968 switch (dispatch)
10969 {
10970 case 2: store_pair_u32 (cpu, offset, Post); return;
10971 case 3: load_pair_u32 (cpu, offset, Post); return;
10972 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
10973 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
10974 case 6: store_pair_u32 (cpu, offset, Pre); return;
10975 case 7: load_pair_u32 (cpu, offset, Pre); return;
10976
10977 case 11: load_pair_s32 (cpu, offset, Post); return;
10978 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
10979 case 15: load_pair_s32 (cpu, offset, Pre); return;
10980
10981 case 18: store_pair_u64 (cpu, offset, Post); return;
10982 case 19: load_pair_u64 (cpu, offset, Post); return;
10983 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
10984 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
10985 case 22: store_pair_u64 (cpu, offset, Pre); return;
10986 case 23: load_pair_u64 (cpu, offset, Pre); return;
10987
10988 default:
10989 HALT_UNALLOC;
10990 }
10991 }
10992
10993 static void
10994 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10995 {
10996 unsigned rn = INSTR (14, 10);
10997 unsigned rd = INSTR (9, 5);
10998 unsigned rm = INSTR (4, 0);
10999 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11000
11001 offset <<= 2;
11002
11003 if (wb != Post)
11004 address += offset;
11005
11006 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11007 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11008
11009 if (wb == Post)
11010 address += offset;
11011
11012 if (wb != NoWriteBack)
11013 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11014 }
11015
11016 static void
11017 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11018 {
11019 unsigned rn = INSTR (14, 10);
11020 unsigned rd = INSTR (9, 5);
11021 unsigned rm = INSTR (4, 0);
11022 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11023
11024 offset <<= 3;
11025
11026 if (wb != Post)
11027 address += offset;
11028
11029 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11030 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11031
11032 if (wb == Post)
11033 address += offset;
11034
11035 if (wb != NoWriteBack)
11036 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11037 }
11038
11039 static void
11040 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11041 {
11042 FRegister a;
11043 unsigned rn = INSTR (14, 10);
11044 unsigned rd = INSTR (9, 5);
11045 unsigned rm = INSTR (4, 0);
11046 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11047
11048 offset <<= 4;
11049
11050 if (wb != Post)
11051 address += offset;
11052
11053 aarch64_get_FP_long_double (cpu, rm, & a);
11054 aarch64_set_mem_long_double (cpu, address, a);
11055 aarch64_get_FP_long_double (cpu, rn, & a);
11056 aarch64_set_mem_long_double (cpu, address + 16, a);
11057
11058 if (wb == Post)
11059 address += offset;
11060
11061 if (wb != NoWriteBack)
11062 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11063 }
11064
11065 static void
11066 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11067 {
11068 unsigned rn = INSTR (14, 10);
11069 unsigned rd = INSTR (9, 5);
11070 unsigned rm = INSTR (4, 0);
11071 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11072
11073 if (rm == rn)
11074 HALT_UNALLOC;
11075
11076 offset <<= 2;
11077
11078 if (wb != Post)
11079 address += offset;
11080
11081 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11082 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11083
11084 if (wb == Post)
11085 address += offset;
11086
11087 if (wb != NoWriteBack)
11088 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11089 }
11090
11091 static void
11092 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11093 {
11094 unsigned rn = INSTR (14, 10);
11095 unsigned rd = INSTR (9, 5);
11096 unsigned rm = INSTR (4, 0);
11097 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11098
11099 if (rm == rn)
11100 HALT_UNALLOC;
11101
11102 offset <<= 3;
11103
11104 if (wb != Post)
11105 address += offset;
11106
11107 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11108 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11109
11110 if (wb == Post)
11111 address += offset;
11112
11113 if (wb != NoWriteBack)
11114 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11115 }
11116
11117 static void
11118 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11119 {
11120 FRegister a;
11121 unsigned rn = INSTR (14, 10);
11122 unsigned rd = INSTR (9, 5);
11123 unsigned rm = INSTR (4, 0);
11124 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11125
11126 if (rm == rn)
11127 HALT_UNALLOC;
11128
11129 offset <<= 4;
11130
11131 if (wb != Post)
11132 address += offset;
11133
11134 aarch64_get_mem_long_double (cpu, address, & a);
11135 aarch64_set_FP_long_double (cpu, rm, a);
11136 aarch64_get_mem_long_double (cpu, address + 16, & a);
11137 aarch64_set_FP_long_double (cpu, rn, a);
11138
11139 if (wb == Post)
11140 address += offset;
11141
11142 if (wb != NoWriteBack)
11143 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11144 }
11145
11146 static void
11147 dex_load_store_pair_fp (sim_cpu *cpu)
11148 {
11149 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11150 instr[29,25] = instruction encoding
11151 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11152 instr[22] = load/store (1=> load)
11153 instr[21,15] = signed, scaled, offset
11154 instr[14,10] = Rn
11155 instr[ 9, 5] = Rd
11156 instr[ 4, 0] = Rm */
11157
11158 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11159 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11160
11161 switch (dispatch)
11162 {
11163 case 2: store_pair_float (cpu, offset, Post); return;
11164 case 3: load_pair_float (cpu, offset, Post); return;
11165 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11166 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11167 case 6: store_pair_float (cpu, offset, Pre); return;
11168 case 7: load_pair_float (cpu, offset, Pre); return;
11169
11170 case 10: store_pair_double (cpu, offset, Post); return;
11171 case 11: load_pair_double (cpu, offset, Post); return;
11172 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11173 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11174 case 14: store_pair_double (cpu, offset, Pre); return;
11175 case 15: load_pair_double (cpu, offset, Pre); return;
11176
11177 case 18: store_pair_long_double (cpu, offset, Post); return;
11178 case 19: load_pair_long_double (cpu, offset, Post); return;
11179 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11180 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11181 case 22: store_pair_long_double (cpu, offset, Pre); return;
11182 case 23: load_pair_long_double (cpu, offset, Pre); return;
11183
11184 default:
11185 HALT_UNALLOC;
11186 }
11187 }
11188
11189 static inline unsigned
11190 vec_reg (unsigned v, unsigned o)
11191 {
11192 return (v + o) & 0x3F;
11193 }
11194
11195 /* Load multiple N-element structures to N consecutive registers. */
11196 static void
11197 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11198 {
11199 int all = INSTR (30, 30);
11200 unsigned size = INSTR (11, 10);
11201 unsigned vd = INSTR (4, 0);
11202 unsigned i;
11203
11204 switch (size)
11205 {
11206 case 0: /* 8-bit operations. */
11207 if (all)
11208 for (i = 0; i < (16 * N); i++)
11209 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11210 aarch64_get_mem_u8 (cpu, address + i));
11211 else
11212 for (i = 0; i < (8 * N); i++)
11213 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11214 aarch64_get_mem_u8 (cpu, address + i));
11215 return;
11216
11217 case 1: /* 16-bit operations. */
11218 if (all)
11219 for (i = 0; i < (8 * N); i++)
11220 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11221 aarch64_get_mem_u16 (cpu, address + i * 2));
11222 else
11223 for (i = 0; i < (4 * N); i++)
11224 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11225 aarch64_get_mem_u16 (cpu, address + i * 2));
11226 return;
11227
11228 case 2: /* 32-bit operations. */
11229 if (all)
11230 for (i = 0; i < (4 * N); i++)
11231 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11232 aarch64_get_mem_u32 (cpu, address + i * 4));
11233 else
11234 for (i = 0; i < (2 * N); i++)
11235 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11236 aarch64_get_mem_u32 (cpu, address + i * 4));
11237 return;
11238
11239 case 3: /* 64-bit operations. */
11240 if (all)
11241 for (i = 0; i < (2 * N); i++)
11242 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11243 aarch64_get_mem_u64 (cpu, address + i * 8));
11244 else
11245 for (i = 0; i < N; i++)
11246 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11247 aarch64_get_mem_u64 (cpu, address + i * 8));
11248 return;
11249 }
11250 }
11251
11252 /* LD4: load multiple 4-element to four consecutive registers. */
11253 static void
11254 LD4 (sim_cpu *cpu, uint64_t address)
11255 {
11256 vec_load (cpu, address, 4);
11257 }
11258
11259 /* LD3: load multiple 3-element structures to three consecutive registers. */
11260 static void
11261 LD3 (sim_cpu *cpu, uint64_t address)
11262 {
11263 vec_load (cpu, address, 3);
11264 }
11265
11266 /* LD2: load multiple 2-element structures to two consecutive registers. */
11267 static void
11268 LD2 (sim_cpu *cpu, uint64_t address)
11269 {
11270 vec_load (cpu, address, 2);
11271 }
11272
11273 /* Load multiple 1-element structures into one register. */
11274 static void
11275 LD1_1 (sim_cpu *cpu, uint64_t address)
11276 {
11277 int all = INSTR (30, 30);
11278 unsigned size = INSTR (11, 10);
11279 unsigned vd = INSTR (4, 0);
11280 unsigned i;
11281
11282 switch (size)
11283 {
11284 case 0:
11285 /* LD1 {Vd.16b}, addr, #16 */
11286 /* LD1 {Vd.8b}, addr, #8 */
11287 for (i = 0; i < (all ? 16 : 8); i++)
11288 aarch64_set_vec_u8 (cpu, vd, i,
11289 aarch64_get_mem_u8 (cpu, address + i));
11290 return;
11291
11292 case 1:
11293 /* LD1 {Vd.8h}, addr, #16 */
11294 /* LD1 {Vd.4h}, addr, #8 */
11295 for (i = 0; i < (all ? 8 : 4); i++)
11296 aarch64_set_vec_u16 (cpu, vd, i,
11297 aarch64_get_mem_u16 (cpu, address + i * 2));
11298 return;
11299
11300 case 2:
11301 /* LD1 {Vd.4s}, addr, #16 */
11302 /* LD1 {Vd.2s}, addr, #8 */
11303 for (i = 0; i < (all ? 4 : 2); i++)
11304 aarch64_set_vec_u32 (cpu, vd, i,
11305 aarch64_get_mem_u32 (cpu, address + i * 4));
11306 return;
11307
11308 case 3:
11309 /* LD1 {Vd.2d}, addr, #16 */
11310 /* LD1 {Vd.1d}, addr, #8 */
11311 for (i = 0; i < (all ? 2 : 1); i++)
11312 aarch64_set_vec_u64 (cpu, vd, i,
11313 aarch64_get_mem_u64 (cpu, address + i * 8));
11314 return;
11315 }
11316 }
11317
11318 /* Load multiple 1-element structures into two registers. */
11319 static void
11320 LD1_2 (sim_cpu *cpu, uint64_t address)
11321 {
11322 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11323 So why have two different instructions ? There must be something
11324 wrong somewhere. */
11325 vec_load (cpu, address, 2);
11326 }
11327
11328 /* Load multiple 1-element structures into three registers. */
11329 static void
11330 LD1_3 (sim_cpu *cpu, uint64_t address)
11331 {
11332 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11333 So why have two different instructions ? There must be something
11334 wrong somewhere. */
11335 vec_load (cpu, address, 3);
11336 }
11337
11338 /* Load multiple 1-element structures into four registers. */
11339 static void
11340 LD1_4 (sim_cpu *cpu, uint64_t address)
11341 {
11342 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11343 So why have two different instructions ? There must be something
11344 wrong somewhere. */
11345 vec_load (cpu, address, 4);
11346 }
11347
11348 /* Store multiple N-element structures to N consecutive registers. */
11349 static void
11350 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11351 {
11352 int all = INSTR (30, 30);
11353 unsigned size = INSTR (11, 10);
11354 unsigned vd = INSTR (4, 0);
11355 unsigned i;
11356
11357 switch (size)
11358 {
11359 case 0: /* 8-bit operations. */
11360 if (all)
11361 for (i = 0; i < (16 * N); i++)
11362 aarch64_set_mem_u8
11363 (cpu, address + i,
11364 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11365 else
11366 for (i = 0; i < (8 * N); i++)
11367 aarch64_set_mem_u8
11368 (cpu, address + i,
11369 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11370 return;
11371
11372 case 1: /* 16-bit operations. */
11373 if (all)
11374 for (i = 0; i < (8 * N); i++)
11375 aarch64_set_mem_u16
11376 (cpu, address + i * 2,
11377 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11378 else
11379 for (i = 0; i < (4 * N); i++)
11380 aarch64_set_mem_u16
11381 (cpu, address + i * 2,
11382 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11383 return;
11384
11385 case 2: /* 32-bit operations. */
11386 if (all)
11387 for (i = 0; i < (4 * N); i++)
11388 aarch64_set_mem_u32
11389 (cpu, address + i * 4,
11390 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11391 else
11392 for (i = 0; i < (2 * N); i++)
11393 aarch64_set_mem_u32
11394 (cpu, address + i * 4,
11395 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11396 return;
11397
11398 case 3: /* 64-bit operations. */
11399 if (all)
11400 for (i = 0; i < (2 * N); i++)
11401 aarch64_set_mem_u64
11402 (cpu, address + i * 8,
11403 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11404 else
11405 for (i = 0; i < N; i++)
11406 aarch64_set_mem_u64
11407 (cpu, address + i * 8,
11408 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11409 return;
11410 }
11411 }
11412
11413 /* Store multiple 4-element structure to four consecutive registers. */
11414 static void
11415 ST4 (sim_cpu *cpu, uint64_t address)
11416 {
11417 vec_store (cpu, address, 4);
11418 }
11419
11420 /* Store multiple 3-element structures to three consecutive registers. */
11421 static void
11422 ST3 (sim_cpu *cpu, uint64_t address)
11423 {
11424 vec_store (cpu, address, 3);
11425 }
11426
11427 /* Store multiple 2-element structures to two consecutive registers. */
11428 static void
11429 ST2 (sim_cpu *cpu, uint64_t address)
11430 {
11431 vec_store (cpu, address, 2);
11432 }
11433
11434 /* Store multiple 1-element structures into one register. */
11435 static void
11436 ST1_1 (sim_cpu *cpu, uint64_t address)
11437 {
11438 int all = INSTR (30, 30);
11439 unsigned size = INSTR (11, 10);
11440 unsigned vd = INSTR (4, 0);
11441 unsigned i;
11442
11443 switch (size)
11444 {
11445 case 0:
11446 for (i = 0; i < (all ? 16 : 8); i++)
11447 aarch64_set_mem_u8 (cpu, address + i,
11448 aarch64_get_vec_u8 (cpu, vd, i));
11449 return;
11450
11451 case 1:
11452 for (i = 0; i < (all ? 8 : 4); i++)
11453 aarch64_set_mem_u16 (cpu, address + i * 2,
11454 aarch64_get_vec_u16 (cpu, vd, i));
11455 return;
11456
11457 case 2:
11458 for (i = 0; i < (all ? 4 : 2); i++)
11459 aarch64_set_mem_u32 (cpu, address + i * 4,
11460 aarch64_get_vec_u32 (cpu, vd, i));
11461 return;
11462
11463 case 3:
11464 for (i = 0; i < (all ? 2 : 1); i++)
11465 aarch64_set_mem_u64 (cpu, address + i * 8,
11466 aarch64_get_vec_u64 (cpu, vd, i));
11467 return;
11468 }
11469 }
11470
11471 /* Store multiple 1-element structures into two registers. */
11472 static void
11473 ST1_2 (sim_cpu *cpu, uint64_t address)
11474 {
11475 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11476 So why have two different instructions ? There must be
11477 something wrong somewhere. */
11478 vec_store (cpu, address, 2);
11479 }
11480
11481 /* Store multiple 1-element structures into three registers. */
11482 static void
11483 ST1_3 (sim_cpu *cpu, uint64_t address)
11484 {
11485 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11486 So why have two different instructions ? There must be
11487 something wrong somewhere. */
11488 vec_store (cpu, address, 3);
11489 }
11490
11491 /* Store multiple 1-element structures into four registers. */
11492 static void
11493 ST1_4 (sim_cpu *cpu, uint64_t address)
11494 {
11495 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11496 So why have two different instructions ? There must be
11497 something wrong somewhere. */
11498 vec_store (cpu, address, 4);
11499 }
11500
11501 static void
11502 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11503 {
11504 /* instr[31] = 0
11505 instr[30] = element selector 0=>half, 1=>all elements
11506 instr[29,24] = 00 1101
11507 instr[23] = 0=>simple, 1=>post
11508 instr[22] = 1
11509 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11510 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11511 11111 (immediate post inc)
11512 instr[15,14] = 11
11513 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11514 instr[12] = 0
11515 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11516 10=> word(s), 11=> double(d)
11517 instr[9,5] = address
11518 instr[4,0] = Vd */
11519
11520 unsigned full = INSTR (30, 30);
11521 unsigned vd = INSTR (4, 0);
11522 unsigned size = INSTR (11, 10);
11523 int i;
11524
11525 NYI_assert (29, 24, 0x0D);
11526 NYI_assert (22, 22, 1);
11527 NYI_assert (15, 14, 3);
11528 NYI_assert (12, 12, 0);
11529
11530 switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
11531 {
11532 case 0: /* LD1R. */
11533 switch (size)
11534 {
11535 case 0:
11536 {
11537 uint8_t val = aarch64_get_mem_u8 (cpu, address);
11538 for (i = 0; i < (full ? 16 : 8); i++)
11539 aarch64_set_vec_u8 (cpu, vd, i, val);
11540 break;
11541 }
11542
11543 case 1:
11544 {
11545 uint16_t val = aarch64_get_mem_u16 (cpu, address);
11546 for (i = 0; i < (full ? 8 : 4); i++)
11547 aarch64_set_vec_u16 (cpu, vd, i, val);
11548 break;
11549 }
11550
11551 case 2:
11552 {
11553 uint32_t val = aarch64_get_mem_u32 (cpu, address);
11554 for (i = 0; i < (full ? 4 : 2); i++)
11555 aarch64_set_vec_u32 (cpu, vd, i, val);
11556 break;
11557 }
11558
11559 case 3:
11560 {
11561 uint64_t val = aarch64_get_mem_u64 (cpu, address);
11562 for (i = 0; i < (full ? 2 : 1); i++)
11563 aarch64_set_vec_u64 (cpu, vd, i, val);
11564 break;
11565 }
11566
11567 default:
11568 HALT_UNALLOC;
11569 }
11570 break;
11571
11572 case 1: /* LD2R. */
11573 switch (size)
11574 {
11575 case 0:
11576 {
11577 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11578 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11579
11580 for (i = 0; i < (full ? 16 : 8); i++)
11581 {
11582 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11583 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11584 }
11585 break;
11586 }
11587
11588 case 1:
11589 {
11590 uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11591 uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11592
11593 for (i = 0; i < (full ? 8 : 4); i++)
11594 {
11595 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11596 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11597 }
11598 break;
11599 }
11600
11601 case 2:
11602 {
11603 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11604 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11605
11606 for (i = 0; i < (full ? 4 : 2); i++)
11607 {
11608 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11609 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11610 }
11611 break;
11612 }
11613
11614 case 3:
11615 {
11616 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11617 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11618
11619 for (i = 0; i < (full ? 2 : 1); i++)
11620 {
11621 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11622 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11623 }
11624 break;
11625 }
11626
11627 default:
11628 HALT_UNALLOC;
11629 }
11630 break;
11631
11632 case 2: /* LD3R. */
11633 switch (size)
11634 {
11635 case 0:
11636 {
11637 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11638 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11639 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11640
11641 for (i = 0; i < (full ? 16 : 8); i++)
11642 {
11643 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11644 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11645 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11646 }
11647 }
11648 break;
11649
11650 case 1:
11651 {
11652 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11653 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11654 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11655
11656 for (i = 0; i < (full ? 8 : 4); i++)
11657 {
11658 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11659 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11660 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11661 }
11662 }
11663 break;
11664
11665 case 2:
11666 {
11667 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11668 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11669 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11670
11671 for (i = 0; i < (full ? 4 : 2); i++)
11672 {
11673 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11674 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11675 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11676 }
11677 }
11678 break;
11679
11680 case 3:
11681 {
11682 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11683 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11684 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11685
11686 for (i = 0; i < (full ? 2 : 1); i++)
11687 {
11688 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11689 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11690 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11691 }
11692 }
11693 break;
11694
11695 default:
11696 HALT_UNALLOC;
11697 }
11698 break;
11699
11700 case 3: /* LD4R. */
11701 switch (size)
11702 {
11703 case 0:
11704 {
11705 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11706 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11707 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11708 uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11709
11710 for (i = 0; i < (full ? 16 : 8); i++)
11711 {
11712 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11713 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11714 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11715 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11716 }
11717 }
11718 break;
11719
11720 case 1:
11721 {
11722 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11723 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11724 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11725 uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11726
11727 for (i = 0; i < (full ? 8 : 4); i++)
11728 {
11729 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11730 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11731 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11732 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11733 }
11734 }
11735 break;
11736
11737 case 2:
11738 {
11739 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11740 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11741 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11742 uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11743
11744 for (i = 0; i < (full ? 4 : 2); i++)
11745 {
11746 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11747 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11748 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11749 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11750 }
11751 }
11752 break;
11753
11754 case 3:
11755 {
11756 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11757 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11758 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11759 uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11760
11761 for (i = 0; i < (full ? 2 : 1); i++)
11762 {
11763 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11764 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11765 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11766 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11767 }
11768 }
11769 break;
11770
11771 default:
11772 HALT_UNALLOC;
11773 }
11774 break;
11775
11776 default:
11777 HALT_UNALLOC;
11778 }
11779 }
11780
11781 static void
11782 do_vec_load_store (sim_cpu *cpu)
11783 {
11784 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11785
11786 instr[31] = 0
11787 instr[30] = element selector 0=>half, 1=>all elements
11788 instr[29,25] = 00110
11789 instr[24] = ?
11790 instr[23] = 0=>simple, 1=>post
11791 instr[22] = 0=>store, 1=>load
11792 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11793 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11794 11111 (immediate post inc)
11795 instr[15,12] = elements and destinations. eg for load:
11796 0000=>LD4 => load multiple 4-element to
11797 four consecutive registers
11798 0100=>LD3 => load multiple 3-element to
11799 three consecutive registers
11800 1000=>LD2 => load multiple 2-element to
11801 two consecutive registers
11802 0010=>LD1 => load multiple 1-element to
11803 four consecutive registers
11804 0110=>LD1 => load multiple 1-element to
11805 three consecutive registers
11806 1010=>LD1 => load multiple 1-element to
11807 two consecutive registers
11808 0111=>LD1 => load multiple 1-element to
11809 one register
11810 1100=>LDR1,LDR2
11811 1110=>LDR3,LDR4
11812 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11813 10=> word(s), 11=> double(d)
11814 instr[9,5] = Vn, can be SP
11815 instr[4,0] = Vd */
11816
11817 int post;
11818 int load;
11819 unsigned vn;
11820 uint64_t address;
11821 int type;
11822
11823 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11824 HALT_NYI;
11825
11826 type = INSTR (15, 12);
11827 if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11828 HALT_NYI;
11829
11830 post = INSTR (23, 23);
11831 load = INSTR (22, 22);
11832 vn = INSTR (9, 5);
11833 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11834
11835 if (post)
11836 {
11837 unsigned vm = INSTR (20, 16);
11838
11839 if (vm == R31)
11840 {
11841 unsigned sizeof_operation;
11842
11843 switch (type)
11844 {
11845 case 0: sizeof_operation = 32; break;
11846 case 4: sizeof_operation = 24; break;
11847 case 8: sizeof_operation = 16; break;
11848
11849 case 0xC:
11850 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11851 sizeof_operation <<= INSTR (11, 10);
11852 break;
11853
11854 case 0xE:
11855 sizeof_operation = INSTR (21, 21) ? 8 : 4;
11856 sizeof_operation <<= INSTR (11, 10);
11857 break;
11858
11859 case 7:
11860 /* One register, immediate offset variant. */
11861 sizeof_operation = 8;
11862 break;
11863
11864 case 10:
11865 /* Two registers, immediate offset variant. */
11866 sizeof_operation = 16;
11867 break;
11868
11869 case 6:
11870 /* Three registers, immediate offset variant. */
11871 sizeof_operation = 24;
11872 break;
11873
11874 case 2:
11875 /* Four registers, immediate offset variant. */
11876 sizeof_operation = 32;
11877 break;
11878
11879 default:
11880 HALT_UNALLOC;
11881 }
11882
11883 if (INSTR (30, 30))
11884 sizeof_operation *= 2;
11885
11886 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11887 }
11888 else
11889 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11890 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11891 }
11892 else
11893 {
11894 NYI_assert (20, 16, 0);
11895 }
11896
11897 if (load)
11898 {
11899 switch (type)
11900 {
11901 case 0: LD4 (cpu, address); return;
11902 case 4: LD3 (cpu, address); return;
11903 case 8: LD2 (cpu, address); return;
11904 case 2: LD1_4 (cpu, address); return;
11905 case 6: LD1_3 (cpu, address); return;
11906 case 10: LD1_2 (cpu, address); return;
11907 case 7: LD1_1 (cpu, address); return;
11908
11909 case 0xE:
11910 case 0xC: do_vec_LDnR (cpu, address); return;
11911
11912 default:
11913 HALT_NYI;
11914 }
11915 }
11916
11917 /* Stores. */
11918 switch (type)
11919 {
11920 case 0: ST4 (cpu, address); return;
11921 case 4: ST3 (cpu, address); return;
11922 case 8: ST2 (cpu, address); return;
11923 case 2: ST1_4 (cpu, address); return;
11924 case 6: ST1_3 (cpu, address); return;
11925 case 10: ST1_2 (cpu, address); return;
11926 case 7: ST1_1 (cpu, address); return;
11927 default:
11928 HALT_NYI;
11929 }
11930 }
11931
11932 static void
11933 dexLdSt (sim_cpu *cpu)
11934 {
11935 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11936 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11937 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11938 bits [29,28:26] of a LS are the secondary dispatch vector. */
11939 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11940
11941 switch (group2)
11942 {
11943 case LS_EXCL_000:
11944 dexLoadExclusive (cpu); return;
11945
11946 case LS_LIT_010:
11947 case LS_LIT_011:
11948 dexLoadLiteral (cpu); return;
11949
11950 case LS_OTHER_110:
11951 case LS_OTHER_111:
11952 dexLoadOther (cpu); return;
11953
11954 case LS_ADVSIMD_001:
11955 do_vec_load_store (cpu); return;
11956
11957 case LS_PAIR_100:
11958 dex_load_store_pair_gr (cpu); return;
11959
11960 case LS_PAIR_101:
11961 dex_load_store_pair_fp (cpu); return;
11962
11963 default:
11964 /* Should never reach here. */
11965 HALT_NYI;
11966 }
11967 }
11968
11969 /* Specific decode and execute for group Data Processing Register. */
11970
11971 static void
11972 dexLogicalShiftedRegister (sim_cpu *cpu)
11973 {
11974 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11975 instr[30,29] = op
11976 instr[28:24] = 01010
11977 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
11978 instr[21] = N
11979 instr[20,16] = Rm
11980 instr[15,10] = count : must be 0xxxxx for 32 bit
11981 instr[9,5] = Rn
11982 instr[4,0] = Rd */
11983
11984 uint32_t size = INSTR (31, 31);
11985 Shift shiftType = INSTR (23, 22);
11986 uint32_t count = INSTR (15, 10);
11987
11988 /* 32 bit operations must have count[5] = 0.
11989 or else we have an UNALLOC. */
11990 if (size == 0 && uimm (count, 5, 5))
11991 HALT_UNALLOC;
11992
11993 /* Dispatch on size:op:N. */
11994 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
11995 {
11996 case 0: and32_shift (cpu, shiftType, count); return;
11997 case 1: bic32_shift (cpu, shiftType, count); return;
11998 case 2: orr32_shift (cpu, shiftType, count); return;
11999 case 3: orn32_shift (cpu, shiftType, count); return;
12000 case 4: eor32_shift (cpu, shiftType, count); return;
12001 case 5: eon32_shift (cpu, shiftType, count); return;
12002 case 6: ands32_shift (cpu, shiftType, count); return;
12003 case 7: bics32_shift (cpu, shiftType, count); return;
12004 case 8: and64_shift (cpu, shiftType, count); return;
12005 case 9: bic64_shift (cpu, shiftType, count); return;
12006 case 10:orr64_shift (cpu, shiftType, count); return;
12007 case 11:orn64_shift (cpu, shiftType, count); return;
12008 case 12:eor64_shift (cpu, shiftType, count); return;
12009 case 13:eon64_shift (cpu, shiftType, count); return;
12010 case 14:ands64_shift (cpu, shiftType, count); return;
12011 case 15:bics64_shift (cpu, shiftType, count); return;
12012 }
12013 }
12014
12015 /* 32 bit conditional select. */
12016 static void
12017 csel32 (sim_cpu *cpu, CondCode cc)
12018 {
12019 unsigned rm = INSTR (20, 16);
12020 unsigned rn = INSTR (9, 5);
12021 unsigned rd = INSTR (4, 0);
12022
12023 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12024 testConditionCode (cpu, cc)
12025 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12026 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12027 }
12028
12029 /* 64 bit conditional select. */
12030 static void
12031 csel64 (sim_cpu *cpu, CondCode cc)
12032 {
12033 unsigned rm = INSTR (20, 16);
12034 unsigned rn = INSTR (9, 5);
12035 unsigned rd = INSTR (4, 0);
12036
12037 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12038 testConditionCode (cpu, cc)
12039 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12040 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12041 }
12042
12043 /* 32 bit conditional increment. */
12044 static void
12045 csinc32 (sim_cpu *cpu, CondCode cc)
12046 {
12047 unsigned rm = INSTR (20, 16);
12048 unsigned rn = INSTR (9, 5);
12049 unsigned rd = INSTR (4, 0);
12050
12051 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12052 testConditionCode (cpu, cc)
12053 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12054 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12055 }
12056
12057 /* 64 bit conditional increment. */
12058 static void
12059 csinc64 (sim_cpu *cpu, CondCode cc)
12060 {
12061 unsigned rm = INSTR (20, 16);
12062 unsigned rn = INSTR (9, 5);
12063 unsigned rd = INSTR (4, 0);
12064
12065 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12066 testConditionCode (cpu, cc)
12067 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12068 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12069 }
12070
12071 /* 32 bit conditional invert. */
12072 static void
12073 csinv32 (sim_cpu *cpu, CondCode cc)
12074 {
12075 unsigned rm = INSTR (20, 16);
12076 unsigned rn = INSTR (9, 5);
12077 unsigned rd = INSTR (4, 0);
12078
12079 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12080 testConditionCode (cpu, cc)
12081 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12082 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12083 }
12084
12085 /* 64 bit conditional invert. */
12086 static void
12087 csinv64 (sim_cpu *cpu, CondCode cc)
12088 {
12089 unsigned rm = INSTR (20, 16);
12090 unsigned rn = INSTR (9, 5);
12091 unsigned rd = INSTR (4, 0);
12092
12093 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12094 testConditionCode (cpu, cc)
12095 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12096 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12097 }
12098
12099 /* 32 bit conditional negate. */
12100 static void
12101 csneg32 (sim_cpu *cpu, CondCode cc)
12102 {
12103 unsigned rm = INSTR (20, 16);
12104 unsigned rn = INSTR (9, 5);
12105 unsigned rd = INSTR (4, 0);
12106
12107 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12108 testConditionCode (cpu, cc)
12109 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12110 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12111 }
12112
12113 /* 64 bit conditional negate. */
12114 static void
12115 csneg64 (sim_cpu *cpu, CondCode cc)
12116 {
12117 unsigned rm = INSTR (20, 16);
12118 unsigned rn = INSTR (9, 5);
12119 unsigned rd = INSTR (4, 0);
12120
12121 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12122 testConditionCode (cpu, cc)
12123 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12124 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12125 }
12126
12127 static void
12128 dexCondSelect (sim_cpu *cpu)
12129 {
12130 /* instr[28,21] = 11011011
12131 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12132 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12133 100 ==> CSINV, 101 ==> CSNEG,
12134 _1_ ==> UNALLOC
12135 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12136 instr[15,12] = cond
12137 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12138
12139 CondCode cc = INSTR (15, 12);
12140 uint32_t S = INSTR (29, 29);
12141 uint32_t op2 = INSTR (11, 10);
12142
12143 if (S == 1)
12144 HALT_UNALLOC;
12145
12146 if (op2 & 0x2)
12147 HALT_UNALLOC;
12148
12149 switch ((INSTR (31, 30) << 1) | op2)
12150 {
12151 case 0: csel32 (cpu, cc); return;
12152 case 1: csinc32 (cpu, cc); return;
12153 case 2: csinv32 (cpu, cc); return;
12154 case 3: csneg32 (cpu, cc); return;
12155 case 4: csel64 (cpu, cc); return;
12156 case 5: csinc64 (cpu, cc); return;
12157 case 6: csinv64 (cpu, cc); return;
12158 case 7: csneg64 (cpu, cc); return;
12159 }
12160 }
12161
12162 /* Some helpers for counting leading 1 or 0 bits. */
12163
12164 /* Counts the number of leading bits which are the same
12165 in a 32 bit value in the range 1 to 32. */
12166 static uint32_t
12167 leading32 (uint32_t value)
12168 {
12169 int32_t mask= 0xffff0000;
12170 uint32_t count= 16; /* Counts number of bits set in mask. */
12171 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12172 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12173
12174 while (lo + 1 < hi)
12175 {
12176 int32_t test = (value & mask);
12177
12178 if (test == 0 || test == mask)
12179 {
12180 lo = count;
12181 count = (lo + hi) / 2;
12182 mask >>= (count - lo);
12183 }
12184 else
12185 {
12186 hi = count;
12187 count = (lo + hi) / 2;
12188 mask <<= hi - count;
12189 }
12190 }
12191
12192 if (lo != hi)
12193 {
12194 int32_t test;
12195
12196 mask >>= 1;
12197 test = (value & mask);
12198
12199 if (test == 0 || test == mask)
12200 count = hi;
12201 else
12202 count = lo;
12203 }
12204
12205 return count;
12206 }
12207
12208 /* Counts the number of leading bits which are the same
12209 in a 64 bit value in the range 1 to 64. */
12210 static uint64_t
12211 leading64 (uint64_t value)
12212 {
12213 int64_t mask= 0xffffffff00000000LL;
12214 uint64_t count = 32; /* Counts number of bits set in mask. */
12215 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12216 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12217
12218 while (lo + 1 < hi)
12219 {
12220 int64_t test = (value & mask);
12221
12222 if (test == 0 || test == mask)
12223 {
12224 lo = count;
12225 count = (lo + hi) / 2;
12226 mask >>= (count - lo);
12227 }
12228 else
12229 {
12230 hi = count;
12231 count = (lo + hi) / 2;
12232 mask <<= hi - count;
12233 }
12234 }
12235
12236 if (lo != hi)
12237 {
12238 int64_t test;
12239
12240 mask >>= 1;
12241 test = (value & mask);
12242
12243 if (test == 0 || test == mask)
12244 count = hi;
12245 else
12246 count = lo;
12247 }
12248
12249 return count;
12250 }
12251
12252 /* Bit operations. */
12253 /* N.B register args may not be SP. */
12254
12255 /* 32 bit count leading sign bits. */
12256 static void
12257 cls32 (sim_cpu *cpu)
12258 {
12259 unsigned rn = INSTR (9, 5);
12260 unsigned rd = INSTR (4, 0);
12261
12262 /* N.B. the result needs to exclude the leading bit. */
12263 aarch64_set_reg_u64
12264 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12265 }
12266
12267 /* 64 bit count leading sign bits. */
12268 static void
12269 cls64 (sim_cpu *cpu)
12270 {
12271 unsigned rn = INSTR (9, 5);
12272 unsigned rd = INSTR (4, 0);
12273
12274 /* N.B. the result needs to exclude the leading bit. */
12275 aarch64_set_reg_u64
12276 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12277 }
12278
12279 /* 32 bit count leading zero bits. */
12280 static void
12281 clz32 (sim_cpu *cpu)
12282 {
12283 unsigned rn = INSTR (9, 5);
12284 unsigned rd = INSTR (4, 0);
12285 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12286
12287 /* if the sign (top) bit is set then the count is 0. */
12288 if (pick32 (value, 31, 31))
12289 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12290 else
12291 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12292 }
12293
12294 /* 64 bit count leading zero bits. */
12295 static void
12296 clz64 (sim_cpu *cpu)
12297 {
12298 unsigned rn = INSTR (9, 5);
12299 unsigned rd = INSTR (4, 0);
12300 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12301
12302 /* if the sign (top) bit is set then the count is 0. */
12303 if (pick64 (value, 63, 63))
12304 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12305 else
12306 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12307 }
12308
12309 /* 32 bit reverse bits. */
12310 static void
12311 rbit32 (sim_cpu *cpu)
12312 {
12313 unsigned rn = INSTR (9, 5);
12314 unsigned rd = INSTR (4, 0);
12315 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12316 uint32_t result = 0;
12317 int i;
12318
12319 for (i = 0; i < 32; i++)
12320 {
12321 result <<= 1;
12322 result |= (value & 1);
12323 value >>= 1;
12324 }
12325 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12326 }
12327
12328 /* 64 bit reverse bits. */
12329 static void
12330 rbit64 (sim_cpu *cpu)
12331 {
12332 unsigned rn = INSTR (9, 5);
12333 unsigned rd = INSTR (4, 0);
12334 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12335 uint64_t result = 0;
12336 int i;
12337
12338 for (i = 0; i < 64; i++)
12339 {
12340 result <<= 1;
12341 result |= (value & 1UL);
12342 value >>= 1;
12343 }
12344 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12345 }
12346
12347 /* 32 bit reverse bytes. */
12348 static void
12349 rev32 (sim_cpu *cpu)
12350 {
12351 unsigned rn = INSTR (9, 5);
12352 unsigned rd = INSTR (4, 0);
12353 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12354 uint32_t result = 0;
12355 int i;
12356
12357 for (i = 0; i < 4; i++)
12358 {
12359 result <<= 8;
12360 result |= (value & 0xff);
12361 value >>= 8;
12362 }
12363 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12364 }
12365
12366 /* 64 bit reverse bytes. */
12367 static void
12368 rev64 (sim_cpu *cpu)
12369 {
12370 unsigned rn = INSTR (9, 5);
12371 unsigned rd = INSTR (4, 0);
12372 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12373 uint64_t result = 0;
12374 int i;
12375
12376 for (i = 0; i < 8; i++)
12377 {
12378 result <<= 8;
12379 result |= (value & 0xffULL);
12380 value >>= 8;
12381 }
12382 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12383 }
12384
12385 /* 32 bit reverse shorts. */
12386 /* N.B.this reverses the order of the bytes in each half word. */
12387 static void
12388 revh32 (sim_cpu *cpu)
12389 {
12390 unsigned rn = INSTR (9, 5);
12391 unsigned rd = INSTR (4, 0);
12392 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12393 uint32_t result = 0;
12394 int i;
12395
12396 for (i = 0; i < 2; i++)
12397 {
12398 result <<= 8;
12399 result |= (value & 0x00ff00ff);
12400 value >>= 8;
12401 }
12402 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12403 }
12404
12405 /* 64 bit reverse shorts. */
12406 /* N.B.this reverses the order of the bytes in each half word. */
12407 static void
12408 revh64 (sim_cpu *cpu)
12409 {
12410 unsigned rn = INSTR (9, 5);
12411 unsigned rd = INSTR (4, 0);
12412 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12413 uint64_t result = 0;
12414 int i;
12415
12416 for (i = 0; i < 2; i++)
12417 {
12418 result <<= 8;
12419 result |= (value & 0x00ff00ff00ff00ffULL);
12420 value >>= 8;
12421 }
12422 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12423 }
12424
12425 static void
12426 dexDataProc1Source (sim_cpu *cpu)
12427 {
12428 /* instr[30] = 1
12429 instr[28,21] = 111010110
12430 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12431 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12432 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12433 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12434 000010 ==> REV, 000011 ==> UNALLOC
12435 000100 ==> CLZ, 000101 ==> CLS
12436 ow ==> UNALLOC
12437 instr[9,5] = rn : may not be SP
12438 instr[4,0] = rd : may not be SP. */
12439
12440 uint32_t S = INSTR (29, 29);
12441 uint32_t opcode2 = INSTR (20, 16);
12442 uint32_t opcode = INSTR (15, 10);
12443 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12444
12445 if (S == 1)
12446 HALT_UNALLOC;
12447
12448 if (opcode2 != 0)
12449 HALT_UNALLOC;
12450
12451 if (opcode & 0x38)
12452 HALT_UNALLOC;
12453
12454 switch (dispatch)
12455 {
12456 case 0: rbit32 (cpu); return;
12457 case 1: revh32 (cpu); return;
12458 case 2: rev32 (cpu); return;
12459 case 4: clz32 (cpu); return;
12460 case 5: cls32 (cpu); return;
12461 case 8: rbit64 (cpu); return;
12462 case 9: revh64 (cpu); return;
12463 case 10:rev32 (cpu); return;
12464 case 11:rev64 (cpu); return;
12465 case 12:clz64 (cpu); return;
12466 case 13:cls64 (cpu); return;
12467 default: HALT_UNALLOC;
12468 }
12469 }
12470
12471 /* Variable shift.
12472 Shifts by count supplied in register.
12473 N.B register args may not be SP.
12474 These all use the shifted auxiliary function for
12475 simplicity and clarity. Writing the actual shift
12476 inline would avoid a branch and so be faster but
12477 would also necessitate getting signs right. */
12478
12479 /* 32 bit arithmetic shift right. */
12480 static void
12481 asrv32 (sim_cpu *cpu)
12482 {
12483 unsigned rm = INSTR (20, 16);
12484 unsigned rn = INSTR (9, 5);
12485 unsigned rd = INSTR (4, 0);
12486
12487 aarch64_set_reg_u64
12488 (cpu, rd, NO_SP,
12489 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12490 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12491 }
12492
12493 /* 64 bit arithmetic shift right. */
12494 static void
12495 asrv64 (sim_cpu *cpu)
12496 {
12497 unsigned rm = INSTR (20, 16);
12498 unsigned rn = INSTR (9, 5);
12499 unsigned rd = INSTR (4, 0);
12500
12501 aarch64_set_reg_u64
12502 (cpu, rd, NO_SP,
12503 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12504 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12505 }
12506
12507 /* 32 bit logical shift left. */
12508 static void
12509 lslv32 (sim_cpu *cpu)
12510 {
12511 unsigned rm = INSTR (20, 16);
12512 unsigned rn = INSTR (9, 5);
12513 unsigned rd = INSTR (4, 0);
12514
12515 aarch64_set_reg_u64
12516 (cpu, rd, NO_SP,
12517 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12518 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12519 }
12520
12521 /* 64 bit arithmetic shift left. */
12522 static void
12523 lslv64 (sim_cpu *cpu)
12524 {
12525 unsigned rm = INSTR (20, 16);
12526 unsigned rn = INSTR (9, 5);
12527 unsigned rd = INSTR (4, 0);
12528
12529 aarch64_set_reg_u64
12530 (cpu, rd, NO_SP,
12531 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12532 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12533 }
12534
12535 /* 32 bit logical shift right. */
12536 static void
12537 lsrv32 (sim_cpu *cpu)
12538 {
12539 unsigned rm = INSTR (20, 16);
12540 unsigned rn = INSTR (9, 5);
12541 unsigned rd = INSTR (4, 0);
12542
12543 aarch64_set_reg_u64
12544 (cpu, rd, NO_SP,
12545 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12546 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12547 }
12548
12549 /* 64 bit logical shift right. */
12550 static void
12551 lsrv64 (sim_cpu *cpu)
12552 {
12553 unsigned rm = INSTR (20, 16);
12554 unsigned rn = INSTR (9, 5);
12555 unsigned rd = INSTR (4, 0);
12556
12557 aarch64_set_reg_u64
12558 (cpu, rd, NO_SP,
12559 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12560 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12561 }
12562
12563 /* 32 bit rotate right. */
12564 static void
12565 rorv32 (sim_cpu *cpu)
12566 {
12567 unsigned rm = INSTR (20, 16);
12568 unsigned rn = INSTR (9, 5);
12569 unsigned rd = INSTR (4, 0);
12570
12571 aarch64_set_reg_u64
12572 (cpu, rd, NO_SP,
12573 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12574 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12575 }
12576
12577 /* 64 bit rotate right. */
12578 static void
12579 rorv64 (sim_cpu *cpu)
12580 {
12581 unsigned rm = INSTR (20, 16);
12582 unsigned rn = INSTR (9, 5);
12583 unsigned rd = INSTR (4, 0);
12584
12585 aarch64_set_reg_u64
12586 (cpu, rd, NO_SP,
12587 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12588 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12589 }
12590
12591
12592 /* divide. */
12593
12594 /* 32 bit signed divide. */
12595 static void
12596 cpuiv32 (sim_cpu *cpu)
12597 {
12598 unsigned rm = INSTR (20, 16);
12599 unsigned rn = INSTR (9, 5);
12600 unsigned rd = INSTR (4, 0);
12601 /* N.B. the pseudo-code does the divide using 64 bit data. */
12602 /* TODO : check that this rounds towards zero as required. */
12603 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12604 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12605
12606 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12607 divisor ? ((int32_t) (dividend / divisor)) : 0);
12608 }
12609
12610 /* 64 bit signed divide. */
12611 static void
12612 cpuiv64 (sim_cpu *cpu)
12613 {
12614 unsigned rm = INSTR (20, 16);
12615 unsigned rn = INSTR (9, 5);
12616 unsigned rd = INSTR (4, 0);
12617
12618 /* TODO : check that this rounds towards zero as required. */
12619 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12620
12621 aarch64_set_reg_s64
12622 (cpu, rd, NO_SP,
12623 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12624 }
12625
12626 /* 32 bit unsigned divide. */
12627 static void
12628 udiv32 (sim_cpu *cpu)
12629 {
12630 unsigned rm = INSTR (20, 16);
12631 unsigned rn = INSTR (9, 5);
12632 unsigned rd = INSTR (4, 0);
12633
12634 /* N.B. the pseudo-code does the divide using 64 bit data. */
12635 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12636 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12637
12638 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12639 divisor ? (uint32_t) (dividend / divisor) : 0);
12640 }
12641
12642 /* 64 bit unsigned divide. */
12643 static void
12644 udiv64 (sim_cpu *cpu)
12645 {
12646 unsigned rm = INSTR (20, 16);
12647 unsigned rn = INSTR (9, 5);
12648 unsigned rd = INSTR (4, 0);
12649
12650 /* TODO : check that this rounds towards zero as required. */
12651 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12652
12653 aarch64_set_reg_u64
12654 (cpu, rd, NO_SP,
12655 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12656 }
12657
12658 static void
12659 dexDataProc2Source (sim_cpu *cpu)
12660 {
12661 /* assert instr[30] == 0
12662 instr[28,21] == 11010110
12663 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12664 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12665 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12666 001000 ==> LSLV, 001001 ==> LSRV
12667 001010 ==> ASRV, 001011 ==> RORV
12668 ow ==> UNALLOC. */
12669
12670 uint32_t dispatch;
12671 uint32_t S = INSTR (29, 29);
12672 uint32_t opcode = INSTR (15, 10);
12673
12674 if (S == 1)
12675 HALT_UNALLOC;
12676
12677 if (opcode & 0x34)
12678 HALT_UNALLOC;
12679
12680 dispatch = ( (INSTR (31, 31) << 3)
12681 | (uimm (opcode, 3, 3) << 2)
12682 | uimm (opcode, 1, 0));
12683 switch (dispatch)
12684 {
12685 case 2: udiv32 (cpu); return;
12686 case 3: cpuiv32 (cpu); return;
12687 case 4: lslv32 (cpu); return;
12688 case 5: lsrv32 (cpu); return;
12689 case 6: asrv32 (cpu); return;
12690 case 7: rorv32 (cpu); return;
12691 case 10: udiv64 (cpu); return;
12692 case 11: cpuiv64 (cpu); return;
12693 case 12: lslv64 (cpu); return;
12694 case 13: lsrv64 (cpu); return;
12695 case 14: asrv64 (cpu); return;
12696 case 15: rorv64 (cpu); return;
12697 default: HALT_UNALLOC;
12698 }
12699 }
12700
12701
12702 /* Multiply. */
12703
12704 /* 32 bit multiply and add. */
12705 static void
12706 madd32 (sim_cpu *cpu)
12707 {
12708 unsigned rm = INSTR (20, 16);
12709 unsigned ra = INSTR (14, 10);
12710 unsigned rn = INSTR (9, 5);
12711 unsigned rd = INSTR (4, 0);
12712
12713 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12714 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12715 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12716 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12717 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12718 }
12719
12720 /* 64 bit multiply and add. */
12721 static void
12722 madd64 (sim_cpu *cpu)
12723 {
12724 unsigned rm = INSTR (20, 16);
12725 unsigned ra = INSTR (14, 10);
12726 unsigned rn = INSTR (9, 5);
12727 unsigned rd = INSTR (4, 0);
12728
12729 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12730 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12731 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12732 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12733 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12734 }
12735
12736 /* 32 bit multiply and sub. */
12737 static void
12738 msub32 (sim_cpu *cpu)
12739 {
12740 unsigned rm = INSTR (20, 16);
12741 unsigned ra = INSTR (14, 10);
12742 unsigned rn = INSTR (9, 5);
12743 unsigned rd = INSTR (4, 0);
12744
12745 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12746 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12747 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12748 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12749 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12750 }
12751
12752 /* 64 bit multiply and sub. */
12753 static void
12754 msub64 (sim_cpu *cpu)
12755 {
12756 unsigned rm = INSTR (20, 16);
12757 unsigned ra = INSTR (14, 10);
12758 unsigned rn = INSTR (9, 5);
12759 unsigned rd = INSTR (4, 0);
12760
12761 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12762 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12763 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12764 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12765 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12766 }
12767
12768 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12769 static void
12770 smaddl (sim_cpu *cpu)
12771 {
12772 unsigned rm = INSTR (20, 16);
12773 unsigned ra = INSTR (14, 10);
12774 unsigned rn = INSTR (9, 5);
12775 unsigned rd = INSTR (4, 0);
12776
12777 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12778 obtain a 64 bit product. */
12779 aarch64_set_reg_s64
12780 (cpu, rd, NO_SP,
12781 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12782 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12783 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12784 }
12785
12786 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12787 static void
12788 smsubl (sim_cpu *cpu)
12789 {
12790 unsigned rm = INSTR (20, 16);
12791 unsigned ra = INSTR (14, 10);
12792 unsigned rn = INSTR (9, 5);
12793 unsigned rd = INSTR (4, 0);
12794
12795 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12796 obtain a 64 bit product. */
12797 aarch64_set_reg_s64
12798 (cpu, rd, NO_SP,
12799 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12800 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12801 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12802 }
12803
12804 /* Integer Multiply/Divide. */
12805
12806 /* First some macros and a helper function. */
12807 /* Macros to test or access elements of 64 bit words. */
12808
12809 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12810 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12811 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12812 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12813 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12814 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12815
12816 /* Offset of sign bit in 64 bit signed integger. */
12817 #define SIGN_SHIFT_U64 63
12818 /* The sign bit itself -- also identifies the minimum negative int value. */
12819 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12820 /* Return true if a 64 bit signed int presented as an unsigned int is the
12821 most negative value. */
12822 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12823 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12824 int has its sign bit set to false. */
12825 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12826 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12827 an unsigned int has its sign bit set or not. */
12828 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12829 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12830 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12831
12832 /* Multiply two 64 bit ints and return.
12833 the hi 64 bits of the 128 bit product. */
12834
12835 static uint64_t
12836 mul64hi (uint64_t value1, uint64_t value2)
12837 {
12838 uint64_t resultmid1;
12839 uint64_t result;
12840 uint64_t value1_lo = lowWordToU64 (value1);
12841 uint64_t value1_hi = highWordToU64 (value1) ;
12842 uint64_t value2_lo = lowWordToU64 (value2);
12843 uint64_t value2_hi = highWordToU64 (value2);
12844
12845 /* Cross-multiply and collect results. */
12846 uint64_t xproductlo = value1_lo * value2_lo;
12847 uint64_t xproductmid1 = value1_lo * value2_hi;
12848 uint64_t xproductmid2 = value1_hi * value2_lo;
12849 uint64_t xproducthi = value1_hi * value2_hi;
12850 uint64_t carry = 0;
12851 /* Start accumulating 64 bit results. */
12852 /* Drop bottom half of lowest cross-product. */
12853 uint64_t resultmid = xproductlo >> 32;
12854 /* Add in middle products. */
12855 resultmid = resultmid + xproductmid1;
12856
12857 /* Check for overflow. */
12858 if (resultmid < xproductmid1)
12859 /* Carry over 1 into top cross-product. */
12860 carry++;
12861
12862 resultmid1 = resultmid + xproductmid2;
12863
12864 /* Check for overflow. */
12865 if (resultmid1 < xproductmid2)
12866 /* Carry over 1 into top cross-product. */
12867 carry++;
12868
12869 /* Drop lowest 32 bits of middle cross-product. */
12870 result = resultmid1 >> 32;
12871
12872 /* Add top cross-product plus and any carry. */
12873 result += xproducthi + carry;
12874
12875 return result;
12876 }
12877
12878 /* Signed multiply high, source, source2 :
12879 64 bit, dest <-- high 64-bit of result. */
12880 static void
12881 smulh (sim_cpu *cpu)
12882 {
12883 uint64_t uresult;
12884 int64_t result;
12885 unsigned rm = INSTR (20, 16);
12886 unsigned rn = INSTR (9, 5);
12887 unsigned rd = INSTR (4, 0);
12888 GReg ra = INSTR (14, 10);
12889 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12890 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12891 uint64_t uvalue1;
12892 uint64_t uvalue2;
12893 int64_t signum = 1;
12894
12895 if (ra != R31)
12896 HALT_UNALLOC;
12897
12898 /* Convert to unsigned and use the unsigned mul64hi routine
12899 the fix the sign up afterwards. */
12900 if (value1 < 0)
12901 {
12902 signum *= -1L;
12903 uvalue1 = -value1;
12904 }
12905 else
12906 {
12907 uvalue1 = value1;
12908 }
12909
12910 if (value2 < 0)
12911 {
12912 signum *= -1L;
12913 uvalue2 = -value2;
12914 }
12915 else
12916 {
12917 uvalue2 = value2;
12918 }
12919
12920 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12921 uresult = mul64hi (uvalue1, uvalue2);
12922 result = uresult;
12923 result *= signum;
12924
12925 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12926 }
12927
12928 /* Unsigned multiply add long -- source, source2 :
12929 32 bit, source3 : 64 bit. */
12930 static void
12931 umaddl (sim_cpu *cpu)
12932 {
12933 unsigned rm = INSTR (20, 16);
12934 unsigned ra = INSTR (14, 10);
12935 unsigned rn = INSTR (9, 5);
12936 unsigned rd = INSTR (4, 0);
12937
12938 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12939 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12940 obtain a 64 bit product. */
12941 aarch64_set_reg_u64
12942 (cpu, rd, NO_SP,
12943 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12944 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12945 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12946 }
12947
12948 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12949 static void
12950 umsubl (sim_cpu *cpu)
12951 {
12952 unsigned rm = INSTR (20, 16);
12953 unsigned ra = INSTR (14, 10);
12954 unsigned rn = INSTR (9, 5);
12955 unsigned rd = INSTR (4, 0);
12956
12957 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12958 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12959 obtain a 64 bit product. */
12960 aarch64_set_reg_u64
12961 (cpu, rd, NO_SP,
12962 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12963 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12964 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12965 }
12966
12967 /* Unsigned multiply high, source, source2 :
12968 64 bit, dest <-- high 64-bit of result. */
12969 static void
12970 umulh (sim_cpu *cpu)
12971 {
12972 unsigned rm = INSTR (20, 16);
12973 unsigned rn = INSTR (9, 5);
12974 unsigned rd = INSTR (4, 0);
12975 GReg ra = INSTR (14, 10);
12976
12977 if (ra != R31)
12978 HALT_UNALLOC;
12979
12980 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12981 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12982 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
12983 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12984 }
12985
12986 static void
12987 dexDataProc3Source (sim_cpu *cpu)
12988 {
12989 /* assert instr[28,24] == 11011. */
12990 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
12991 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
12992 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
12993 instr[15] = o0 : 0/1 ==> ok
12994 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
12995 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
12996 0100 ==> SMULH, (64 bit only)
12997 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
12998 1100 ==> UMULH (64 bit only)
12999 ow ==> UNALLOC. */
13000
13001 uint32_t dispatch;
13002 uint32_t size = INSTR (31, 31);
13003 uint32_t op54 = INSTR (30, 29);
13004 uint32_t op31 = INSTR (23, 21);
13005 uint32_t o0 = INSTR (15, 15);
13006
13007 if (op54 != 0)
13008 HALT_UNALLOC;
13009
13010 if (size == 0)
13011 {
13012 if (op31 != 0)
13013 HALT_UNALLOC;
13014
13015 if (o0 == 0)
13016 madd32 (cpu);
13017 else
13018 msub32 (cpu);
13019 return;
13020 }
13021
13022 dispatch = (op31 << 1) | o0;
13023
13024 switch (dispatch)
13025 {
13026 case 0: madd64 (cpu); return;
13027 case 1: msub64 (cpu); return;
13028 case 2: smaddl (cpu); return;
13029 case 3: smsubl (cpu); return;
13030 case 4: smulh (cpu); return;
13031 case 10: umaddl (cpu); return;
13032 case 11: umsubl (cpu); return;
13033 case 12: umulh (cpu); return;
13034 default: HALT_UNALLOC;
13035 }
13036 }
13037
13038 static void
13039 dexDPReg (sim_cpu *cpu)
13040 {
13041 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13042 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13043 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13044 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13045
13046 switch (group2)
13047 {
13048 case DPREG_LOG_000:
13049 case DPREG_LOG_001:
13050 dexLogicalShiftedRegister (cpu); return;
13051
13052 case DPREG_ADDSHF_010:
13053 dexAddSubtractShiftedRegister (cpu); return;
13054
13055 case DPREG_ADDEXT_011:
13056 dexAddSubtractExtendedRegister (cpu); return;
13057
13058 case DPREG_ADDCOND_100:
13059 {
13060 /* This set bundles a variety of different operations. */
13061 /* Check for. */
13062 /* 1) add/sub w carry. */
13063 uint32_t mask1 = 0x1FE00000U;
13064 uint32_t val1 = 0x1A000000U;
13065 /* 2) cond compare register/immediate. */
13066 uint32_t mask2 = 0x1FE00000U;
13067 uint32_t val2 = 0x1A400000U;
13068 /* 3) cond select. */
13069 uint32_t mask3 = 0x1FE00000U;
13070 uint32_t val3 = 0x1A800000U;
13071 /* 4) data proc 1/2 source. */
13072 uint32_t mask4 = 0x1FE00000U;
13073 uint32_t val4 = 0x1AC00000U;
13074
13075 if ((aarch64_get_instr (cpu) & mask1) == val1)
13076 dexAddSubtractWithCarry (cpu);
13077
13078 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13079 CondCompare (cpu);
13080
13081 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13082 dexCondSelect (cpu);
13083
13084 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13085 {
13086 /* Bit 30 is clear for data proc 2 source
13087 and set for data proc 1 source. */
13088 if (aarch64_get_instr (cpu) & (1U << 30))
13089 dexDataProc1Source (cpu);
13090 else
13091 dexDataProc2Source (cpu);
13092 }
13093
13094 else
13095 /* Should not reach here. */
13096 HALT_NYI;
13097
13098 return;
13099 }
13100
13101 case DPREG_3SRC_110:
13102 dexDataProc3Source (cpu); return;
13103
13104 case DPREG_UNALLOC_101:
13105 HALT_UNALLOC;
13106
13107 case DPREG_3SRC_111:
13108 dexDataProc3Source (cpu); return;
13109
13110 default:
13111 /* Should never reach here. */
13112 HALT_NYI;
13113 }
13114 }
13115
13116 /* Unconditional Branch immediate.
13117 Offset is a PC-relative byte offset in the range +/- 128MiB.
13118 The offset is assumed to be raw from the decode i.e. the
13119 simulator is expected to scale them from word offsets to byte. */
13120
13121 /* Unconditional branch. */
13122 static void
13123 buc (sim_cpu *cpu, int32_t offset)
13124 {
13125 aarch64_set_next_PC_by_offset (cpu, offset);
13126 }
13127
13128 static unsigned stack_depth = 0;
13129
13130 /* Unconditional branch and link -- writes return PC to LR. */
13131 static void
13132 bl (sim_cpu *cpu, int32_t offset)
13133 {
13134 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13135 aarch64_save_LR (cpu);
13136 aarch64_set_next_PC_by_offset (cpu, offset);
13137
13138 if (TRACE_BRANCH_P (cpu))
13139 {
13140 ++ stack_depth;
13141 TRACE_BRANCH (cpu,
13142 " %*scall %" PRIx64 " [%s]"
13143 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13144 stack_depth, " ", aarch64_get_next_PC (cpu),
13145 aarch64_get_func (CPU_STATE (cpu),
13146 aarch64_get_next_PC (cpu)),
13147 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13148 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13149 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13150 );
13151 }
13152 }
13153
13154 /* Unconditional Branch register.
13155 Branch/return address is in source register. */
13156
13157 /* Unconditional branch. */
13158 static void
13159 br (sim_cpu *cpu)
13160 {
13161 unsigned rn = INSTR (9, 5);
13162 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13163 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13164 }
13165
13166 /* Unconditional branch and link -- writes return PC to LR. */
13167 static void
13168 blr (sim_cpu *cpu)
13169 {
13170 unsigned rn = INSTR (9, 5);
13171
13172 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13173 /* The pseudo code in the spec says we update LR before fetching.
13174 the value from the rn. */
13175 aarch64_save_LR (cpu);
13176 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13177
13178 if (TRACE_BRANCH_P (cpu))
13179 {
13180 ++ stack_depth;
13181 TRACE_BRANCH (cpu,
13182 " %*scall %" PRIx64 " [%s]"
13183 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13184 stack_depth, " ", aarch64_get_next_PC (cpu),
13185 aarch64_get_func (CPU_STATE (cpu),
13186 aarch64_get_next_PC (cpu)),
13187 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13188 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13189 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13190 );
13191 }
13192 }
13193
13194 /* Return -- assembler will default source to LR this is functionally
13195 equivalent to br but, presumably, unlike br it side effects the
13196 branch predictor. */
13197 static void
13198 ret (sim_cpu *cpu)
13199 {
13200 unsigned rn = INSTR (9, 5);
13201 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13202
13203 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13204 if (TRACE_BRANCH_P (cpu))
13205 {
13206 TRACE_BRANCH (cpu,
13207 " %*sreturn [result: %" PRIx64 "]",
13208 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13209 -- stack_depth;
13210 }
13211 }
13212
13213 /* NOP -- we implement this and call it from the decode in case we
13214 want to intercept it later. */
13215
13216 static void
13217 nop (sim_cpu *cpu)
13218 {
13219 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13220 }
13221
13222 /* Data synchronization barrier. */
13223
13224 static void
13225 dsb (sim_cpu *cpu)
13226 {
13227 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13228 }
13229
13230 /* Data memory barrier. */
13231
13232 static void
13233 dmb (sim_cpu *cpu)
13234 {
13235 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13236 }
13237
13238 /* Instruction synchronization barrier. */
13239
13240 static void
13241 isb (sim_cpu *cpu)
13242 {
13243 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13244 }
13245
13246 static void
13247 dexBranchImmediate (sim_cpu *cpu)
13248 {
13249 /* assert instr[30,26] == 00101
13250 instr[31] ==> 0 == B, 1 == BL
13251 instr[25,0] == imm26 branch offset counted in words. */
13252
13253 uint32_t top = INSTR (31, 31);
13254 /* We have a 26 byte signed word offset which we need to pass to the
13255 execute routine as a signed byte offset. */
13256 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13257
13258 if (top)
13259 bl (cpu, offset);
13260 else
13261 buc (cpu, offset);
13262 }
13263
13264 /* Control Flow. */
13265
13266 /* Conditional branch
13267
13268 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13269 a bit position in the range 0 .. 63
13270
13271 cc is a CondCode enum value as pulled out of the decode
13272
13273 N.B. any offset register (source) can only be Xn or Wn. */
13274
13275 static void
13276 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13277 {
13278 /* The test returns TRUE if CC is met. */
13279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13280 if (testConditionCode (cpu, cc))
13281 aarch64_set_next_PC_by_offset (cpu, offset);
13282 }
13283
13284 /* 32 bit branch on register non-zero. */
13285 static void
13286 cbnz32 (sim_cpu *cpu, int32_t offset)
13287 {
13288 unsigned rt = INSTR (4, 0);
13289
13290 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13291 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13292 aarch64_set_next_PC_by_offset (cpu, offset);
13293 }
13294
13295 /* 64 bit branch on register zero. */
13296 static void
13297 cbnz (sim_cpu *cpu, int32_t offset)
13298 {
13299 unsigned rt = INSTR (4, 0);
13300
13301 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13302 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13303 aarch64_set_next_PC_by_offset (cpu, offset);
13304 }
13305
13306 /* 32 bit branch on register non-zero. */
13307 static void
13308 cbz32 (sim_cpu *cpu, int32_t offset)
13309 {
13310 unsigned rt = INSTR (4, 0);
13311
13312 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13313 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13314 aarch64_set_next_PC_by_offset (cpu, offset);
13315 }
13316
13317 /* 64 bit branch on register zero. */
13318 static void
13319 cbz (sim_cpu *cpu, int32_t offset)
13320 {
13321 unsigned rt = INSTR (4, 0);
13322
13323 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13324 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13325 aarch64_set_next_PC_by_offset (cpu, offset);
13326 }
13327
13328 /* Branch on register bit test non-zero -- one size fits all. */
13329 static void
13330 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13331 {
13332 unsigned rt = INSTR (4, 0);
13333
13334 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13335 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13336 aarch64_set_next_PC_by_offset (cpu, offset);
13337 }
13338
13339 /* Branch on register bit test zero -- one size fits all. */
13340 static void
13341 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13342 {
13343 unsigned rt = INSTR (4, 0);
13344
13345 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13346 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13347 aarch64_set_next_PC_by_offset (cpu, offset);
13348 }
13349
13350 static void
13351 dexCompareBranchImmediate (sim_cpu *cpu)
13352 {
13353 /* instr[30,25] = 01 1010
13354 instr[31] = size : 0 ==> 32, 1 ==> 64
13355 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13356 instr[23,5] = simm19 branch offset counted in words
13357 instr[4,0] = rt */
13358
13359 uint32_t size = INSTR (31, 31);
13360 uint32_t op = INSTR (24, 24);
13361 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13362
13363 if (size == 0)
13364 {
13365 if (op == 0)
13366 cbz32 (cpu, offset);
13367 else
13368 cbnz32 (cpu, offset);
13369 }
13370 else
13371 {
13372 if (op == 0)
13373 cbz (cpu, offset);
13374 else
13375 cbnz (cpu, offset);
13376 }
13377 }
13378
13379 static void
13380 dexTestBranchImmediate (sim_cpu *cpu)
13381 {
13382 /* instr[31] = b5 : bit 5 of test bit idx
13383 instr[30,25] = 01 1011
13384 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13385 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13386 instr[18,5] = simm14 : signed offset counted in words
13387 instr[4,0] = uimm5 */
13388
13389 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13390 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13391
13392 NYI_assert (30, 25, 0x1b);
13393
13394 if (INSTR (24, 24) == 0)
13395 tbz (cpu, pos, offset);
13396 else
13397 tbnz (cpu, pos, offset);
13398 }
13399
13400 static void
13401 dexCondBranchImmediate (sim_cpu *cpu)
13402 {
13403 /* instr[31,25] = 010 1010
13404 instr[24] = op1; op => 00 ==> B.cond
13405 instr[23,5] = simm19 : signed offset counted in words
13406 instr[4] = op0
13407 instr[3,0] = cond */
13408
13409 int32_t offset;
13410 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13411
13412 NYI_assert (31, 25, 0x2a);
13413
13414 if (op != 0)
13415 HALT_UNALLOC;
13416
13417 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13418
13419 bcc (cpu, offset, INSTR (3, 0));
13420 }
13421
13422 static void
13423 dexBranchRegister (sim_cpu *cpu)
13424 {
13425 /* instr[31,25] = 110 1011
13426 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13427 instr[20,16] = op2 : must be 11111
13428 instr[15,10] = op3 : must be 000000
13429 instr[4,0] = op2 : must be 11111. */
13430
13431 uint32_t op = INSTR (24, 21);
13432 uint32_t op2 = INSTR (20, 16);
13433 uint32_t op3 = INSTR (15, 10);
13434 uint32_t op4 = INSTR (4, 0);
13435
13436 NYI_assert (31, 25, 0x6b);
13437
13438 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13439 HALT_UNALLOC;
13440
13441 if (op == 0)
13442 br (cpu);
13443
13444 else if (op == 1)
13445 blr (cpu);
13446
13447 else if (op == 2)
13448 ret (cpu);
13449
13450 else
13451 {
13452 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13453 /* anything else is unallocated. */
13454 uint32_t rn = INSTR (4, 0);
13455
13456 if (rn != 0x1f)
13457 HALT_UNALLOC;
13458
13459 if (op == 4 || op == 5)
13460 HALT_NYI;
13461
13462 HALT_UNALLOC;
13463 }
13464 }
13465
13466 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13467 but this may not be available. So instead we define the values we need
13468 here. */
13469 #define AngelSVC_Reason_Open 0x01
13470 #define AngelSVC_Reason_Close 0x02
13471 #define AngelSVC_Reason_Write 0x05
13472 #define AngelSVC_Reason_Read 0x06
13473 #define AngelSVC_Reason_IsTTY 0x09
13474 #define AngelSVC_Reason_Seek 0x0A
13475 #define AngelSVC_Reason_FLen 0x0C
13476 #define AngelSVC_Reason_Remove 0x0E
13477 #define AngelSVC_Reason_Rename 0x0F
13478 #define AngelSVC_Reason_Clock 0x10
13479 #define AngelSVC_Reason_Time 0x11
13480 #define AngelSVC_Reason_System 0x12
13481 #define AngelSVC_Reason_Errno 0x13
13482 #define AngelSVC_Reason_GetCmdLine 0x15
13483 #define AngelSVC_Reason_HeapInfo 0x16
13484 #define AngelSVC_Reason_ReportException 0x18
13485 #define AngelSVC_Reason_Elapsed 0x30
13486
13487
13488 static void
13489 handle_halt (sim_cpu *cpu, uint32_t val)
13490 {
13491 uint64_t result = 0;
13492
13493 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13494 if (val != 0xf000)
13495 {
13496 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13497 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13498 sim_stopped, SIM_SIGTRAP);
13499 }
13500
13501 /* We have encountered an Angel SVC call. See if we can process it. */
13502 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13503 {
13504 case AngelSVC_Reason_HeapInfo:
13505 {
13506 /* Get the values. */
13507 uint64_t stack_top = aarch64_get_stack_start (cpu);
13508 uint64_t heap_base = aarch64_get_heap_start (cpu);
13509
13510 /* Get the pointer */
13511 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13512 ptr = aarch64_get_mem_u64 (cpu, ptr);
13513
13514 /* Fill in the memory block. */
13515 /* Start addr of heap. */
13516 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13517 /* End addr of heap. */
13518 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13519 /* Lowest stack addr. */
13520 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13521 /* Initial stack addr. */
13522 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13523
13524 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13525 }
13526 break;
13527
13528 case AngelSVC_Reason_Open:
13529 {
13530 /* Get the pointer */
13531 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13532 /* FIXME: For now we just assume that we will only be asked
13533 to open the standard file descriptors. */
13534 static int fd = 0;
13535 result = fd ++;
13536
13537 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13538 }
13539 break;
13540
13541 case AngelSVC_Reason_Close:
13542 {
13543 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13544 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13545 result = 0;
13546 }
13547 break;
13548
13549 case AngelSVC_Reason_Errno:
13550 result = 0;
13551 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13552 break;
13553
13554 case AngelSVC_Reason_Clock:
13555 result =
13556 #ifdef CLOCKS_PER_SEC
13557 (CLOCKS_PER_SEC >= 100)
13558 ? (clock () / (CLOCKS_PER_SEC / 100))
13559 : ((clock () * 100) / CLOCKS_PER_SEC)
13560 #else
13561 /* Presume unix... clock() returns microseconds. */
13562 (clock () / 10000)
13563 #endif
13564 ;
13565 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13566 break;
13567
13568 case AngelSVC_Reason_GetCmdLine:
13569 {
13570 /* Get the pointer */
13571 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13572 ptr = aarch64_get_mem_u64 (cpu, ptr);
13573
13574 /* FIXME: No command line for now. */
13575 aarch64_set_mem_u64 (cpu, ptr, 0);
13576 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13577 }
13578 break;
13579
13580 case AngelSVC_Reason_IsTTY:
13581 result = 1;
13582 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13583 break;
13584
13585 case AngelSVC_Reason_Write:
13586 {
13587 /* Get the pointer */
13588 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13589 /* Get the write control block. */
13590 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13591 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13592 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13593
13594 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13595 PRIx64 " on descriptor %" PRIx64,
13596 len, buf, fd);
13597
13598 if (len > 1280)
13599 {
13600 TRACE_SYSCALL (cpu,
13601 " AngelSVC: Write: Suspiciously long write: %ld",
13602 (long) len);
13603 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13604 sim_stopped, SIM_SIGBUS);
13605 }
13606 else if (fd == 1)
13607 {
13608 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13609 }
13610 else if (fd == 2)
13611 {
13612 TRACE (cpu, 0, "\n");
13613 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13614 (int) len, aarch64_get_mem_ptr (cpu, buf));
13615 TRACE (cpu, 0, "\n");
13616 }
13617 else
13618 {
13619 TRACE_SYSCALL (cpu,
13620 " AngelSVC: Write: Unexpected file handle: %d",
13621 (int) fd);
13622 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13623 sim_stopped, SIM_SIGABRT);
13624 }
13625 }
13626 break;
13627
13628 case AngelSVC_Reason_ReportException:
13629 {
13630 /* Get the pointer */
13631 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13632 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13633 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13634 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13635
13636 TRACE_SYSCALL (cpu,
13637 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13638 type, state);
13639
13640 if (type == 0x20026)
13641 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13642 sim_exited, state);
13643 else
13644 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13645 sim_stopped, SIM_SIGINT);
13646 }
13647 break;
13648
13649 case AngelSVC_Reason_Read:
13650 case AngelSVC_Reason_FLen:
13651 case AngelSVC_Reason_Seek:
13652 case AngelSVC_Reason_Remove:
13653 case AngelSVC_Reason_Time:
13654 case AngelSVC_Reason_System:
13655 case AngelSVC_Reason_Rename:
13656 case AngelSVC_Reason_Elapsed:
13657 default:
13658 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13659 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13660 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13661 sim_stopped, SIM_SIGTRAP);
13662 }
13663
13664 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13665 }
13666
13667 static void
13668 dexExcpnGen (sim_cpu *cpu)
13669 {
13670 /* instr[31:24] = 11010100
13671 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13672 010 ==> HLT, 101 ==> DBG GEN EXCPN
13673 instr[20,5] = imm16
13674 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13675 instr[1,0] = LL : discriminates opc */
13676
13677 uint32_t opc = INSTR (23, 21);
13678 uint32_t imm16 = INSTR (20, 5);
13679 uint32_t opc2 = INSTR (4, 2);
13680 uint32_t LL;
13681
13682 NYI_assert (31, 24, 0xd4);
13683
13684 if (opc2 != 0)
13685 HALT_UNALLOC;
13686
13687 LL = INSTR (1, 0);
13688
13689 /* We only implement HLT and BRK for now. */
13690 if (opc == 1 && LL == 0)
13691 {
13692 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13693 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13694 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13695 }
13696
13697 if (opc == 2 && LL == 0)
13698 handle_halt (cpu, imm16);
13699
13700 else if (opc == 0 || opc == 5)
13701 HALT_NYI;
13702
13703 else
13704 HALT_UNALLOC;
13705 }
13706
13707 /* Stub for accessing system registers. */
13708
13709 static uint64_t
13710 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13711 unsigned crm, unsigned op2)
13712 {
13713 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13714 /* DCZID_EL0 - the Data Cache Zero ID register.
13715 We do not support DC ZVA at the moment, so
13716 we return a value with the disable bit set.
13717 We implement support for the DCZID register since
13718 it is used by the C library's memset function. */
13719 return ((uint64_t) 1) << 4;
13720
13721 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13722 /* Cache Type Register. */
13723 return 0x80008000UL;
13724
13725 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13726 /* TPIDR_EL0 - thread pointer id. */
13727 return aarch64_get_thread_id (cpu);
13728
13729 if (op1 == 3 && crm == 4 && op2 == 0)
13730 return aarch64_get_FPCR (cpu);
13731
13732 if (op1 == 3 && crm == 4 && op2 == 1)
13733 return aarch64_get_FPSR (cpu);
13734
13735 else if (op1 == 3 && crm == 2 && op2 == 0)
13736 return aarch64_get_CPSR (cpu);
13737
13738 HALT_NYI;
13739 }
13740
13741 static void
13742 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13743 unsigned crm, unsigned op2, uint64_t val)
13744 {
13745 if (op1 == 3 && crm == 4 && op2 == 0)
13746 aarch64_set_FPCR (cpu, val);
13747
13748 else if (op1 == 3 && crm == 4 && op2 == 1)
13749 aarch64_set_FPSR (cpu, val);
13750
13751 else if (op1 == 3 && crm == 2 && op2 == 0)
13752 aarch64_set_CPSR (cpu, val);
13753
13754 else
13755 HALT_NYI;
13756 }
13757
13758 static void
13759 do_mrs (sim_cpu *cpu)
13760 {
13761 /* instr[31:20] = 1101 0101 0001 1
13762 instr[19] = op0
13763 instr[18,16] = op1
13764 instr[15,12] = CRn
13765 instr[11,8] = CRm
13766 instr[7,5] = op2
13767 instr[4,0] = Rt */
13768 unsigned sys_op0 = INSTR (19, 19) + 2;
13769 unsigned sys_op1 = INSTR (18, 16);
13770 unsigned sys_crn = INSTR (15, 12);
13771 unsigned sys_crm = INSTR (11, 8);
13772 unsigned sys_op2 = INSTR (7, 5);
13773 unsigned rt = INSTR (4, 0);
13774
13775 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13776 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13777 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13778 }
13779
13780 static void
13781 do_MSR_immediate (sim_cpu *cpu)
13782 {
13783 /* instr[31:19] = 1101 0101 0000 0
13784 instr[18,16] = op1
13785 instr[15,12] = 0100
13786 instr[11,8] = CRm
13787 instr[7,5] = op2
13788 instr[4,0] = 1 1111 */
13789
13790 unsigned op1 = INSTR (18, 16);
13791 /*unsigned crm = INSTR (11, 8);*/
13792 unsigned op2 = INSTR (7, 5);
13793
13794 NYI_assert (31, 19, 0x1AA0);
13795 NYI_assert (15, 12, 0x4);
13796 NYI_assert (4, 0, 0x1F);
13797
13798 if (op1 == 0)
13799 {
13800 if (op2 == 5)
13801 HALT_NYI; /* set SPSel. */
13802 else
13803 HALT_UNALLOC;
13804 }
13805 else if (op1 == 3)
13806 {
13807 if (op2 == 6)
13808 HALT_NYI; /* set DAIFset. */
13809 else if (op2 == 7)
13810 HALT_NYI; /* set DAIFclr. */
13811 else
13812 HALT_UNALLOC;
13813 }
13814 else
13815 HALT_UNALLOC;
13816 }
13817
13818 static void
13819 do_MSR_reg (sim_cpu *cpu)
13820 {
13821 /* instr[31:20] = 1101 0101 0001
13822 instr[19] = op0
13823 instr[18,16] = op1
13824 instr[15,12] = CRn
13825 instr[11,8] = CRm
13826 instr[7,5] = op2
13827 instr[4,0] = Rt */
13828
13829 unsigned sys_op0 = INSTR (19, 19) + 2;
13830 unsigned sys_op1 = INSTR (18, 16);
13831 unsigned sys_crn = INSTR (15, 12);
13832 unsigned sys_crm = INSTR (11, 8);
13833 unsigned sys_op2 = INSTR (7, 5);
13834 unsigned rt = INSTR (4, 0);
13835
13836 NYI_assert (31, 20, 0xD51);
13837
13838 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13839 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13840 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13841 }
13842
13843 static void
13844 do_SYS (sim_cpu *cpu)
13845 {
13846 /* instr[31,19] = 1101 0101 0000 1
13847 instr[18,16] = op1
13848 instr[15,12] = CRn
13849 instr[11,8] = CRm
13850 instr[7,5] = op2
13851 instr[4,0] = Rt */
13852 NYI_assert (31, 19, 0x1AA1);
13853
13854 /* FIXME: For now we just silently accept system ops. */
13855 }
13856
13857 static void
13858 dexSystem (sim_cpu *cpu)
13859 {
13860 /* instr[31:22] = 1101 01010 0
13861 instr[21] = L
13862 instr[20,19] = op0
13863 instr[18,16] = op1
13864 instr[15,12] = CRn
13865 instr[11,8] = CRm
13866 instr[7,5] = op2
13867 instr[4,0] = uimm5 */
13868
13869 /* We are interested in HINT, DSB, DMB and ISB
13870
13871 Hint #0 encodes NOOP (this is the only hint we care about)
13872 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13873 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13874
13875 DSB, DMB, ISB are data store barrier, data memory barrier and
13876 instruction store barrier, respectively, where
13877
13878 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13879 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13880 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13881 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13882 10 ==> InerShareable, 11 ==> FullSystem
13883 types : 01 ==> Reads, 10 ==> Writes,
13884 11 ==> All, 00 ==> All (domain == FullSystem). */
13885
13886 unsigned rt = INSTR (4, 0);
13887
13888 NYI_assert (31, 22, 0x354);
13889
13890 switch (INSTR (21, 12))
13891 {
13892 case 0x032:
13893 if (rt == 0x1F)
13894 {
13895 /* NOP has CRm != 0000 OR. */
13896 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13897 uint32_t crm = INSTR (11, 8);
13898 uint32_t op2 = INSTR (7, 5);
13899
13900 if (crm != 0 || (op2 == 0 || op2 > 5))
13901 {
13902 /* Actually call nop method so we can reimplement it later. */
13903 nop (cpu);
13904 return;
13905 }
13906 }
13907 HALT_NYI;
13908
13909 case 0x033:
13910 {
13911 uint32_t op2 = INSTR (7, 5);
13912
13913 switch (op2)
13914 {
13915 case 2: HALT_NYI;
13916 case 4: dsb (cpu); return;
13917 case 5: dmb (cpu); return;
13918 case 6: isb (cpu); return;
13919 default: HALT_UNALLOC;
13920 }
13921 }
13922
13923 case 0x3B0:
13924 case 0x3B4:
13925 case 0x3BD:
13926 do_mrs (cpu);
13927 return;
13928
13929 case 0x0B7:
13930 do_SYS (cpu); /* DC is an alias of SYS. */
13931 return;
13932
13933 default:
13934 if (INSTR (21, 20) == 0x1)
13935 do_MSR_reg (cpu);
13936 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13937 do_MSR_immediate (cpu);
13938 else
13939 HALT_NYI;
13940 return;
13941 }
13942 }
13943
13944 static void
13945 dexBr (sim_cpu *cpu)
13946 {
13947 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13948 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13949 bits [31,29] of a BrExSys are the secondary dispatch vector. */
13950 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13951
13952 switch (group2)
13953 {
13954 case BR_IMM_000:
13955 return dexBranchImmediate (cpu);
13956
13957 case BR_IMMCMP_001:
13958 /* Compare has bit 25 clear while test has it set. */
13959 if (!INSTR (25, 25))
13960 dexCompareBranchImmediate (cpu);
13961 else
13962 dexTestBranchImmediate (cpu);
13963 return;
13964
13965 case BR_IMMCOND_010:
13966 /* This is a conditional branch if bit 25 is clear otherwise
13967 unallocated. */
13968 if (!INSTR (25, 25))
13969 dexCondBranchImmediate (cpu);
13970 else
13971 HALT_UNALLOC;
13972 return;
13973
13974 case BR_UNALLOC_011:
13975 HALT_UNALLOC;
13976
13977 case BR_IMM_100:
13978 dexBranchImmediate (cpu);
13979 return;
13980
13981 case BR_IMMCMP_101:
13982 /* Compare has bit 25 clear while test has it set. */
13983 if (!INSTR (25, 25))
13984 dexCompareBranchImmediate (cpu);
13985 else
13986 dexTestBranchImmediate (cpu);
13987 return;
13988
13989 case BR_REG_110:
13990 /* Unconditional branch reg has bit 25 set. */
13991 if (INSTR (25, 25))
13992 dexBranchRegister (cpu);
13993
13994 /* This includes both Excpn Gen, System and unalloc operations.
13995 We need to decode the Excpn Gen operation BRK so we can plant
13996 debugger entry points.
13997 Excpn Gen operations have instr [24] = 0.
13998 we need to decode at least one of the System operations NOP
13999 which is an alias for HINT #0.
14000 System operations have instr [24,22] = 100. */
14001 else if (INSTR (24, 24) == 0)
14002 dexExcpnGen (cpu);
14003
14004 else if (INSTR (24, 22) == 4)
14005 dexSystem (cpu);
14006
14007 else
14008 HALT_UNALLOC;
14009
14010 return;
14011
14012 case BR_UNALLOC_111:
14013 HALT_UNALLOC;
14014
14015 default:
14016 /* Should never reach here. */
14017 HALT_NYI;
14018 }
14019 }
14020
14021 static void
14022 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14023 {
14024 /* We need to check if gdb wants an in here. */
14025 /* checkBreak (cpu);. */
14026
14027 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14028
14029 switch (group)
14030 {
14031 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14032 case GROUP_LDST_0100: dexLdSt (cpu); break;
14033 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14034 case GROUP_LDST_0110: dexLdSt (cpu); break;
14035 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14036 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14037 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14038 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14039 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14040 case GROUP_LDST_1100: dexLdSt (cpu); break;
14041 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14042 case GROUP_LDST_1110: dexLdSt (cpu); break;
14043 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14044
14045 case GROUP_UNALLOC_0001:
14046 case GROUP_UNALLOC_0010:
14047 case GROUP_UNALLOC_0011:
14048 HALT_UNALLOC;
14049
14050 default:
14051 /* Should never reach here. */
14052 HALT_NYI;
14053 }
14054 }
14055
14056 static bfd_boolean
14057 aarch64_step (sim_cpu *cpu)
14058 {
14059 uint64_t pc = aarch64_get_PC (cpu);
14060
14061 if (pc == TOP_LEVEL_RETURN_PC)
14062 return FALSE;
14063
14064 aarch64_set_next_PC (cpu, pc + 4);
14065
14066 /* Code is always little-endian. */
14067 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14068 & aarch64_get_instr (cpu), pc, 4);
14069 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14070
14071 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14072 aarch64_get_instr (cpu));
14073 TRACE_DISASM (cpu, pc);
14074
14075 aarch64_decode_and_execute (cpu, pc);
14076
14077 return TRUE;
14078 }
14079
14080 void
14081 aarch64_run (SIM_DESC sd)
14082 {
14083 sim_cpu *cpu = STATE_CPU (sd, 0);
14084
14085 while (aarch64_step (cpu))
14086 {
14087 aarch64_update_PC (cpu);
14088
14089 if (sim_events_tick (sd))
14090 sim_events_process (sd);
14091 }
14092
14093 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14094 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14095 }
14096
14097 void
14098 aarch64_init (sim_cpu *cpu, uint64_t pc)
14099 {
14100 uint64_t sp = aarch64_get_stack_start (cpu);
14101
14102 /* Install SP, FP and PC and set LR to -20
14103 so we can detect a top-level return. */
14104 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14105 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14106 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14107 aarch64_set_next_PC (cpu, pc);
14108 aarch64_update_PC (cpu);
14109 aarch64_init_LIT_table ();
14110 }
This page took 0.358611 seconds and 4 git commands to generate.