Fix for aarch64 sim sxtl/uxtl insns, plus another fix for addv.
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2017 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
71 } \
72 while (0)
73
74 #define NYI_assert(HI, LO, EXPECTED) \
75 do \
76 { \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
78 HALT_NYI; \
79 } \
80 while (0)
81
82 /* Helper functions used by expandLogicalImmediate. */
83
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
86 ones (int N)
87 {
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
89 }
90
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
94 {
95 return pickbits64 (val, N, N);
96 }
97
98 static uint64_t
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
100 {
101 uint64_t mask;
102 uint64_t imm;
103 unsigned simd_size;
104
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
107 if (N != 0)
108 {
109 simd_size = 64;
110 mask = 0xffffffffffffffffull;
111 }
112 else
113 {
114 switch (S)
115 {
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
121 default: return 0;
122 }
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
125 R &= simd_size - 1;
126 }
127
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
130 return 0;
131
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
135
136 /* Rotate to the left by simd_size - R. */
137 if (R != 0)
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
139
140 /* Replicate the value according to SIMD size. */
141 switch (simd_size)
142 {
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
148 case 64: break;
149 default: return 0;
150 }
151
152 return imm;
153 }
154
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
159
160 void
161 aarch64_init_LIT_table (void)
162 {
163 unsigned index;
164
165 for (index = 0; index < LI_TABLE_SIZE; index++)
166 {
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
170
171 LITable [index] = expand_logical_immediate (imms, immr, N);
172 }
173 }
174
175 static void
176 dexNotify (sim_cpu *cpu)
177 {
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
181
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
183
184 switch (type)
185 {
186 case 0:
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 case 1:
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 break;
194 case 2:
195 /* aarch64_notifyMethodExit (); */
196 break;
197 case 3:
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
200 break;
201 }
202 }
203
204 /* secondary decode within top level groups */
205
206 static void
207 dexPseudo (sim_cpu *cpu)
208 {
209 /* assert instr[28,27] = 00
210
211 We provide 2 pseudo instructions:
212
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
215
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
221
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
226 uint32_t dispatch;
227
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
229 {
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
233 }
234
235 dispatch = INSTR (31, 15);
236
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
239 {
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
243 }
244
245 else if (dispatch == PSEUDO_NOTIFY)
246 dexNotify (cpu);
247
248 else
249 HALT_UNALLOC;
250 }
251
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
254 9 bit offset.
255
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
258
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
260 static void
261 ldur32 (sim_cpu *cpu, int32_t offset)
262 {
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
265
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
269 + offset));
270 }
271
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
273 static void
274 ldur64 (sim_cpu *cpu, int32_t offset)
275 {
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
278
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
295 + offset));
296 }
297
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 static void
300 ldursb32 (sim_cpu *cpu, int32_t offset)
301 {
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
304
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
308 + offset));
309 }
310
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
312 static void
313 ldursb64 (sim_cpu *cpu, int32_t offset)
314 {
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
317
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 + offset));
322 }
323
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
325 static void
326 ldurh32 (sim_cpu *cpu, int32_t offset)
327 {
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
330
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
334 + offset));
335 }
336
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
338 static void
339 ldursh32 (sim_cpu *cpu, int32_t offset)
340 {
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
343
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
347 + offset));
348 }
349
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
351 static void
352 ldursh64 (sim_cpu *cpu, int32_t offset)
353 {
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
356
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
360 + offset));
361 }
362
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
364 static void
365 ldursw (sim_cpu *cpu, int32_t offset)
366 {
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
369
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
373 + offset));
374 }
375
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
378
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
380 static void
381 stur32 (sim_cpu *cpu, int32_t offset)
382 {
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
385
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
390 }
391
392 /* 64 bit store 64 bit unscaled signed 9 bit */
393 static void
394 stur64 (sim_cpu *cpu, int32_t offset)
395 {
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
398
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
403 }
404
405 /* 32 bit store byte unscaled signed 9 bit */
406 static void
407 sturb (sim_cpu *cpu, int32_t offset)
408 {
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
411
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
416 }
417
418 /* 32 bit store short unscaled signed 9 bit */
419 static void
420 sturh (sim_cpu *cpu, int32_t offset)
421 {
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
424
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
429 }
430
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
433 rt may not be SP. */
434
435 /* 32 bit pc-relative load */
436 static void
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 aarch64_get_mem_u32
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
445 }
446
447 /* 64 bit pc-relative load */
448 static void
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
450 {
451 unsigned rd = INSTR (4, 0);
452
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
455 aarch64_get_mem_u64
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 }
458
459 /* sign extended 32 bit pc-relative load */
460 static void
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
462 {
463 unsigned rd = INSTR (4, 0);
464
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
467 aarch64_get_mem_s32
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
469 }
470
471 /* float pc-relative load */
472 static void
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
474 {
475 unsigned int rd = INSTR (4, 0);
476
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
479 aarch64_get_mem_u32
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
481 }
482
483 /* double pc-relative load */
484 static void
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
486 {
487 unsigned int st = INSTR (4, 0);
488
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
491 aarch64_get_mem_u64
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
493 }
494
495 /* long double pc-relative load. */
496 static void
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
498 {
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
501 FRegister a;
502
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
506 }
507
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
510 16, 32 or 64. */
511
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
514
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
521
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
524
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
529
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
532 {
533 union
534 {
535 uint32_t u;
536 int32_t n;
537 } x;
538
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
541 return value;
542
543 x.u = value;
544 return x.n;
545 }
546
547 /* Scalar Floating Point
548
549 FP load/store single register (4 addressing modes)
550
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
553
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
555 static void
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
557 {
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
561
562 if (wb != Post)
563 address += offset;
564
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
567 if (wb == Post)
568 address += offset;
569
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
572 }
573
574 /* Load 8 bit with unsigned 12 bit offset. */
575 static void
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
577 {
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
581
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
584 }
585
586 /* Load 16 bit scaled unsigned 12 bit. */
587 static void
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
589 {
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
593
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
596 }
597
598 /* Load 32 bit scaled unsigned 12 bit. */
599 static void
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
601 {
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
605
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
608 }
609
610 /* Load 64 bit scaled unsigned 12 bit. */
611 static void
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
613 {
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
617
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
620 }
621
622 /* Load 128 bit scaled unsigned 12 bit. */
623 static void
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
625 {
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
629
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
633 }
634
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
637 static void
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
639 {
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
646
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
650 }
651
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
653 static void
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
655 {
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
659
660 if (wb != Post)
661 address += offset;
662
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
665
666 if (wb == Post)
667 address += offset;
668
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
671 }
672
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
674 static void
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
676 {
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
680
681 fldrd_wb (cpu, displacement, NoWriteBack);
682 }
683
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
685 static void
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
687 {
688 FRegister a;
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
692
693 if (wb != Post)
694 address += offset;
695
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
699
700 if (wb == Post)
701 address += offset;
702
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
705 }
706
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
708 static void
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
710 {
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
714
715 fldrq_wb (cpu, displacement, NoWriteBack);
716 }
717
718 /* Memory Access
719
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
723
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
726
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
728 post-index options.
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
730 writeback.
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
733
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
739
740 A separate method is provided for each possible addressing mode. */
741
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
743 static void
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
745 {
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
748
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
754 }
755
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
757 static void
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
759 {
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 uint64_t address;
763
764 if (rn == rt && wb != NoWriteBack)
765 HALT_UNALLOC;
766
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
768
769 if (wb != Post)
770 address += offset;
771
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
774
775 if (wb == Post)
776 address += offset;
777
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
780 }
781
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
784 static void
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
786 {
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
791
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
795
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
799 }
800
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
802 static void
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
804 {
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
807
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
813 }
814
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
816 static void
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
818 {
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
821 uint64_t address;
822
823 if (rn == rt && wb != NoWriteBack)
824 HALT_UNALLOC;
825
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
827
828 if (wb != Post)
829 address += offset;
830
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
833
834 if (wb == Post)
835 address += offset;
836
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
839 }
840
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
843 static void
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
845 {
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
850
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
854
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
858 }
859
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
861 static void
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
863 {
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
866
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
871 aarch64_get_mem_u8
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
873 }
874
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
876 static void
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
878 {
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
881 uint64_t address;
882
883 if (rn == rt && wb != NoWriteBack)
884 HALT_UNALLOC;
885
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
887
888 if (wb != Post)
889 address += offset;
890
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
893
894 if (wb == Post)
895 address += offset;
896
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
899 }
900
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
903 static void
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
905 {
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
910
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
913 extension);
914
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
919 }
920
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
923 static void
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
925 {
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
928 uint64_t address;
929 int64_t val;
930
931 if (rn == rt && wb != NoWriteBack)
932 HALT_UNALLOC;
933
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
935
936 if (wb != Post)
937 address += offset;
938
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
942
943 if (wb == Post)
944 address += offset;
945
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
948 }
949
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
951 static void
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
953 {
954 ldrsb_wb (cpu, offset, NoWriteBack);
955 }
956
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
959 static void
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
961 {
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
966
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
969 extension);
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
974 }
975
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
977 static void
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
979 {
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
982 uint32_t val;
983
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
989 }
990
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
993 static void
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
995 {
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
998 uint64_t address;
999
1000 if (rn == rt && wb != NoWriteBack)
1001 HALT_UNALLOC;
1002
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1004
1005 if (wb != Post)
1006 address += offset;
1007
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1010
1011 if (wb == Post)
1012 address += offset;
1013
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1016 }
1017
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1020 static void
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1022 {
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1027
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1031
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1035 }
1036
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1038 static void
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1040 {
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1043 int32_t val;
1044
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1050 }
1051
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1054 static void
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1056 {
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1059 uint64_t address;
1060
1061 if (rn == rt && wb != NoWriteBack)
1062 HALT_UNALLOC;
1063
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1065
1066 if (wb != Post)
1067 address += offset;
1068
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1072
1073 if (wb == Post)
1074 address += offset;
1075
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1078 }
1079
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1082 static void
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1084 {
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1089
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1093
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1098 }
1099
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1101 static void
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1103 {
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1106 int64_t val;
1107
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1113 }
1114
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1117 static void
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1119 {
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1122 uint64_t address;
1123 int64_t val;
1124
1125 if (rn == rt && wb != NoWriteBack)
1126 HALT_UNALLOC;
1127
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1130
1131 if (wb != Post)
1132 address += offset;
1133
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1136
1137 if (wb == Post)
1138 address += offset;
1139
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1142 }
1143
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1146 static void
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1148 {
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1152
1153 /* rn may reference SP, rm and rt must reference ZR */
1154
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1158 int64_t val;
1159
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1163 }
1164
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1166 static void
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1168 {
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1171 int64_t val;
1172
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1178 }
1179
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1182 static void
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1184 {
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1187 uint64_t address;
1188
1189 if (rn == rt && wb != NoWriteBack)
1190 HALT_UNALLOC;
1191
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1193
1194 if (wb != Post)
1195 address += offset;
1196
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1199
1200 if (wb == Post)
1201 address += offset;
1202
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1205 }
1206
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1209 static void
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1211 {
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1216
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1220
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1224 }
1225
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1228
1229 /* 32 bit store scaled unsigned 12 bit. */
1230 static void
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1232 {
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1235
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1241 }
1242
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1244 static void
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1246 {
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1249 uint64_t address;
1250
1251 if (rn == rt && wb != NoWriteBack)
1252 HALT_UNALLOC;
1253
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1255 if (wb != Post)
1256 address += offset;
1257
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1260
1261 if (wb == Post)
1262 address += offset;
1263
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1266 }
1267
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1270 static void
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1272 {
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1276
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1280
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1284 }
1285
1286 /* 64 bit store scaled unsigned 12 bit. */
1287 static void
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1289 {
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1292
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1298 }
1299
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1301 static void
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1303 {
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1306 uint64_t address;
1307
1308 if (rn == rt && wb != NoWriteBack)
1309 HALT_UNALLOC;
1310
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312
1313 if (wb != Post)
1314 address += offset;
1315
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1318
1319 if (wb == Post)
1320 address += offset;
1321
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1324 }
1325
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1328 static void
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1330 {
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1335
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1338 extension);
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1340
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1344 }
1345
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1347 static void
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1349 {
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1352
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1359 }
1360
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1362 static void
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1364 {
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1367 uint64_t address;
1368
1369 if (rn == rt && wb != NoWriteBack)
1370 HALT_UNALLOC;
1371
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1373
1374 if (wb != Post)
1375 address += offset;
1376
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1399 extension);
1400
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1405 }
1406
1407 /* 32 bit store short scaled unsigned 12 bit. */
1408 static void
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1410 {
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1413
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1419 }
1420
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1422 static void
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1424 {
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1427 uint64_t address;
1428
1429 if (rn == rt && wb != NoWriteBack)
1430 HALT_UNALLOC;
1431
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1433
1434 if (wb != Post)
1435 address += offset;
1436
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1439
1440 if (wb == Post)
1441 address += offset;
1442
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1445 }
1446
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1449 static void
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1451 {
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1456
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1460
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1464 }
1465
1466 /* Prefetch unsigned 12 bit. */
1467 static void
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1469 {
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1476 ow ==> UNALLOC
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1480
1481 /* TODO : implement prefetch of address. */
1482 }
1483
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1485 static void
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1487 {
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1494 ow ==> UNALLOC
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1499 extension);
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1502
1503 /* TODO : implement prefetch of address */
1504 }
1505
1506 /* 64 bit pc-relative prefetch. */
1507 static void
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1509 {
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1516 ow ==> UNALLOC
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1519
1520 /* TODO : implement this */
1521 }
1522
1523 /* Load-store exclusive. */
1524
1525 static void
1526 ldxr (sim_cpu *cpu)
1527 {
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1534
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1536 switch (size)
1537 {
1538 case 0:
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1540 break;
1541 case 1:
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1543 break;
1544 case 2:
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1546 break;
1547 case 3:
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1549 break;
1550 }
1551 }
1552
1553 static void
1554 stxr (sim_cpu *cpu)
1555 {
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1562
1563 switch (size)
1564 {
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1569 }
1570
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1573 }
1574
1575 static void
1576 dexLoadLiteral (sim_cpu *cpu)
1577 {
1578 /* instr[29,27] == 011
1579 instr[25,24] == 00
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1586
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1590
1591 switch (dispatch)
1592 {
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1600 case 7:
1601 default:
1602 HALT_UNALLOC;
1603 }
1604 }
1605
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1609
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1613
1614 /* 32 bit add immediate. */
1615 static void
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1617 {
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1620
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1624 }
1625
1626 /* 64 bit add immediate. */
1627 static void
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1629 {
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1632
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1636 }
1637
1638 static void
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1640 {
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1645 uint32_t flags = 0;
1646
1647 if (result == 0)
1648 flags |= Z;
1649
1650 if (result & (1 << 31))
1651 flags |= N;
1652
1653 if (uresult != result)
1654 flags |= C;
1655
1656 if (sresult != result)
1657 flags |= V;
1658
1659 aarch64_set_CPSR (cpu, flags);
1660 }
1661
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1664
1665 static void
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1667 {
1668 uint64_t result = value1 + value2;
1669 uint32_t flags = 0;
1670 uint64_t signbit = 1ULL << 63;
1671
1672 if (result == 0)
1673 flags |= Z;
1674
1675 if (NEG (result))
1676 flags |= N;
1677
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1681 flags |= C;
1682
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1685 flags |= V;
1686
1687 aarch64_set_CPSR (cpu, flags);
1688 }
1689
1690 static void
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1692 {
1693 uint32_t result = value1 - value2;
1694 uint32_t flags = 0;
1695 uint32_t signbit = 1U << 31;
1696
1697 if (result == 0)
1698 flags |= Z;
1699
1700 if (NEG (result))
1701 flags |= N;
1702
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1706 flags |= C;
1707
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1710 flags |= V;
1711
1712 aarch64_set_CPSR (cpu, flags);
1713 }
1714
1715 static void
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1717 {
1718 uint64_t result = value1 - value2;
1719 uint32_t flags = 0;
1720 uint64_t signbit = 1ULL << 63;
1721
1722 if (result == 0)
1723 flags |= Z;
1724
1725 if (NEG (result))
1726 flags |= N;
1727
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1731 flags |= C;
1732
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1735 flags |= V;
1736
1737 aarch64_set_CPSR (cpu, flags);
1738 }
1739
1740 static void
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1742 {
1743 uint32_t flags = 0;
1744
1745 if (result == 0)
1746 flags |= Z;
1747 else
1748 flags &= ~ Z;
1749
1750 if (result & (1 << 31))
1751 flags |= N;
1752 else
1753 flags &= ~ N;
1754
1755 aarch64_set_CPSR (cpu, flags);
1756 }
1757
1758 static void
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1760 {
1761 uint32_t flags = 0;
1762
1763 if (result == 0)
1764 flags |= Z;
1765 else
1766 flags &= ~ Z;
1767
1768 if (result & (1ULL << 63))
1769 flags |= N;
1770 else
1771 flags &= ~ N;
1772
1773 aarch64_set_CPSR (cpu, flags);
1774 }
1775
1776 /* 32 bit add immediate set flags. */
1777 static void
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1779 {
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1784
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1788 }
1789
1790 /* 64 bit add immediate set flags. */
1791 static void
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1793 {
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1798
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1802 }
1803
1804 /* 32 bit sub immediate. */
1805 static void
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1807 {
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1810
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1814 }
1815
1816 /* 64 bit sub immediate. */
1817 static void
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1819 {
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1822
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1826 }
1827
1828 /* 32 bit sub immediate set flags. */
1829 static void
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1831 {
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1836
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1840 }
1841
1842 /* 64 bit sub immediate set flags. */
1843 static void
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1845 {
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1850
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1854 }
1855
1856 /* Data Processing Register. */
1857
1858 /* First two helpers to perform the shift operations. */
1859
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1862 {
1863 switch (shift)
1864 {
1865 default:
1866 case LSL:
1867 return (value << count);
1868 case LSR:
1869 return (value >> count);
1870 case ASR:
1871 {
1872 int32_t svalue = value;
1873 return (svalue >> count);
1874 }
1875 case ROR:
1876 {
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1880 }
1881 }
1882 }
1883
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1886 {
1887 switch (shift)
1888 {
1889 default:
1890 case LSL:
1891 return (value << count);
1892 case LSR:
1893 return (value >> count);
1894 case ASR:
1895 {
1896 int64_t svalue = value;
1897 return (svalue >> count);
1898 }
1899 case ROR:
1900 {
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1904 }
1905 }
1906 }
1907
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1911
1912 N.B register args may not be SP. */
1913
1914 /* 32 bit ADD shifted register. */
1915 static void
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1917 {
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1921
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1926 shift, count));
1927 }
1928
1929 /* 64 bit ADD shifted register. */
1930 static void
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1932 {
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1936
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1941 shift, count));
1942 }
1943
1944 /* 32 bit ADD shifted register setting flags. */
1945 static void
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1947 {
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1951
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1954 shift, count);
1955
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1959 }
1960
1961 /* 64 bit ADD shifted register setting flags. */
1962 static void
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1964 {
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1968
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1971 shift, count);
1972
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1976 }
1977
1978 /* 32 bit SUB shifted register. */
1979 static void
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1981 {
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1985
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1990 shift, count));
1991 }
1992
1993 /* 64 bit SUB shifted register. */
1994 static void
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1996 {
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2000
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2005 shift, count));
2006 }
2007
2008 /* 32 bit SUB shifted register setting flags. */
2009 static void
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2011 {
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2015
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2018 shift, count);
2019
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2023 }
2024
2025 /* 64 bit SUB shifted register setting flags. */
2026 static void
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2028 {
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2032
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2035 shift, count);
2036
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2040 }
2041
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2045 extension value. */
2046
2047 static uint32_t
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2049 {
2050 switch (extension)
2051 {
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2061 }
2062 }
2063
2064 static uint64_t
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2066 {
2067 switch (extension)
2068 {
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2076 case SXTX:
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2078 }
2079 }
2080
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2085
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2089
2090 /* 32 bit ADD extending register. */
2091 static void
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2097
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2102 }
2103
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2106 static void
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2108 {
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2112
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2117 }
2118
2119 /* 32 bit ADD extending register setting flags. */
2120 static void
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2122 {
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2126
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2129
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2133 }
2134
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2137 static void
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2139 {
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2143
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2146
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2150 }
2151
2152 /* 32 bit SUB extending register. */
2153 static void
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2155 {
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2159
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2164 }
2165
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2168 static void
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2170 {
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2174
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2179 }
2180
2181 /* 32 bit SUB extending register setting flags. */
2182 static void
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2184 {
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2188
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2191
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2195 }
2196
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2199 static void
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2201 {
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2205
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2208
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2212 }
2213
2214 static void
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2216 {
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2223 instr[9,5] = Rn
2224 instr[4,0] = Rd */
2225
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2230
2231 NYI_assert (28, 24, 0x11);
2232
2233 if (shift > 1)
2234 HALT_UNALLOC;
2235
2236 if (shift)
2237 imm <<= 12;
2238
2239 switch (dispatch)
2240 {
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2249 }
2250 }
2251
2252 static void
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2254 {
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2259 instr[21] = 0
2260 instr[20,16] = Rm
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2262 instr[9,5] = Rn
2263 instr[4,0] = Rd */
2264
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2268
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2271
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2274 HALT_UNALLOC;
2275
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2279 HALT_UNALLOC;
2280
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2283 {
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2292 }
2293 }
2294
2295 static void
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2297 {
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2303 instr[21] = 1
2304 instr[20,16] = Rm
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2310 instr[9,5] = Rn
2311 instr[4,0] = Rd */
2312
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2315
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2318
2319 /* Shift may not exceed 4. */
2320 if (shift > 4)
2321 HALT_UNALLOC;
2322
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2325 {
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2334 }
2335 }
2336
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2339
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2342
2343 static void
2344 adc32 (sim_cpu *cpu)
2345 {
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2349
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2354 + IS_SET (C));
2355 }
2356
2357 /* 64 bit add with carry */
2358 static void
2359 adc64 (sim_cpu *cpu)
2360 {
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2364
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2369 + IS_SET (C));
2370 }
2371
2372 /* 32 bit add with carry setting flags. */
2373 static void
2374 adcs32 (sim_cpu *cpu)
2375 {
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2379
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2383
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2387 }
2388
2389 /* 64 bit add with carry setting flags. */
2390 static void
2391 adcs64 (sim_cpu *cpu)
2392 {
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2396
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2400
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2404 }
2405
2406 /* 32 bit sub with carry. */
2407 static void
2408 sbc32 (sim_cpu *cpu)
2409 {
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2413
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2418 - 1 + IS_SET (C));
2419 }
2420
2421 /* 64 bit sub with carry */
2422 static void
2423 sbc64 (sim_cpu *cpu)
2424 {
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2428
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2433 - 1 + IS_SET (C));
2434 }
2435
2436 /* 32 bit sub with carry setting flags */
2437 static void
2438 sbcs32 (sim_cpu *cpu)
2439 {
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2443
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2448
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2452 }
2453
2454 /* 64 bit sub with carry setting flags */
2455 static void
2456 sbcs64 (sim_cpu *cpu)
2457 {
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2461
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2466
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2470 }
2471
2472 static void
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2474 {
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2479 instr[20,16] = Rm
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2481 instr[9,5] = Rn
2482 instr[4,0] = Rd */
2483
2484 uint32_t op2 = INSTR (15, 10);
2485
2486 NYI_assert (28, 21, 0xD0);
2487
2488 if (op2 != 0)
2489 HALT_UNALLOC;
2490
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2493 {
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2502 }
2503 }
2504
2505 static uint32_t
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2507 {
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2512
2513 For now we do it with a switch. */
2514 int res;
2515
2516 switch (cc)
2517 {
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2532 case AL:
2533 case NV:
2534 default:
2535 res = 1;
2536 break;
2537 }
2538 return res;
2539 }
2540
2541 static void
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2543 {
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2548 instr[15,12] = cond
2549 instr[11] = compare reg (0) or const (1)
2550 instr[10] = 0
2551 instr[9,5] = Rn
2552 instr[4] = 0
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2554 signed int negate;
2555 unsigned rm;
2556 unsigned rn;
2557
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2561
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2564 {
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2566 return;
2567 }
2568
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2571 rn = INSTR ( 9, 5);
2572
2573 if (INSTR (31, 31))
2574 {
2575 if (INSTR (11, 11))
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2578 else
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2581 }
2582 else
2583 {
2584 if (INSTR (11, 11))
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2586 negate * rm);
2587 else
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2590 }
2591 }
2592
2593 static void
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2595 {
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2597
2598 instr[31] = 0
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2601 instr[20,16] = Vs
2602 instr[15,10] = 000111
2603 instr[9,5] = Vs
2604 instr[4,0] = Vd */
2605
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2608
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2611
2612 if (INSTR (20, 16) != vs)
2613 HALT_NYI;
2614
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2616 if (INSTR (30, 30))
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2618
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2620 }
2621
2622 static void
2623 do_vec_MOV_into_scalar (sim_cpu *cpu)
2624 {
2625 /* instr[31] = 0
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,18] = element size and index
2629 instr[17,10] = 00 0011 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2632
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2635
2636 NYI_assert (29, 21, 0x070);
2637 NYI_assert (17, 10, 0x0F);
2638
2639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2640 switch (INSTR (20, 18))
2641 {
2642 case 0x2:
2643 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2644 break;
2645
2646 case 0x6:
2647 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2648 break;
2649
2650 case 0x1:
2651 case 0x3:
2652 case 0x5:
2653 case 0x7:
2654 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2655 (cpu, vs, INSTR (20, 19)));
2656 break;
2657
2658 default:
2659 HALT_NYI;
2660 }
2661 }
2662
2663 static void
2664 do_vec_INS (sim_cpu *cpu)
2665 {
2666 /* instr[31,21] = 01001110000
2667 instr[20,16] = element size and index
2668 instr[15,10] = 000111
2669 instr[9,5] = W source
2670 instr[4,0] = V dest */
2671
2672 int index;
2673 unsigned rs = INSTR (9, 5);
2674 unsigned vd = INSTR (4, 0);
2675
2676 NYI_assert (31, 21, 0x270);
2677 NYI_assert (15, 10, 0x07);
2678
2679 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2680 if (INSTR (16, 16))
2681 {
2682 index = INSTR (20, 17);
2683 aarch64_set_vec_u8 (cpu, vd, index,
2684 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2685 }
2686 else if (INSTR (17, 17))
2687 {
2688 index = INSTR (20, 18);
2689 aarch64_set_vec_u16 (cpu, vd, index,
2690 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2691 }
2692 else if (INSTR (18, 18))
2693 {
2694 index = INSTR (20, 19);
2695 aarch64_set_vec_u32 (cpu, vd, index,
2696 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2697 }
2698 else if (INSTR (19, 19))
2699 {
2700 index = INSTR (20, 20);
2701 aarch64_set_vec_u64 (cpu, vd, index,
2702 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2703 }
2704 else
2705 HALT_NYI;
2706 }
2707
2708 static void
2709 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2710 {
2711 /* instr[31] = 0
2712 instr[30] = half(0)/full(1)
2713 instr[29,21] = 00 1110 000
2714 instr[20,16] = element size and index
2715 instr[15,10] = 0000 01
2716 instr[9,5] = V source
2717 instr[4,0] = V dest. */
2718
2719 unsigned full = INSTR (30, 30);
2720 unsigned vs = INSTR (9, 5);
2721 unsigned vd = INSTR (4, 0);
2722 int i, index;
2723
2724 NYI_assert (29, 21, 0x070);
2725 NYI_assert (15, 10, 0x01);
2726
2727 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2728 if (INSTR (16, 16))
2729 {
2730 index = INSTR (20, 17);
2731
2732 for (i = 0; i < (full ? 16 : 8); i++)
2733 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2734 }
2735 else if (INSTR (17, 17))
2736 {
2737 index = INSTR (20, 18);
2738
2739 for (i = 0; i < (full ? 8 : 4); i++)
2740 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2741 }
2742 else if (INSTR (18, 18))
2743 {
2744 index = INSTR (20, 19);
2745
2746 for (i = 0; i < (full ? 4 : 2); i++)
2747 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2748 }
2749 else
2750 {
2751 if (INSTR (19, 19) == 0)
2752 HALT_UNALLOC;
2753
2754 if (! full)
2755 HALT_UNALLOC;
2756
2757 index = INSTR (20, 20);
2758
2759 for (i = 0; i < 2; i++)
2760 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2761 }
2762 }
2763
2764 static void
2765 do_vec_TBL (sim_cpu *cpu)
2766 {
2767 /* instr[31] = 0
2768 instr[30] = half(0)/full(1)
2769 instr[29,21] = 00 1110 000
2770 instr[20,16] = Vm
2771 instr[15] = 0
2772 instr[14,13] = vec length
2773 instr[12,10] = 000
2774 instr[9,5] = V start
2775 instr[4,0] = V dest */
2776
2777 int full = INSTR (30, 30);
2778 int len = INSTR (14, 13) + 1;
2779 unsigned vm = INSTR (20, 16);
2780 unsigned vn = INSTR (9, 5);
2781 unsigned vd = INSTR (4, 0);
2782 unsigned i;
2783
2784 NYI_assert (29, 21, 0x070);
2785 NYI_assert (12, 10, 0);
2786
2787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 for (i = 0; i < (full ? 16 : 8); i++)
2789 {
2790 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2791 uint8_t val;
2792
2793 if (selector < 16)
2794 val = aarch64_get_vec_u8 (cpu, vn, selector);
2795 else if (selector < 32)
2796 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2797 else if (selector < 48)
2798 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2799 else if (selector < 64)
2800 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2801 else
2802 val = 0;
2803
2804 aarch64_set_vec_u8 (cpu, vd, i, val);
2805 }
2806 }
2807
2808 static void
2809 do_vec_TRN (sim_cpu *cpu)
2810 {
2811 /* instr[31] = 0
2812 instr[30] = half(0)/full(1)
2813 instr[29,24] = 00 1110
2814 instr[23,22] = size
2815 instr[21] = 0
2816 instr[20,16] = Vm
2817 instr[15] = 0
2818 instr[14] = TRN1 (0) / TRN2 (1)
2819 instr[13,10] = 1010
2820 instr[9,5] = V source
2821 instr[4,0] = V dest. */
2822
2823 int full = INSTR (30, 30);
2824 int second = INSTR (14, 14);
2825 unsigned vm = INSTR (20, 16);
2826 unsigned vn = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2828 unsigned i;
2829
2830 NYI_assert (29, 24, 0x0E);
2831 NYI_assert (13, 10, 0xA);
2832
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2834 switch (INSTR (23, 22))
2835 {
2836 case 0:
2837 for (i = 0; i < (full ? 8 : 4); i++)
2838 {
2839 aarch64_set_vec_u8
2840 (cpu, vd, i * 2,
2841 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2842 aarch64_set_vec_u8
2843 (cpu, vd, 1 * 2 + 1,
2844 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2845 }
2846 break;
2847
2848 case 1:
2849 for (i = 0; i < (full ? 4 : 2); i++)
2850 {
2851 aarch64_set_vec_u16
2852 (cpu, vd, i * 2,
2853 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2854 aarch64_set_vec_u16
2855 (cpu, vd, 1 * 2 + 1,
2856 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2857 }
2858 break;
2859
2860 case 2:
2861 aarch64_set_vec_u32
2862 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2863 aarch64_set_vec_u32
2864 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2865 aarch64_set_vec_u32
2866 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2867 aarch64_set_vec_u32
2868 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2869 break;
2870
2871 case 3:
2872 if (! full)
2873 HALT_UNALLOC;
2874
2875 aarch64_set_vec_u64 (cpu, vd, 0,
2876 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2877 aarch64_set_vec_u64 (cpu, vd, 1,
2878 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2879 break;
2880 }
2881 }
2882
2883 static void
2884 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2885 {
2886 /* instr[31] = 0
2887 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2888 [must be 1 for 64-bit xfer]
2889 instr[29,20] = 00 1110 0000
2890 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2891 0100=> 32-bits. 1000=>64-bits
2892 instr[15,10] = 0000 11
2893 instr[9,5] = W source
2894 instr[4,0] = V dest. */
2895
2896 unsigned i;
2897 unsigned Vd = INSTR (4, 0);
2898 unsigned Rs = INSTR (9, 5);
2899 int both = INSTR (30, 30);
2900
2901 NYI_assert (29, 20, 0x0E0);
2902 NYI_assert (15, 10, 0x03);
2903
2904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2905 switch (INSTR (19, 16))
2906 {
2907 case 1:
2908 for (i = 0; i < (both ? 16 : 8); i++)
2909 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2910 break;
2911
2912 case 2:
2913 for (i = 0; i < (both ? 8 : 4); i++)
2914 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2915 break;
2916
2917 case 4:
2918 for (i = 0; i < (both ? 4 : 2); i++)
2919 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2920 break;
2921
2922 case 8:
2923 if (!both)
2924 HALT_NYI;
2925 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2926 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2927 break;
2928
2929 default:
2930 HALT_NYI;
2931 }
2932 }
2933
2934 static void
2935 do_vec_UZP (sim_cpu *cpu)
2936 {
2937 /* instr[31] = 0
2938 instr[30] = half(0)/full(1)
2939 instr[29,24] = 00 1110
2940 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2941 instr[21] = 0
2942 instr[20,16] = Vm
2943 instr[15] = 0
2944 instr[14] = lower (0) / upper (1)
2945 instr[13,10] = 0110
2946 instr[9,5] = Vn
2947 instr[4,0] = Vd. */
2948
2949 int full = INSTR (30, 30);
2950 int upper = INSTR (14, 14);
2951
2952 unsigned vm = INSTR (20, 16);
2953 unsigned vn = INSTR (9, 5);
2954 unsigned vd = INSTR (4, 0);
2955
2956 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2957 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2958 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2959 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2960
2961 uint64_t val1;
2962 uint64_t val2;
2963
2964 uint64_t input2 = full ? val_n2 : val_m1;
2965
2966 NYI_assert (29, 24, 0x0E);
2967 NYI_assert (21, 21, 0);
2968 NYI_assert (15, 15, 0);
2969 NYI_assert (13, 10, 6);
2970
2971 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2972 switch (INSTR (23, 22))
2973 {
2974 case 0:
2975 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
2976 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2977 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2978 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2979
2980 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2981 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2982 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2983 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2984
2985 if (full)
2986 {
2987 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
2988 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2989 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2990 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2991
2992 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2993 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2994 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2995 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2996 }
2997 break;
2998
2999 case 1:
3000 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3001 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3002
3003 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3004 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3005
3006 if (full)
3007 {
3008 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3009 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3010
3011 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3012 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3013 }
3014 break;
3015
3016 case 2:
3017 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3018 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3019
3020 if (full)
3021 {
3022 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3023 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3024 }
3025 break;
3026
3027 case 3:
3028 if (! full)
3029 HALT_UNALLOC;
3030
3031 val1 = upper ? val_n2 : val_n1;
3032 val2 = upper ? val_m2 : val_m1;
3033 break;
3034 }
3035
3036 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3037 if (full)
3038 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3039 }
3040
3041 static void
3042 do_vec_ZIP (sim_cpu *cpu)
3043 {
3044 /* instr[31] = 0
3045 instr[30] = half(0)/full(1)
3046 instr[29,24] = 00 1110
3047 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3048 instr[21] = 0
3049 instr[20,16] = Vm
3050 instr[15] = 0
3051 instr[14] = lower (0) / upper (1)
3052 instr[13,10] = 1110
3053 instr[9,5] = Vn
3054 instr[4,0] = Vd. */
3055
3056 int full = INSTR (30, 30);
3057 int upper = INSTR (14, 14);
3058
3059 unsigned vm = INSTR (20, 16);
3060 unsigned vn = INSTR (9, 5);
3061 unsigned vd = INSTR (4, 0);
3062
3063 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3064 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3065 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3066 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3067
3068 uint64_t val1 = 0;
3069 uint64_t val2 = 0;
3070
3071 uint64_t input1 = upper ? val_n1 : val_m1;
3072 uint64_t input2 = upper ? val_n2 : val_m2;
3073
3074 NYI_assert (29, 24, 0x0E);
3075 NYI_assert (21, 21, 0);
3076 NYI_assert (15, 15, 0);
3077 NYI_assert (13, 10, 0xE);
3078
3079 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3080 switch (INSTR (23, 23))
3081 {
3082 case 0:
3083 val1 =
3084 ((input1 << 0) & (0xFF << 0))
3085 | ((input2 << 8) & (0xFF << 8))
3086 | ((input1 << 8) & (0xFF << 16))
3087 | ((input2 << 16) & (0xFF << 24))
3088 | ((input1 << 16) & (0xFFULL << 32))
3089 | ((input2 << 24) & (0xFFULL << 40))
3090 | ((input1 << 24) & (0xFFULL << 48))
3091 | ((input2 << 32) & (0xFFULL << 56));
3092
3093 val2 =
3094 ((input1 >> 32) & (0xFF << 0))
3095 | ((input2 >> 24) & (0xFF << 8))
3096 | ((input1 >> 24) & (0xFF << 16))
3097 | ((input2 >> 16) & (0xFF << 24))
3098 | ((input1 >> 16) & (0xFFULL << 32))
3099 | ((input2 >> 8) & (0xFFULL << 40))
3100 | ((input1 >> 8) & (0xFFULL << 48))
3101 | ((input2 >> 0) & (0xFFULL << 56));
3102 break;
3103
3104 case 1:
3105 val1 =
3106 ((input1 << 0) & (0xFFFF << 0))
3107 | ((input2 << 16) & (0xFFFF << 16))
3108 | ((input1 << 16) & (0xFFFFULL << 32))
3109 | ((input2 << 32) & (0xFFFFULL << 48));
3110
3111 val2 =
3112 ((input1 >> 32) & (0xFFFF << 0))
3113 | ((input2 >> 16) & (0xFFFF << 16))
3114 | ((input1 >> 16) & (0xFFFFULL << 32))
3115 | ((input2 >> 0) & (0xFFFFULL << 48));
3116 break;
3117
3118 case 2:
3119 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3120 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3121 break;
3122
3123 case 3:
3124 val1 = input1;
3125 val2 = input2;
3126 break;
3127 }
3128
3129 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3130 if (full)
3131 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3132 }
3133
3134 /* Floating point immediates are encoded in 8 bits.
3135 fpimm[7] = sign bit.
3136 fpimm[6:4] = signed exponent.
3137 fpimm[3:0] = fraction (assuming leading 1).
3138 i.e. F = s * 1.f * 2^(e - b). */
3139
3140 static float
3141 fp_immediate_for_encoding_32 (uint32_t imm8)
3142 {
3143 float u;
3144 uint32_t s, e, f, i;
3145
3146 s = (imm8 >> 7) & 0x1;
3147 e = (imm8 >> 4) & 0x7;
3148 f = imm8 & 0xf;
3149
3150 /* The fp value is s * n/16 * 2r where n is 16+e. */
3151 u = (16.0 + f) / 16.0;
3152
3153 /* N.B. exponent is signed. */
3154 if (e < 4)
3155 {
3156 int epos = e;
3157
3158 for (i = 0; i <= epos; i++)
3159 u *= 2.0;
3160 }
3161 else
3162 {
3163 int eneg = 7 - e;
3164
3165 for (i = 0; i < eneg; i++)
3166 u /= 2.0;
3167 }
3168
3169 if (s)
3170 u = - u;
3171
3172 return u;
3173 }
3174
3175 static double
3176 fp_immediate_for_encoding_64 (uint32_t imm8)
3177 {
3178 double u;
3179 uint32_t s, e, f, i;
3180
3181 s = (imm8 >> 7) & 0x1;
3182 e = (imm8 >> 4) & 0x7;
3183 f = imm8 & 0xf;
3184
3185 /* The fp value is s * n/16 * 2r where n is 16+e. */
3186 u = (16.0 + f) / 16.0;
3187
3188 /* N.B. exponent is signed. */
3189 if (e < 4)
3190 {
3191 int epos = e;
3192
3193 for (i = 0; i <= epos; i++)
3194 u *= 2.0;
3195 }
3196 else
3197 {
3198 int eneg = 7 - e;
3199
3200 for (i = 0; i < eneg; i++)
3201 u /= 2.0;
3202 }
3203
3204 if (s)
3205 u = - u;
3206
3207 return u;
3208 }
3209
3210 static void
3211 do_vec_MOV_immediate (sim_cpu *cpu)
3212 {
3213 /* instr[31] = 0
3214 instr[30] = full/half selector
3215 instr[29,19] = 00111100000
3216 instr[18,16] = high 3 bits of uimm8
3217 instr[15,12] = size & shift:
3218 0000 => 32-bit
3219 0010 => 32-bit + LSL#8
3220 0100 => 32-bit + LSL#16
3221 0110 => 32-bit + LSL#24
3222 1010 => 16-bit + LSL#8
3223 1000 => 16-bit
3224 1101 => 32-bit + MSL#16
3225 1100 => 32-bit + MSL#8
3226 1110 => 8-bit
3227 1111 => double
3228 instr[11,10] = 01
3229 instr[9,5] = low 5-bits of uimm8
3230 instr[4,0] = Vd. */
3231
3232 int full = INSTR (30, 30);
3233 unsigned vd = INSTR (4, 0);
3234 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3235 unsigned i;
3236
3237 NYI_assert (29, 19, 0x1E0);
3238 NYI_assert (11, 10, 1);
3239
3240 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3241 switch (INSTR (15, 12))
3242 {
3243 case 0x0: /* 32-bit, no shift. */
3244 case 0x2: /* 32-bit, shift by 8. */
3245 case 0x4: /* 32-bit, shift by 16. */
3246 case 0x6: /* 32-bit, shift by 24. */
3247 val <<= (8 * INSTR (14, 13));
3248 for (i = 0; i < (full ? 4 : 2); i++)
3249 aarch64_set_vec_u32 (cpu, vd, i, val);
3250 break;
3251
3252 case 0xa: /* 16-bit, shift by 8. */
3253 val <<= 8;
3254 /* Fall through. */
3255 case 0x8: /* 16-bit, no shift. */
3256 for (i = 0; i < (full ? 8 : 4); i++)
3257 aarch64_set_vec_u16 (cpu, vd, i, val);
3258 break;
3259
3260 case 0xd: /* 32-bit, mask shift by 16. */
3261 val <<= 8;
3262 val |= 0xFF;
3263 /* Fall through. */
3264 case 0xc: /* 32-bit, mask shift by 8. */
3265 val <<= 8;
3266 val |= 0xFF;
3267 for (i = 0; i < (full ? 4 : 2); i++)
3268 aarch64_set_vec_u32 (cpu, vd, i, val);
3269 break;
3270
3271 case 0xe: /* 8-bit, no shift. */
3272 for (i = 0; i < (full ? 16 : 8); i++)
3273 aarch64_set_vec_u8 (cpu, vd, i, val);
3274 break;
3275
3276 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3277 {
3278 float u = fp_immediate_for_encoding_32 (val);
3279 for (i = 0; i < (full ? 4 : 2); i++)
3280 aarch64_set_vec_float (cpu, vd, i, u);
3281 break;
3282 }
3283
3284 default:
3285 HALT_NYI;
3286 }
3287 }
3288
3289 static void
3290 do_vec_MVNI (sim_cpu *cpu)
3291 {
3292 /* instr[31] = 0
3293 instr[30] = full/half selector
3294 instr[29,19] = 10111100000
3295 instr[18,16] = high 3 bits of uimm8
3296 instr[15,12] = selector
3297 instr[11,10] = 01
3298 instr[9,5] = low 5-bits of uimm8
3299 instr[4,0] = Vd. */
3300
3301 int full = INSTR (30, 30);
3302 unsigned vd = INSTR (4, 0);
3303 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3304 unsigned i;
3305
3306 NYI_assert (29, 19, 0x5E0);
3307 NYI_assert (11, 10, 1);
3308
3309 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3310 switch (INSTR (15, 12))
3311 {
3312 case 0x0: /* 32-bit, no shift. */
3313 case 0x2: /* 32-bit, shift by 8. */
3314 case 0x4: /* 32-bit, shift by 16. */
3315 case 0x6: /* 32-bit, shift by 24. */
3316 val <<= (8 * INSTR (14, 13));
3317 val = ~ val;
3318 for (i = 0; i < (full ? 4 : 2); i++)
3319 aarch64_set_vec_u32 (cpu, vd, i, val);
3320 return;
3321
3322 case 0xa: /* 16-bit, 8 bit shift. */
3323 val <<= 8;
3324 case 0x8: /* 16-bit, no shift. */
3325 val = ~ val;
3326 for (i = 0; i < (full ? 8 : 4); i++)
3327 aarch64_set_vec_u16 (cpu, vd, i, val);
3328 return;
3329
3330 case 0xd: /* 32-bit, mask shift by 16. */
3331 val <<= 8;
3332 val |= 0xFF;
3333 case 0xc: /* 32-bit, mask shift by 8. */
3334 val <<= 8;
3335 val |= 0xFF;
3336 val = ~ val;
3337 for (i = 0; i < (full ? 4 : 2); i++)
3338 aarch64_set_vec_u32 (cpu, vd, i, val);
3339 return;
3340
3341 case 0xE: /* MOVI Dn, #mask64 */
3342 {
3343 uint64_t mask = 0;
3344
3345 for (i = 0; i < 8; i++)
3346 if (val & (1 << i))
3347 mask |= (0xFFUL << (i * 8));
3348 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3349 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3350 return;
3351 }
3352
3353 case 0xf: /* FMOV Vd.2D, #fpimm. */
3354 {
3355 double u = fp_immediate_for_encoding_64 (val);
3356
3357 if (! full)
3358 HALT_UNALLOC;
3359
3360 aarch64_set_vec_double (cpu, vd, 0, u);
3361 aarch64_set_vec_double (cpu, vd, 1, u);
3362 return;
3363 }
3364
3365 default:
3366 HALT_NYI;
3367 }
3368 }
3369
3370 #define ABS(A) ((A) < 0 ? - (A) : (A))
3371
3372 static void
3373 do_vec_ABS (sim_cpu *cpu)
3374 {
3375 /* instr[31] = 0
3376 instr[30] = half(0)/full(1)
3377 instr[29,24] = 00 1110
3378 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3379 instr[21,10] = 10 0000 1011 10
3380 instr[9,5] = Vn
3381 instr[4.0] = Vd. */
3382
3383 unsigned vn = INSTR (9, 5);
3384 unsigned vd = INSTR (4, 0);
3385 unsigned full = INSTR (30, 30);
3386 unsigned i;
3387
3388 NYI_assert (29, 24, 0x0E);
3389 NYI_assert (21, 10, 0x82E);
3390
3391 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3392 switch (INSTR (23, 22))
3393 {
3394 case 0:
3395 for (i = 0; i < (full ? 16 : 8); i++)
3396 aarch64_set_vec_s8 (cpu, vd, i,
3397 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3398 break;
3399
3400 case 1:
3401 for (i = 0; i < (full ? 8 : 4); i++)
3402 aarch64_set_vec_s16 (cpu, vd, i,
3403 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3404 break;
3405
3406 case 2:
3407 for (i = 0; i < (full ? 4 : 2); i++)
3408 aarch64_set_vec_s32 (cpu, vd, i,
3409 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3410 break;
3411
3412 case 3:
3413 if (! full)
3414 HALT_NYI;
3415 for (i = 0; i < 2; i++)
3416 aarch64_set_vec_s64 (cpu, vd, i,
3417 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3418 break;
3419 }
3420 }
3421
3422 static void
3423 do_vec_ADDV (sim_cpu *cpu)
3424 {
3425 /* instr[31] = 0
3426 instr[30] = full/half selector
3427 instr[29,24] = 00 1110
3428 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3429 instr[21,10] = 11 0001 1011 10
3430 instr[9,5] = Vm
3431 instr[4.0] = Rd. */
3432
3433 unsigned vm = INSTR (9, 5);
3434 unsigned rd = INSTR (4, 0);
3435 unsigned i;
3436 int full = INSTR (30, 30);
3437
3438 NYI_assert (29, 24, 0x0E);
3439 NYI_assert (21, 10, 0xC6E);
3440
3441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3442 switch (INSTR (23, 22))
3443 {
3444 case 0:
3445 {
3446 uint8_t val = 0;
3447 for (i = 0; i < (full ? 16 : 8); i++)
3448 val += aarch64_get_vec_u8 (cpu, vm, i);
3449 aarch64_set_vec_u64 (cpu, rd, 0, val);
3450 return;
3451 }
3452
3453 case 1:
3454 {
3455 uint16_t val = 0;
3456 for (i = 0; i < (full ? 8 : 4); i++)
3457 val += aarch64_get_vec_u16 (cpu, vm, i);
3458 aarch64_set_vec_u64 (cpu, rd, 0, val);
3459 return;
3460 }
3461
3462 case 2:
3463 {
3464 uint32_t val = 0;
3465 if (! full)
3466 HALT_UNALLOC;
3467 for (i = 0; i < 4; i++)
3468 val += aarch64_get_vec_u32 (cpu, vm, i);
3469 aarch64_set_vec_u64 (cpu, rd, 0, val);
3470 return;
3471 }
3472
3473 case 3:
3474 HALT_UNALLOC;
3475 }
3476 }
3477
3478 static void
3479 do_vec_ins_2 (sim_cpu *cpu)
3480 {
3481 /* instr[31,21] = 01001110000
3482 instr[20,18] = size & element selector
3483 instr[17,14] = 0000
3484 instr[13] = direction: to vec(0), from vec (1)
3485 instr[12,10] = 111
3486 instr[9,5] = Vm
3487 instr[4,0] = Vd. */
3488
3489 unsigned elem;
3490 unsigned vm = INSTR (9, 5);
3491 unsigned vd = INSTR (4, 0);
3492
3493 NYI_assert (31, 21, 0x270);
3494 NYI_assert (17, 14, 0);
3495 NYI_assert (12, 10, 7);
3496
3497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3498 if (INSTR (13, 13) == 1)
3499 {
3500 if (INSTR (18, 18) == 1)
3501 {
3502 /* 32-bit moves. */
3503 elem = INSTR (20, 19);
3504 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3505 aarch64_get_vec_u32 (cpu, vm, elem));
3506 }
3507 else
3508 {
3509 /* 64-bit moves. */
3510 if (INSTR (19, 19) != 1)
3511 HALT_NYI;
3512
3513 elem = INSTR (20, 20);
3514 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3515 aarch64_get_vec_u64 (cpu, vm, elem));
3516 }
3517 }
3518 else
3519 {
3520 if (INSTR (18, 18) == 1)
3521 {
3522 /* 32-bit moves. */
3523 elem = INSTR (20, 19);
3524 aarch64_set_vec_u32 (cpu, vd, elem,
3525 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3526 }
3527 else
3528 {
3529 /* 64-bit moves. */
3530 if (INSTR (19, 19) != 1)
3531 HALT_NYI;
3532
3533 elem = INSTR (20, 20);
3534 aarch64_set_vec_u64 (cpu, vd, elem,
3535 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3536 }
3537 }
3538 }
3539
3540 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3541 do \
3542 { \
3543 DST_TYPE a[N], b[N]; \
3544 \
3545 for (i = 0; i < (N); i++) \
3546 { \
3547 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3548 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3549 } \
3550 for (i = 0; i < (N); i++) \
3551 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3552 } \
3553 while (0)
3554
3555 static void
3556 do_vec_mull (sim_cpu *cpu)
3557 {
3558 /* instr[31] = 0
3559 instr[30] = lower(0)/upper(1) selector
3560 instr[29] = signed(0)/unsigned(1)
3561 instr[28,24] = 0 1110
3562 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3563 instr[21] = 1
3564 instr[20,16] = Vm
3565 instr[15,10] = 11 0000
3566 instr[9,5] = Vn
3567 instr[4.0] = Vd. */
3568
3569 int unsign = INSTR (29, 29);
3570 int bias = INSTR (30, 30);
3571 unsigned vm = INSTR (20, 16);
3572 unsigned vn = INSTR ( 9, 5);
3573 unsigned vd = INSTR ( 4, 0);
3574 unsigned i;
3575
3576 NYI_assert (28, 24, 0x0E);
3577 NYI_assert (15, 10, 0x30);
3578
3579 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3580 /* NB: Read source values before writing results, in case
3581 the source and destination vectors are the same. */
3582 switch (INSTR (23, 22))
3583 {
3584 case 0:
3585 if (bias)
3586 bias = 8;
3587 if (unsign)
3588 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3589 else
3590 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3591 return;
3592
3593 case 1:
3594 if (bias)
3595 bias = 4;
3596 if (unsign)
3597 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3598 else
3599 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3600 return;
3601
3602 case 2:
3603 if (bias)
3604 bias = 2;
3605 if (unsign)
3606 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3607 else
3608 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3609 return;
3610
3611 case 3:
3612 HALT_NYI;
3613 }
3614 }
3615
3616 static void
3617 do_vec_fadd (sim_cpu *cpu)
3618 {
3619 /* instr[31] = 0
3620 instr[30] = half(0)/full(1)
3621 instr[29,24] = 001110
3622 instr[23] = FADD(0)/FSUB(1)
3623 instr[22] = float (0)/double(1)
3624 instr[21] = 1
3625 instr[20,16] = Vm
3626 instr[15,10] = 110101
3627 instr[9,5] = Vn
3628 instr[4.0] = Vd. */
3629
3630 unsigned vm = INSTR (20, 16);
3631 unsigned vn = INSTR (9, 5);
3632 unsigned vd = INSTR (4, 0);
3633 unsigned i;
3634 int full = INSTR (30, 30);
3635
3636 NYI_assert (29, 24, 0x0E);
3637 NYI_assert (21, 21, 1);
3638 NYI_assert (15, 10, 0x35);
3639
3640 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3641 if (INSTR (23, 23))
3642 {
3643 if (INSTR (22, 22))
3644 {
3645 if (! full)
3646 HALT_NYI;
3647
3648 for (i = 0; i < 2; i++)
3649 aarch64_set_vec_double (cpu, vd, i,
3650 aarch64_get_vec_double (cpu, vn, i)
3651 - aarch64_get_vec_double (cpu, vm, i));
3652 }
3653 else
3654 {
3655 for (i = 0; i < (full ? 4 : 2); i++)
3656 aarch64_set_vec_float (cpu, vd, i,
3657 aarch64_get_vec_float (cpu, vn, i)
3658 - aarch64_get_vec_float (cpu, vm, i));
3659 }
3660 }
3661 else
3662 {
3663 if (INSTR (22, 22))
3664 {
3665 if (! full)
3666 HALT_NYI;
3667
3668 for (i = 0; i < 2; i++)
3669 aarch64_set_vec_double (cpu, vd, i,
3670 aarch64_get_vec_double (cpu, vm, i)
3671 + aarch64_get_vec_double (cpu, vn, i));
3672 }
3673 else
3674 {
3675 for (i = 0; i < (full ? 4 : 2); i++)
3676 aarch64_set_vec_float (cpu, vd, i,
3677 aarch64_get_vec_float (cpu, vm, i)
3678 + aarch64_get_vec_float (cpu, vn, i));
3679 }
3680 }
3681 }
3682
3683 static void
3684 do_vec_add (sim_cpu *cpu)
3685 {
3686 /* instr[31] = 0
3687 instr[30] = full/half selector
3688 instr[29,24] = 001110
3689 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3690 instr[21] = 1
3691 instr[20,16] = Vn
3692 instr[15,10] = 100001
3693 instr[9,5] = Vm
3694 instr[4.0] = Vd. */
3695
3696 unsigned vm = INSTR (20, 16);
3697 unsigned vn = INSTR (9, 5);
3698 unsigned vd = INSTR (4, 0);
3699 unsigned i;
3700 int full = INSTR (30, 30);
3701
3702 NYI_assert (29, 24, 0x0E);
3703 NYI_assert (21, 21, 1);
3704 NYI_assert (15, 10, 0x21);
3705
3706 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3707 switch (INSTR (23, 22))
3708 {
3709 case 0:
3710 for (i = 0; i < (full ? 16 : 8); i++)
3711 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3712 + aarch64_get_vec_u8 (cpu, vm, i));
3713 return;
3714
3715 case 1:
3716 for (i = 0; i < (full ? 8 : 4); i++)
3717 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3718 + aarch64_get_vec_u16 (cpu, vm, i));
3719 return;
3720
3721 case 2:
3722 for (i = 0; i < (full ? 4 : 2); i++)
3723 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3724 + aarch64_get_vec_u32 (cpu, vm, i));
3725 return;
3726
3727 case 3:
3728 if (! full)
3729 HALT_UNALLOC;
3730 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3731 + aarch64_get_vec_u64 (cpu, vm, 0));
3732 aarch64_set_vec_u64 (cpu, vd, 1,
3733 aarch64_get_vec_u64 (cpu, vn, 1)
3734 + aarch64_get_vec_u64 (cpu, vm, 1));
3735 return;
3736 }
3737 }
3738
3739 static void
3740 do_vec_mul (sim_cpu *cpu)
3741 {
3742 /* instr[31] = 0
3743 instr[30] = full/half selector
3744 instr[29,24] = 00 1110
3745 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3746 instr[21] = 1
3747 instr[20,16] = Vn
3748 instr[15,10] = 10 0111
3749 instr[9,5] = Vm
3750 instr[4.0] = Vd. */
3751
3752 unsigned vm = INSTR (20, 16);
3753 unsigned vn = INSTR (9, 5);
3754 unsigned vd = INSTR (4, 0);
3755 unsigned i;
3756 int full = INSTR (30, 30);
3757 int bias = 0;
3758
3759 NYI_assert (29, 24, 0x0E);
3760 NYI_assert (21, 21, 1);
3761 NYI_assert (15, 10, 0x27);
3762
3763 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3764 switch (INSTR (23, 22))
3765 {
3766 case 0:
3767 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3768 return;
3769
3770 case 1:
3771 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3772 return;
3773
3774 case 2:
3775 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3776 return;
3777
3778 case 3:
3779 HALT_UNALLOC;
3780 }
3781 }
3782
3783 static void
3784 do_vec_MLA (sim_cpu *cpu)
3785 {
3786 /* instr[31] = 0
3787 instr[30] = full/half selector
3788 instr[29,24] = 00 1110
3789 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3790 instr[21] = 1
3791 instr[20,16] = Vn
3792 instr[15,10] = 1001 01
3793 instr[9,5] = Vm
3794 instr[4.0] = Vd. */
3795
3796 unsigned vm = INSTR (20, 16);
3797 unsigned vn = INSTR (9, 5);
3798 unsigned vd = INSTR (4, 0);
3799 unsigned i;
3800 int full = INSTR (30, 30);
3801
3802 NYI_assert (29, 24, 0x0E);
3803 NYI_assert (21, 21, 1);
3804 NYI_assert (15, 10, 0x25);
3805
3806 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3807 switch (INSTR (23, 22))
3808 {
3809 case 0:
3810 for (i = 0; i < (full ? 16 : 8); i++)
3811 aarch64_set_vec_u8 (cpu, vd, i,
3812 aarch64_get_vec_u8 (cpu, vd, i)
3813 + (aarch64_get_vec_u8 (cpu, vn, i)
3814 * aarch64_get_vec_u8 (cpu, vm, i)));
3815 return;
3816
3817 case 1:
3818 for (i = 0; i < (full ? 8 : 4); i++)
3819 aarch64_set_vec_u16 (cpu, vd, i,
3820 aarch64_get_vec_u16 (cpu, vd, i)
3821 + (aarch64_get_vec_u16 (cpu, vn, i)
3822 * aarch64_get_vec_u16 (cpu, vm, i)));
3823 return;
3824
3825 case 2:
3826 for (i = 0; i < (full ? 4 : 2); i++)
3827 aarch64_set_vec_u32 (cpu, vd, i,
3828 aarch64_get_vec_u32 (cpu, vd, i)
3829 + (aarch64_get_vec_u32 (cpu, vn, i)
3830 * aarch64_get_vec_u32 (cpu, vm, i)));
3831 return;
3832
3833 default:
3834 HALT_UNALLOC;
3835 }
3836 }
3837
3838 static float
3839 fmaxnm (float a, float b)
3840 {
3841 if (! isnan (a))
3842 {
3843 if (! isnan (b))
3844 return a > b ? a : b;
3845 return a;
3846 }
3847 else if (! isnan (b))
3848 return b;
3849 return a;
3850 }
3851
3852 static float
3853 fminnm (float a, float b)
3854 {
3855 if (! isnan (a))
3856 {
3857 if (! isnan (b))
3858 return a < b ? a : b;
3859 return a;
3860 }
3861 else if (! isnan (b))
3862 return b;
3863 return a;
3864 }
3865
3866 static double
3867 dmaxnm (double a, double b)
3868 {
3869 if (! isnan (a))
3870 {
3871 if (! isnan (b))
3872 return a > b ? a : b;
3873 return a;
3874 }
3875 else if (! isnan (b))
3876 return b;
3877 return a;
3878 }
3879
3880 static double
3881 dminnm (double a, double b)
3882 {
3883 if (! isnan (a))
3884 {
3885 if (! isnan (b))
3886 return a < b ? a : b;
3887 return a;
3888 }
3889 else if (! isnan (b))
3890 return b;
3891 return a;
3892 }
3893
3894 static void
3895 do_vec_FminmaxNMP (sim_cpu *cpu)
3896 {
3897 /* instr [31] = 0
3898 instr [30] = half (0)/full (1)
3899 instr [29,24] = 10 1110
3900 instr [23] = max(0)/min(1)
3901 instr [22] = float (0)/double (1)
3902 instr [21] = 1
3903 instr [20,16] = Vn
3904 instr [15,10] = 1100 01
3905 instr [9,5] = Vm
3906 instr [4.0] = Vd. */
3907
3908 unsigned vm = INSTR (20, 16);
3909 unsigned vn = INSTR (9, 5);
3910 unsigned vd = INSTR (4, 0);
3911 int full = INSTR (30, 30);
3912
3913 NYI_assert (29, 24, 0x2E);
3914 NYI_assert (21, 21, 1);
3915 NYI_assert (15, 10, 0x31);
3916
3917 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3918 if (INSTR (22, 22))
3919 {
3920 double (* fn)(double, double) = INSTR (23, 23)
3921 ? dminnm : dmaxnm;
3922
3923 if (! full)
3924 HALT_NYI;
3925 aarch64_set_vec_double (cpu, vd, 0,
3926 fn (aarch64_get_vec_double (cpu, vn, 0),
3927 aarch64_get_vec_double (cpu, vn, 1)));
3928 aarch64_set_vec_double (cpu, vd, 0,
3929 fn (aarch64_get_vec_double (cpu, vm, 0),
3930 aarch64_get_vec_double (cpu, vm, 1)));
3931 }
3932 else
3933 {
3934 float (* fn)(float, float) = INSTR (23, 23)
3935 ? fminnm : fmaxnm;
3936
3937 aarch64_set_vec_float (cpu, vd, 0,
3938 fn (aarch64_get_vec_float (cpu, vn, 0),
3939 aarch64_get_vec_float (cpu, vn, 1)));
3940 if (full)
3941 aarch64_set_vec_float (cpu, vd, 1,
3942 fn (aarch64_get_vec_float (cpu, vn, 2),
3943 aarch64_get_vec_float (cpu, vn, 3)));
3944
3945 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3946 fn (aarch64_get_vec_float (cpu, vm, 0),
3947 aarch64_get_vec_float (cpu, vm, 1)));
3948 if (full)
3949 aarch64_set_vec_float (cpu, vd, 3,
3950 fn (aarch64_get_vec_float (cpu, vm, 2),
3951 aarch64_get_vec_float (cpu, vm, 3)));
3952 }
3953 }
3954
3955 static void
3956 do_vec_AND (sim_cpu *cpu)
3957 {
3958 /* instr[31] = 0
3959 instr[30] = half (0)/full (1)
3960 instr[29,21] = 001110001
3961 instr[20,16] = Vm
3962 instr[15,10] = 000111
3963 instr[9,5] = Vn
3964 instr[4.0] = Vd. */
3965
3966 unsigned vm = INSTR (20, 16);
3967 unsigned vn = INSTR (9, 5);
3968 unsigned vd = INSTR (4, 0);
3969 unsigned i;
3970 int full = INSTR (30, 30);
3971
3972 NYI_assert (29, 21, 0x071);
3973 NYI_assert (15, 10, 0x07);
3974
3975 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3976 for (i = 0; i < (full ? 4 : 2); i++)
3977 aarch64_set_vec_u32 (cpu, vd, i,
3978 aarch64_get_vec_u32 (cpu, vn, i)
3979 & aarch64_get_vec_u32 (cpu, vm, i));
3980 }
3981
3982 static void
3983 do_vec_BSL (sim_cpu *cpu)
3984 {
3985 /* instr[31] = 0
3986 instr[30] = half (0)/full (1)
3987 instr[29,21] = 101110011
3988 instr[20,16] = Vm
3989 instr[15,10] = 000111
3990 instr[9,5] = Vn
3991 instr[4.0] = Vd. */
3992
3993 unsigned vm = INSTR (20, 16);
3994 unsigned vn = INSTR (9, 5);
3995 unsigned vd = INSTR (4, 0);
3996 unsigned i;
3997 int full = INSTR (30, 30);
3998
3999 NYI_assert (29, 21, 0x173);
4000 NYI_assert (15, 10, 0x07);
4001
4002 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4003 for (i = 0; i < (full ? 16 : 8); i++)
4004 aarch64_set_vec_u8 (cpu, vd, i,
4005 ( aarch64_get_vec_u8 (cpu, vd, i)
4006 & aarch64_get_vec_u8 (cpu, vn, i))
4007 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4008 & aarch64_get_vec_u8 (cpu, vm, i)));
4009 }
4010
4011 static void
4012 do_vec_EOR (sim_cpu *cpu)
4013 {
4014 /* instr[31] = 0
4015 instr[30] = half (0)/full (1)
4016 instr[29,21] = 10 1110 001
4017 instr[20,16] = Vm
4018 instr[15,10] = 000111
4019 instr[9,5] = Vn
4020 instr[4.0] = Vd. */
4021
4022 unsigned vm = INSTR (20, 16);
4023 unsigned vn = INSTR (9, 5);
4024 unsigned vd = INSTR (4, 0);
4025 unsigned i;
4026 int full = INSTR (30, 30);
4027
4028 NYI_assert (29, 21, 0x171);
4029 NYI_assert (15, 10, 0x07);
4030
4031 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4032 for (i = 0; i < (full ? 4 : 2); i++)
4033 aarch64_set_vec_u32 (cpu, vd, i,
4034 aarch64_get_vec_u32 (cpu, vn, i)
4035 ^ aarch64_get_vec_u32 (cpu, vm, i));
4036 }
4037
4038 static void
4039 do_vec_bit (sim_cpu *cpu)
4040 {
4041 /* instr[31] = 0
4042 instr[30] = half (0)/full (1)
4043 instr[29,23] = 10 1110 1
4044 instr[22] = BIT (0) / BIF (1)
4045 instr[21] = 1
4046 instr[20,16] = Vm
4047 instr[15,10] = 0001 11
4048 instr[9,5] = Vn
4049 instr[4.0] = Vd. */
4050
4051 unsigned vm = INSTR (20, 16);
4052 unsigned vn = INSTR (9, 5);
4053 unsigned vd = INSTR (4, 0);
4054 unsigned full = INSTR (30, 30);
4055 unsigned test_false = INSTR (22, 22);
4056 unsigned i;
4057
4058 NYI_assert (29, 23, 0x5D);
4059 NYI_assert (21, 21, 1);
4060 NYI_assert (15, 10, 0x07);
4061
4062 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4063 for (i = 0; i < (full ? 4 : 2); i++)
4064 {
4065 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4066 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4067 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4068 if (test_false)
4069 aarch64_set_vec_u32 (cpu, vd, i,
4070 (vd_val & vm_val) | (vn_val & ~vm_val));
4071 else
4072 aarch64_set_vec_u32 (cpu, vd, i,
4073 (vd_val & ~vm_val) | (vn_val & vm_val));
4074 }
4075 }
4076
4077 static void
4078 do_vec_ORN (sim_cpu *cpu)
4079 {
4080 /* instr[31] = 0
4081 instr[30] = half (0)/full (1)
4082 instr[29,21] = 00 1110 111
4083 instr[20,16] = Vm
4084 instr[15,10] = 00 0111
4085 instr[9,5] = Vn
4086 instr[4.0] = Vd. */
4087
4088 unsigned vm = INSTR (20, 16);
4089 unsigned vn = INSTR (9, 5);
4090 unsigned vd = INSTR (4, 0);
4091 unsigned i;
4092 int full = INSTR (30, 30);
4093
4094 NYI_assert (29, 21, 0x077);
4095 NYI_assert (15, 10, 0x07);
4096
4097 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4098 for (i = 0; i < (full ? 16 : 8); i++)
4099 aarch64_set_vec_u8 (cpu, vd, i,
4100 aarch64_get_vec_u8 (cpu, vn, i)
4101 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4102 }
4103
4104 static void
4105 do_vec_ORR (sim_cpu *cpu)
4106 {
4107 /* instr[31] = 0
4108 instr[30] = half (0)/full (1)
4109 instr[29,21] = 00 1110 101
4110 instr[20,16] = Vm
4111 instr[15,10] = 0001 11
4112 instr[9,5] = Vn
4113 instr[4.0] = Vd. */
4114
4115 unsigned vm = INSTR (20, 16);
4116 unsigned vn = INSTR (9, 5);
4117 unsigned vd = INSTR (4, 0);
4118 unsigned i;
4119 int full = INSTR (30, 30);
4120
4121 NYI_assert (29, 21, 0x075);
4122 NYI_assert (15, 10, 0x07);
4123
4124 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4125 for (i = 0; i < (full ? 16 : 8); i++)
4126 aarch64_set_vec_u8 (cpu, vd, i,
4127 aarch64_get_vec_u8 (cpu, vn, i)
4128 | aarch64_get_vec_u8 (cpu, vm, i));
4129 }
4130
4131 static void
4132 do_vec_BIC (sim_cpu *cpu)
4133 {
4134 /* instr[31] = 0
4135 instr[30] = half (0)/full (1)
4136 instr[29,21] = 00 1110 011
4137 instr[20,16] = Vm
4138 instr[15,10] = 00 0111
4139 instr[9,5] = Vn
4140 instr[4.0] = Vd. */
4141
4142 unsigned vm = INSTR (20, 16);
4143 unsigned vn = INSTR (9, 5);
4144 unsigned vd = INSTR (4, 0);
4145 unsigned i;
4146 int full = INSTR (30, 30);
4147
4148 NYI_assert (29, 21, 0x073);
4149 NYI_assert (15, 10, 0x07);
4150
4151 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4152 for (i = 0; i < (full ? 16 : 8); i++)
4153 aarch64_set_vec_u8 (cpu, vd, i,
4154 aarch64_get_vec_u8 (cpu, vn, i)
4155 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4156 }
4157
4158 static void
4159 do_vec_XTN (sim_cpu *cpu)
4160 {
4161 /* instr[31] = 0
4162 instr[30] = first part (0)/ second part (1)
4163 instr[29,24] = 00 1110
4164 instr[23,22] = size: byte(00), half(01), word (10)
4165 instr[21,10] = 1000 0100 1010
4166 instr[9,5] = Vs
4167 instr[4,0] = Vd. */
4168
4169 unsigned vs = INSTR (9, 5);
4170 unsigned vd = INSTR (4, 0);
4171 unsigned bias = INSTR (30, 30);
4172 unsigned i;
4173
4174 NYI_assert (29, 24, 0x0E);
4175 NYI_assert (21, 10, 0x84A);
4176
4177 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4178 switch (INSTR (23, 22))
4179 {
4180 case 0:
4181 for (i = 0; i < 8; i++)
4182 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4183 aarch64_get_vec_u16 (cpu, vs, i));
4184 return;
4185
4186 case 1:
4187 for (i = 0; i < 4; i++)
4188 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4189 aarch64_get_vec_u32 (cpu, vs, i));
4190 return;
4191
4192 case 2:
4193 for (i = 0; i < 2; i++)
4194 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4195 aarch64_get_vec_u64 (cpu, vs, i));
4196 return;
4197 }
4198 }
4199
4200 static void
4201 do_vec_maxv (sim_cpu *cpu)
4202 {
4203 /* instr[31] = 0
4204 instr[30] = half(0)/full(1)
4205 instr[29] = signed (0)/unsigned(1)
4206 instr[28,24] = 0 1110
4207 instr[23,22] = size: byte(00), half(01), word (10)
4208 instr[21] = 1
4209 instr[20,17] = 1 000
4210 instr[16] = max(0)/min(1)
4211 instr[15,10] = 1010 10
4212 instr[9,5] = V source
4213 instr[4.0] = R dest. */
4214
4215 unsigned vs = INSTR (9, 5);
4216 unsigned rd = INSTR (4, 0);
4217 unsigned full = INSTR (30, 30);
4218 unsigned i;
4219
4220 NYI_assert (28, 24, 0x0E);
4221 NYI_assert (21, 21, 1);
4222 NYI_assert (20, 17, 8);
4223 NYI_assert (15, 10, 0x2A);
4224
4225 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4226 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4227 {
4228 case 0: /* SMAXV. */
4229 {
4230 int64_t smax;
4231 switch (INSTR (23, 22))
4232 {
4233 case 0:
4234 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4235 for (i = 1; i < (full ? 16 : 8); i++)
4236 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4237 break;
4238 case 1:
4239 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4240 for (i = 1; i < (full ? 8 : 4); i++)
4241 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4242 break;
4243 case 2:
4244 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4245 for (i = 1; i < (full ? 4 : 2); i++)
4246 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4247 break;
4248 case 3:
4249 HALT_UNALLOC;
4250 }
4251 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4252 return;
4253 }
4254
4255 case 1: /* SMINV. */
4256 {
4257 int64_t smin;
4258 switch (INSTR (23, 22))
4259 {
4260 case 0:
4261 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4262 for (i = 1; i < (full ? 16 : 8); i++)
4263 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4264 break;
4265 case 1:
4266 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4267 for (i = 1; i < (full ? 8 : 4); i++)
4268 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4269 break;
4270 case 2:
4271 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4272 for (i = 1; i < (full ? 4 : 2); i++)
4273 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4274 break;
4275
4276 case 3:
4277 HALT_UNALLOC;
4278 }
4279 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4280 return;
4281 }
4282
4283 case 2: /* UMAXV. */
4284 {
4285 uint64_t umax;
4286 switch (INSTR (23, 22))
4287 {
4288 case 0:
4289 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4290 for (i = 1; i < (full ? 16 : 8); i++)
4291 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4292 break;
4293 case 1:
4294 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4295 for (i = 1; i < (full ? 8 : 4); i++)
4296 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4297 break;
4298 case 2:
4299 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4300 for (i = 1; i < (full ? 4 : 2); i++)
4301 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4302 break;
4303
4304 case 3:
4305 HALT_UNALLOC;
4306 }
4307 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4308 return;
4309 }
4310
4311 case 3: /* UMINV. */
4312 {
4313 uint64_t umin;
4314 switch (INSTR (23, 22))
4315 {
4316 case 0:
4317 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4318 for (i = 1; i < (full ? 16 : 8); i++)
4319 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4320 break;
4321 case 1:
4322 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4323 for (i = 1; i < (full ? 8 : 4); i++)
4324 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4325 break;
4326 case 2:
4327 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4328 for (i = 1; i < (full ? 4 : 2); i++)
4329 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4330 break;
4331
4332 case 3:
4333 HALT_UNALLOC;
4334 }
4335 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4336 return;
4337 }
4338 }
4339 }
4340
4341 static void
4342 do_vec_fminmaxV (sim_cpu *cpu)
4343 {
4344 /* instr[31,24] = 0110 1110
4345 instr[23] = max(0)/min(1)
4346 instr[22,14] = 011 0000 11
4347 instr[13,12] = nm(00)/normal(11)
4348 instr[11,10] = 10
4349 instr[9,5] = V source
4350 instr[4.0] = R dest. */
4351
4352 unsigned vs = INSTR (9, 5);
4353 unsigned rd = INSTR (4, 0);
4354 unsigned i;
4355 float res = aarch64_get_vec_float (cpu, vs, 0);
4356
4357 NYI_assert (31, 24, 0x6E);
4358 NYI_assert (22, 14, 0x0C3);
4359 NYI_assert (11, 10, 2);
4360
4361 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4362 if (INSTR (23, 23))
4363 {
4364 switch (INSTR (13, 12))
4365 {
4366 case 0: /* FMNINNMV. */
4367 for (i = 1; i < 4; i++)
4368 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4369 break;
4370
4371 case 3: /* FMINV. */
4372 for (i = 1; i < 4; i++)
4373 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4374 break;
4375
4376 default:
4377 HALT_NYI;
4378 }
4379 }
4380 else
4381 {
4382 switch (INSTR (13, 12))
4383 {
4384 case 0: /* FMNAXNMV. */
4385 for (i = 1; i < 4; i++)
4386 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4387 break;
4388
4389 case 3: /* FMAXV. */
4390 for (i = 1; i < 4; i++)
4391 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4392 break;
4393
4394 default:
4395 HALT_NYI;
4396 }
4397 }
4398
4399 aarch64_set_FP_float (cpu, rd, res);
4400 }
4401
4402 static void
4403 do_vec_Fminmax (sim_cpu *cpu)
4404 {
4405 /* instr[31] = 0
4406 instr[30] = half(0)/full(1)
4407 instr[29,24] = 00 1110
4408 instr[23] = max(0)/min(1)
4409 instr[22] = float(0)/double(1)
4410 instr[21] = 1
4411 instr[20,16] = Vm
4412 instr[15,14] = 11
4413 instr[13,12] = nm(00)/normal(11)
4414 instr[11,10] = 01
4415 instr[9,5] = Vn
4416 instr[4,0] = Vd. */
4417
4418 unsigned vm = INSTR (20, 16);
4419 unsigned vn = INSTR (9, 5);
4420 unsigned vd = INSTR (4, 0);
4421 unsigned full = INSTR (30, 30);
4422 unsigned min = INSTR (23, 23);
4423 unsigned i;
4424
4425 NYI_assert (29, 24, 0x0E);
4426 NYI_assert (21, 21, 1);
4427 NYI_assert (15, 14, 3);
4428 NYI_assert (11, 10, 1);
4429
4430 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4431 if (INSTR (22, 22))
4432 {
4433 double (* func)(double, double);
4434
4435 if (! full)
4436 HALT_NYI;
4437
4438 if (INSTR (13, 12) == 0)
4439 func = min ? dminnm : dmaxnm;
4440 else if (INSTR (13, 12) == 3)
4441 func = min ? fmin : fmax;
4442 else
4443 HALT_NYI;
4444
4445 for (i = 0; i < 2; i++)
4446 aarch64_set_vec_double (cpu, vd, i,
4447 func (aarch64_get_vec_double (cpu, vn, i),
4448 aarch64_get_vec_double (cpu, vm, i)));
4449 }
4450 else
4451 {
4452 float (* func)(float, float);
4453
4454 if (INSTR (13, 12) == 0)
4455 func = min ? fminnm : fmaxnm;
4456 else if (INSTR (13, 12) == 3)
4457 func = min ? fminf : fmaxf;
4458 else
4459 HALT_NYI;
4460
4461 for (i = 0; i < (full ? 4 : 2); i++)
4462 aarch64_set_vec_float (cpu, vd, i,
4463 func (aarch64_get_vec_float (cpu, vn, i),
4464 aarch64_get_vec_float (cpu, vm, i)));
4465 }
4466 }
4467
4468 static void
4469 do_vec_SCVTF (sim_cpu *cpu)
4470 {
4471 /* instr[31] = 0
4472 instr[30] = Q
4473 instr[29,23] = 00 1110 0
4474 instr[22] = float(0)/double(1)
4475 instr[21,10] = 10 0001 1101 10
4476 instr[9,5] = Vn
4477 instr[4,0] = Vd. */
4478
4479 unsigned vn = INSTR (9, 5);
4480 unsigned vd = INSTR (4, 0);
4481 unsigned full = INSTR (30, 30);
4482 unsigned size = INSTR (22, 22);
4483 unsigned i;
4484
4485 NYI_assert (29, 23, 0x1C);
4486 NYI_assert (21, 10, 0x876);
4487
4488 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4489 if (size)
4490 {
4491 if (! full)
4492 HALT_UNALLOC;
4493
4494 for (i = 0; i < 2; i++)
4495 {
4496 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4497 aarch64_set_vec_double (cpu, vd, i, val);
4498 }
4499 }
4500 else
4501 {
4502 for (i = 0; i < (full ? 4 : 2); i++)
4503 {
4504 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4505 aarch64_set_vec_float (cpu, vd, i, val);
4506 }
4507 }
4508 }
4509
4510 #define VEC_CMP(SOURCE, CMP) \
4511 do \
4512 { \
4513 switch (size) \
4514 { \
4515 case 0: \
4516 for (i = 0; i < (full ? 16 : 8); i++) \
4517 aarch64_set_vec_u8 (cpu, vd, i, \
4518 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4519 CMP \
4520 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4521 ? -1 : 0); \
4522 return; \
4523 case 1: \
4524 for (i = 0; i < (full ? 8 : 4); i++) \
4525 aarch64_set_vec_u16 (cpu, vd, i, \
4526 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4527 CMP \
4528 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4529 ? -1 : 0); \
4530 return; \
4531 case 2: \
4532 for (i = 0; i < (full ? 4 : 2); i++) \
4533 aarch64_set_vec_u32 (cpu, vd, i, \
4534 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4535 CMP \
4536 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4537 ? -1 : 0); \
4538 return; \
4539 case 3: \
4540 if (! full) \
4541 HALT_UNALLOC; \
4542 for (i = 0; i < 2; i++) \
4543 aarch64_set_vec_u64 (cpu, vd, i, \
4544 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4545 CMP \
4546 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4547 ? -1ULL : 0); \
4548 return; \
4549 } \
4550 } \
4551 while (0)
4552
4553 #define VEC_CMP0(SOURCE, CMP) \
4554 do \
4555 { \
4556 switch (size) \
4557 { \
4558 case 0: \
4559 for (i = 0; i < (full ? 16 : 8); i++) \
4560 aarch64_set_vec_u8 (cpu, vd, i, \
4561 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4562 CMP 0 ? -1 : 0); \
4563 return; \
4564 case 1: \
4565 for (i = 0; i < (full ? 8 : 4); i++) \
4566 aarch64_set_vec_u16 (cpu, vd, i, \
4567 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4568 CMP 0 ? -1 : 0); \
4569 return; \
4570 case 2: \
4571 for (i = 0; i < (full ? 4 : 2); i++) \
4572 aarch64_set_vec_u32 (cpu, vd, i, \
4573 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4574 CMP 0 ? -1 : 0); \
4575 return; \
4576 case 3: \
4577 if (! full) \
4578 HALT_UNALLOC; \
4579 for (i = 0; i < 2; i++) \
4580 aarch64_set_vec_u64 (cpu, vd, i, \
4581 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4582 CMP 0 ? -1ULL : 0); \
4583 return; \
4584 } \
4585 } \
4586 while (0)
4587
4588 #define VEC_FCMP0(CMP) \
4589 do \
4590 { \
4591 if (vm != 0) \
4592 HALT_NYI; \
4593 if (INSTR (22, 22)) \
4594 { \
4595 if (! full) \
4596 HALT_NYI; \
4597 for (i = 0; i < 2; i++) \
4598 aarch64_set_vec_u64 (cpu, vd, i, \
4599 aarch64_get_vec_double (cpu, vn, i) \
4600 CMP 0.0 ? -1 : 0); \
4601 } \
4602 else \
4603 { \
4604 for (i = 0; i < (full ? 4 : 2); i++) \
4605 aarch64_set_vec_u32 (cpu, vd, i, \
4606 aarch64_get_vec_float (cpu, vn, i) \
4607 CMP 0.0 ? -1 : 0); \
4608 } \
4609 return; \
4610 } \
4611 while (0)
4612
4613 #define VEC_FCMP(CMP) \
4614 do \
4615 { \
4616 if (INSTR (22, 22)) \
4617 { \
4618 if (! full) \
4619 HALT_NYI; \
4620 for (i = 0; i < 2; i++) \
4621 aarch64_set_vec_u64 (cpu, vd, i, \
4622 aarch64_get_vec_double (cpu, vn, i) \
4623 CMP \
4624 aarch64_get_vec_double (cpu, vm, i) \
4625 ? -1 : 0); \
4626 } \
4627 else \
4628 { \
4629 for (i = 0; i < (full ? 4 : 2); i++) \
4630 aarch64_set_vec_u32 (cpu, vd, i, \
4631 aarch64_get_vec_float (cpu, vn, i) \
4632 CMP \
4633 aarch64_get_vec_float (cpu, vm, i) \
4634 ? -1 : 0); \
4635 } \
4636 return; \
4637 } \
4638 while (0)
4639
4640 static void
4641 do_vec_compare (sim_cpu *cpu)
4642 {
4643 /* instr[31] = 0
4644 instr[30] = half(0)/full(1)
4645 instr[29] = part-of-comparison-type
4646 instr[28,24] = 0 1110
4647 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4648 type of float compares: single (-0) / double (-1)
4649 instr[21] = 1
4650 instr[20,16] = Vm or 00000 (compare vs 0)
4651 instr[15,10] = part-of-comparison-type
4652 instr[9,5] = Vn
4653 instr[4.0] = Vd. */
4654
4655 int full = INSTR (30, 30);
4656 int size = INSTR (23, 22);
4657 unsigned vm = INSTR (20, 16);
4658 unsigned vn = INSTR (9, 5);
4659 unsigned vd = INSTR (4, 0);
4660 unsigned i;
4661
4662 NYI_assert (28, 24, 0x0E);
4663 NYI_assert (21, 21, 1);
4664
4665 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4666 if ((INSTR (11, 11)
4667 && INSTR (14, 14))
4668 || ((INSTR (11, 11) == 0
4669 && INSTR (10, 10) == 0)))
4670 {
4671 /* A compare vs 0. */
4672 if (vm != 0)
4673 {
4674 if (INSTR (15, 10) == 0x2A)
4675 do_vec_maxv (cpu);
4676 else if (INSTR (15, 10) == 0x32
4677 || INSTR (15, 10) == 0x3E)
4678 do_vec_fminmaxV (cpu);
4679 else if (INSTR (29, 23) == 0x1C
4680 && INSTR (21, 10) == 0x876)
4681 do_vec_SCVTF (cpu);
4682 else
4683 HALT_NYI;
4684 return;
4685 }
4686 }
4687
4688 if (INSTR (14, 14))
4689 {
4690 /* A floating point compare. */
4691 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4692 | INSTR (13, 10);
4693
4694 NYI_assert (15, 15, 1);
4695
4696 switch (decode)
4697 {
4698 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4699 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4700 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4701 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4702 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4703 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4704 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4705 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4706
4707 default:
4708 HALT_NYI;
4709 }
4710 }
4711 else
4712 {
4713 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4714
4715 switch (decode)
4716 {
4717 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4718 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4719 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4720 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4721 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4722 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4723 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4724 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4725 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4726 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4727 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4728 default:
4729 if (vm == 0)
4730 HALT_NYI;
4731 do_vec_maxv (cpu);
4732 }
4733 }
4734 }
4735
4736 static void
4737 do_vec_SSHL (sim_cpu *cpu)
4738 {
4739 /* instr[31] = 0
4740 instr[30] = first part (0)/ second part (1)
4741 instr[29,24] = 00 1110
4742 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4743 instr[21] = 1
4744 instr[20,16] = Vm
4745 instr[15,10] = 0100 01
4746 instr[9,5] = Vn
4747 instr[4,0] = Vd. */
4748
4749 unsigned full = INSTR (30, 30);
4750 unsigned vm = INSTR (20, 16);
4751 unsigned vn = INSTR (9, 5);
4752 unsigned vd = INSTR (4, 0);
4753 unsigned i;
4754 signed int shift;
4755
4756 NYI_assert (29, 24, 0x0E);
4757 NYI_assert (21, 21, 1);
4758 NYI_assert (15, 10, 0x11);
4759
4760 /* FIXME: What is a signed shift left in this context ?. */
4761
4762 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4763 switch (INSTR (23, 22))
4764 {
4765 case 0:
4766 for (i = 0; i < (full ? 16 : 8); i++)
4767 {
4768 shift = aarch64_get_vec_s8 (cpu, vm, i);
4769 if (shift >= 0)
4770 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4771 << shift);
4772 else
4773 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4774 >> - shift);
4775 }
4776 return;
4777
4778 case 1:
4779 for (i = 0; i < (full ? 8 : 4); i++)
4780 {
4781 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4782 if (shift >= 0)
4783 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4784 << shift);
4785 else
4786 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4787 >> - shift);
4788 }
4789 return;
4790
4791 case 2:
4792 for (i = 0; i < (full ? 4 : 2); i++)
4793 {
4794 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4795 if (shift >= 0)
4796 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4797 << shift);
4798 else
4799 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4800 >> - shift);
4801 }
4802 return;
4803
4804 case 3:
4805 if (! full)
4806 HALT_UNALLOC;
4807 for (i = 0; i < 2; i++)
4808 {
4809 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4810 if (shift >= 0)
4811 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4812 << shift);
4813 else
4814 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4815 >> - shift);
4816 }
4817 return;
4818 }
4819 }
4820
4821 static void
4822 do_vec_USHL (sim_cpu *cpu)
4823 {
4824 /* instr[31] = 0
4825 instr[30] = first part (0)/ second part (1)
4826 instr[29,24] = 10 1110
4827 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4828 instr[21] = 1
4829 instr[20,16] = Vm
4830 instr[15,10] = 0100 01
4831 instr[9,5] = Vn
4832 instr[4,0] = Vd */
4833
4834 unsigned full = INSTR (30, 30);
4835 unsigned vm = INSTR (20, 16);
4836 unsigned vn = INSTR (9, 5);
4837 unsigned vd = INSTR (4, 0);
4838 unsigned i;
4839 signed int shift;
4840
4841 NYI_assert (29, 24, 0x2E);
4842 NYI_assert (15, 10, 0x11);
4843
4844 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4845 switch (INSTR (23, 22))
4846 {
4847 case 0:
4848 for (i = 0; i < (full ? 16 : 8); i++)
4849 {
4850 shift = aarch64_get_vec_s8 (cpu, vm, i);
4851 if (shift >= 0)
4852 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4853 << shift);
4854 else
4855 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4856 >> - shift);
4857 }
4858 return;
4859
4860 case 1:
4861 for (i = 0; i < (full ? 8 : 4); i++)
4862 {
4863 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4864 if (shift >= 0)
4865 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4866 << shift);
4867 else
4868 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4869 >> - shift);
4870 }
4871 return;
4872
4873 case 2:
4874 for (i = 0; i < (full ? 4 : 2); i++)
4875 {
4876 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4877 if (shift >= 0)
4878 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4879 << shift);
4880 else
4881 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4882 >> - shift);
4883 }
4884 return;
4885
4886 case 3:
4887 if (! full)
4888 HALT_UNALLOC;
4889 for (i = 0; i < 2; i++)
4890 {
4891 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4892 if (shift >= 0)
4893 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4894 << shift);
4895 else
4896 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4897 >> - shift);
4898 }
4899 return;
4900 }
4901 }
4902
4903 static void
4904 do_vec_FMLA (sim_cpu *cpu)
4905 {
4906 /* instr[31] = 0
4907 instr[30] = full/half selector
4908 instr[29,23] = 0011100
4909 instr[22] = size: 0=>float, 1=>double
4910 instr[21] = 1
4911 instr[20,16] = Vn
4912 instr[15,10] = 1100 11
4913 instr[9,5] = Vm
4914 instr[4.0] = Vd. */
4915
4916 unsigned vm = INSTR (20, 16);
4917 unsigned vn = INSTR (9, 5);
4918 unsigned vd = INSTR (4, 0);
4919 unsigned i;
4920 int full = INSTR (30, 30);
4921
4922 NYI_assert (29, 23, 0x1C);
4923 NYI_assert (21, 21, 1);
4924 NYI_assert (15, 10, 0x33);
4925
4926 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4927 if (INSTR (22, 22))
4928 {
4929 if (! full)
4930 HALT_UNALLOC;
4931 for (i = 0; i < 2; i++)
4932 aarch64_set_vec_double (cpu, vd, i,
4933 aarch64_get_vec_double (cpu, vn, i) *
4934 aarch64_get_vec_double (cpu, vm, i) +
4935 aarch64_get_vec_double (cpu, vd, i));
4936 }
4937 else
4938 {
4939 for (i = 0; i < (full ? 4 : 2); i++)
4940 aarch64_set_vec_float (cpu, vd, i,
4941 aarch64_get_vec_float (cpu, vn, i) *
4942 aarch64_get_vec_float (cpu, vm, i) +
4943 aarch64_get_vec_float (cpu, vd, i));
4944 }
4945 }
4946
4947 static void
4948 do_vec_max (sim_cpu *cpu)
4949 {
4950 /* instr[31] = 0
4951 instr[30] = full/half selector
4952 instr[29] = SMAX (0) / UMAX (1)
4953 instr[28,24] = 0 1110
4954 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4955 instr[21] = 1
4956 instr[20,16] = Vn
4957 instr[15,10] = 0110 01
4958 instr[9,5] = Vm
4959 instr[4.0] = Vd. */
4960
4961 unsigned vm = INSTR (20, 16);
4962 unsigned vn = INSTR (9, 5);
4963 unsigned vd = INSTR (4, 0);
4964 unsigned i;
4965 int full = INSTR (30, 30);
4966
4967 NYI_assert (28, 24, 0x0E);
4968 NYI_assert (21, 21, 1);
4969 NYI_assert (15, 10, 0x19);
4970
4971 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4972 if (INSTR (29, 29))
4973 {
4974 switch (INSTR (23, 22))
4975 {
4976 case 0:
4977 for (i = 0; i < (full ? 16 : 8); i++)
4978 aarch64_set_vec_u8 (cpu, vd, i,
4979 aarch64_get_vec_u8 (cpu, vn, i)
4980 > aarch64_get_vec_u8 (cpu, vm, i)
4981 ? aarch64_get_vec_u8 (cpu, vn, i)
4982 : aarch64_get_vec_u8 (cpu, vm, i));
4983 return;
4984
4985 case 1:
4986 for (i = 0; i < (full ? 8 : 4); i++)
4987 aarch64_set_vec_u16 (cpu, vd, i,
4988 aarch64_get_vec_u16 (cpu, vn, i)
4989 > aarch64_get_vec_u16 (cpu, vm, i)
4990 ? aarch64_get_vec_u16 (cpu, vn, i)
4991 : aarch64_get_vec_u16 (cpu, vm, i));
4992 return;
4993
4994 case 2:
4995 for (i = 0; i < (full ? 4 : 2); i++)
4996 aarch64_set_vec_u32 (cpu, vd, i,
4997 aarch64_get_vec_u32 (cpu, vn, i)
4998 > aarch64_get_vec_u32 (cpu, vm, i)
4999 ? aarch64_get_vec_u32 (cpu, vn, i)
5000 : aarch64_get_vec_u32 (cpu, vm, i));
5001 return;
5002
5003 case 3:
5004 HALT_UNALLOC;
5005 }
5006 }
5007 else
5008 {
5009 switch (INSTR (23, 22))
5010 {
5011 case 0:
5012 for (i = 0; i < (full ? 16 : 8); i++)
5013 aarch64_set_vec_s8 (cpu, vd, i,
5014 aarch64_get_vec_s8 (cpu, vn, i)
5015 > aarch64_get_vec_s8 (cpu, vm, i)
5016 ? aarch64_get_vec_s8 (cpu, vn, i)
5017 : aarch64_get_vec_s8 (cpu, vm, i));
5018 return;
5019
5020 case 1:
5021 for (i = 0; i < (full ? 8 : 4); i++)
5022 aarch64_set_vec_s16 (cpu, vd, i,
5023 aarch64_get_vec_s16 (cpu, vn, i)
5024 > aarch64_get_vec_s16 (cpu, vm, i)
5025 ? aarch64_get_vec_s16 (cpu, vn, i)
5026 : aarch64_get_vec_s16 (cpu, vm, i));
5027 return;
5028
5029 case 2:
5030 for (i = 0; i < (full ? 4 : 2); i++)
5031 aarch64_set_vec_s32 (cpu, vd, i,
5032 aarch64_get_vec_s32 (cpu, vn, i)
5033 > aarch64_get_vec_s32 (cpu, vm, i)
5034 ? aarch64_get_vec_s32 (cpu, vn, i)
5035 : aarch64_get_vec_s32 (cpu, vm, i));
5036 return;
5037
5038 case 3:
5039 HALT_UNALLOC;
5040 }
5041 }
5042 }
5043
5044 static void
5045 do_vec_min (sim_cpu *cpu)
5046 {
5047 /* instr[31] = 0
5048 instr[30] = full/half selector
5049 instr[29] = SMIN (0) / UMIN (1)
5050 instr[28,24] = 0 1110
5051 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5052 instr[21] = 1
5053 instr[20,16] = Vn
5054 instr[15,10] = 0110 11
5055 instr[9,5] = Vm
5056 instr[4.0] = Vd. */
5057
5058 unsigned vm = INSTR (20, 16);
5059 unsigned vn = INSTR (9, 5);
5060 unsigned vd = INSTR (4, 0);
5061 unsigned i;
5062 int full = INSTR (30, 30);
5063
5064 NYI_assert (28, 24, 0x0E);
5065 NYI_assert (21, 21, 1);
5066 NYI_assert (15, 10, 0x1B);
5067
5068 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5069 if (INSTR (29, 29))
5070 {
5071 switch (INSTR (23, 22))
5072 {
5073 case 0:
5074 for (i = 0; i < (full ? 16 : 8); i++)
5075 aarch64_set_vec_u8 (cpu, vd, i,
5076 aarch64_get_vec_u8 (cpu, vn, i)
5077 < aarch64_get_vec_u8 (cpu, vm, i)
5078 ? aarch64_get_vec_u8 (cpu, vn, i)
5079 : aarch64_get_vec_u8 (cpu, vm, i));
5080 return;
5081
5082 case 1:
5083 for (i = 0; i < (full ? 8 : 4); i++)
5084 aarch64_set_vec_u16 (cpu, vd, i,
5085 aarch64_get_vec_u16 (cpu, vn, i)
5086 < aarch64_get_vec_u16 (cpu, vm, i)
5087 ? aarch64_get_vec_u16 (cpu, vn, i)
5088 : aarch64_get_vec_u16 (cpu, vm, i));
5089 return;
5090
5091 case 2:
5092 for (i = 0; i < (full ? 4 : 2); i++)
5093 aarch64_set_vec_u32 (cpu, vd, i,
5094 aarch64_get_vec_u32 (cpu, vn, i)
5095 < aarch64_get_vec_u32 (cpu, vm, i)
5096 ? aarch64_get_vec_u32 (cpu, vn, i)
5097 : aarch64_get_vec_u32 (cpu, vm, i));
5098 return;
5099
5100 case 3:
5101 HALT_UNALLOC;
5102 }
5103 }
5104 else
5105 {
5106 switch (INSTR (23, 22))
5107 {
5108 case 0:
5109 for (i = 0; i < (full ? 16 : 8); i++)
5110 aarch64_set_vec_s8 (cpu, vd, i,
5111 aarch64_get_vec_s8 (cpu, vn, i)
5112 < aarch64_get_vec_s8 (cpu, vm, i)
5113 ? aarch64_get_vec_s8 (cpu, vn, i)
5114 : aarch64_get_vec_s8 (cpu, vm, i));
5115 return;
5116
5117 case 1:
5118 for (i = 0; i < (full ? 8 : 4); i++)
5119 aarch64_set_vec_s16 (cpu, vd, i,
5120 aarch64_get_vec_s16 (cpu, vn, i)
5121 < aarch64_get_vec_s16 (cpu, vm, i)
5122 ? aarch64_get_vec_s16 (cpu, vn, i)
5123 : aarch64_get_vec_s16 (cpu, vm, i));
5124 return;
5125
5126 case 2:
5127 for (i = 0; i < (full ? 4 : 2); i++)
5128 aarch64_set_vec_s32 (cpu, vd, i,
5129 aarch64_get_vec_s32 (cpu, vn, i)
5130 < aarch64_get_vec_s32 (cpu, vm, i)
5131 ? aarch64_get_vec_s32 (cpu, vn, i)
5132 : aarch64_get_vec_s32 (cpu, vm, i));
5133 return;
5134
5135 case 3:
5136 HALT_UNALLOC;
5137 }
5138 }
5139 }
5140
5141 static void
5142 do_vec_sub_long (sim_cpu *cpu)
5143 {
5144 /* instr[31] = 0
5145 instr[30] = lower (0) / upper (1)
5146 instr[29] = signed (0) / unsigned (1)
5147 instr[28,24] = 0 1110
5148 instr[23,22] = size: bytes (00), half (01), word (10)
5149 instr[21] = 1
5150 insrt[20,16] = Vm
5151 instr[15,10] = 0010 00
5152 instr[9,5] = Vn
5153 instr[4,0] = V dest. */
5154
5155 unsigned size = INSTR (23, 22);
5156 unsigned vm = INSTR (20, 16);
5157 unsigned vn = INSTR (9, 5);
5158 unsigned vd = INSTR (4, 0);
5159 unsigned bias = 0;
5160 unsigned i;
5161
5162 NYI_assert (28, 24, 0x0E);
5163 NYI_assert (21, 21, 1);
5164 NYI_assert (15, 10, 0x08);
5165
5166 if (size == 3)
5167 HALT_UNALLOC;
5168
5169 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5170 switch (INSTR (30, 29))
5171 {
5172 case 2: /* SSUBL2. */
5173 bias = 2;
5174 case 0: /* SSUBL. */
5175 switch (size)
5176 {
5177 case 0:
5178 bias *= 3;
5179 for (i = 0; i < 8; i++)
5180 aarch64_set_vec_s16 (cpu, vd, i,
5181 aarch64_get_vec_s8 (cpu, vn, i + bias)
5182 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5183 break;
5184
5185 case 1:
5186 bias *= 2;
5187 for (i = 0; i < 4; i++)
5188 aarch64_set_vec_s32 (cpu, vd, i,
5189 aarch64_get_vec_s16 (cpu, vn, i + bias)
5190 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5191 break;
5192
5193 case 2:
5194 for (i = 0; i < 2; i++)
5195 aarch64_set_vec_s64 (cpu, vd, i,
5196 aarch64_get_vec_s32 (cpu, vn, i + bias)
5197 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5198 break;
5199
5200 default:
5201 HALT_UNALLOC;
5202 }
5203 break;
5204
5205 case 3: /* USUBL2. */
5206 bias = 2;
5207 case 1: /* USUBL. */
5208 switch (size)
5209 {
5210 case 0:
5211 bias *= 3;
5212 for (i = 0; i < 8; i++)
5213 aarch64_set_vec_u16 (cpu, vd, i,
5214 aarch64_get_vec_u8 (cpu, vn, i + bias)
5215 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5216 break;
5217
5218 case 1:
5219 bias *= 2;
5220 for (i = 0; i < 4; i++)
5221 aarch64_set_vec_u32 (cpu, vd, i,
5222 aarch64_get_vec_u16 (cpu, vn, i + bias)
5223 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5224 break;
5225
5226 case 2:
5227 for (i = 0; i < 2; i++)
5228 aarch64_set_vec_u64 (cpu, vd, i,
5229 aarch64_get_vec_u32 (cpu, vn, i + bias)
5230 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5231 break;
5232
5233 default:
5234 HALT_UNALLOC;
5235 }
5236 break;
5237 }
5238 }
5239
5240 static void
5241 do_vec_ADDP (sim_cpu *cpu)
5242 {
5243 /* instr[31] = 0
5244 instr[30] = half(0)/full(1)
5245 instr[29,24] = 00 1110
5246 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5247 instr[21] = 1
5248 insrt[20,16] = Vm
5249 instr[15,10] = 1011 11
5250 instr[9,5] = Vn
5251 instr[4,0] = V dest. */
5252
5253 FRegister copy_vn;
5254 FRegister copy_vm;
5255 unsigned full = INSTR (30, 30);
5256 unsigned size = INSTR (23, 22);
5257 unsigned vm = INSTR (20, 16);
5258 unsigned vn = INSTR (9, 5);
5259 unsigned vd = INSTR (4, 0);
5260 unsigned i, range;
5261
5262 NYI_assert (29, 24, 0x0E);
5263 NYI_assert (21, 21, 1);
5264 NYI_assert (15, 10, 0x2F);
5265
5266 /* Make copies of the source registers in case vd == vn/vm. */
5267 copy_vn = cpu->fr[vn];
5268 copy_vm = cpu->fr[vm];
5269
5270 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5271 switch (size)
5272 {
5273 case 0:
5274 range = full ? 8 : 4;
5275 for (i = 0; i < range; i++)
5276 {
5277 aarch64_set_vec_u8 (cpu, vd, i,
5278 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5279 aarch64_set_vec_u8 (cpu, vd, i + range,
5280 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5281 }
5282 return;
5283
5284 case 1:
5285 range = full ? 4 : 2;
5286 for (i = 0; i < range; i++)
5287 {
5288 aarch64_set_vec_u16 (cpu, vd, i,
5289 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5290 aarch64_set_vec_u16 (cpu, vd, i + range,
5291 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5292 }
5293 return;
5294
5295 case 2:
5296 range = full ? 2 : 1;
5297 for (i = 0; i < range; i++)
5298 {
5299 aarch64_set_vec_u32 (cpu, vd, i,
5300 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5301 aarch64_set_vec_u32 (cpu, vd, i + range,
5302 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5303 }
5304 return;
5305
5306 case 3:
5307 if (! full)
5308 HALT_UNALLOC;
5309 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5310 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5311 return;
5312 }
5313 }
5314
5315 static void
5316 do_vec_UMOV (sim_cpu *cpu)
5317 {
5318 /* instr[31] = 0
5319 instr[30] = 32-bit(0)/64-bit(1)
5320 instr[29,21] = 00 1110 000
5321 insrt[20,16] = size & index
5322 instr[15,10] = 0011 11
5323 instr[9,5] = V source
5324 instr[4,0] = R dest. */
5325
5326 unsigned vs = INSTR (9, 5);
5327 unsigned rd = INSTR (4, 0);
5328 unsigned index;
5329
5330 NYI_assert (29, 21, 0x070);
5331 NYI_assert (15, 10, 0x0F);
5332
5333 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5334 if (INSTR (16, 16))
5335 {
5336 /* Byte transfer. */
5337 index = INSTR (20, 17);
5338 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5339 aarch64_get_vec_u8 (cpu, vs, index));
5340 }
5341 else if (INSTR (17, 17))
5342 {
5343 index = INSTR (20, 18);
5344 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5345 aarch64_get_vec_u16 (cpu, vs, index));
5346 }
5347 else if (INSTR (18, 18))
5348 {
5349 index = INSTR (20, 19);
5350 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5351 aarch64_get_vec_u32 (cpu, vs, index));
5352 }
5353 else
5354 {
5355 if (INSTR (30, 30) != 1)
5356 HALT_UNALLOC;
5357
5358 index = INSTR (20, 20);
5359 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5360 aarch64_get_vec_u64 (cpu, vs, index));
5361 }
5362 }
5363
5364 static void
5365 do_vec_FABS (sim_cpu *cpu)
5366 {
5367 /* instr[31] = 0
5368 instr[30] = half(0)/full(1)
5369 instr[29,23] = 00 1110 1
5370 instr[22] = float(0)/double(1)
5371 instr[21,16] = 10 0000
5372 instr[15,10] = 1111 10
5373 instr[9,5] = Vn
5374 instr[4,0] = Vd. */
5375
5376 unsigned vn = INSTR (9, 5);
5377 unsigned vd = INSTR (4, 0);
5378 unsigned full = INSTR (30, 30);
5379 unsigned i;
5380
5381 NYI_assert (29, 23, 0x1D);
5382 NYI_assert (21, 10, 0x83E);
5383
5384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5385 if (INSTR (22, 22))
5386 {
5387 if (! full)
5388 HALT_NYI;
5389
5390 for (i = 0; i < 2; i++)
5391 aarch64_set_vec_double (cpu, vd, i,
5392 fabs (aarch64_get_vec_double (cpu, vn, i)));
5393 }
5394 else
5395 {
5396 for (i = 0; i < (full ? 4 : 2); i++)
5397 aarch64_set_vec_float (cpu, vd, i,
5398 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5399 }
5400 }
5401
5402 static void
5403 do_vec_FCVTZS (sim_cpu *cpu)
5404 {
5405 /* instr[31] = 0
5406 instr[30] = half (0) / all (1)
5407 instr[29,23] = 00 1110 1
5408 instr[22] = single (0) / double (1)
5409 instr[21,10] = 10 0001 1011 10
5410 instr[9,5] = Rn
5411 instr[4,0] = Rd. */
5412
5413 unsigned rn = INSTR (9, 5);
5414 unsigned rd = INSTR (4, 0);
5415 unsigned full = INSTR (30, 30);
5416 unsigned i;
5417
5418 NYI_assert (31, 31, 0);
5419 NYI_assert (29, 23, 0x1D);
5420 NYI_assert (21, 10, 0x86E);
5421
5422 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5423 if (INSTR (22, 22))
5424 {
5425 if (! full)
5426 HALT_UNALLOC;
5427
5428 for (i = 0; i < 2; i++)
5429 aarch64_set_vec_s64 (cpu, rd, i,
5430 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5431 }
5432 else
5433 for (i = 0; i < (full ? 4 : 2); i++)
5434 aarch64_set_vec_s32 (cpu, rd, i,
5435 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5436 }
5437
5438 static void
5439 do_vec_REV64 (sim_cpu *cpu)
5440 {
5441 /* instr[31] = 0
5442 instr[30] = full/half
5443 instr[29,24] = 00 1110
5444 instr[23,22] = size
5445 instr[21,10] = 10 0000 0000 10
5446 instr[9,5] = Rn
5447 instr[4,0] = Rd. */
5448
5449 unsigned rn = INSTR (9, 5);
5450 unsigned rd = INSTR (4, 0);
5451 unsigned size = INSTR (23, 22);
5452 unsigned full = INSTR (30, 30);
5453 unsigned i;
5454 FRegister val;
5455
5456 NYI_assert (29, 24, 0x0E);
5457 NYI_assert (21, 10, 0x802);
5458
5459 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5460 switch (size)
5461 {
5462 case 0:
5463 for (i = 0; i < (full ? 16 : 8); i++)
5464 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5465 break;
5466
5467 case 1:
5468 for (i = 0; i < (full ? 8 : 4); i++)
5469 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5470 break;
5471
5472 case 2:
5473 for (i = 0; i < (full ? 4 : 2); i++)
5474 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5475 break;
5476
5477 case 3:
5478 HALT_UNALLOC;
5479 }
5480
5481 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5482 if (full)
5483 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5484 }
5485
5486 static void
5487 do_vec_REV16 (sim_cpu *cpu)
5488 {
5489 /* instr[31] = 0
5490 instr[30] = full/half
5491 instr[29,24] = 00 1110
5492 instr[23,22] = size
5493 instr[21,10] = 10 0000 0001 10
5494 instr[9,5] = Rn
5495 instr[4,0] = Rd. */
5496
5497 unsigned rn = INSTR (9, 5);
5498 unsigned rd = INSTR (4, 0);
5499 unsigned size = INSTR (23, 22);
5500 unsigned full = INSTR (30, 30);
5501 unsigned i;
5502 FRegister val;
5503
5504 NYI_assert (29, 24, 0x0E);
5505 NYI_assert (21, 10, 0x806);
5506
5507 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5508 switch (size)
5509 {
5510 case 0:
5511 for (i = 0; i < (full ? 16 : 8); i++)
5512 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5513 break;
5514
5515 default:
5516 HALT_UNALLOC;
5517 }
5518
5519 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5520 if (full)
5521 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5522 }
5523
5524 static void
5525 do_vec_op1 (sim_cpu *cpu)
5526 {
5527 /* instr[31] = 0
5528 instr[30] = half/full
5529 instr[29,24] = 00 1110
5530 instr[23,21] = ???
5531 instr[20,16] = Vm
5532 instr[15,10] = sub-opcode
5533 instr[9,5] = Vn
5534 instr[4,0] = Vd */
5535 NYI_assert (29, 24, 0x0E);
5536
5537 if (INSTR (21, 21) == 0)
5538 {
5539 if (INSTR (23, 22) == 0)
5540 {
5541 if (INSTR (30, 30) == 1
5542 && INSTR (17, 14) == 0
5543 && INSTR (12, 10) == 7)
5544 return do_vec_ins_2 (cpu);
5545
5546 switch (INSTR (15, 10))
5547 {
5548 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5549 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5550 case 0x07: do_vec_INS (cpu); return;
5551 case 0x0A: do_vec_TRN (cpu); return;
5552
5553 case 0x0F:
5554 if (INSTR (17, 16) == 0)
5555 {
5556 do_vec_MOV_into_scalar (cpu);
5557 return;
5558 }
5559 break;
5560
5561 case 0x00:
5562 case 0x08:
5563 case 0x10:
5564 case 0x18:
5565 do_vec_TBL (cpu); return;
5566
5567 case 0x06:
5568 case 0x16:
5569 do_vec_UZP (cpu); return;
5570
5571 case 0x0E:
5572 case 0x1E:
5573 do_vec_ZIP (cpu); return;
5574
5575 default:
5576 HALT_NYI;
5577 }
5578 }
5579
5580 switch (INSTR (13, 10))
5581 {
5582 case 0x6: do_vec_UZP (cpu); return;
5583 case 0xE: do_vec_ZIP (cpu); return;
5584 case 0xA: do_vec_TRN (cpu); return;
5585 case 0xF: do_vec_UMOV (cpu); return;
5586 default: HALT_NYI;
5587 }
5588 }
5589
5590 switch (INSTR (15, 10))
5591 {
5592 case 0x02: do_vec_REV64 (cpu); return;
5593 case 0x06: do_vec_REV16 (cpu); return;
5594
5595 case 0x07:
5596 switch (INSTR (23, 21))
5597 {
5598 case 1: do_vec_AND (cpu); return;
5599 case 3: do_vec_BIC (cpu); return;
5600 case 5: do_vec_ORR (cpu); return;
5601 case 7: do_vec_ORN (cpu); return;
5602 default: HALT_NYI;
5603 }
5604
5605 case 0x08: do_vec_sub_long (cpu); return;
5606 case 0x0a: do_vec_XTN (cpu); return;
5607 case 0x11: do_vec_SSHL (cpu); return;
5608 case 0x19: do_vec_max (cpu); return;
5609 case 0x1B: do_vec_min (cpu); return;
5610 case 0x21: do_vec_add (cpu); return;
5611 case 0x25: do_vec_MLA (cpu); return;
5612 case 0x27: do_vec_mul (cpu); return;
5613 case 0x2F: do_vec_ADDP (cpu); return;
5614 case 0x30: do_vec_mull (cpu); return;
5615 case 0x33: do_vec_FMLA (cpu); return;
5616 case 0x35: do_vec_fadd (cpu); return;
5617
5618 case 0x2E:
5619 switch (INSTR (20, 16))
5620 {
5621 case 0x00: do_vec_ABS (cpu); return;
5622 case 0x01: do_vec_FCVTZS (cpu); return;
5623 case 0x11: do_vec_ADDV (cpu); return;
5624 default: HALT_NYI;
5625 }
5626
5627 case 0x31:
5628 case 0x3B:
5629 do_vec_Fminmax (cpu); return;
5630
5631 case 0x0D:
5632 case 0x0F:
5633 case 0x22:
5634 case 0x23:
5635 case 0x26:
5636 case 0x2A:
5637 case 0x32:
5638 case 0x36:
5639 case 0x39:
5640 case 0x3A:
5641 do_vec_compare (cpu); return;
5642
5643 case 0x3E:
5644 do_vec_FABS (cpu); return;
5645
5646 default:
5647 HALT_NYI;
5648 }
5649 }
5650
5651 static void
5652 do_vec_xtl (sim_cpu *cpu)
5653 {
5654 /* instr[31] = 0
5655 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5656 instr[28,22] = 0 1111 00
5657 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5658 instr[15,10] = 1010 01
5659 instr[9,5] = V source
5660 instr[4,0] = V dest. */
5661
5662 unsigned vs = INSTR (9, 5);
5663 unsigned vd = INSTR (4, 0);
5664 unsigned i, shift, bias = 0;
5665
5666 NYI_assert (28, 22, 0x3C);
5667 NYI_assert (15, 10, 0x29);
5668
5669 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5670 switch (INSTR (30, 29))
5671 {
5672 case 2: /* SXTL2, SSHLL2. */
5673 bias = 2;
5674 case 0: /* SXTL, SSHLL. */
5675 if (INSTR (21, 21))
5676 {
5677 int64_t val1, val2;
5678
5679 shift = INSTR (20, 16);
5680 /* Get the source values before setting the destination values
5681 in case the source and destination are the same. */
5682 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5683 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5684 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5685 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5686 }
5687 else if (INSTR (20, 20))
5688 {
5689 int32_t v[4];
5690 int32_t v1,v2,v3,v4;
5691
5692 shift = INSTR (19, 16);
5693 bias *= 2;
5694 for (i = 0; i < 4; i++)
5695 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5696 for (i = 0; i < 4; i++)
5697 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5698 }
5699 else
5700 {
5701 int16_t v[8];
5702 NYI_assert (19, 19, 1);
5703
5704 shift = INSTR (18, 16);
5705 bias *= 4;
5706 for (i = 0; i < 8; i++)
5707 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5708 for (i = 0; i < 8; i++)
5709 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5710 }
5711 return;
5712
5713 case 3: /* UXTL2, USHLL2. */
5714 bias = 2;
5715 case 1: /* UXTL, USHLL. */
5716 if (INSTR (21, 21))
5717 {
5718 uint64_t v1, v2;
5719 shift = INSTR (20, 16);
5720 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5721 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5722 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5723 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5724 }
5725 else if (INSTR (20, 20))
5726 {
5727 uint32_t v[4];
5728 shift = INSTR (19, 16);
5729 bias *= 2;
5730 for (i = 0; i < 4; i++)
5731 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5732 for (i = 0; i < 4; i++)
5733 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5734 }
5735 else
5736 {
5737 uint16_t v[8];
5738 NYI_assert (19, 19, 1);
5739
5740 shift = INSTR (18, 16);
5741 bias *= 4;
5742 for (i = 0; i < 8; i++)
5743 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5744 for (i = 0; i < 8; i++)
5745 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5746 }
5747 return;
5748 }
5749 }
5750
5751 static void
5752 do_vec_SHL (sim_cpu *cpu)
5753 {
5754 /* instr [31] = 0
5755 instr [30] = half(0)/full(1)
5756 instr [29,23] = 001 1110
5757 instr [22,16] = size and shift amount
5758 instr [15,10] = 01 0101
5759 instr [9, 5] = Vs
5760 instr [4, 0] = Vd. */
5761
5762 int shift;
5763 int full = INSTR (30, 30);
5764 unsigned vs = INSTR (9, 5);
5765 unsigned vd = INSTR (4, 0);
5766 unsigned i;
5767
5768 NYI_assert (29, 23, 0x1E);
5769 NYI_assert (15, 10, 0x15);
5770
5771 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5772 if (INSTR (22, 22))
5773 {
5774 shift = INSTR (21, 16);
5775
5776 if (full == 0)
5777 HALT_UNALLOC;
5778
5779 for (i = 0; i < 2; i++)
5780 {
5781 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5782 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5783 }
5784
5785 return;
5786 }
5787
5788 if (INSTR (21, 21))
5789 {
5790 shift = INSTR (20, 16);
5791
5792 for (i = 0; i < (full ? 4 : 2); i++)
5793 {
5794 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5795 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5796 }
5797
5798 return;
5799 }
5800
5801 if (INSTR (20, 20))
5802 {
5803 shift = INSTR (19, 16);
5804
5805 for (i = 0; i < (full ? 8 : 4); i++)
5806 {
5807 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5808 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5809 }
5810
5811 return;
5812 }
5813
5814 if (INSTR (19, 19) == 0)
5815 HALT_UNALLOC;
5816
5817 shift = INSTR (18, 16);
5818
5819 for (i = 0; i < (full ? 16 : 8); i++)
5820 {
5821 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5822 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5823 }
5824 }
5825
5826 static void
5827 do_vec_SSHR_USHR (sim_cpu *cpu)
5828 {
5829 /* instr [31] = 0
5830 instr [30] = half(0)/full(1)
5831 instr [29] = signed(0)/unsigned(1)
5832 instr [28,23] = 0 1111 0
5833 instr [22,16] = size and shift amount
5834 instr [15,10] = 0000 01
5835 instr [9, 5] = Vs
5836 instr [4, 0] = Vd. */
5837
5838 int full = INSTR (30, 30);
5839 int sign = ! INSTR (29, 29);
5840 unsigned shift = INSTR (22, 16);
5841 unsigned vs = INSTR (9, 5);
5842 unsigned vd = INSTR (4, 0);
5843 unsigned i;
5844
5845 NYI_assert (28, 23, 0x1E);
5846 NYI_assert (15, 10, 0x01);
5847
5848 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5849 if (INSTR (22, 22))
5850 {
5851 shift = 128 - shift;
5852
5853 if (full == 0)
5854 HALT_UNALLOC;
5855
5856 if (sign)
5857 for (i = 0; i < 2; i++)
5858 {
5859 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5860 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5861 }
5862 else
5863 for (i = 0; i < 2; i++)
5864 {
5865 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5866 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5867 }
5868
5869 return;
5870 }
5871
5872 if (INSTR (21, 21))
5873 {
5874 shift = 64 - shift;
5875
5876 if (sign)
5877 for (i = 0; i < (full ? 4 : 2); i++)
5878 {
5879 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5880 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5881 }
5882 else
5883 for (i = 0; i < (full ? 4 : 2); i++)
5884 {
5885 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5886 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5887 }
5888
5889 return;
5890 }
5891
5892 if (INSTR (20, 20))
5893 {
5894 shift = 32 - shift;
5895
5896 if (sign)
5897 for (i = 0; i < (full ? 8 : 4); i++)
5898 {
5899 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5900 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5901 }
5902 else
5903 for (i = 0; i < (full ? 8 : 4); i++)
5904 {
5905 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5906 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5907 }
5908
5909 return;
5910 }
5911
5912 if (INSTR (19, 19) == 0)
5913 HALT_UNALLOC;
5914
5915 shift = 16 - shift;
5916
5917 if (sign)
5918 for (i = 0; i < (full ? 16 : 8); i++)
5919 {
5920 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5921 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5922 }
5923 else
5924 for (i = 0; i < (full ? 16 : 8); i++)
5925 {
5926 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5927 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5928 }
5929 }
5930
5931 static void
5932 do_vec_MUL_by_element (sim_cpu *cpu)
5933 {
5934 /* instr[31] = 0
5935 instr[30] = half/full
5936 instr[29,24] = 00 1111
5937 instr[23,22] = size
5938 instr[21] = L
5939 instr[20] = M
5940 instr[19,16] = m
5941 instr[15,12] = 1000
5942 instr[11] = H
5943 instr[10] = 0
5944 instr[9,5] = Vn
5945 instr[4,0] = Vd */
5946
5947 unsigned full = INSTR (30, 30);
5948 unsigned L = INSTR (21, 21);
5949 unsigned H = INSTR (11, 11);
5950 unsigned vn = INSTR (9, 5);
5951 unsigned vd = INSTR (4, 0);
5952 unsigned size = INSTR (23, 22);
5953 unsigned index;
5954 unsigned vm;
5955 unsigned e;
5956
5957 NYI_assert (29, 24, 0x0F);
5958 NYI_assert (15, 12, 0x8);
5959 NYI_assert (10, 10, 0);
5960
5961 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5962 switch (size)
5963 {
5964 case 1:
5965 {
5966 /* 16 bit products. */
5967 uint16_t product;
5968 uint16_t element1;
5969 uint16_t element2;
5970
5971 index = (H << 2) | (L << 1) | INSTR (20, 20);
5972 vm = INSTR (19, 16);
5973 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5974
5975 for (e = 0; e < (full ? 8 : 4); e ++)
5976 {
5977 element1 = aarch64_get_vec_u16 (cpu, vn, e);
5978 product = element1 * element2;
5979 aarch64_set_vec_u16 (cpu, vd, e, product);
5980 }
5981 }
5982 break;
5983
5984 case 2:
5985 {
5986 /* 32 bit products. */
5987 uint32_t product;
5988 uint32_t element1;
5989 uint32_t element2;
5990
5991 index = (H << 1) | L;
5992 vm = INSTR (20, 16);
5993 element2 = aarch64_get_vec_u32 (cpu, vm, index);
5994
5995 for (e = 0; e < (full ? 4 : 2); e ++)
5996 {
5997 element1 = aarch64_get_vec_u32 (cpu, vn, e);
5998 product = element1 * element2;
5999 aarch64_set_vec_u32 (cpu, vd, e, product);
6000 }
6001 }
6002 break;
6003
6004 default:
6005 HALT_UNALLOC;
6006 }
6007 }
6008
6009 static void
6010 do_FMLA_by_element (sim_cpu *cpu)
6011 {
6012 /* instr[31] = 0
6013 instr[30] = half/full
6014 instr[29,23] = 00 1111 1
6015 instr[22] = size
6016 instr[21] = L
6017 instr[20,16] = m
6018 instr[15,12] = 0001
6019 instr[11] = H
6020 instr[10] = 0
6021 instr[9,5] = Vn
6022 instr[4,0] = Vd */
6023
6024 unsigned full = INSTR (30, 30);
6025 unsigned size = INSTR (22, 22);
6026 unsigned L = INSTR (21, 21);
6027 unsigned vm = INSTR (20, 16);
6028 unsigned H = INSTR (11, 11);
6029 unsigned vn = INSTR (9, 5);
6030 unsigned vd = INSTR (4, 0);
6031 unsigned e;
6032
6033 NYI_assert (29, 23, 0x1F);
6034 NYI_assert (15, 12, 0x1);
6035 NYI_assert (10, 10, 0);
6036
6037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6038 if (size)
6039 {
6040 double element1, element2;
6041
6042 if (! full || L)
6043 HALT_UNALLOC;
6044
6045 element2 = aarch64_get_vec_double (cpu, vm, H);
6046
6047 for (e = 0; e < 2; e++)
6048 {
6049 element1 = aarch64_get_vec_double (cpu, vn, e);
6050 element1 *= element2;
6051 element1 += aarch64_get_vec_double (cpu, vd, e);
6052 aarch64_set_vec_double (cpu, vd, e, element1);
6053 }
6054 }
6055 else
6056 {
6057 float element1;
6058 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6059
6060 for (e = 0; e < (full ? 4 : 2); e++)
6061 {
6062 element1 = aarch64_get_vec_float (cpu, vn, e);
6063 element1 *= element2;
6064 element1 += aarch64_get_vec_float (cpu, vd, e);
6065 aarch64_set_vec_float (cpu, vd, e, element1);
6066 }
6067 }
6068 }
6069
6070 static void
6071 do_vec_op2 (sim_cpu *cpu)
6072 {
6073 /* instr[31] = 0
6074 instr[30] = half/full
6075 instr[29,24] = 00 1111
6076 instr[23] = ?
6077 instr[22,16] = element size & index
6078 instr[15,10] = sub-opcode
6079 instr[9,5] = Vm
6080 instr[4,0] = Vd */
6081
6082 NYI_assert (29, 24, 0x0F);
6083
6084 if (INSTR (23, 23) != 0)
6085 {
6086 switch (INSTR (15, 10))
6087 {
6088 case 0x04:
6089 case 0x06:
6090 do_FMLA_by_element (cpu);
6091 return;
6092
6093 case 0x20:
6094 case 0x22:
6095 do_vec_MUL_by_element (cpu);
6096 return;
6097
6098 default:
6099 HALT_NYI;
6100 }
6101 }
6102 else
6103 {
6104 switch (INSTR (15, 10))
6105 {
6106 case 0x01: do_vec_SSHR_USHR (cpu); return;
6107 case 0x15: do_vec_SHL (cpu); return;
6108 case 0x20:
6109 case 0x22: do_vec_MUL_by_element (cpu); return;
6110 case 0x29: do_vec_xtl (cpu); return;
6111 default: HALT_NYI;
6112 }
6113 }
6114 }
6115
6116 static void
6117 do_vec_neg (sim_cpu *cpu)
6118 {
6119 /* instr[31] = 0
6120 instr[30] = full(1)/half(0)
6121 instr[29,24] = 10 1110
6122 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6123 instr[21,10] = 1000 0010 1110
6124 instr[9,5] = Vs
6125 instr[4,0] = Vd */
6126
6127 int full = INSTR (30, 30);
6128 unsigned vs = INSTR (9, 5);
6129 unsigned vd = INSTR (4, 0);
6130 unsigned i;
6131
6132 NYI_assert (29, 24, 0x2E);
6133 NYI_assert (21, 10, 0x82E);
6134
6135 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6136 switch (INSTR (23, 22))
6137 {
6138 case 0:
6139 for (i = 0; i < (full ? 16 : 8); i++)
6140 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6141 return;
6142
6143 case 1:
6144 for (i = 0; i < (full ? 8 : 4); i++)
6145 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6146 return;
6147
6148 case 2:
6149 for (i = 0; i < (full ? 4 : 2); i++)
6150 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6151 return;
6152
6153 case 3:
6154 if (! full)
6155 HALT_NYI;
6156 for (i = 0; i < 2; i++)
6157 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6158 return;
6159 }
6160 }
6161
6162 static void
6163 do_vec_sqrt (sim_cpu *cpu)
6164 {
6165 /* instr[31] = 0
6166 instr[30] = full(1)/half(0)
6167 instr[29,23] = 101 1101
6168 instr[22] = single(0)/double(1)
6169 instr[21,10] = 1000 0111 1110
6170 instr[9,5] = Vs
6171 instr[4,0] = Vd. */
6172
6173 int full = INSTR (30, 30);
6174 unsigned vs = INSTR (9, 5);
6175 unsigned vd = INSTR (4, 0);
6176 unsigned i;
6177
6178 NYI_assert (29, 23, 0x5B);
6179 NYI_assert (21, 10, 0x87E);
6180
6181 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6182 if (INSTR (22, 22) == 0)
6183 for (i = 0; i < (full ? 4 : 2); i++)
6184 aarch64_set_vec_float (cpu, vd, i,
6185 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6186 else
6187 for (i = 0; i < 2; i++)
6188 aarch64_set_vec_double (cpu, vd, i,
6189 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6190 }
6191
6192 static void
6193 do_vec_mls_indexed (sim_cpu *cpu)
6194 {
6195 /* instr[31] = 0
6196 instr[30] = half(0)/full(1)
6197 instr[29,24] = 10 1111
6198 instr[23,22] = 16-bit(01)/32-bit(10)
6199 instr[21,20+11] = index (if 16-bit)
6200 instr[21+11] = index (if 32-bit)
6201 instr[20,16] = Vm
6202 instr[15,12] = 0100
6203 instr[11] = part of index
6204 instr[10] = 0
6205 instr[9,5] = Vs
6206 instr[4,0] = Vd. */
6207
6208 int full = INSTR (30, 30);
6209 unsigned vs = INSTR (9, 5);
6210 unsigned vd = INSTR (4, 0);
6211 unsigned vm = INSTR (20, 16);
6212 unsigned i;
6213
6214 NYI_assert (15, 12, 4);
6215 NYI_assert (10, 10, 0);
6216
6217 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6218 switch (INSTR (23, 22))
6219 {
6220 case 1:
6221 {
6222 unsigned elem;
6223 uint32_t val;
6224
6225 if (vm > 15)
6226 HALT_NYI;
6227
6228 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6229 val = aarch64_get_vec_u16 (cpu, vm, elem);
6230
6231 for (i = 0; i < (full ? 8 : 4); i++)
6232 aarch64_set_vec_u32 (cpu, vd, i,
6233 aarch64_get_vec_u32 (cpu, vd, i) -
6234 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6235 return;
6236 }
6237
6238 case 2:
6239 {
6240 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6241 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6242
6243 for (i = 0; i < (full ? 4 : 2); i++)
6244 aarch64_set_vec_u64 (cpu, vd, i,
6245 aarch64_get_vec_u64 (cpu, vd, i) -
6246 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6247 return;
6248 }
6249
6250 case 0:
6251 case 3:
6252 default:
6253 HALT_NYI;
6254 }
6255 }
6256
6257 static void
6258 do_vec_SUB (sim_cpu *cpu)
6259 {
6260 /* instr [31] = 0
6261 instr [30] = half(0)/full(1)
6262 instr [29,24] = 10 1110
6263 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6264 instr [21] = 1
6265 instr [20,16] = Vm
6266 instr [15,10] = 10 0001
6267 instr [9, 5] = Vn
6268 instr [4, 0] = Vd. */
6269
6270 unsigned full = INSTR (30, 30);
6271 unsigned vm = INSTR (20, 16);
6272 unsigned vn = INSTR (9, 5);
6273 unsigned vd = INSTR (4, 0);
6274 unsigned i;
6275
6276 NYI_assert (29, 24, 0x2E);
6277 NYI_assert (21, 21, 1);
6278 NYI_assert (15, 10, 0x21);
6279
6280 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6281 switch (INSTR (23, 22))
6282 {
6283 case 0:
6284 for (i = 0; i < (full ? 16 : 8); i++)
6285 aarch64_set_vec_s8 (cpu, vd, i,
6286 aarch64_get_vec_s8 (cpu, vn, i)
6287 - aarch64_get_vec_s8 (cpu, vm, i));
6288 return;
6289
6290 case 1:
6291 for (i = 0; i < (full ? 8 : 4); i++)
6292 aarch64_set_vec_s16 (cpu, vd, i,
6293 aarch64_get_vec_s16 (cpu, vn, i)
6294 - aarch64_get_vec_s16 (cpu, vm, i));
6295 return;
6296
6297 case 2:
6298 for (i = 0; i < (full ? 4 : 2); i++)
6299 aarch64_set_vec_s32 (cpu, vd, i,
6300 aarch64_get_vec_s32 (cpu, vn, i)
6301 - aarch64_get_vec_s32 (cpu, vm, i));
6302 return;
6303
6304 case 3:
6305 if (full == 0)
6306 HALT_UNALLOC;
6307
6308 for (i = 0; i < 2; i++)
6309 aarch64_set_vec_s64 (cpu, vd, i,
6310 aarch64_get_vec_s64 (cpu, vn, i)
6311 - aarch64_get_vec_s64 (cpu, vm, i));
6312 return;
6313 }
6314 }
6315
6316 static void
6317 do_vec_MLS (sim_cpu *cpu)
6318 {
6319 /* instr [31] = 0
6320 instr [30] = half(0)/full(1)
6321 instr [29,24] = 10 1110
6322 instr [23,22] = size: byte(00, half(01), word (10)
6323 instr [21] = 1
6324 instr [20,16] = Vm
6325 instr [15,10] = 10 0101
6326 instr [9, 5] = Vn
6327 instr [4, 0] = Vd. */
6328
6329 unsigned full = INSTR (30, 30);
6330 unsigned vm = INSTR (20, 16);
6331 unsigned vn = INSTR (9, 5);
6332 unsigned vd = INSTR (4, 0);
6333 unsigned i;
6334
6335 NYI_assert (29, 24, 0x2E);
6336 NYI_assert (21, 21, 1);
6337 NYI_assert (15, 10, 0x25);
6338
6339 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6340 switch (INSTR (23, 22))
6341 {
6342 case 0:
6343 for (i = 0; i < (full ? 16 : 8); i++)
6344 aarch64_set_vec_u8 (cpu, vd, i,
6345 aarch64_get_vec_u8 (cpu, vd, i)
6346 - (aarch64_get_vec_u8 (cpu, vn, i)
6347 * aarch64_get_vec_u8 (cpu, vm, i)));
6348 return;
6349
6350 case 1:
6351 for (i = 0; i < (full ? 8 : 4); i++)
6352 aarch64_set_vec_u16 (cpu, vd, i,
6353 aarch64_get_vec_u16 (cpu, vd, i)
6354 - (aarch64_get_vec_u16 (cpu, vn, i)
6355 * aarch64_get_vec_u16 (cpu, vm, i)));
6356 return;
6357
6358 case 2:
6359 for (i = 0; i < (full ? 4 : 2); i++)
6360 aarch64_set_vec_u32 (cpu, vd, i,
6361 aarch64_get_vec_u32 (cpu, vd, i)
6362 - (aarch64_get_vec_u32 (cpu, vn, i)
6363 * aarch64_get_vec_u32 (cpu, vm, i)));
6364 return;
6365
6366 default:
6367 HALT_UNALLOC;
6368 }
6369 }
6370
6371 static void
6372 do_vec_FDIV (sim_cpu *cpu)
6373 {
6374 /* instr [31] = 0
6375 instr [30] = half(0)/full(1)
6376 instr [29,23] = 10 1110 0
6377 instr [22] = float()/double(1)
6378 instr [21] = 1
6379 instr [20,16] = Vm
6380 instr [15,10] = 1111 11
6381 instr [9, 5] = Vn
6382 instr [4, 0] = Vd. */
6383
6384 unsigned full = INSTR (30, 30);
6385 unsigned vm = INSTR (20, 16);
6386 unsigned vn = INSTR (9, 5);
6387 unsigned vd = INSTR (4, 0);
6388 unsigned i;
6389
6390 NYI_assert (29, 23, 0x5C);
6391 NYI_assert (21, 21, 1);
6392 NYI_assert (15, 10, 0x3F);
6393
6394 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6395 if (INSTR (22, 22))
6396 {
6397 if (! full)
6398 HALT_UNALLOC;
6399
6400 for (i = 0; i < 2; i++)
6401 aarch64_set_vec_double (cpu, vd, i,
6402 aarch64_get_vec_double (cpu, vn, i)
6403 / aarch64_get_vec_double (cpu, vm, i));
6404 }
6405 else
6406 for (i = 0; i < (full ? 4 : 2); i++)
6407 aarch64_set_vec_float (cpu, vd, i,
6408 aarch64_get_vec_float (cpu, vn, i)
6409 / aarch64_get_vec_float (cpu, vm, i));
6410 }
6411
6412 static void
6413 do_vec_FMUL (sim_cpu *cpu)
6414 {
6415 /* instr [31] = 0
6416 instr [30] = half(0)/full(1)
6417 instr [29,23] = 10 1110 0
6418 instr [22] = float(0)/double(1)
6419 instr [21] = 1
6420 instr [20,16] = Vm
6421 instr [15,10] = 1101 11
6422 instr [9, 5] = Vn
6423 instr [4, 0] = Vd. */
6424
6425 unsigned full = INSTR (30, 30);
6426 unsigned vm = INSTR (20, 16);
6427 unsigned vn = INSTR (9, 5);
6428 unsigned vd = INSTR (4, 0);
6429 unsigned i;
6430
6431 NYI_assert (29, 23, 0x5C);
6432 NYI_assert (21, 21, 1);
6433 NYI_assert (15, 10, 0x37);
6434
6435 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6436 if (INSTR (22, 22))
6437 {
6438 if (! full)
6439 HALT_UNALLOC;
6440
6441 for (i = 0; i < 2; i++)
6442 aarch64_set_vec_double (cpu, vd, i,
6443 aarch64_get_vec_double (cpu, vn, i)
6444 * aarch64_get_vec_double (cpu, vm, i));
6445 }
6446 else
6447 for (i = 0; i < (full ? 4 : 2); i++)
6448 aarch64_set_vec_float (cpu, vd, i,
6449 aarch64_get_vec_float (cpu, vn, i)
6450 * aarch64_get_vec_float (cpu, vm, i));
6451 }
6452
6453 static void
6454 do_vec_FADDP (sim_cpu *cpu)
6455 {
6456 /* instr [31] = 0
6457 instr [30] = half(0)/full(1)
6458 instr [29,23] = 10 1110 0
6459 instr [22] = float(0)/double(1)
6460 instr [21] = 1
6461 instr [20,16] = Vm
6462 instr [15,10] = 1101 01
6463 instr [9, 5] = Vn
6464 instr [4, 0] = Vd. */
6465
6466 unsigned full = INSTR (30, 30);
6467 unsigned vm = INSTR (20, 16);
6468 unsigned vn = INSTR (9, 5);
6469 unsigned vd = INSTR (4, 0);
6470
6471 NYI_assert (29, 23, 0x5C);
6472 NYI_assert (21, 21, 1);
6473 NYI_assert (15, 10, 0x35);
6474
6475 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6476 if (INSTR (22, 22))
6477 {
6478 /* Extract values before adding them incase vd == vn/vm. */
6479 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6480 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6481 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6482 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6483
6484 if (! full)
6485 HALT_UNALLOC;
6486
6487 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6488 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6489 }
6490 else
6491 {
6492 /* Extract values before adding them incase vd == vn/vm. */
6493 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6494 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6495 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6496 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6497
6498 if (full)
6499 {
6500 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6501 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6502 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6503 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6504
6505 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6506 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6507 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6508 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6509 }
6510 else
6511 {
6512 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6513 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6514 }
6515 }
6516 }
6517
6518 static void
6519 do_vec_FSQRT (sim_cpu *cpu)
6520 {
6521 /* instr[31] = 0
6522 instr[30] = half(0)/full(1)
6523 instr[29,23] = 10 1110 1
6524 instr[22] = single(0)/double(1)
6525 instr[21,10] = 10 0001 1111 10
6526 instr[9,5] = Vsrc
6527 instr[4,0] = Vdest. */
6528
6529 unsigned vn = INSTR (9, 5);
6530 unsigned vd = INSTR (4, 0);
6531 unsigned full = INSTR (30, 30);
6532 int i;
6533
6534 NYI_assert (29, 23, 0x5D);
6535 NYI_assert (21, 10, 0x87E);
6536
6537 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6538 if (INSTR (22, 22))
6539 {
6540 if (! full)
6541 HALT_UNALLOC;
6542
6543 for (i = 0; i < 2; i++)
6544 aarch64_set_vec_double (cpu, vd, i,
6545 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6546 }
6547 else
6548 {
6549 for (i = 0; i < (full ? 4 : 2); i++)
6550 aarch64_set_vec_float (cpu, vd, i,
6551 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6552 }
6553 }
6554
6555 static void
6556 do_vec_FNEG (sim_cpu *cpu)
6557 {
6558 /* instr[31] = 0
6559 instr[30] = half (0)/full (1)
6560 instr[29,23] = 10 1110 1
6561 instr[22] = single (0)/double (1)
6562 instr[21,10] = 10 0000 1111 10
6563 instr[9,5] = Vsrc
6564 instr[4,0] = Vdest. */
6565
6566 unsigned vn = INSTR (9, 5);
6567 unsigned vd = INSTR (4, 0);
6568 unsigned full = INSTR (30, 30);
6569 int i;
6570
6571 NYI_assert (29, 23, 0x5D);
6572 NYI_assert (21, 10, 0x83E);
6573
6574 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6575 if (INSTR (22, 22))
6576 {
6577 if (! full)
6578 HALT_UNALLOC;
6579
6580 for (i = 0; i < 2; i++)
6581 aarch64_set_vec_double (cpu, vd, i,
6582 - aarch64_get_vec_double (cpu, vn, i));
6583 }
6584 else
6585 {
6586 for (i = 0; i < (full ? 4 : 2); i++)
6587 aarch64_set_vec_float (cpu, vd, i,
6588 - aarch64_get_vec_float (cpu, vn, i));
6589 }
6590 }
6591
6592 static void
6593 do_vec_NOT (sim_cpu *cpu)
6594 {
6595 /* instr[31] = 0
6596 instr[30] = half (0)/full (1)
6597 instr[29,10] = 10 1110 0010 0000 0101 10
6598 instr[9,5] = Vn
6599 instr[4.0] = Vd. */
6600
6601 unsigned vn = INSTR (9, 5);
6602 unsigned vd = INSTR (4, 0);
6603 unsigned i;
6604 int full = INSTR (30, 30);
6605
6606 NYI_assert (29, 10, 0xB8816);
6607
6608 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6609 for (i = 0; i < (full ? 16 : 8); i++)
6610 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6611 }
6612
6613 static unsigned int
6614 clz (uint64_t val, unsigned size)
6615 {
6616 uint64_t mask = 1;
6617 int count;
6618
6619 mask <<= (size - 1);
6620 count = 0;
6621 do
6622 {
6623 if (val & mask)
6624 break;
6625 mask >>= 1;
6626 count ++;
6627 }
6628 while (mask);
6629
6630 return count;
6631 }
6632
6633 static void
6634 do_vec_CLZ (sim_cpu *cpu)
6635 {
6636 /* instr[31] = 0
6637 instr[30] = half (0)/full (1)
6638 instr[29,24] = 10 1110
6639 instr[23,22] = size
6640 instr[21,10] = 10 0000 0100 10
6641 instr[9,5] = Vn
6642 instr[4.0] = Vd. */
6643
6644 unsigned vn = INSTR (9, 5);
6645 unsigned vd = INSTR (4, 0);
6646 unsigned i;
6647 int full = INSTR (30,30);
6648
6649 NYI_assert (29, 24, 0x2E);
6650 NYI_assert (21, 10, 0x812);
6651
6652 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6653 switch (INSTR (23, 22))
6654 {
6655 case 0:
6656 for (i = 0; i < (full ? 16 : 8); i++)
6657 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6658 break;
6659 case 1:
6660 for (i = 0; i < (full ? 8 : 4); i++)
6661 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6662 break;
6663 case 2:
6664 for (i = 0; i < (full ? 4 : 2); i++)
6665 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6666 break;
6667 case 3:
6668 if (! full)
6669 HALT_UNALLOC;
6670 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6671 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6672 break;
6673 }
6674 }
6675
6676 static void
6677 do_vec_MOV_element (sim_cpu *cpu)
6678 {
6679 /* instr[31,21] = 0110 1110 000
6680 instr[20,16] = size & dest index
6681 instr[15] = 0
6682 instr[14,11] = source index
6683 instr[10] = 1
6684 instr[9,5] = Vs
6685 instr[4.0] = Vd. */
6686
6687 unsigned vs = INSTR (9, 5);
6688 unsigned vd = INSTR (4, 0);
6689 unsigned src_index;
6690 unsigned dst_index;
6691
6692 NYI_assert (31, 21, 0x370);
6693 NYI_assert (15, 15, 0);
6694 NYI_assert (10, 10, 1);
6695
6696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6697 if (INSTR (16, 16))
6698 {
6699 /* Move a byte. */
6700 src_index = INSTR (14, 11);
6701 dst_index = INSTR (20, 17);
6702 aarch64_set_vec_u8 (cpu, vd, dst_index,
6703 aarch64_get_vec_u8 (cpu, vs, src_index));
6704 }
6705 else if (INSTR (17, 17))
6706 {
6707 /* Move 16-bits. */
6708 NYI_assert (11, 11, 0);
6709 src_index = INSTR (14, 12);
6710 dst_index = INSTR (20, 18);
6711 aarch64_set_vec_u16 (cpu, vd, dst_index,
6712 aarch64_get_vec_u16 (cpu, vs, src_index));
6713 }
6714 else if (INSTR (18, 18))
6715 {
6716 /* Move 32-bits. */
6717 NYI_assert (12, 11, 0);
6718 src_index = INSTR (14, 13);
6719 dst_index = INSTR (20, 19);
6720 aarch64_set_vec_u32 (cpu, vd, dst_index,
6721 aarch64_get_vec_u32 (cpu, vs, src_index));
6722 }
6723 else
6724 {
6725 NYI_assert (19, 19, 1);
6726 NYI_assert (13, 11, 0);
6727 src_index = INSTR (14, 14);
6728 dst_index = INSTR (20, 20);
6729 aarch64_set_vec_u64 (cpu, vd, dst_index,
6730 aarch64_get_vec_u64 (cpu, vs, src_index));
6731 }
6732 }
6733
6734 static void
6735 do_vec_REV32 (sim_cpu *cpu)
6736 {
6737 /* instr[31] = 0
6738 instr[30] = full/half
6739 instr[29,24] = 10 1110
6740 instr[23,22] = size
6741 instr[21,10] = 10 0000 0000 10
6742 instr[9,5] = Rn
6743 instr[4,0] = Rd. */
6744
6745 unsigned rn = INSTR (9, 5);
6746 unsigned rd = INSTR (4, 0);
6747 unsigned size = INSTR (23, 22);
6748 unsigned full = INSTR (30, 30);
6749 unsigned i;
6750 FRegister val;
6751
6752 NYI_assert (29, 24, 0x2E);
6753 NYI_assert (21, 10, 0x802);
6754
6755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6756 switch (size)
6757 {
6758 case 0:
6759 for (i = 0; i < (full ? 16 : 8); i++)
6760 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6761 break;
6762
6763 case 1:
6764 for (i = 0; i < (full ? 8 : 4); i++)
6765 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6766 break;
6767
6768 default:
6769 HALT_UNALLOC;
6770 }
6771
6772 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6773 if (full)
6774 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6775 }
6776
6777 static void
6778 do_vec_EXT (sim_cpu *cpu)
6779 {
6780 /* instr[31] = 0
6781 instr[30] = full/half
6782 instr[29,21] = 10 1110 000
6783 instr[20,16] = Vm
6784 instr[15] = 0
6785 instr[14,11] = source index
6786 instr[10] = 0
6787 instr[9,5] = Vn
6788 instr[4.0] = Vd. */
6789
6790 unsigned vm = INSTR (20, 16);
6791 unsigned vn = INSTR (9, 5);
6792 unsigned vd = INSTR (4, 0);
6793 unsigned src_index = INSTR (14, 11);
6794 unsigned full = INSTR (30, 30);
6795 unsigned i;
6796 unsigned j;
6797 FRegister val;
6798
6799 NYI_assert (31, 21, 0x370);
6800 NYI_assert (15, 15, 0);
6801 NYI_assert (10, 10, 0);
6802
6803 if (!full && (src_index & 0x8))
6804 HALT_UNALLOC;
6805
6806 j = 0;
6807
6808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6809 for (i = src_index; i < (full ? 16 : 8); i++)
6810 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6811 for (i = 0; i < src_index; i++)
6812 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6813
6814 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6815 if (full)
6816 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6817 }
6818
6819 static void
6820 dexAdvSIMD0 (sim_cpu *cpu)
6821 {
6822 /* instr [28,25] = 0 111. */
6823 if ( INSTR (15, 10) == 0x07
6824 && (INSTR (9, 5) ==
6825 INSTR (20, 16)))
6826 {
6827 if (INSTR (31, 21) == 0x075
6828 || INSTR (31, 21) == 0x275)
6829 {
6830 do_vec_MOV_whole_vector (cpu);
6831 return;
6832 }
6833 }
6834
6835 if (INSTR (29, 19) == 0x1E0)
6836 {
6837 do_vec_MOV_immediate (cpu);
6838 return;
6839 }
6840
6841 if (INSTR (29, 19) == 0x5E0)
6842 {
6843 do_vec_MVNI (cpu);
6844 return;
6845 }
6846
6847 if (INSTR (29, 19) == 0x1C0
6848 || INSTR (29, 19) == 0x1C1)
6849 {
6850 if (INSTR (15, 10) == 0x03)
6851 {
6852 do_vec_DUP_scalar_into_vector (cpu);
6853 return;
6854 }
6855 }
6856
6857 switch (INSTR (29, 24))
6858 {
6859 case 0x0E: do_vec_op1 (cpu); return;
6860 case 0x0F: do_vec_op2 (cpu); return;
6861
6862 case 0x2E:
6863 if (INSTR (21, 21) == 1)
6864 {
6865 switch (INSTR (15, 10))
6866 {
6867 case 0x02:
6868 do_vec_REV32 (cpu);
6869 return;
6870
6871 case 0x07:
6872 switch (INSTR (23, 22))
6873 {
6874 case 0: do_vec_EOR (cpu); return;
6875 case 1: do_vec_BSL (cpu); return;
6876 case 2:
6877 case 3: do_vec_bit (cpu); return;
6878 }
6879 break;
6880
6881 case 0x08: do_vec_sub_long (cpu); return;
6882 case 0x11: do_vec_USHL (cpu); return;
6883 case 0x12: do_vec_CLZ (cpu); return;
6884 case 0x16: do_vec_NOT (cpu); return;
6885 case 0x19: do_vec_max (cpu); return;
6886 case 0x1B: do_vec_min (cpu); return;
6887 case 0x21: do_vec_SUB (cpu); return;
6888 case 0x25: do_vec_MLS (cpu); return;
6889 case 0x31: do_vec_FminmaxNMP (cpu); return;
6890 case 0x35: do_vec_FADDP (cpu); return;
6891 case 0x37: do_vec_FMUL (cpu); return;
6892 case 0x3F: do_vec_FDIV (cpu); return;
6893
6894 case 0x3E:
6895 switch (INSTR (20, 16))
6896 {
6897 case 0x00: do_vec_FNEG (cpu); return;
6898 case 0x01: do_vec_FSQRT (cpu); return;
6899 default: HALT_NYI;
6900 }
6901
6902 case 0x0D:
6903 case 0x0F:
6904 case 0x22:
6905 case 0x23:
6906 case 0x26:
6907 case 0x2A:
6908 case 0x32:
6909 case 0x36:
6910 case 0x39:
6911 case 0x3A:
6912 do_vec_compare (cpu); return;
6913
6914 default:
6915 break;
6916 }
6917 }
6918
6919 if (INSTR (31, 21) == 0x370)
6920 {
6921 if (INSTR (10, 10))
6922 do_vec_MOV_element (cpu);
6923 else
6924 do_vec_EXT (cpu);
6925 return;
6926 }
6927
6928 switch (INSTR (21, 10))
6929 {
6930 case 0x82E: do_vec_neg (cpu); return;
6931 case 0x87E: do_vec_sqrt (cpu); return;
6932 default:
6933 if (INSTR (15, 10) == 0x30)
6934 {
6935 do_vec_mull (cpu);
6936 return;
6937 }
6938 break;
6939 }
6940 break;
6941
6942 case 0x2f:
6943 switch (INSTR (15, 10))
6944 {
6945 case 0x01: do_vec_SSHR_USHR (cpu); return;
6946 case 0x10:
6947 case 0x12: do_vec_mls_indexed (cpu); return;
6948 case 0x29: do_vec_xtl (cpu); return;
6949 default:
6950 HALT_NYI;
6951 }
6952
6953 default:
6954 break;
6955 }
6956
6957 HALT_NYI;
6958 }
6959
6960 /* 3 sources. */
6961
6962 /* Float multiply add. */
6963 static void
6964 fmadds (sim_cpu *cpu)
6965 {
6966 unsigned sa = INSTR (14, 10);
6967 unsigned sm = INSTR (20, 16);
6968 unsigned sn = INSTR ( 9, 5);
6969 unsigned sd = INSTR ( 4, 0);
6970
6971 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6972 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6973 + aarch64_get_FP_float (cpu, sn)
6974 * aarch64_get_FP_float (cpu, sm));
6975 }
6976
6977 /* Double multiply add. */
6978 static void
6979 fmaddd (sim_cpu *cpu)
6980 {
6981 unsigned sa = INSTR (14, 10);
6982 unsigned sm = INSTR (20, 16);
6983 unsigned sn = INSTR ( 9, 5);
6984 unsigned sd = INSTR ( 4, 0);
6985
6986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6987 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6988 + aarch64_get_FP_double (cpu, sn)
6989 * aarch64_get_FP_double (cpu, sm));
6990 }
6991
6992 /* Float multiply subtract. */
6993 static void
6994 fmsubs (sim_cpu *cpu)
6995 {
6996 unsigned sa = INSTR (14, 10);
6997 unsigned sm = INSTR (20, 16);
6998 unsigned sn = INSTR ( 9, 5);
6999 unsigned sd = INSTR ( 4, 0);
7000
7001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7002 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7003 - aarch64_get_FP_float (cpu, sn)
7004 * aarch64_get_FP_float (cpu, sm));
7005 }
7006
7007 /* Double multiply subtract. */
7008 static void
7009 fmsubd (sim_cpu *cpu)
7010 {
7011 unsigned sa = INSTR (14, 10);
7012 unsigned sm = INSTR (20, 16);
7013 unsigned sn = INSTR ( 9, 5);
7014 unsigned sd = INSTR ( 4, 0);
7015
7016 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7017 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7018 - aarch64_get_FP_double (cpu, sn)
7019 * aarch64_get_FP_double (cpu, sm));
7020 }
7021
7022 /* Float negative multiply add. */
7023 static void
7024 fnmadds (sim_cpu *cpu)
7025 {
7026 unsigned sa = INSTR (14, 10);
7027 unsigned sm = INSTR (20, 16);
7028 unsigned sn = INSTR ( 9, 5);
7029 unsigned sd = INSTR ( 4, 0);
7030
7031 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7032 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7033 + (- aarch64_get_FP_float (cpu, sn))
7034 * aarch64_get_FP_float (cpu, sm));
7035 }
7036
7037 /* Double negative multiply add. */
7038 static void
7039 fnmaddd (sim_cpu *cpu)
7040 {
7041 unsigned sa = INSTR (14, 10);
7042 unsigned sm = INSTR (20, 16);
7043 unsigned sn = INSTR ( 9, 5);
7044 unsigned sd = INSTR ( 4, 0);
7045
7046 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7047 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7048 + (- aarch64_get_FP_double (cpu, sn))
7049 * aarch64_get_FP_double (cpu, sm));
7050 }
7051
7052 /* Float negative multiply subtract. */
7053 static void
7054 fnmsubs (sim_cpu *cpu)
7055 {
7056 unsigned sa = INSTR (14, 10);
7057 unsigned sm = INSTR (20, 16);
7058 unsigned sn = INSTR ( 9, 5);
7059 unsigned sd = INSTR ( 4, 0);
7060
7061 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7062 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7063 + aarch64_get_FP_float (cpu, sn)
7064 * aarch64_get_FP_float (cpu, sm));
7065 }
7066
7067 /* Double negative multiply subtract. */
7068 static void
7069 fnmsubd (sim_cpu *cpu)
7070 {
7071 unsigned sa = INSTR (14, 10);
7072 unsigned sm = INSTR (20, 16);
7073 unsigned sn = INSTR ( 9, 5);
7074 unsigned sd = INSTR ( 4, 0);
7075
7076 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7077 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7078 + aarch64_get_FP_double (cpu, sn)
7079 * aarch64_get_FP_double (cpu, sm));
7080 }
7081
7082 static void
7083 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7084 {
7085 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7086 instr[30] = 0
7087 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7088 instr[28,25] = 1111
7089 instr[24] = 1
7090 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7091 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7092 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7093
7094 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7095 /* dispatch on combined type:o1:o2. */
7096 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7097
7098 if (M_S != 0)
7099 HALT_UNALLOC;
7100
7101 switch (dispatch)
7102 {
7103 case 0: fmadds (cpu); return;
7104 case 1: fmsubs (cpu); return;
7105 case 2: fnmadds (cpu); return;
7106 case 3: fnmsubs (cpu); return;
7107 case 4: fmaddd (cpu); return;
7108 case 5: fmsubd (cpu); return;
7109 case 6: fnmaddd (cpu); return;
7110 case 7: fnmsubd (cpu); return;
7111 default:
7112 /* type > 1 is currently unallocated. */
7113 HALT_UNALLOC;
7114 }
7115 }
7116
7117 static void
7118 dexSimpleFPFixedConvert (sim_cpu *cpu)
7119 {
7120 HALT_NYI;
7121 }
7122
7123 static void
7124 dexSimpleFPCondCompare (sim_cpu *cpu)
7125 {
7126 /* instr [31,23] = 0001 1110 0
7127 instr [22] = type
7128 instr [21] = 1
7129 instr [20,16] = Rm
7130 instr [15,12] = condition
7131 instr [11,10] = 01
7132 instr [9,5] = Rn
7133 instr [4] = 0
7134 instr [3,0] = nzcv */
7135
7136 unsigned rm = INSTR (20, 16);
7137 unsigned rn = INSTR (9, 5);
7138
7139 NYI_assert (31, 23, 0x3C);
7140 NYI_assert (11, 10, 0x1);
7141 NYI_assert (4, 4, 0);
7142
7143 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7144 if (! testConditionCode (cpu, INSTR (15, 12)))
7145 {
7146 aarch64_set_CPSR (cpu, INSTR (3, 0));
7147 return;
7148 }
7149
7150 if (INSTR (22, 22))
7151 {
7152 /* Double precision. */
7153 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7154 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7155
7156 /* FIXME: Check for NaNs. */
7157 if (val1 == val2)
7158 aarch64_set_CPSR (cpu, (Z | C));
7159 else if (val1 < val2)
7160 aarch64_set_CPSR (cpu, N);
7161 else /* val1 > val2 */
7162 aarch64_set_CPSR (cpu, C);
7163 }
7164 else
7165 {
7166 /* Single precision. */
7167 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7168 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7169
7170 /* FIXME: Check for NaNs. */
7171 if (val1 == val2)
7172 aarch64_set_CPSR (cpu, (Z | C));
7173 else if (val1 < val2)
7174 aarch64_set_CPSR (cpu, N);
7175 else /* val1 > val2 */
7176 aarch64_set_CPSR (cpu, C);
7177 }
7178 }
7179
7180 /* 2 sources. */
7181
7182 /* Float add. */
7183 static void
7184 fadds (sim_cpu *cpu)
7185 {
7186 unsigned sm = INSTR (20, 16);
7187 unsigned sn = INSTR ( 9, 5);
7188 unsigned sd = INSTR ( 4, 0);
7189
7190 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7191 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7192 + aarch64_get_FP_float (cpu, sm));
7193 }
7194
7195 /* Double add. */
7196 static void
7197 faddd (sim_cpu *cpu)
7198 {
7199 unsigned sm = INSTR (20, 16);
7200 unsigned sn = INSTR ( 9, 5);
7201 unsigned sd = INSTR ( 4, 0);
7202
7203 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7204 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7205 + aarch64_get_FP_double (cpu, sm));
7206 }
7207
7208 /* Float divide. */
7209 static void
7210 fdivs (sim_cpu *cpu)
7211 {
7212 unsigned sm = INSTR (20, 16);
7213 unsigned sn = INSTR ( 9, 5);
7214 unsigned sd = INSTR ( 4, 0);
7215
7216 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7217 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7218 / aarch64_get_FP_float (cpu, sm));
7219 }
7220
7221 /* Double divide. */
7222 static void
7223 fdivd (sim_cpu *cpu)
7224 {
7225 unsigned sm = INSTR (20, 16);
7226 unsigned sn = INSTR ( 9, 5);
7227 unsigned sd = INSTR ( 4, 0);
7228
7229 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7230 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7231 / aarch64_get_FP_double (cpu, sm));
7232 }
7233
7234 /* Float multiply. */
7235 static void
7236 fmuls (sim_cpu *cpu)
7237 {
7238 unsigned sm = INSTR (20, 16);
7239 unsigned sn = INSTR ( 9, 5);
7240 unsigned sd = INSTR ( 4, 0);
7241
7242 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7243 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7244 * aarch64_get_FP_float (cpu, sm));
7245 }
7246
7247 /* Double multiply. */
7248 static void
7249 fmuld (sim_cpu *cpu)
7250 {
7251 unsigned sm = INSTR (20, 16);
7252 unsigned sn = INSTR ( 9, 5);
7253 unsigned sd = INSTR ( 4, 0);
7254
7255 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7256 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7257 * aarch64_get_FP_double (cpu, sm));
7258 }
7259
7260 /* Float negate and multiply. */
7261 static void
7262 fnmuls (sim_cpu *cpu)
7263 {
7264 unsigned sm = INSTR (20, 16);
7265 unsigned sn = INSTR ( 9, 5);
7266 unsigned sd = INSTR ( 4, 0);
7267
7268 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7269 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7270 * aarch64_get_FP_float (cpu, sm)));
7271 }
7272
7273 /* Double negate and multiply. */
7274 static void
7275 fnmuld (sim_cpu *cpu)
7276 {
7277 unsigned sm = INSTR (20, 16);
7278 unsigned sn = INSTR ( 9, 5);
7279 unsigned sd = INSTR ( 4, 0);
7280
7281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7282 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7283 * aarch64_get_FP_double (cpu, sm)));
7284 }
7285
7286 /* Float subtract. */
7287 static void
7288 fsubs (sim_cpu *cpu)
7289 {
7290 unsigned sm = INSTR (20, 16);
7291 unsigned sn = INSTR ( 9, 5);
7292 unsigned sd = INSTR ( 4, 0);
7293
7294 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7295 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7296 - aarch64_get_FP_float (cpu, sm));
7297 }
7298
7299 /* Double subtract. */
7300 static void
7301 fsubd (sim_cpu *cpu)
7302 {
7303 unsigned sm = INSTR (20, 16);
7304 unsigned sn = INSTR ( 9, 5);
7305 unsigned sd = INSTR ( 4, 0);
7306
7307 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7308 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7309 - aarch64_get_FP_double (cpu, sm));
7310 }
7311
7312 static void
7313 do_FMINNM (sim_cpu *cpu)
7314 {
7315 /* instr[31,23] = 0 0011 1100
7316 instr[22] = float(0)/double(1)
7317 instr[21] = 1
7318 instr[20,16] = Sm
7319 instr[15,10] = 01 1110
7320 instr[9,5] = Sn
7321 instr[4,0] = Cpu */
7322
7323 unsigned sm = INSTR (20, 16);
7324 unsigned sn = INSTR ( 9, 5);
7325 unsigned sd = INSTR ( 4, 0);
7326
7327 NYI_assert (31, 23, 0x03C);
7328 NYI_assert (15, 10, 0x1E);
7329
7330 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7331 if (INSTR (22, 22))
7332 aarch64_set_FP_double (cpu, sd,
7333 dminnm (aarch64_get_FP_double (cpu, sn),
7334 aarch64_get_FP_double (cpu, sm)));
7335 else
7336 aarch64_set_FP_float (cpu, sd,
7337 fminnm (aarch64_get_FP_float (cpu, sn),
7338 aarch64_get_FP_float (cpu, sm)));
7339 }
7340
7341 static void
7342 do_FMAXNM (sim_cpu *cpu)
7343 {
7344 /* instr[31,23] = 0 0011 1100
7345 instr[22] = float(0)/double(1)
7346 instr[21] = 1
7347 instr[20,16] = Sm
7348 instr[15,10] = 01 1010
7349 instr[9,5] = Sn
7350 instr[4,0] = Cpu */
7351
7352 unsigned sm = INSTR (20, 16);
7353 unsigned sn = INSTR ( 9, 5);
7354 unsigned sd = INSTR ( 4, 0);
7355
7356 NYI_assert (31, 23, 0x03C);
7357 NYI_assert (15, 10, 0x1A);
7358
7359 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7360 if (INSTR (22, 22))
7361 aarch64_set_FP_double (cpu, sd,
7362 dmaxnm (aarch64_get_FP_double (cpu, sn),
7363 aarch64_get_FP_double (cpu, sm)));
7364 else
7365 aarch64_set_FP_float (cpu, sd,
7366 fmaxnm (aarch64_get_FP_float (cpu, sn),
7367 aarch64_get_FP_float (cpu, sm)));
7368 }
7369
7370 static void
7371 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7372 {
7373 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7374 instr[30] = 0
7375 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7376 instr[28,25] = 1111
7377 instr[24] = 0
7378 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7379 instr[21] = 1
7380 instr[20,16] = Vm
7381 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7382 0010 ==> FADD, 0011 ==> FSUB,
7383 0100 ==> FMAX, 0101 ==> FMIN
7384 0110 ==> FMAXNM, 0111 ==> FMINNM
7385 1000 ==> FNMUL, ow ==> UNALLOC
7386 instr[11,10] = 10
7387 instr[9,5] = Vn
7388 instr[4,0] = Vd */
7389
7390 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7391 uint32_t type = INSTR (23, 22);
7392 /* Dispatch on opcode. */
7393 uint32_t dispatch = INSTR (15, 12);
7394
7395 if (type > 1)
7396 HALT_UNALLOC;
7397
7398 if (M_S != 0)
7399 HALT_UNALLOC;
7400
7401 if (type)
7402 switch (dispatch)
7403 {
7404 case 0: fmuld (cpu); return;
7405 case 1: fdivd (cpu); return;
7406 case 2: faddd (cpu); return;
7407 case 3: fsubd (cpu); return;
7408 case 6: do_FMAXNM (cpu); return;
7409 case 7: do_FMINNM (cpu); return;
7410 case 8: fnmuld (cpu); return;
7411
7412 /* Have not yet implemented fmax and fmin. */
7413 case 4:
7414 case 5:
7415 HALT_NYI;
7416
7417 default:
7418 HALT_UNALLOC;
7419 }
7420 else /* type == 0 => floats. */
7421 switch (dispatch)
7422 {
7423 case 0: fmuls (cpu); return;
7424 case 1: fdivs (cpu); return;
7425 case 2: fadds (cpu); return;
7426 case 3: fsubs (cpu); return;
7427 case 6: do_FMAXNM (cpu); return;
7428 case 7: do_FMINNM (cpu); return;
7429 case 8: fnmuls (cpu); return;
7430
7431 case 4:
7432 case 5:
7433 HALT_NYI;
7434
7435 default:
7436 HALT_UNALLOC;
7437 }
7438 }
7439
7440 static void
7441 dexSimpleFPCondSelect (sim_cpu *cpu)
7442 {
7443 /* FCSEL
7444 instr[31,23] = 0 0011 1100
7445 instr[22] = 0=>single 1=>double
7446 instr[21] = 1
7447 instr[20,16] = Sm
7448 instr[15,12] = cond
7449 instr[11,10] = 11
7450 instr[9,5] = Sn
7451 instr[4,0] = Cpu */
7452 unsigned sm = INSTR (20, 16);
7453 unsigned sn = INSTR ( 9, 5);
7454 unsigned sd = INSTR ( 4, 0);
7455 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7456
7457 NYI_assert (31, 23, 0x03C);
7458 NYI_assert (11, 10, 0x3);
7459
7460 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7461 if (INSTR (22, 22))
7462 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7463 : aarch64_get_FP_double (cpu, sm)));
7464 else
7465 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7466 : aarch64_get_FP_float (cpu, sm)));
7467 }
7468
7469 /* Store 32 bit unscaled signed 9 bit. */
7470 static void
7471 fsturs (sim_cpu *cpu, int32_t offset)
7472 {
7473 unsigned int rn = INSTR (9, 5);
7474 unsigned int st = INSTR (4, 0);
7475
7476 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7477 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7478 aarch64_get_vec_u32 (cpu, st, 0));
7479 }
7480
7481 /* Store 64 bit unscaled signed 9 bit. */
7482 static void
7483 fsturd (sim_cpu *cpu, int32_t offset)
7484 {
7485 unsigned int rn = INSTR (9, 5);
7486 unsigned int st = INSTR (4, 0);
7487
7488 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7489 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7490 aarch64_get_vec_u64 (cpu, st, 0));
7491 }
7492
7493 /* Store 128 bit unscaled signed 9 bit. */
7494 static void
7495 fsturq (sim_cpu *cpu, int32_t offset)
7496 {
7497 unsigned int rn = INSTR (9, 5);
7498 unsigned int st = INSTR (4, 0);
7499 FRegister a;
7500
7501 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7502 aarch64_get_FP_long_double (cpu, st, & a);
7503 aarch64_set_mem_long_double (cpu,
7504 aarch64_get_reg_u64 (cpu, rn, 1)
7505 + offset, a);
7506 }
7507
7508 /* TODO FP move register. */
7509
7510 /* 32 bit fp to fp move register. */
7511 static void
7512 ffmovs (sim_cpu *cpu)
7513 {
7514 unsigned int rn = INSTR (9, 5);
7515 unsigned int st = INSTR (4, 0);
7516
7517 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7518 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7519 }
7520
7521 /* 64 bit fp to fp move register. */
7522 static void
7523 ffmovd (sim_cpu *cpu)
7524 {
7525 unsigned int rn = INSTR (9, 5);
7526 unsigned int st = INSTR (4, 0);
7527
7528 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7529 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7530 }
7531
7532 /* 32 bit GReg to Vec move register. */
7533 static void
7534 fgmovs (sim_cpu *cpu)
7535 {
7536 unsigned int rn = INSTR (9, 5);
7537 unsigned int st = INSTR (4, 0);
7538
7539 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7540 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7541 }
7542
7543 /* 64 bit g to fp move register. */
7544 static void
7545 fgmovd (sim_cpu *cpu)
7546 {
7547 unsigned int rn = INSTR (9, 5);
7548 unsigned int st = INSTR (4, 0);
7549
7550 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7551 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7552 }
7553
7554 /* 32 bit fp to g move register. */
7555 static void
7556 gfmovs (sim_cpu *cpu)
7557 {
7558 unsigned int rn = INSTR (9, 5);
7559 unsigned int st = INSTR (4, 0);
7560
7561 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7562 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7563 }
7564
7565 /* 64 bit fp to g move register. */
7566 static void
7567 gfmovd (sim_cpu *cpu)
7568 {
7569 unsigned int rn = INSTR (9, 5);
7570 unsigned int st = INSTR (4, 0);
7571
7572 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7573 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7574 }
7575
7576 /* FP move immediate
7577
7578 These install an immediate 8 bit value in the target register
7579 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7580 bit exponent. */
7581
7582 static void
7583 fmovs (sim_cpu *cpu)
7584 {
7585 unsigned int sd = INSTR (4, 0);
7586 uint32_t imm = INSTR (20, 13);
7587 float f = fp_immediate_for_encoding_32 (imm);
7588
7589 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7590 aarch64_set_FP_float (cpu, sd, f);
7591 }
7592
7593 static void
7594 fmovd (sim_cpu *cpu)
7595 {
7596 unsigned int sd = INSTR (4, 0);
7597 uint32_t imm = INSTR (20, 13);
7598 double d = fp_immediate_for_encoding_64 (imm);
7599
7600 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7601 aarch64_set_FP_double (cpu, sd, d);
7602 }
7603
7604 static void
7605 dexSimpleFPImmediate (sim_cpu *cpu)
7606 {
7607 /* instr[31,23] == 00111100
7608 instr[22] == type : single(0)/double(1)
7609 instr[21] == 1
7610 instr[20,13] == imm8
7611 instr[12,10] == 100
7612 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7613 instr[4,0] == Rd */
7614 uint32_t imm5 = INSTR (9, 5);
7615
7616 NYI_assert (31, 23, 0x3C);
7617
7618 if (imm5 != 0)
7619 HALT_UNALLOC;
7620
7621 if (INSTR (22, 22))
7622 fmovd (cpu);
7623 else
7624 fmovs (cpu);
7625 }
7626
7627 /* TODO specific decode and execute for group Load Store. */
7628
7629 /* TODO FP load/store single register (unscaled offset). */
7630
7631 /* TODO load 8 bit unscaled signed 9 bit. */
7632 /* TODO load 16 bit unscaled signed 9 bit. */
7633
7634 /* Load 32 bit unscaled signed 9 bit. */
7635 static void
7636 fldurs (sim_cpu *cpu, int32_t offset)
7637 {
7638 unsigned int rn = INSTR (9, 5);
7639 unsigned int st = INSTR (4, 0);
7640
7641 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7642 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7643 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7644 }
7645
7646 /* Load 64 bit unscaled signed 9 bit. */
7647 static void
7648 fldurd (sim_cpu *cpu, int32_t offset)
7649 {
7650 unsigned int rn = INSTR (9, 5);
7651 unsigned int st = INSTR (4, 0);
7652
7653 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7654 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7655 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7656 }
7657
7658 /* Load 128 bit unscaled signed 9 bit. */
7659 static void
7660 fldurq (sim_cpu *cpu, int32_t offset)
7661 {
7662 unsigned int rn = INSTR (9, 5);
7663 unsigned int st = INSTR (4, 0);
7664 FRegister a;
7665 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7666
7667 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7668 aarch64_get_mem_long_double (cpu, addr, & a);
7669 aarch64_set_FP_long_double (cpu, st, a);
7670 }
7671
7672 /* TODO store 8 bit unscaled signed 9 bit. */
7673 /* TODO store 16 bit unscaled signed 9 bit. */
7674
7675
7676 /* 1 source. */
7677
7678 /* Float absolute value. */
7679 static void
7680 fabss (sim_cpu *cpu)
7681 {
7682 unsigned sn = INSTR (9, 5);
7683 unsigned sd = INSTR (4, 0);
7684 float value = aarch64_get_FP_float (cpu, sn);
7685
7686 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7687 aarch64_set_FP_float (cpu, sd, fabsf (value));
7688 }
7689
7690 /* Double absolute value. */
7691 static void
7692 fabcpu (sim_cpu *cpu)
7693 {
7694 unsigned sn = INSTR (9, 5);
7695 unsigned sd = INSTR (4, 0);
7696 double value = aarch64_get_FP_double (cpu, sn);
7697
7698 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7699 aarch64_set_FP_double (cpu, sd, fabs (value));
7700 }
7701
7702 /* Float negative value. */
7703 static void
7704 fnegs (sim_cpu *cpu)
7705 {
7706 unsigned sn = INSTR (9, 5);
7707 unsigned sd = INSTR (4, 0);
7708
7709 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7710 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7711 }
7712
7713 /* Double negative value. */
7714 static void
7715 fnegd (sim_cpu *cpu)
7716 {
7717 unsigned sn = INSTR (9, 5);
7718 unsigned sd = INSTR (4, 0);
7719
7720 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7721 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7722 }
7723
7724 /* Float square root. */
7725 static void
7726 fsqrts (sim_cpu *cpu)
7727 {
7728 unsigned sn = INSTR (9, 5);
7729 unsigned sd = INSTR (4, 0);
7730
7731 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7732 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7733 }
7734
7735 /* Double square root. */
7736 static void
7737 fsqrtd (sim_cpu *cpu)
7738 {
7739 unsigned sn = INSTR (9, 5);
7740 unsigned sd = INSTR (4, 0);
7741
7742 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7743 aarch64_set_FP_double (cpu, sd,
7744 sqrt (aarch64_get_FP_double (cpu, sn)));
7745 }
7746
7747 /* Convert double to float. */
7748 static void
7749 fcvtds (sim_cpu *cpu)
7750 {
7751 unsigned sn = INSTR (9, 5);
7752 unsigned sd = INSTR (4, 0);
7753
7754 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7755 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7756 }
7757
7758 /* Convert float to double. */
7759 static void
7760 fcvtcpu (sim_cpu *cpu)
7761 {
7762 unsigned sn = INSTR (9, 5);
7763 unsigned sd = INSTR (4, 0);
7764
7765 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7766 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7767 }
7768
7769 static void
7770 do_FRINT (sim_cpu *cpu)
7771 {
7772 /* instr[31,23] = 0001 1110 0
7773 instr[22] = single(0)/double(1)
7774 instr[21,18] = 1001
7775 instr[17,15] = rounding mode
7776 instr[14,10] = 10000
7777 instr[9,5] = source
7778 instr[4,0] = dest */
7779
7780 float val;
7781 unsigned rs = INSTR (9, 5);
7782 unsigned rd = INSTR (4, 0);
7783 unsigned int rmode = INSTR (17, 15);
7784
7785 NYI_assert (31, 23, 0x03C);
7786 NYI_assert (21, 18, 0x9);
7787 NYI_assert (14, 10, 0x10);
7788
7789 if (rmode == 6 || rmode == 7)
7790 /* FIXME: Add support for rmode == 6 exactness check. */
7791 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7792
7793 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7794 if (INSTR (22, 22))
7795 {
7796 double val = aarch64_get_FP_double (cpu, rs);
7797
7798 switch (rmode)
7799 {
7800 case 0: /* mode N: nearest or even. */
7801 {
7802 double rval = round (val);
7803
7804 if (val - rval == 0.5)
7805 {
7806 if (((rval / 2.0) * 2.0) != rval)
7807 rval += 1.0;
7808 }
7809
7810 aarch64_set_FP_double (cpu, rd, round (val));
7811 return;
7812 }
7813
7814 case 1: /* mode P: towards +inf. */
7815 if (val < 0.0)
7816 aarch64_set_FP_double (cpu, rd, trunc (val));
7817 else
7818 aarch64_set_FP_double (cpu, rd, round (val));
7819 return;
7820
7821 case 2: /* mode M: towards -inf. */
7822 if (val < 0.0)
7823 aarch64_set_FP_double (cpu, rd, round (val));
7824 else
7825 aarch64_set_FP_double (cpu, rd, trunc (val));
7826 return;
7827
7828 case 3: /* mode Z: towards 0. */
7829 aarch64_set_FP_double (cpu, rd, trunc (val));
7830 return;
7831
7832 case 4: /* mode A: away from 0. */
7833 aarch64_set_FP_double (cpu, rd, round (val));
7834 return;
7835
7836 case 6: /* mode X: use FPCR with exactness check. */
7837 case 7: /* mode I: use FPCR mode. */
7838 HALT_NYI;
7839
7840 default:
7841 HALT_UNALLOC;
7842 }
7843 }
7844
7845 val = aarch64_get_FP_float (cpu, rs);
7846
7847 switch (rmode)
7848 {
7849 case 0: /* mode N: nearest or even. */
7850 {
7851 float rval = roundf (val);
7852
7853 if (val - rval == 0.5)
7854 {
7855 if (((rval / 2.0) * 2.0) != rval)
7856 rval += 1.0;
7857 }
7858
7859 aarch64_set_FP_float (cpu, rd, rval);
7860 return;
7861 }
7862
7863 case 1: /* mode P: towards +inf. */
7864 if (val < 0.0)
7865 aarch64_set_FP_float (cpu, rd, truncf (val));
7866 else
7867 aarch64_set_FP_float (cpu, rd, roundf (val));
7868 return;
7869
7870 case 2: /* mode M: towards -inf. */
7871 if (val < 0.0)
7872 aarch64_set_FP_float (cpu, rd, truncf (val));
7873 else
7874 aarch64_set_FP_float (cpu, rd, roundf (val));
7875 return;
7876
7877 case 3: /* mode Z: towards 0. */
7878 aarch64_set_FP_float (cpu, rd, truncf (val));
7879 return;
7880
7881 case 4: /* mode A: away from 0. */
7882 aarch64_set_FP_float (cpu, rd, roundf (val));
7883 return;
7884
7885 case 6: /* mode X: use FPCR with exactness check. */
7886 case 7: /* mode I: use FPCR mode. */
7887 HALT_NYI;
7888
7889 default:
7890 HALT_UNALLOC;
7891 }
7892 }
7893
7894 /* Convert half to float. */
7895 static void
7896 do_FCVT_half_to_single (sim_cpu *cpu)
7897 {
7898 unsigned rn = INSTR (9, 5);
7899 unsigned rd = INSTR (4, 0);
7900
7901 NYI_assert (31, 10, 0x7B890);
7902
7903 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7904 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7905 }
7906
7907 /* Convert half to double. */
7908 static void
7909 do_FCVT_half_to_double (sim_cpu *cpu)
7910 {
7911 unsigned rn = INSTR (9, 5);
7912 unsigned rd = INSTR (4, 0);
7913
7914 NYI_assert (31, 10, 0x7B8B0);
7915
7916 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7917 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7918 }
7919
7920 static void
7921 do_FCVT_single_to_half (sim_cpu *cpu)
7922 {
7923 unsigned rn = INSTR (9, 5);
7924 unsigned rd = INSTR (4, 0);
7925
7926 NYI_assert (31, 10, 0x788F0);
7927
7928 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7929 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7930 }
7931
7932 /* Convert double to half. */
7933 static void
7934 do_FCVT_double_to_half (sim_cpu *cpu)
7935 {
7936 unsigned rn = INSTR (9, 5);
7937 unsigned rd = INSTR (4, 0);
7938
7939 NYI_assert (31, 10, 0x798F0);
7940
7941 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7942 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7943 }
7944
7945 static void
7946 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7947 {
7948 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7949 instr[30] = 0
7950 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7951 instr[28,25] = 1111
7952 instr[24] = 0
7953 instr[23,22] ==> type : 00 ==> source is single,
7954 01 ==> source is double
7955 10 ==> UNALLOC
7956 11 ==> UNALLOC or source is half
7957 instr[21] = 1
7958 instr[20,15] ==> opcode : with type 00 or 01
7959 000000 ==> FMOV, 000001 ==> FABS,
7960 000010 ==> FNEG, 000011 ==> FSQRT,
7961 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7962 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7963 001000 ==> FRINTN, 001001 ==> FRINTP,
7964 001010 ==> FRINTM, 001011 ==> FRINTZ,
7965 001100 ==> FRINTA, 001101 ==> UNALLOC
7966 001110 ==> FRINTX, 001111 ==> FRINTI
7967 with type 11
7968 000100 ==> FCVT (half-to-single)
7969 000101 ==> FCVT (half-to-double)
7970 instr[14,10] = 10000. */
7971
7972 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7973 uint32_t type = INSTR (23, 22);
7974 uint32_t opcode = INSTR (20, 15);
7975
7976 if (M_S != 0)
7977 HALT_UNALLOC;
7978
7979 if (type == 3)
7980 {
7981 if (opcode == 4)
7982 do_FCVT_half_to_single (cpu);
7983 else if (opcode == 5)
7984 do_FCVT_half_to_double (cpu);
7985 else
7986 HALT_UNALLOC;
7987 return;
7988 }
7989
7990 if (type == 2)
7991 HALT_UNALLOC;
7992
7993 switch (opcode)
7994 {
7995 case 0:
7996 if (type)
7997 ffmovd (cpu);
7998 else
7999 ffmovs (cpu);
8000 return;
8001
8002 case 1:
8003 if (type)
8004 fabcpu (cpu);
8005 else
8006 fabss (cpu);
8007 return;
8008
8009 case 2:
8010 if (type)
8011 fnegd (cpu);
8012 else
8013 fnegs (cpu);
8014 return;
8015
8016 case 3:
8017 if (type)
8018 fsqrtd (cpu);
8019 else
8020 fsqrts (cpu);
8021 return;
8022
8023 case 4:
8024 if (type)
8025 fcvtds (cpu);
8026 else
8027 HALT_UNALLOC;
8028 return;
8029
8030 case 5:
8031 if (type)
8032 HALT_UNALLOC;
8033 fcvtcpu (cpu);
8034 return;
8035
8036 case 8: /* FRINTN etc. */
8037 case 9:
8038 case 10:
8039 case 11:
8040 case 12:
8041 case 14:
8042 case 15:
8043 do_FRINT (cpu);
8044 return;
8045
8046 case 7:
8047 if (INSTR (22, 22))
8048 do_FCVT_double_to_half (cpu);
8049 else
8050 do_FCVT_single_to_half (cpu);
8051 return;
8052
8053 case 13:
8054 HALT_NYI;
8055
8056 default:
8057 HALT_UNALLOC;
8058 }
8059 }
8060
8061 /* 32 bit signed int to float. */
8062 static void
8063 scvtf32 (sim_cpu *cpu)
8064 {
8065 unsigned rn = INSTR (9, 5);
8066 unsigned sd = INSTR (4, 0);
8067
8068 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8069 aarch64_set_FP_float
8070 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8071 }
8072
8073 /* signed int to float. */
8074 static void
8075 scvtf (sim_cpu *cpu)
8076 {
8077 unsigned rn = INSTR (9, 5);
8078 unsigned sd = INSTR (4, 0);
8079
8080 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8081 aarch64_set_FP_float
8082 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8083 }
8084
8085 /* 32 bit signed int to double. */
8086 static void
8087 scvtd32 (sim_cpu *cpu)
8088 {
8089 unsigned rn = INSTR (9, 5);
8090 unsigned sd = INSTR (4, 0);
8091
8092 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8093 aarch64_set_FP_double
8094 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8095 }
8096
8097 /* signed int to double. */
8098 static void
8099 scvtd (sim_cpu *cpu)
8100 {
8101 unsigned rn = INSTR (9, 5);
8102 unsigned sd = INSTR (4, 0);
8103
8104 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8105 aarch64_set_FP_double
8106 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8107 }
8108
8109 static const float FLOAT_INT_MAX = (float) INT_MAX;
8110 static const float FLOAT_INT_MIN = (float) INT_MIN;
8111 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8112 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8113 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8114 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8115 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8116 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8117
8118 #define UINT_MIN 0
8119 #define ULONG_MIN 0
8120 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8121 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8122 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8123 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8124 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8125 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8126 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8127 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8128
8129 /* Check for FP exception conditions:
8130 NaN raises IO
8131 Infinity raises IO
8132 Out of Range raises IO and IX and saturates value
8133 Denormal raises ID and IX and sets to zero. */
8134 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8135 do \
8136 { \
8137 switch (fpclassify (F)) \
8138 { \
8139 case FP_INFINITE: \
8140 case FP_NAN: \
8141 aarch64_set_FPSR (cpu, IO); \
8142 if (signbit (F)) \
8143 VALUE = ITYPE##_MAX; \
8144 else \
8145 VALUE = ITYPE##_MIN; \
8146 break; \
8147 \
8148 case FP_NORMAL: \
8149 if (F >= FTYPE##_##ITYPE##_MAX) \
8150 { \
8151 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8152 VALUE = ITYPE##_MAX; \
8153 } \
8154 else if (F <= FTYPE##_##ITYPE##_MIN) \
8155 { \
8156 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8157 VALUE = ITYPE##_MIN; \
8158 } \
8159 break; \
8160 \
8161 case FP_SUBNORMAL: \
8162 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8163 VALUE = 0; \
8164 break; \
8165 \
8166 default: \
8167 case FP_ZERO: \
8168 VALUE = 0; \
8169 break; \
8170 } \
8171 } \
8172 while (0)
8173
8174 /* 32 bit convert float to signed int truncate towards zero. */
8175 static void
8176 fcvtszs32 (sim_cpu *cpu)
8177 {
8178 unsigned sn = INSTR (9, 5);
8179 unsigned rd = INSTR (4, 0);
8180 /* TODO : check that this rounds toward zero. */
8181 float f = aarch64_get_FP_float (cpu, sn);
8182 int32_t value = (int32_t) f;
8183
8184 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8185
8186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8187 /* Avoid sign extension to 64 bit. */
8188 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8189 }
8190
8191 /* 64 bit convert float to signed int truncate towards zero. */
8192 static void
8193 fcvtszs (sim_cpu *cpu)
8194 {
8195 unsigned sn = INSTR (9, 5);
8196 unsigned rd = INSTR (4, 0);
8197 float f = aarch64_get_FP_float (cpu, sn);
8198 int64_t value = (int64_t) f;
8199
8200 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8201
8202 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8203 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8204 }
8205
8206 /* 32 bit convert double to signed int truncate towards zero. */
8207 static void
8208 fcvtszd32 (sim_cpu *cpu)
8209 {
8210 unsigned sn = INSTR (9, 5);
8211 unsigned rd = INSTR (4, 0);
8212 /* TODO : check that this rounds toward zero. */
8213 double d = aarch64_get_FP_double (cpu, sn);
8214 int32_t value = (int32_t) d;
8215
8216 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8217
8218 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8219 /* Avoid sign extension to 64 bit. */
8220 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8221 }
8222
8223 /* 64 bit convert double to signed int truncate towards zero. */
8224 static void
8225 fcvtszd (sim_cpu *cpu)
8226 {
8227 unsigned sn = INSTR (9, 5);
8228 unsigned rd = INSTR (4, 0);
8229 /* TODO : check that this rounds toward zero. */
8230 double d = aarch64_get_FP_double (cpu, sn);
8231 int64_t value;
8232
8233 value = (int64_t) d;
8234
8235 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8236
8237 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8238 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8239 }
8240
8241 static void
8242 do_fcvtzu (sim_cpu *cpu)
8243 {
8244 /* instr[31] = size: 32-bit (0), 64-bit (1)
8245 instr[30,23] = 00111100
8246 instr[22] = type: single (0)/ double (1)
8247 instr[21] = enable (0)/disable(1) precision
8248 instr[20,16] = 11001
8249 instr[15,10] = precision
8250 instr[9,5] = Rs
8251 instr[4,0] = Rd. */
8252
8253 unsigned rs = INSTR (9, 5);
8254 unsigned rd = INSTR (4, 0);
8255
8256 NYI_assert (30, 23, 0x3C);
8257 NYI_assert (20, 16, 0x19);
8258
8259 if (INSTR (21, 21) != 1)
8260 /* Convert to fixed point. */
8261 HALT_NYI;
8262
8263 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8264 if (INSTR (31, 31))
8265 {
8266 /* Convert to unsigned 64-bit integer. */
8267 if (INSTR (22, 22))
8268 {
8269 double d = aarch64_get_FP_double (cpu, rs);
8270 uint64_t value = (uint64_t) d;
8271
8272 /* Do not raise an exception if we have reached ULONG_MAX. */
8273 if (value != (1UL << 63))
8274 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8275
8276 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8277 }
8278 else
8279 {
8280 float f = aarch64_get_FP_float (cpu, rs);
8281 uint64_t value = (uint64_t) f;
8282
8283 /* Do not raise an exception if we have reached ULONG_MAX. */
8284 if (value != (1UL << 63))
8285 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8286
8287 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8288 }
8289 }
8290 else
8291 {
8292 uint32_t value;
8293
8294 /* Convert to unsigned 32-bit integer. */
8295 if (INSTR (22, 22))
8296 {
8297 double d = aarch64_get_FP_double (cpu, rs);
8298
8299 value = (uint32_t) d;
8300 /* Do not raise an exception if we have reached UINT_MAX. */
8301 if (value != (1UL << 31))
8302 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8303 }
8304 else
8305 {
8306 float f = aarch64_get_FP_float (cpu, rs);
8307
8308 value = (uint32_t) f;
8309 /* Do not raise an exception if we have reached UINT_MAX. */
8310 if (value != (1UL << 31))
8311 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8312 }
8313
8314 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8315 }
8316 }
8317
8318 static void
8319 do_UCVTF (sim_cpu *cpu)
8320 {
8321 /* instr[31] = size: 32-bit (0), 64-bit (1)
8322 instr[30,23] = 001 1110 0
8323 instr[22] = type: single (0)/ double (1)
8324 instr[21] = enable (0)/disable(1) precision
8325 instr[20,16] = 0 0011
8326 instr[15,10] = precision
8327 instr[9,5] = Rs
8328 instr[4,0] = Rd. */
8329
8330 unsigned rs = INSTR (9, 5);
8331 unsigned rd = INSTR (4, 0);
8332
8333 NYI_assert (30, 23, 0x3C);
8334 NYI_assert (20, 16, 0x03);
8335
8336 if (INSTR (21, 21) != 1)
8337 HALT_NYI;
8338
8339 /* FIXME: Add exception raising. */
8340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8341 if (INSTR (31, 31))
8342 {
8343 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8344
8345 if (INSTR (22, 22))
8346 aarch64_set_FP_double (cpu, rd, (double) value);
8347 else
8348 aarch64_set_FP_float (cpu, rd, (float) value);
8349 }
8350 else
8351 {
8352 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8353
8354 if (INSTR (22, 22))
8355 aarch64_set_FP_double (cpu, rd, (double) value);
8356 else
8357 aarch64_set_FP_float (cpu, rd, (float) value);
8358 }
8359 }
8360
8361 static void
8362 float_vector_move (sim_cpu *cpu)
8363 {
8364 /* instr[31,17] == 100 1111 0101 0111
8365 instr[16] ==> direction 0=> to GR, 1=> from GR
8366 instr[15,10] => ???
8367 instr[9,5] ==> source
8368 instr[4,0] ==> dest. */
8369
8370 unsigned rn = INSTR (9, 5);
8371 unsigned rd = INSTR (4, 0);
8372
8373 NYI_assert (31, 17, 0x4F57);
8374
8375 if (INSTR (15, 10) != 0)
8376 HALT_UNALLOC;
8377
8378 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8379 if (INSTR (16, 16))
8380 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8381 else
8382 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8383 }
8384
8385 static void
8386 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8387 {
8388 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8389 instr[30 = 0
8390 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8391 instr[28,25] = 1111
8392 instr[24] = 0
8393 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8394 instr[21] = 1
8395 instr[20,19] = rmode
8396 instr[18,16] = opcode
8397 instr[15,10] = 10 0000 */
8398
8399 uint32_t rmode_opcode;
8400 uint32_t size_type;
8401 uint32_t type;
8402 uint32_t size;
8403 uint32_t S;
8404
8405 if (INSTR (31, 17) == 0x4F57)
8406 {
8407 float_vector_move (cpu);
8408 return;
8409 }
8410
8411 size = INSTR (31, 31);
8412 S = INSTR (29, 29);
8413 if (S != 0)
8414 HALT_UNALLOC;
8415
8416 type = INSTR (23, 22);
8417 if (type > 1)
8418 HALT_UNALLOC;
8419
8420 rmode_opcode = INSTR (20, 16);
8421 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8422
8423 switch (rmode_opcode)
8424 {
8425 case 2: /* SCVTF. */
8426 switch (size_type)
8427 {
8428 case 0: scvtf32 (cpu); return;
8429 case 1: scvtd32 (cpu); return;
8430 case 2: scvtf (cpu); return;
8431 case 3: scvtd (cpu); return;
8432 }
8433
8434 case 6: /* FMOV GR, Vec. */
8435 switch (size_type)
8436 {
8437 case 0: gfmovs (cpu); return;
8438 case 3: gfmovd (cpu); return;
8439 default: HALT_UNALLOC;
8440 }
8441
8442 case 7: /* FMOV vec, GR. */
8443 switch (size_type)
8444 {
8445 case 0: fgmovs (cpu); return;
8446 case 3: fgmovd (cpu); return;
8447 default: HALT_UNALLOC;
8448 }
8449
8450 case 24: /* FCVTZS. */
8451 switch (size_type)
8452 {
8453 case 0: fcvtszs32 (cpu); return;
8454 case 1: fcvtszd32 (cpu); return;
8455 case 2: fcvtszs (cpu); return;
8456 case 3: fcvtszd (cpu); return;
8457 }
8458
8459 case 25: do_fcvtzu (cpu); return;
8460 case 3: do_UCVTF (cpu); return;
8461
8462 case 0: /* FCVTNS. */
8463 case 1: /* FCVTNU. */
8464 case 4: /* FCVTAS. */
8465 case 5: /* FCVTAU. */
8466 case 8: /* FCVPTS. */
8467 case 9: /* FCVTPU. */
8468 case 16: /* FCVTMS. */
8469 case 17: /* FCVTMU. */
8470 default:
8471 HALT_NYI;
8472 }
8473 }
8474
8475 static void
8476 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8477 {
8478 uint32_t flags;
8479
8480 /* FIXME: Add exception raising. */
8481 if (isnan (fvalue1) || isnan (fvalue2))
8482 flags = C|V;
8483 else if (isinf (fvalue1) && isinf (fvalue2))
8484 {
8485 /* Subtracting two infinities may give a NaN. We only need to compare
8486 the signs, which we can get from isinf. */
8487 int result = isinf (fvalue1) - isinf (fvalue2);
8488
8489 if (result == 0)
8490 flags = Z|C;
8491 else if (result < 0)
8492 flags = N;
8493 else /* (result > 0). */
8494 flags = C;
8495 }
8496 else
8497 {
8498 float result = fvalue1 - fvalue2;
8499
8500 if (result == 0.0)
8501 flags = Z|C;
8502 else if (result < 0)
8503 flags = N;
8504 else /* (result > 0). */
8505 flags = C;
8506 }
8507
8508 aarch64_set_CPSR (cpu, flags);
8509 }
8510
8511 static void
8512 fcmps (sim_cpu *cpu)
8513 {
8514 unsigned sm = INSTR (20, 16);
8515 unsigned sn = INSTR ( 9, 5);
8516
8517 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8518 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8519
8520 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8521 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8522 }
8523
8524 /* Float compare to zero -- Invalid Operation exception
8525 only on signaling NaNs. */
8526 static void
8527 fcmpzs (sim_cpu *cpu)
8528 {
8529 unsigned sn = INSTR ( 9, 5);
8530 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8531
8532 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8533 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8534 }
8535
8536 /* Float compare -- Invalid Operation exception on all NaNs. */
8537 static void
8538 fcmpes (sim_cpu *cpu)
8539 {
8540 unsigned sm = INSTR (20, 16);
8541 unsigned sn = INSTR ( 9, 5);
8542
8543 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8544 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8545
8546 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8547 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8548 }
8549
8550 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8551 static void
8552 fcmpzes (sim_cpu *cpu)
8553 {
8554 unsigned sn = INSTR ( 9, 5);
8555 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8556
8557 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8558 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8559 }
8560
8561 static void
8562 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8563 {
8564 uint32_t flags;
8565
8566 /* FIXME: Add exception raising. */
8567 if (isnan (dval1) || isnan (dval2))
8568 flags = C|V;
8569 else if (isinf (dval1) && isinf (dval2))
8570 {
8571 /* Subtracting two infinities may give a NaN. We only need to compare
8572 the signs, which we can get from isinf. */
8573 int result = isinf (dval1) - isinf (dval2);
8574
8575 if (result == 0)
8576 flags = Z|C;
8577 else if (result < 0)
8578 flags = N;
8579 else /* (result > 0). */
8580 flags = C;
8581 }
8582 else
8583 {
8584 double result = dval1 - dval2;
8585
8586 if (result == 0.0)
8587 flags = Z|C;
8588 else if (result < 0)
8589 flags = N;
8590 else /* (result > 0). */
8591 flags = C;
8592 }
8593
8594 aarch64_set_CPSR (cpu, flags);
8595 }
8596
8597 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8598 static void
8599 fcmpd (sim_cpu *cpu)
8600 {
8601 unsigned sm = INSTR (20, 16);
8602 unsigned sn = INSTR ( 9, 5);
8603
8604 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8605 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8606
8607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8608 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8609 }
8610
8611 /* Double compare to zero -- Invalid Operation exception
8612 only on signaling NaNs. */
8613 static void
8614 fcmpzd (sim_cpu *cpu)
8615 {
8616 unsigned sn = INSTR ( 9, 5);
8617 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8618
8619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8620 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8621 }
8622
8623 /* Double compare -- Invalid Operation exception on all NaNs. */
8624 static void
8625 fcmped (sim_cpu *cpu)
8626 {
8627 unsigned sm = INSTR (20, 16);
8628 unsigned sn = INSTR ( 9, 5);
8629
8630 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8631 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8632
8633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8634 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8635 }
8636
8637 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8638 static void
8639 fcmpzed (sim_cpu *cpu)
8640 {
8641 unsigned sn = INSTR ( 9, 5);
8642 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8643
8644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8645 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8646 }
8647
8648 static void
8649 dexSimpleFPCompare (sim_cpu *cpu)
8650 {
8651 /* assert instr[28,25] == 1111
8652 instr[30:24:21:13,10] = 0011000
8653 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8654 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8655 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8656 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8657 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8658 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8659 ow ==> UNALLOC */
8660 uint32_t dispatch;
8661 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8662 uint32_t type = INSTR (23, 22);
8663 uint32_t op = INSTR (15, 14);
8664 uint32_t op2_2_0 = INSTR (2, 0);
8665
8666 if (op2_2_0 != 0)
8667 HALT_UNALLOC;
8668
8669 if (M_S != 0)
8670 HALT_UNALLOC;
8671
8672 if (type > 1)
8673 HALT_UNALLOC;
8674
8675 if (op != 0)
8676 HALT_UNALLOC;
8677
8678 /* dispatch on type and top 2 bits of opcode. */
8679 dispatch = (type << 2) | INSTR (4, 3);
8680
8681 switch (dispatch)
8682 {
8683 case 0: fcmps (cpu); return;
8684 case 1: fcmpzs (cpu); return;
8685 case 2: fcmpes (cpu); return;
8686 case 3: fcmpzes (cpu); return;
8687 case 4: fcmpd (cpu); return;
8688 case 5: fcmpzd (cpu); return;
8689 case 6: fcmped (cpu); return;
8690 case 7: fcmpzed (cpu); return;
8691 }
8692 }
8693
8694 static void
8695 do_scalar_FADDP (sim_cpu *cpu)
8696 {
8697 /* instr [31,23] = 0111 1110 0
8698 instr [22] = single(0)/double(1)
8699 instr [21,10] = 11 0000 1101 10
8700 instr [9,5] = Fn
8701 instr [4,0] = Fd. */
8702
8703 unsigned Fn = INSTR (9, 5);
8704 unsigned Fd = INSTR (4, 0);
8705
8706 NYI_assert (31, 23, 0x0FC);
8707 NYI_assert (21, 10, 0xC36);
8708
8709 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8710 if (INSTR (22, 22))
8711 {
8712 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8713 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8714
8715 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8716 }
8717 else
8718 {
8719 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8720 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8721
8722 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8723 }
8724 }
8725
8726 /* Floating point absolute difference. */
8727
8728 static void
8729 do_scalar_FABD (sim_cpu *cpu)
8730 {
8731 /* instr [31,23] = 0111 1110 1
8732 instr [22] = float(0)/double(1)
8733 instr [21] = 1
8734 instr [20,16] = Rm
8735 instr [15,10] = 1101 01
8736 instr [9, 5] = Rn
8737 instr [4, 0] = Rd. */
8738
8739 unsigned rm = INSTR (20, 16);
8740 unsigned rn = INSTR (9, 5);
8741 unsigned rd = INSTR (4, 0);
8742
8743 NYI_assert (31, 23, 0x0FD);
8744 NYI_assert (21, 21, 1);
8745 NYI_assert (15, 10, 0x35);
8746
8747 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8748 if (INSTR (22, 22))
8749 aarch64_set_FP_double (cpu, rd,
8750 fabs (aarch64_get_FP_double (cpu, rn)
8751 - aarch64_get_FP_double (cpu, rm)));
8752 else
8753 aarch64_set_FP_float (cpu, rd,
8754 fabsf (aarch64_get_FP_float (cpu, rn)
8755 - aarch64_get_FP_float (cpu, rm)));
8756 }
8757
8758 static void
8759 do_scalar_CMGT (sim_cpu *cpu)
8760 {
8761 /* instr [31,21] = 0101 1110 111
8762 instr [20,16] = Rm
8763 instr [15,10] = 00 1101
8764 instr [9, 5] = Rn
8765 instr [4, 0] = Rd. */
8766
8767 unsigned rm = INSTR (20, 16);
8768 unsigned rn = INSTR (9, 5);
8769 unsigned rd = INSTR (4, 0);
8770
8771 NYI_assert (31, 21, 0x2F7);
8772 NYI_assert (15, 10, 0x0D);
8773
8774 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8775 aarch64_set_vec_u64 (cpu, rd, 0,
8776 aarch64_get_vec_u64 (cpu, rn, 0) >
8777 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8778 }
8779
8780 static void
8781 do_scalar_USHR (sim_cpu *cpu)
8782 {
8783 /* instr [31,23] = 0111 1111 0
8784 instr [22,16] = shift amount
8785 instr [15,10] = 0000 01
8786 instr [9, 5] = Rn
8787 instr [4, 0] = Rd. */
8788
8789 unsigned amount = 128 - INSTR (22, 16);
8790 unsigned rn = INSTR (9, 5);
8791 unsigned rd = INSTR (4, 0);
8792
8793 NYI_assert (31, 23, 0x0FE);
8794 NYI_assert (15, 10, 0x01);
8795
8796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8797 aarch64_set_vec_u64 (cpu, rd, 0,
8798 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8799 }
8800
8801 static void
8802 do_scalar_SSHL (sim_cpu *cpu)
8803 {
8804 /* instr [31,21] = 0101 1110 111
8805 instr [20,16] = Rm
8806 instr [15,10] = 0100 01
8807 instr [9, 5] = Rn
8808 instr [4, 0] = Rd. */
8809
8810 unsigned rm = INSTR (20, 16);
8811 unsigned rn = INSTR (9, 5);
8812 unsigned rd = INSTR (4, 0);
8813 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8814
8815 NYI_assert (31, 21, 0x2F7);
8816 NYI_assert (15, 10, 0x11);
8817
8818 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8819 if (shift >= 0)
8820 aarch64_set_vec_s64 (cpu, rd, 0,
8821 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8822 else
8823 aarch64_set_vec_s64 (cpu, rd, 0,
8824 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8825 }
8826
8827 static void
8828 do_scalar_shift (sim_cpu *cpu)
8829 {
8830 /* instr [31,23] = 0101 1111 0
8831 instr [22,16] = shift amount
8832 instr [15,10] = 0101 01 [SHL]
8833 instr [15,10] = 0000 01 [SSHR]
8834 instr [9, 5] = Rn
8835 instr [4, 0] = Rd. */
8836
8837 unsigned rn = INSTR (9, 5);
8838 unsigned rd = INSTR (4, 0);
8839 unsigned amount;
8840
8841 NYI_assert (31, 23, 0x0BE);
8842
8843 if (INSTR (22, 22) == 0)
8844 HALT_UNALLOC;
8845
8846 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8847 switch (INSTR (15, 10))
8848 {
8849 case 0x01: /* SSHR */
8850 amount = 128 - INSTR (22, 16);
8851 aarch64_set_vec_s64 (cpu, rd, 0,
8852 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8853 return;
8854 case 0x15: /* SHL */
8855 amount = INSTR (22, 16) - 64;
8856 aarch64_set_vec_u64 (cpu, rd, 0,
8857 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8858 return;
8859 default:
8860 HALT_NYI;
8861 }
8862 }
8863
8864 /* FCMEQ FCMGT FCMGE. */
8865 static void
8866 do_scalar_FCM (sim_cpu *cpu)
8867 {
8868 /* instr [31,30] = 01
8869 instr [29] = U
8870 instr [28,24] = 1 1110
8871 instr [23] = E
8872 instr [22] = size
8873 instr [21] = 1
8874 instr [20,16] = Rm
8875 instr [15,12] = 1110
8876 instr [11] = AC
8877 instr [10] = 1
8878 instr [9, 5] = Rn
8879 instr [4, 0] = Rd. */
8880
8881 unsigned rm = INSTR (20, 16);
8882 unsigned rn = INSTR (9, 5);
8883 unsigned rd = INSTR (4, 0);
8884 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8885 unsigned result;
8886 float val1;
8887 float val2;
8888
8889 NYI_assert (31, 30, 1);
8890 NYI_assert (28, 24, 0x1E);
8891 NYI_assert (21, 21, 1);
8892 NYI_assert (15, 12, 0xE);
8893 NYI_assert (10, 10, 1);
8894
8895 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8896 if (INSTR (22, 22))
8897 {
8898 double val1 = aarch64_get_FP_double (cpu, rn);
8899 double val2 = aarch64_get_FP_double (cpu, rm);
8900
8901 switch (EUac)
8902 {
8903 case 0: /* 000 */
8904 result = val1 == val2;
8905 break;
8906
8907 case 3: /* 011 */
8908 val1 = fabs (val1);
8909 val2 = fabs (val2);
8910 /* Fall through. */
8911 case 2: /* 010 */
8912 result = val1 >= val2;
8913 break;
8914
8915 case 7: /* 111 */
8916 val1 = fabs (val1);
8917 val2 = fabs (val2);
8918 /* Fall through. */
8919 case 6: /* 110 */
8920 result = val1 > val2;
8921 break;
8922
8923 default:
8924 HALT_UNALLOC;
8925 }
8926
8927 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8928 return;
8929 }
8930
8931 val1 = aarch64_get_FP_float (cpu, rn);
8932 val2 = aarch64_get_FP_float (cpu, rm);
8933
8934 switch (EUac)
8935 {
8936 case 0: /* 000 */
8937 result = val1 == val2;
8938 break;
8939
8940 case 3: /* 011 */
8941 val1 = fabsf (val1);
8942 val2 = fabsf (val2);
8943 /* Fall through. */
8944 case 2: /* 010 */
8945 result = val1 >= val2;
8946 break;
8947
8948 case 7: /* 111 */
8949 val1 = fabsf (val1);
8950 val2 = fabsf (val2);
8951 /* Fall through. */
8952 case 6: /* 110 */
8953 result = val1 > val2;
8954 break;
8955
8956 default:
8957 HALT_UNALLOC;
8958 }
8959
8960 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8961 }
8962
8963 /* An alias of DUP. */
8964 static void
8965 do_scalar_MOV (sim_cpu *cpu)
8966 {
8967 /* instr [31,21] = 0101 1110 000
8968 instr [20,16] = imm5
8969 instr [15,10] = 0000 01
8970 instr [9, 5] = Rn
8971 instr [4, 0] = Rd. */
8972
8973 unsigned rn = INSTR (9, 5);
8974 unsigned rd = INSTR (4, 0);
8975 unsigned index;
8976
8977 NYI_assert (31, 21, 0x2F0);
8978 NYI_assert (15, 10, 0x01);
8979
8980 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8981 if (INSTR (16, 16))
8982 {
8983 /* 8-bit. */
8984 index = INSTR (20, 17);
8985 aarch64_set_vec_u8
8986 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8987 }
8988 else if (INSTR (17, 17))
8989 {
8990 /* 16-bit. */
8991 index = INSTR (20, 18);
8992 aarch64_set_vec_u16
8993 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8994 }
8995 else if (INSTR (18, 18))
8996 {
8997 /* 32-bit. */
8998 index = INSTR (20, 19);
8999 aarch64_set_vec_u32
9000 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9001 }
9002 else if (INSTR (19, 19))
9003 {
9004 /* 64-bit. */
9005 index = INSTR (20, 20);
9006 aarch64_set_vec_u64
9007 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9008 }
9009 else
9010 HALT_UNALLOC;
9011 }
9012
9013 static void
9014 do_scalar_NEG (sim_cpu *cpu)
9015 {
9016 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9017 instr [9, 5] = Rn
9018 instr [4, 0] = Rd. */
9019
9020 unsigned rn = INSTR (9, 5);
9021 unsigned rd = INSTR (4, 0);
9022
9023 NYI_assert (31, 10, 0x1FB82E);
9024
9025 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9026 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9027 }
9028
9029 static void
9030 do_scalar_USHL (sim_cpu *cpu)
9031 {
9032 /* instr [31,21] = 0111 1110 111
9033 instr [20,16] = Rm
9034 instr [15,10] = 0100 01
9035 instr [9, 5] = Rn
9036 instr [4, 0] = Rd. */
9037
9038 unsigned rm = INSTR (20, 16);
9039 unsigned rn = INSTR (9, 5);
9040 unsigned rd = INSTR (4, 0);
9041 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9042
9043 NYI_assert (31, 21, 0x3F7);
9044 NYI_assert (15, 10, 0x11);
9045
9046 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9047 if (shift >= 0)
9048 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9049 else
9050 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9051 }
9052
9053 static void
9054 do_double_add (sim_cpu *cpu)
9055 {
9056 /* instr [31,21] = 0101 1110 111
9057 instr [20,16] = Fn
9058 instr [15,10] = 1000 01
9059 instr [9,5] = Fm
9060 instr [4,0] = Fd. */
9061 unsigned Fd;
9062 unsigned Fm;
9063 unsigned Fn;
9064 double val1;
9065 double val2;
9066
9067 NYI_assert (31, 21, 0x2F7);
9068 NYI_assert (15, 10, 0x21);
9069
9070 Fd = INSTR (4, 0);
9071 Fm = INSTR (9, 5);
9072 Fn = INSTR (20, 16);
9073
9074 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9075 val1 = aarch64_get_FP_double (cpu, Fm);
9076 val2 = aarch64_get_FP_double (cpu, Fn);
9077
9078 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9079 }
9080
9081 static void
9082 do_scalar_UCVTF (sim_cpu *cpu)
9083 {
9084 /* instr [31,23] = 0111 1110 0
9085 instr [22] = single(0)/double(1)
9086 instr [21,10] = 10 0001 1101 10
9087 instr [9,5] = rn
9088 instr [4,0] = rd. */
9089
9090 unsigned rn = INSTR (9, 5);
9091 unsigned rd = INSTR (4, 0);
9092
9093 NYI_assert (31, 23, 0x0FC);
9094 NYI_assert (21, 10, 0x876);
9095
9096 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9097 if (INSTR (22, 22))
9098 {
9099 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9100
9101 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9102 }
9103 else
9104 {
9105 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9106
9107 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9108 }
9109 }
9110
9111 static void
9112 do_scalar_vec (sim_cpu *cpu)
9113 {
9114 /* instr [30] = 1. */
9115 /* instr [28,25] = 1111. */
9116 switch (INSTR (31, 23))
9117 {
9118 case 0xBC:
9119 switch (INSTR (15, 10))
9120 {
9121 case 0x01: do_scalar_MOV (cpu); return;
9122 case 0x39: do_scalar_FCM (cpu); return;
9123 case 0x3B: do_scalar_FCM (cpu); return;
9124 }
9125 break;
9126
9127 case 0xBE: do_scalar_shift (cpu); return;
9128
9129 case 0xFC:
9130 switch (INSTR (15, 10))
9131 {
9132 case 0x36:
9133 switch (INSTR (21, 16))
9134 {
9135 case 0x30: do_scalar_FADDP (cpu); return;
9136 case 0x21: do_scalar_UCVTF (cpu); return;
9137 }
9138 HALT_NYI;
9139 case 0x39: do_scalar_FCM (cpu); return;
9140 case 0x3B: do_scalar_FCM (cpu); return;
9141 }
9142 break;
9143
9144 case 0xFD:
9145 switch (INSTR (15, 10))
9146 {
9147 case 0x0D: do_scalar_CMGT (cpu); return;
9148 case 0x11: do_scalar_USHL (cpu); return;
9149 case 0x2E: do_scalar_NEG (cpu); return;
9150 case 0x35: do_scalar_FABD (cpu); return;
9151 case 0x39: do_scalar_FCM (cpu); return;
9152 case 0x3B: do_scalar_FCM (cpu); return;
9153 default:
9154 HALT_NYI;
9155 }
9156
9157 case 0xFE: do_scalar_USHR (cpu); return;
9158
9159 case 0xBD:
9160 switch (INSTR (15, 10))
9161 {
9162 case 0x21: do_double_add (cpu); return;
9163 case 0x11: do_scalar_SSHL (cpu); return;
9164 default:
9165 HALT_NYI;
9166 }
9167
9168 default:
9169 HALT_NYI;
9170 }
9171 }
9172
9173 static void
9174 dexAdvSIMD1 (sim_cpu *cpu)
9175 {
9176 /* instr [28,25] = 1 111. */
9177
9178 /* We are currently only interested in the basic
9179 scalar fp routines which all have bit 30 = 0. */
9180 if (INSTR (30, 30))
9181 do_scalar_vec (cpu);
9182
9183 /* instr[24] is set for FP data processing 3-source and clear for
9184 all other basic scalar fp instruction groups. */
9185 else if (INSTR (24, 24))
9186 dexSimpleFPDataProc3Source (cpu);
9187
9188 /* instr[21] is clear for floating <-> fixed conversions and set for
9189 all other basic scalar fp instruction groups. */
9190 else if (!INSTR (21, 21))
9191 dexSimpleFPFixedConvert (cpu);
9192
9193 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9194 11 ==> cond select, 00 ==> other. */
9195 else
9196 switch (INSTR (11, 10))
9197 {
9198 case 1: dexSimpleFPCondCompare (cpu); return;
9199 case 2: dexSimpleFPDataProc2Source (cpu); return;
9200 case 3: dexSimpleFPCondSelect (cpu); return;
9201
9202 default:
9203 /* Now an ordered cascade of tests.
9204 FP immediate has instr [12] == 1.
9205 FP compare has instr [13] == 1.
9206 FP Data Proc 1 Source has instr [14] == 1.
9207 FP floating <--> integer conversions has instr [15] == 0. */
9208 if (INSTR (12, 12))
9209 dexSimpleFPImmediate (cpu);
9210
9211 else if (INSTR (13, 13))
9212 dexSimpleFPCompare (cpu);
9213
9214 else if (INSTR (14, 14))
9215 dexSimpleFPDataProc1Source (cpu);
9216
9217 else if (!INSTR (15, 15))
9218 dexSimpleFPIntegerConvert (cpu);
9219
9220 else
9221 /* If we get here then instr[15] == 1 which means UNALLOC. */
9222 HALT_UNALLOC;
9223 }
9224 }
9225
9226 /* PC relative addressing. */
9227
9228 static void
9229 pcadr (sim_cpu *cpu)
9230 {
9231 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9232 instr[30,29] = immlo
9233 instr[23,5] = immhi. */
9234 uint64_t address;
9235 unsigned rd = INSTR (4, 0);
9236 uint32_t isPage = INSTR (31, 31);
9237 union { int64_t u64; uint64_t s64; } imm;
9238 uint64_t offset;
9239
9240 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9241 offset = imm.u64;
9242 offset = (offset << 2) | INSTR (30, 29);
9243
9244 address = aarch64_get_PC (cpu);
9245
9246 if (isPage)
9247 {
9248 offset <<= 12;
9249 address &= ~0xfff;
9250 }
9251
9252 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9253 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9254 }
9255
9256 /* Specific decode and execute for group Data Processing Immediate. */
9257
9258 static void
9259 dexPCRelAddressing (sim_cpu *cpu)
9260 {
9261 /* assert instr[28,24] = 10000. */
9262 pcadr (cpu);
9263 }
9264
9265 /* Immediate logical.
9266 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9267 16, 32 or 64 bit sequence pulled out at decode and possibly
9268 inverting it..
9269
9270 N.B. the output register (dest) can normally be Xn or SP
9271 the exception occurs for flag setting instructions which may
9272 only use Xn for the output (dest). The input register can
9273 never be SP. */
9274
9275 /* 32 bit and immediate. */
9276 static void
9277 and32 (sim_cpu *cpu, uint32_t bimm)
9278 {
9279 unsigned rn = INSTR (9, 5);
9280 unsigned rd = INSTR (4, 0);
9281
9282 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9283 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9284 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9285 }
9286
9287 /* 64 bit and immediate. */
9288 static void
9289 and64 (sim_cpu *cpu, uint64_t bimm)
9290 {
9291 unsigned rn = INSTR (9, 5);
9292 unsigned rd = INSTR (4, 0);
9293
9294 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9295 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9296 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9297 }
9298
9299 /* 32 bit and immediate set flags. */
9300 static void
9301 ands32 (sim_cpu *cpu, uint32_t bimm)
9302 {
9303 unsigned rn = INSTR (9, 5);
9304 unsigned rd = INSTR (4, 0);
9305
9306 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9307 uint32_t value2 = bimm;
9308
9309 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9310 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9311 set_flags_for_binop32 (cpu, value1 & value2);
9312 }
9313
9314 /* 64 bit and immediate set flags. */
9315 static void
9316 ands64 (sim_cpu *cpu, uint64_t bimm)
9317 {
9318 unsigned rn = INSTR (9, 5);
9319 unsigned rd = INSTR (4, 0);
9320
9321 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9322 uint64_t value2 = bimm;
9323
9324 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9325 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9326 set_flags_for_binop64 (cpu, value1 & value2);
9327 }
9328
9329 /* 32 bit exclusive or immediate. */
9330 static void
9331 eor32 (sim_cpu *cpu, uint32_t bimm)
9332 {
9333 unsigned rn = INSTR (9, 5);
9334 unsigned rd = INSTR (4, 0);
9335
9336 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9337 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9338 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9339 }
9340
9341 /* 64 bit exclusive or immediate. */
9342 static void
9343 eor64 (sim_cpu *cpu, uint64_t bimm)
9344 {
9345 unsigned rn = INSTR (9, 5);
9346 unsigned rd = INSTR (4, 0);
9347
9348 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9349 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9350 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9351 }
9352
9353 /* 32 bit or immediate. */
9354 static void
9355 orr32 (sim_cpu *cpu, uint32_t bimm)
9356 {
9357 unsigned rn = INSTR (9, 5);
9358 unsigned rd = INSTR (4, 0);
9359
9360 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9361 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9362 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9363 }
9364
9365 /* 64 bit or immediate. */
9366 static void
9367 orr64 (sim_cpu *cpu, uint64_t bimm)
9368 {
9369 unsigned rn = INSTR (9, 5);
9370 unsigned rd = INSTR (4, 0);
9371
9372 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9373 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9374 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9375 }
9376
9377 /* Logical shifted register.
9378 These allow an optional LSL, ASR, LSR or ROR to the second source
9379 register with a count up to the register bit count.
9380 N.B register args may not be SP. */
9381
9382 /* 32 bit AND shifted register. */
9383 static void
9384 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9385 {
9386 unsigned rm = INSTR (20, 16);
9387 unsigned rn = INSTR (9, 5);
9388 unsigned rd = INSTR (4, 0);
9389
9390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9391 aarch64_set_reg_u64
9392 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9393 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9394 }
9395
9396 /* 64 bit AND shifted register. */
9397 static void
9398 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9399 {
9400 unsigned rm = INSTR (20, 16);
9401 unsigned rn = INSTR (9, 5);
9402 unsigned rd = INSTR (4, 0);
9403
9404 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9405 aarch64_set_reg_u64
9406 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9407 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9408 }
9409
9410 /* 32 bit AND shifted register setting flags. */
9411 static void
9412 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9413 {
9414 unsigned rm = INSTR (20, 16);
9415 unsigned rn = INSTR (9, 5);
9416 unsigned rd = INSTR (4, 0);
9417
9418 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9419 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9420 shift, count);
9421
9422 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9423 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9424 set_flags_for_binop32 (cpu, value1 & value2);
9425 }
9426
9427 /* 64 bit AND shifted register setting flags. */
9428 static void
9429 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9430 {
9431 unsigned rm = INSTR (20, 16);
9432 unsigned rn = INSTR (9, 5);
9433 unsigned rd = INSTR (4, 0);
9434
9435 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9436 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9437 shift, count);
9438
9439 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9440 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9441 set_flags_for_binop64 (cpu, value1 & value2);
9442 }
9443
9444 /* 32 bit BIC shifted register. */
9445 static void
9446 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9447 {
9448 unsigned rm = INSTR (20, 16);
9449 unsigned rn = INSTR (9, 5);
9450 unsigned rd = INSTR (4, 0);
9451
9452 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9453 aarch64_set_reg_u64
9454 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9455 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9456 }
9457
9458 /* 64 bit BIC shifted register. */
9459 static void
9460 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9461 {
9462 unsigned rm = INSTR (20, 16);
9463 unsigned rn = INSTR (9, 5);
9464 unsigned rd = INSTR (4, 0);
9465
9466 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9467 aarch64_set_reg_u64
9468 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9469 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9470 }
9471
9472 /* 32 bit BIC shifted register setting flags. */
9473 static void
9474 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9475 {
9476 unsigned rm = INSTR (20, 16);
9477 unsigned rn = INSTR (9, 5);
9478 unsigned rd = INSTR (4, 0);
9479
9480 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9481 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9482 shift, count);
9483
9484 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9485 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9486 set_flags_for_binop32 (cpu, value1 & value2);
9487 }
9488
9489 /* 64 bit BIC shifted register setting flags. */
9490 static void
9491 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9492 {
9493 unsigned rm = INSTR (20, 16);
9494 unsigned rn = INSTR (9, 5);
9495 unsigned rd = INSTR (4, 0);
9496
9497 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9498 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9499 shift, count);
9500
9501 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9502 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9503 set_flags_for_binop64 (cpu, value1 & value2);
9504 }
9505
9506 /* 32 bit EON shifted register. */
9507 static void
9508 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9509 {
9510 unsigned rm = INSTR (20, 16);
9511 unsigned rn = INSTR (9, 5);
9512 unsigned rd = INSTR (4, 0);
9513
9514 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9515 aarch64_set_reg_u64
9516 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9517 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9518 }
9519
9520 /* 64 bit EON shifted register. */
9521 static void
9522 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9523 {
9524 unsigned rm = INSTR (20, 16);
9525 unsigned rn = INSTR (9, 5);
9526 unsigned rd = INSTR (4, 0);
9527
9528 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9529 aarch64_set_reg_u64
9530 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9531 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9532 }
9533
9534 /* 32 bit EOR shifted register. */
9535 static void
9536 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9537 {
9538 unsigned rm = INSTR (20, 16);
9539 unsigned rn = INSTR (9, 5);
9540 unsigned rd = INSTR (4, 0);
9541
9542 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9543 aarch64_set_reg_u64
9544 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9545 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9546 }
9547
9548 /* 64 bit EOR shifted register. */
9549 static void
9550 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9551 {
9552 unsigned rm = INSTR (20, 16);
9553 unsigned rn = INSTR (9, 5);
9554 unsigned rd = INSTR (4, 0);
9555
9556 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9557 aarch64_set_reg_u64
9558 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9559 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9560 }
9561
9562 /* 32 bit ORR shifted register. */
9563 static void
9564 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9565 {
9566 unsigned rm = INSTR (20, 16);
9567 unsigned rn = INSTR (9, 5);
9568 unsigned rd = INSTR (4, 0);
9569
9570 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9571 aarch64_set_reg_u64
9572 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9573 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9574 }
9575
9576 /* 64 bit ORR shifted register. */
9577 static void
9578 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9579 {
9580 unsigned rm = INSTR (20, 16);
9581 unsigned rn = INSTR (9, 5);
9582 unsigned rd = INSTR (4, 0);
9583
9584 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9585 aarch64_set_reg_u64
9586 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9587 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9588 }
9589
9590 /* 32 bit ORN shifted register. */
9591 static void
9592 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9593 {
9594 unsigned rm = INSTR (20, 16);
9595 unsigned rn = INSTR (9, 5);
9596 unsigned rd = INSTR (4, 0);
9597
9598 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9599 aarch64_set_reg_u64
9600 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9601 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9602 }
9603
9604 /* 64 bit ORN shifted register. */
9605 static void
9606 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9607 {
9608 unsigned rm = INSTR (20, 16);
9609 unsigned rn = INSTR (9, 5);
9610 unsigned rd = INSTR (4, 0);
9611
9612 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9613 aarch64_set_reg_u64
9614 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9615 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9616 }
9617
9618 static void
9619 dexLogicalImmediate (sim_cpu *cpu)
9620 {
9621 /* assert instr[28,23] = 1001000
9622 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9623 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9624 instr[22] = N : used to construct immediate mask
9625 instr[21,16] = immr
9626 instr[15,10] = imms
9627 instr[9,5] = Rn
9628 instr[4,0] = Rd */
9629
9630 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9631 uint32_t size = INSTR (31, 31);
9632 uint32_t N = INSTR (22, 22);
9633 /* uint32_t immr = INSTR (21, 16);. */
9634 /* uint32_t imms = INSTR (15, 10);. */
9635 uint32_t index = INSTR (22, 10);
9636 uint64_t bimm64 = LITable [index];
9637 uint32_t dispatch = INSTR (30, 29);
9638
9639 if (~size & N)
9640 HALT_UNALLOC;
9641
9642 if (!bimm64)
9643 HALT_UNALLOC;
9644
9645 if (size == 0)
9646 {
9647 uint32_t bimm = (uint32_t) bimm64;
9648
9649 switch (dispatch)
9650 {
9651 case 0: and32 (cpu, bimm); return;
9652 case 1: orr32 (cpu, bimm); return;
9653 case 2: eor32 (cpu, bimm); return;
9654 case 3: ands32 (cpu, bimm); return;
9655 }
9656 }
9657 else
9658 {
9659 switch (dispatch)
9660 {
9661 case 0: and64 (cpu, bimm64); return;
9662 case 1: orr64 (cpu, bimm64); return;
9663 case 2: eor64 (cpu, bimm64); return;
9664 case 3: ands64 (cpu, bimm64); return;
9665 }
9666 }
9667 HALT_UNALLOC;
9668 }
9669
9670 /* Immediate move.
9671 The uimm argument is a 16 bit value to be inserted into the
9672 target register the pos argument locates the 16 bit word in the
9673 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9674 3} for 64 bit.
9675 N.B register arg may not be SP so it should be.
9676 accessed using the setGZRegisterXXX accessors. */
9677
9678 /* 32 bit move 16 bit immediate zero remaining shorts. */
9679 static void
9680 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9681 {
9682 unsigned rd = INSTR (4, 0);
9683
9684 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9685 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9686 }
9687
9688 /* 64 bit move 16 bit immediate zero remaining shorts. */
9689 static void
9690 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9691 {
9692 unsigned rd = INSTR (4, 0);
9693
9694 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9695 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9696 }
9697
9698 /* 32 bit move 16 bit immediate negated. */
9699 static void
9700 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9701 {
9702 unsigned rd = INSTR (4, 0);
9703
9704 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9705 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9706 }
9707
9708 /* 64 bit move 16 bit immediate negated. */
9709 static void
9710 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9711 {
9712 unsigned rd = INSTR (4, 0);
9713
9714 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9715 aarch64_set_reg_u64
9716 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9717 ^ 0xffffffffffffffffULL));
9718 }
9719
9720 /* 32 bit move 16 bit immediate keep remaining shorts. */
9721 static void
9722 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9723 {
9724 unsigned rd = INSTR (4, 0);
9725 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9726 uint32_t value = val << (pos * 16);
9727 uint32_t mask = ~(0xffffU << (pos * 16));
9728
9729 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9730 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9731 }
9732
9733 /* 64 bit move 16 it immediate keep remaining shorts. */
9734 static void
9735 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9736 {
9737 unsigned rd = INSTR (4, 0);
9738 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9739 uint64_t value = (uint64_t) val << (pos * 16);
9740 uint64_t mask = ~(0xffffULL << (pos * 16));
9741
9742 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9743 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9744 }
9745
9746 static void
9747 dexMoveWideImmediate (sim_cpu *cpu)
9748 {
9749 /* assert instr[28:23] = 100101
9750 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9751 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9752 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9753 instr[20,5] = uimm16
9754 instr[4,0] = Rd */
9755
9756 /* N.B. the (multiple of 16) shift is applied by the called routine,
9757 we just pass the multiplier. */
9758
9759 uint32_t imm;
9760 uint32_t size = INSTR (31, 31);
9761 uint32_t op = INSTR (30, 29);
9762 uint32_t shift = INSTR (22, 21);
9763
9764 /* 32 bit can only shift 0 or 1 lot of 16.
9765 anything else is an unallocated instruction. */
9766 if (size == 0 && (shift > 1))
9767 HALT_UNALLOC;
9768
9769 if (op == 1)
9770 HALT_UNALLOC;
9771
9772 imm = INSTR (20, 5);
9773
9774 if (size == 0)
9775 {
9776 if (op == 0)
9777 movn32 (cpu, imm, shift);
9778 else if (op == 2)
9779 movz32 (cpu, imm, shift);
9780 else
9781 movk32 (cpu, imm, shift);
9782 }
9783 else
9784 {
9785 if (op == 0)
9786 movn64 (cpu, imm, shift);
9787 else if (op == 2)
9788 movz64 (cpu, imm, shift);
9789 else
9790 movk64 (cpu, imm, shift);
9791 }
9792 }
9793
9794 /* Bitfield operations.
9795 These take a pair of bit positions r and s which are in {0..31}
9796 or {0..63} depending on the instruction word size.
9797 N.B register args may not be SP. */
9798
9799 /* OK, we start with ubfm which just needs to pick
9800 some bits out of source zero the rest and write
9801 the result to dest. Just need two logical shifts. */
9802
9803 /* 32 bit bitfield move, left and right of affected zeroed
9804 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9805 static void
9806 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9807 {
9808 unsigned rd;
9809 unsigned rn = INSTR (9, 5);
9810 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9811
9812 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9813 if (r <= s)
9814 {
9815 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9816 We want only bits s:xxx:r at the bottom of the word
9817 so we LSL bit s up to bit 31 i.e. by 31 - s
9818 and then we LSR to bring bit 31 down to bit s - r
9819 i.e. by 31 + r - s. */
9820 value <<= 31 - s;
9821 value >>= 31 + r - s;
9822 }
9823 else
9824 {
9825 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9826 We want only bits s:xxx:0 starting at it 31-(r-1)
9827 so we LSL bit s up to bit 31 i.e. by 31 - s
9828 and then we LSL to bring bit 31 down to 31-(r-1)+s
9829 i.e. by r - (s + 1). */
9830 value <<= 31 - s;
9831 value >>= r - (s + 1);
9832 }
9833
9834 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9835 rd = INSTR (4, 0);
9836 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9837 }
9838
9839 /* 64 bit bitfield move, left and right of affected zeroed
9840 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9841 static void
9842 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9843 {
9844 unsigned rd;
9845 unsigned rn = INSTR (9, 5);
9846 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9847
9848 if (r <= s)
9849 {
9850 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9851 We want only bits s:xxx:r at the bottom of the word.
9852 So we LSL bit s up to bit 63 i.e. by 63 - s
9853 and then we LSR to bring bit 63 down to bit s - r
9854 i.e. by 63 + r - s. */
9855 value <<= 63 - s;
9856 value >>= 63 + r - s;
9857 }
9858 else
9859 {
9860 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9861 We want only bits s:xxx:0 starting at it 63-(r-1).
9862 So we LSL bit s up to bit 63 i.e. by 63 - s
9863 and then we LSL to bring bit 63 down to 63-(r-1)+s
9864 i.e. by r - (s + 1). */
9865 value <<= 63 - s;
9866 value >>= r - (s + 1);
9867 }
9868
9869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9870 rd = INSTR (4, 0);
9871 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9872 }
9873
9874 /* The signed versions need to insert sign bits
9875 on the left of the inserted bit field. so we do
9876 much the same as the unsigned version except we
9877 use an arithmetic shift right -- this just means
9878 we need to operate on signed values. */
9879
9880 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9881 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9882 static void
9883 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9884 {
9885 unsigned rd;
9886 unsigned rn = INSTR (9, 5);
9887 /* as per ubfm32 but use an ASR instead of an LSR. */
9888 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9889
9890 if (r <= s)
9891 {
9892 value <<= 31 - s;
9893 value >>= 31 + r - s;
9894 }
9895 else
9896 {
9897 value <<= 31 - s;
9898 value >>= r - (s + 1);
9899 }
9900
9901 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9902 rd = INSTR (4, 0);
9903 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9904 }
9905
9906 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9907 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9908 static void
9909 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9910 {
9911 unsigned rd;
9912 unsigned rn = INSTR (9, 5);
9913 /* acpu per ubfm but use an ASR instead of an LSR. */
9914 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9915
9916 if (r <= s)
9917 {
9918 value <<= 63 - s;
9919 value >>= 63 + r - s;
9920 }
9921 else
9922 {
9923 value <<= 63 - s;
9924 value >>= r - (s + 1);
9925 }
9926
9927 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9928 rd = INSTR (4, 0);
9929 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9930 }
9931
9932 /* Finally, these versions leave non-affected bits
9933 as is. so we need to generate the bits as per
9934 ubfm and also generate a mask to pick the
9935 bits from the original and computed values. */
9936
9937 /* 32 bit bitfield move, non-affected bits left as is.
9938 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9939 static void
9940 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9941 {
9942 unsigned rn = INSTR (9, 5);
9943 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9944 uint32_t mask = -1;
9945 unsigned rd;
9946 uint32_t value2;
9947
9948 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9949 if (r <= s)
9950 {
9951 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9952 We want only bits s:xxx:r at the bottom of the word
9953 so we LSL bit s up to bit 31 i.e. by 31 - s
9954 and then we LSR to bring bit 31 down to bit s - r
9955 i.e. by 31 + r - s. */
9956 value <<= 31 - s;
9957 value >>= 31 + r - s;
9958 /* the mask must include the same bits. */
9959 mask <<= 31 - s;
9960 mask >>= 31 + r - s;
9961 }
9962 else
9963 {
9964 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9965 We want only bits s:xxx:0 starting at it 31-(r-1)
9966 so we LSL bit s up to bit 31 i.e. by 31 - s
9967 and then we LSL to bring bit 31 down to 31-(r-1)+s
9968 i.e. by r - (s + 1). */
9969 value <<= 31 - s;
9970 value >>= r - (s + 1);
9971 /* The mask must include the same bits. */
9972 mask <<= 31 - s;
9973 mask >>= r - (s + 1);
9974 }
9975
9976 rd = INSTR (4, 0);
9977 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9978
9979 value2 &= ~mask;
9980 value2 |= value;
9981
9982 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9983 aarch64_set_reg_u64
9984 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9985 }
9986
9987 /* 64 bit bitfield move, non-affected bits left as is.
9988 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9989 static void
9990 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9991 {
9992 unsigned rd;
9993 unsigned rn = INSTR (9, 5);
9994 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9995 uint64_t mask = 0xffffffffffffffffULL;
9996
9997 if (r <= s)
9998 {
9999 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10000 We want only bits s:xxx:r at the bottom of the word
10001 so we LSL bit s up to bit 63 i.e. by 63 - s
10002 and then we LSR to bring bit 63 down to bit s - r
10003 i.e. by 63 + r - s. */
10004 value <<= 63 - s;
10005 value >>= 63 + r - s;
10006 /* The mask must include the same bits. */
10007 mask <<= 63 - s;
10008 mask >>= 63 + r - s;
10009 }
10010 else
10011 {
10012 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10013 We want only bits s:xxx:0 starting at it 63-(r-1)
10014 so we LSL bit s up to bit 63 i.e. by 63 - s
10015 and then we LSL to bring bit 63 down to 63-(r-1)+s
10016 i.e. by r - (s + 1). */
10017 value <<= 63 - s;
10018 value >>= r - (s + 1);
10019 /* The mask must include the same bits. */
10020 mask <<= 63 - s;
10021 mask >>= r - (s + 1);
10022 }
10023
10024 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10025 rd = INSTR (4, 0);
10026 aarch64_set_reg_u64
10027 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10028 }
10029
10030 static void
10031 dexBitfieldImmediate (sim_cpu *cpu)
10032 {
10033 /* assert instr[28:23] = 100110
10034 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10035 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10036 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10037 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10038 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10039 instr[9,5] = Rn
10040 instr[4,0] = Rd */
10041
10042 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10043 uint32_t dispatch;
10044 uint32_t imms;
10045 uint32_t size = INSTR (31, 31);
10046 uint32_t N = INSTR (22, 22);
10047 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10048 /* or else we have an UNALLOC. */
10049 uint32_t immr = INSTR (21, 16);
10050
10051 if (~size & N)
10052 HALT_UNALLOC;
10053
10054 if (!size && uimm (immr, 5, 5))
10055 HALT_UNALLOC;
10056
10057 imms = INSTR (15, 10);
10058 if (!size && uimm (imms, 5, 5))
10059 HALT_UNALLOC;
10060
10061 /* Switch on combined size and op. */
10062 dispatch = INSTR (31, 29);
10063 switch (dispatch)
10064 {
10065 case 0: sbfm32 (cpu, immr, imms); return;
10066 case 1: bfm32 (cpu, immr, imms); return;
10067 case 2: ubfm32 (cpu, immr, imms); return;
10068 case 4: sbfm (cpu, immr, imms); return;
10069 case 5: bfm (cpu, immr, imms); return;
10070 case 6: ubfm (cpu, immr, imms); return;
10071 default: HALT_UNALLOC;
10072 }
10073 }
10074
10075 static void
10076 do_EXTR_32 (sim_cpu *cpu)
10077 {
10078 /* instr[31:21] = 00010011100
10079 instr[20,16] = Rm
10080 instr[15,10] = imms : 0xxxxx for 32 bit
10081 instr[9,5] = Rn
10082 instr[4,0] = Rd */
10083 unsigned rm = INSTR (20, 16);
10084 unsigned imms = INSTR (15, 10) & 31;
10085 unsigned rn = INSTR ( 9, 5);
10086 unsigned rd = INSTR ( 4, 0);
10087 uint64_t val1;
10088 uint64_t val2;
10089
10090 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10091 val1 >>= imms;
10092 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10093 val2 <<= (32 - imms);
10094
10095 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10096 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10097 }
10098
10099 static void
10100 do_EXTR_64 (sim_cpu *cpu)
10101 {
10102 /* instr[31:21] = 10010011100
10103 instr[20,16] = Rm
10104 instr[15,10] = imms
10105 instr[9,5] = Rn
10106 instr[4,0] = Rd */
10107 unsigned rm = INSTR (20, 16);
10108 unsigned imms = INSTR (15, 10) & 63;
10109 unsigned rn = INSTR ( 9, 5);
10110 unsigned rd = INSTR ( 4, 0);
10111 uint64_t val;
10112
10113 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10114 val >>= imms;
10115 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10116
10117 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10118 }
10119
10120 static void
10121 dexExtractImmediate (sim_cpu *cpu)
10122 {
10123 /* assert instr[28:23] = 100111
10124 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10125 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10126 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10127 instr[21] = op0 : must be 0 or UNALLOC
10128 instr[20,16] = Rm
10129 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10130 instr[9,5] = Rn
10131 instr[4,0] = Rd */
10132
10133 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10134 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10135 uint32_t dispatch;
10136 uint32_t size = INSTR (31, 31);
10137 uint32_t N = INSTR (22, 22);
10138 /* 32 bit operations must have imms[5] = 0
10139 or else we have an UNALLOC. */
10140 uint32_t imms = INSTR (15, 10);
10141
10142 if (size ^ N)
10143 HALT_UNALLOC;
10144
10145 if (!size && uimm (imms, 5, 5))
10146 HALT_UNALLOC;
10147
10148 /* Switch on combined size and op. */
10149 dispatch = INSTR (31, 29);
10150
10151 if (dispatch == 0)
10152 do_EXTR_32 (cpu);
10153
10154 else if (dispatch == 4)
10155 do_EXTR_64 (cpu);
10156
10157 else if (dispatch == 1)
10158 HALT_NYI;
10159 else
10160 HALT_UNALLOC;
10161 }
10162
10163 static void
10164 dexDPImm (sim_cpu *cpu)
10165 {
10166 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10167 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10168 bits [25,23] of a DPImm are the secondary dispatch vector. */
10169 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10170
10171 switch (group2)
10172 {
10173 case DPIMM_PCADR_000:
10174 case DPIMM_PCADR_001:
10175 dexPCRelAddressing (cpu);
10176 return;
10177
10178 case DPIMM_ADDSUB_010:
10179 case DPIMM_ADDSUB_011:
10180 dexAddSubtractImmediate (cpu);
10181 return;
10182
10183 case DPIMM_LOG_100:
10184 dexLogicalImmediate (cpu);
10185 return;
10186
10187 case DPIMM_MOV_101:
10188 dexMoveWideImmediate (cpu);
10189 return;
10190
10191 case DPIMM_BITF_110:
10192 dexBitfieldImmediate (cpu);
10193 return;
10194
10195 case DPIMM_EXTR_111:
10196 dexExtractImmediate (cpu);
10197 return;
10198
10199 default:
10200 /* Should never reach here. */
10201 HALT_NYI;
10202 }
10203 }
10204
10205 static void
10206 dexLoadUnscaledImmediate (sim_cpu *cpu)
10207 {
10208 /* instr[29,24] == 111_00
10209 instr[21] == 0
10210 instr[11,10] == 00
10211 instr[31,30] = size
10212 instr[26] = V
10213 instr[23,22] = opc
10214 instr[20,12] = simm9
10215 instr[9,5] = rn may be SP. */
10216 /* unsigned rt = INSTR (4, 0); */
10217 uint32_t V = INSTR (26, 26);
10218 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10219 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10220
10221 if (!V)
10222 {
10223 /* GReg operations. */
10224 switch (dispatch)
10225 {
10226 case 0: sturb (cpu, imm); return;
10227 case 1: ldurb32 (cpu, imm); return;
10228 case 2: ldursb64 (cpu, imm); return;
10229 case 3: ldursb32 (cpu, imm); return;
10230 case 4: sturh (cpu, imm); return;
10231 case 5: ldurh32 (cpu, imm); return;
10232 case 6: ldursh64 (cpu, imm); return;
10233 case 7: ldursh32 (cpu, imm); return;
10234 case 8: stur32 (cpu, imm); return;
10235 case 9: ldur32 (cpu, imm); return;
10236 case 10: ldursw (cpu, imm); return;
10237 case 12: stur64 (cpu, imm); return;
10238 case 13: ldur64 (cpu, imm); return;
10239
10240 case 14:
10241 /* PRFUM NYI. */
10242 HALT_NYI;
10243
10244 default:
10245 case 11:
10246 case 15:
10247 HALT_UNALLOC;
10248 }
10249 }
10250
10251 /* FReg operations. */
10252 switch (dispatch)
10253 {
10254 case 2: fsturq (cpu, imm); return;
10255 case 3: fldurq (cpu, imm); return;
10256 case 8: fsturs (cpu, imm); return;
10257 case 9: fldurs (cpu, imm); return;
10258 case 12: fsturd (cpu, imm); return;
10259 case 13: fldurd (cpu, imm); return;
10260
10261 case 0: /* STUR 8 bit FP. */
10262 case 1: /* LDUR 8 bit FP. */
10263 case 4: /* STUR 16 bit FP. */
10264 case 5: /* LDUR 8 bit FP. */
10265 HALT_NYI;
10266
10267 default:
10268 case 6:
10269 case 7:
10270 case 10:
10271 case 11:
10272 case 14:
10273 case 15:
10274 HALT_UNALLOC;
10275 }
10276 }
10277
10278 /* N.B. A preliminary note regarding all the ldrs<x>32
10279 instructions
10280
10281 The signed value loaded by these instructions is cast to unsigned
10282 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10283 64 bit element of the GReg union. this performs a 32 bit sign extension
10284 (as required) but avoids 64 bit sign extension, thus ensuring that the
10285 top half of the register word is zero. this is what the spec demands
10286 when a 32 bit load occurs. */
10287
10288 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10289 static void
10290 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10291 {
10292 unsigned int rn = INSTR (9, 5);
10293 unsigned int rt = INSTR (4, 0);
10294
10295 /* The target register may not be SP but the source may be
10296 there is no scaling required for a byte load. */
10297 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10298 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10299 (int64_t) aarch64_get_mem_s8 (cpu, address));
10300 }
10301
10302 /* 32 bit load sign-extended byte scaled or unscaled zero-
10303 or sign-extended 32-bit register offset. */
10304 static void
10305 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10306 {
10307 unsigned int rm = INSTR (20, 16);
10308 unsigned int rn = INSTR (9, 5);
10309 unsigned int rt = INSTR (4, 0);
10310
10311 /* rn may reference SP, rm and rt must reference ZR. */
10312
10313 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10314 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10315 extension);
10316
10317 /* There is no scaling required for a byte load. */
10318 aarch64_set_reg_u64
10319 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10320 + displacement));
10321 }
10322
10323 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10324 pre- or post-writeback. */
10325 static void
10326 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10327 {
10328 uint64_t address;
10329 unsigned int rn = INSTR (9, 5);
10330 unsigned int rt = INSTR (4, 0);
10331
10332 if (rn == rt && wb != NoWriteBack)
10333 HALT_UNALLOC;
10334
10335 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10336
10337 if (wb == Pre)
10338 address += offset;
10339
10340 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10341 (int64_t) aarch64_get_mem_s8 (cpu, address));
10342
10343 if (wb == Post)
10344 address += offset;
10345
10346 if (wb != NoWriteBack)
10347 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10348 }
10349
10350 /* 8 bit store scaled. */
10351 static void
10352 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10353 {
10354 unsigned st = INSTR (4, 0);
10355 unsigned rn = INSTR (9, 5);
10356
10357 aarch64_set_mem_u8 (cpu,
10358 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10359 aarch64_get_vec_u8 (cpu, st, 0));
10360 }
10361
10362 /* 8 bit store scaled or unscaled zero- or
10363 sign-extended 8-bit register offset. */
10364 static void
10365 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10366 {
10367 unsigned rm = INSTR (20, 16);
10368 unsigned rn = INSTR (9, 5);
10369 unsigned st = INSTR (4, 0);
10370
10371 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10372 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10373 extension);
10374 uint64_t displacement = scaling == Scaled ? extended : 0;
10375
10376 aarch64_set_mem_u8
10377 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10378 }
10379
10380 /* 16 bit store scaled. */
10381 static void
10382 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10383 {
10384 unsigned st = INSTR (4, 0);
10385 unsigned rn = INSTR (9, 5);
10386
10387 aarch64_set_mem_u16
10388 (cpu,
10389 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10390 aarch64_get_vec_u16 (cpu, st, 0));
10391 }
10392
10393 /* 16 bit store scaled or unscaled zero-
10394 or sign-extended 16-bit register offset. */
10395 static void
10396 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10397 {
10398 unsigned rm = INSTR (20, 16);
10399 unsigned rn = INSTR (9, 5);
10400 unsigned st = INSTR (4, 0);
10401
10402 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10403 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10404 extension);
10405 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10406
10407 aarch64_set_mem_u16
10408 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10409 }
10410
10411 /* 32 bit store scaled unsigned 12 bit. */
10412 static void
10413 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10414 {
10415 unsigned st = INSTR (4, 0);
10416 unsigned rn = INSTR (9, 5);
10417
10418 aarch64_set_mem_u32
10419 (cpu,
10420 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10421 aarch64_get_vec_u32 (cpu, st, 0));
10422 }
10423
10424 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10425 static void
10426 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10427 {
10428 unsigned rn = INSTR (9, 5);
10429 unsigned st = INSTR (4, 0);
10430
10431 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10432
10433 if (wb != Post)
10434 address += offset;
10435
10436 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10437
10438 if (wb == Post)
10439 address += offset;
10440
10441 if (wb != NoWriteBack)
10442 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10443 }
10444
10445 /* 32 bit store scaled or unscaled zero-
10446 or sign-extended 32-bit register offset. */
10447 static void
10448 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10449 {
10450 unsigned rm = INSTR (20, 16);
10451 unsigned rn = INSTR (9, 5);
10452 unsigned st = INSTR (4, 0);
10453
10454 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10455 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10456 extension);
10457 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10458
10459 aarch64_set_mem_u32
10460 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10461 }
10462
10463 /* 64 bit store scaled unsigned 12 bit. */
10464 static void
10465 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10466 {
10467 unsigned st = INSTR (4, 0);
10468 unsigned rn = INSTR (9, 5);
10469
10470 aarch64_set_mem_u64
10471 (cpu,
10472 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10473 aarch64_get_vec_u64 (cpu, st, 0));
10474 }
10475
10476 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10477 static void
10478 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10479 {
10480 unsigned rn = INSTR (9, 5);
10481 unsigned st = INSTR (4, 0);
10482
10483 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10484
10485 if (wb != Post)
10486 address += offset;
10487
10488 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10489
10490 if (wb == Post)
10491 address += offset;
10492
10493 if (wb != NoWriteBack)
10494 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10495 }
10496
10497 /* 64 bit store scaled or unscaled zero-
10498 or sign-extended 32-bit register offset. */
10499 static void
10500 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10501 {
10502 unsigned rm = INSTR (20, 16);
10503 unsigned rn = INSTR (9, 5);
10504 unsigned st = INSTR (4, 0);
10505
10506 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10507 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10508 extension);
10509 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10510
10511 aarch64_set_mem_u64
10512 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10513 }
10514
10515 /* 128 bit store scaled unsigned 12 bit. */
10516 static void
10517 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10518 {
10519 FRegister a;
10520 unsigned st = INSTR (4, 0);
10521 unsigned rn = INSTR (9, 5);
10522 uint64_t addr;
10523
10524 aarch64_get_FP_long_double (cpu, st, & a);
10525
10526 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10527 aarch64_set_mem_long_double (cpu, addr, a);
10528 }
10529
10530 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10531 static void
10532 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10533 {
10534 FRegister a;
10535 unsigned rn = INSTR (9, 5);
10536 unsigned st = INSTR (4, 0);
10537 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10538
10539 if (wb != Post)
10540 address += offset;
10541
10542 aarch64_get_FP_long_double (cpu, st, & a);
10543 aarch64_set_mem_long_double (cpu, address, a);
10544
10545 if (wb == Post)
10546 address += offset;
10547
10548 if (wb != NoWriteBack)
10549 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10550 }
10551
10552 /* 128 bit store scaled or unscaled zero-
10553 or sign-extended 32-bit register offset. */
10554 static void
10555 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10556 {
10557 unsigned rm = INSTR (20, 16);
10558 unsigned rn = INSTR (9, 5);
10559 unsigned st = INSTR (4, 0);
10560
10561 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10562 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10563 extension);
10564 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10565
10566 FRegister a;
10567
10568 aarch64_get_FP_long_double (cpu, st, & a);
10569 aarch64_set_mem_long_double (cpu, address + displacement, a);
10570 }
10571
10572 static void
10573 dexLoadImmediatePrePost (sim_cpu *cpu)
10574 {
10575 /* instr[31,30] = size
10576 instr[29,27] = 111
10577 instr[26] = V
10578 instr[25,24] = 00
10579 instr[23,22] = opc
10580 instr[21] = 0
10581 instr[20,12] = simm9
10582 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10583 instr[10] = 0
10584 instr[9,5] = Rn may be SP.
10585 instr[4,0] = Rt */
10586
10587 uint32_t V = INSTR (26, 26);
10588 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10589 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10590 WriteBack wb = INSTR (11, 11);
10591
10592 if (!V)
10593 {
10594 /* GReg operations. */
10595 switch (dispatch)
10596 {
10597 case 0: strb_wb (cpu, imm, wb); return;
10598 case 1: ldrb32_wb (cpu, imm, wb); return;
10599 case 2: ldrsb_wb (cpu, imm, wb); return;
10600 case 3: ldrsb32_wb (cpu, imm, wb); return;
10601 case 4: strh_wb (cpu, imm, wb); return;
10602 case 5: ldrh32_wb (cpu, imm, wb); return;
10603 case 6: ldrsh64_wb (cpu, imm, wb); return;
10604 case 7: ldrsh32_wb (cpu, imm, wb); return;
10605 case 8: str32_wb (cpu, imm, wb); return;
10606 case 9: ldr32_wb (cpu, imm, wb); return;
10607 case 10: ldrsw_wb (cpu, imm, wb); return;
10608 case 12: str_wb (cpu, imm, wb); return;
10609 case 13: ldr_wb (cpu, imm, wb); return;
10610
10611 default:
10612 case 11:
10613 case 14:
10614 case 15:
10615 HALT_UNALLOC;
10616 }
10617 }
10618
10619 /* FReg operations. */
10620 switch (dispatch)
10621 {
10622 case 2: fstrq_wb (cpu, imm, wb); return;
10623 case 3: fldrq_wb (cpu, imm, wb); return;
10624 case 8: fstrs_wb (cpu, imm, wb); return;
10625 case 9: fldrs_wb (cpu, imm, wb); return;
10626 case 12: fstrd_wb (cpu, imm, wb); return;
10627 case 13: fldrd_wb (cpu, imm, wb); return;
10628
10629 case 0: /* STUR 8 bit FP. */
10630 case 1: /* LDUR 8 bit FP. */
10631 case 4: /* STUR 16 bit FP. */
10632 case 5: /* LDUR 8 bit FP. */
10633 HALT_NYI;
10634
10635 default:
10636 case 6:
10637 case 7:
10638 case 10:
10639 case 11:
10640 case 14:
10641 case 15:
10642 HALT_UNALLOC;
10643 }
10644 }
10645
10646 static void
10647 dexLoadRegisterOffset (sim_cpu *cpu)
10648 {
10649 /* instr[31,30] = size
10650 instr[29,27] = 111
10651 instr[26] = V
10652 instr[25,24] = 00
10653 instr[23,22] = opc
10654 instr[21] = 1
10655 instr[20,16] = rm
10656 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10657 110 ==> SXTW, 111 ==> SXTX,
10658 ow ==> RESERVED
10659 instr[12] = scaled
10660 instr[11,10] = 10
10661 instr[9,5] = rn
10662 instr[4,0] = rt. */
10663
10664 uint32_t V = INSTR (26, 26);
10665 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10666 Scaling scale = INSTR (12, 12);
10667 Extension extensionType = INSTR (15, 13);
10668
10669 /* Check for illegal extension types. */
10670 if (uimm (extensionType, 1, 1) == 0)
10671 HALT_UNALLOC;
10672
10673 if (extensionType == UXTX || extensionType == SXTX)
10674 extensionType = NoExtension;
10675
10676 if (!V)
10677 {
10678 /* GReg operations. */
10679 switch (dispatch)
10680 {
10681 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10682 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10683 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10684 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10685 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10686 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10687 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10688 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10689 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10690 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10691 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10692 case 12: str_scale_ext (cpu, scale, extensionType); return;
10693 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10694 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10695
10696 default:
10697 case 11:
10698 case 15:
10699 HALT_UNALLOC;
10700 }
10701 }
10702
10703 /* FReg operations. */
10704 switch (dispatch)
10705 {
10706 case 1: /* LDUR 8 bit FP. */
10707 HALT_NYI;
10708 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10709 case 5: /* LDUR 8 bit FP. */
10710 HALT_NYI;
10711 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10712 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10713
10714 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10715 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10716 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10717 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10718 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10719
10720 default:
10721 case 6:
10722 case 7:
10723 case 10:
10724 case 11:
10725 case 14:
10726 case 15:
10727 HALT_UNALLOC;
10728 }
10729 }
10730
10731 static void
10732 dexLoadUnsignedImmediate (sim_cpu *cpu)
10733 {
10734 /* instr[29,24] == 111_01
10735 instr[31,30] = size
10736 instr[26] = V
10737 instr[23,22] = opc
10738 instr[21,10] = uimm12 : unsigned immediate offset
10739 instr[9,5] = rn may be SP.
10740 instr[4,0] = rt. */
10741
10742 uint32_t V = INSTR (26,26);
10743 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10744 uint32_t imm = INSTR (21, 10);
10745
10746 if (!V)
10747 {
10748 /* GReg operations. */
10749 switch (dispatch)
10750 {
10751 case 0: strb_abs (cpu, imm); return;
10752 case 1: ldrb32_abs (cpu, imm); return;
10753 case 2: ldrsb_abs (cpu, imm); return;
10754 case 3: ldrsb32_abs (cpu, imm); return;
10755 case 4: strh_abs (cpu, imm); return;
10756 case 5: ldrh32_abs (cpu, imm); return;
10757 case 6: ldrsh_abs (cpu, imm); return;
10758 case 7: ldrsh32_abs (cpu, imm); return;
10759 case 8: str32_abs (cpu, imm); return;
10760 case 9: ldr32_abs (cpu, imm); return;
10761 case 10: ldrsw_abs (cpu, imm); return;
10762 case 12: str_abs (cpu, imm); return;
10763 case 13: ldr_abs (cpu, imm); return;
10764 case 14: prfm_abs (cpu, imm); return;
10765
10766 default:
10767 case 11:
10768 case 15:
10769 HALT_UNALLOC;
10770 }
10771 }
10772
10773 /* FReg operations. */
10774 switch (dispatch)
10775 {
10776 case 0: fstrb_abs (cpu, imm); return;
10777 case 4: fstrh_abs (cpu, imm); return;
10778 case 8: fstrs_abs (cpu, imm); return;
10779 case 12: fstrd_abs (cpu, imm); return;
10780 case 2: fstrq_abs (cpu, imm); return;
10781
10782 case 1: fldrb_abs (cpu, imm); return;
10783 case 5: fldrh_abs (cpu, imm); return;
10784 case 9: fldrs_abs (cpu, imm); return;
10785 case 13: fldrd_abs (cpu, imm); return;
10786 case 3: fldrq_abs (cpu, imm); return;
10787
10788 default:
10789 case 6:
10790 case 7:
10791 case 10:
10792 case 11:
10793 case 14:
10794 case 15:
10795 HALT_UNALLOC;
10796 }
10797 }
10798
10799 static void
10800 dexLoadExclusive (sim_cpu *cpu)
10801 {
10802 /* assert instr[29:24] = 001000;
10803 instr[31,30] = size
10804 instr[23] = 0 if exclusive
10805 instr[22] = L : 1 if load, 0 if store
10806 instr[21] = 1 if pair
10807 instr[20,16] = Rs
10808 instr[15] = o0 : 1 if ordered
10809 instr[14,10] = Rt2
10810 instr[9,5] = Rn
10811 instr[4.0] = Rt. */
10812
10813 switch (INSTR (22, 21))
10814 {
10815 case 2: ldxr (cpu); return;
10816 case 0: stxr (cpu); return;
10817 default: HALT_NYI;
10818 }
10819 }
10820
10821 static void
10822 dexLoadOther (sim_cpu *cpu)
10823 {
10824 uint32_t dispatch;
10825
10826 /* instr[29,25] = 111_0
10827 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10828 instr[21:11,10] is the secondary dispatch. */
10829 if (INSTR (24, 24))
10830 {
10831 dexLoadUnsignedImmediate (cpu);
10832 return;
10833 }
10834
10835 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10836 switch (dispatch)
10837 {
10838 case 0: dexLoadUnscaledImmediate (cpu); return;
10839 case 1: dexLoadImmediatePrePost (cpu); return;
10840 case 3: dexLoadImmediatePrePost (cpu); return;
10841 case 6: dexLoadRegisterOffset (cpu); return;
10842
10843 default:
10844 case 2:
10845 case 4:
10846 case 5:
10847 case 7:
10848 HALT_NYI;
10849 }
10850 }
10851
10852 static void
10853 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10854 {
10855 unsigned rn = INSTR (14, 10);
10856 unsigned rd = INSTR (9, 5);
10857 unsigned rm = INSTR (4, 0);
10858 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10859
10860 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10861 HALT_UNALLOC; /* ??? */
10862
10863 offset <<= 2;
10864
10865 if (wb != Post)
10866 address += offset;
10867
10868 aarch64_set_mem_u32 (cpu, address,
10869 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10870 aarch64_set_mem_u32 (cpu, address + 4,
10871 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10872
10873 if (wb == Post)
10874 address += offset;
10875
10876 if (wb != NoWriteBack)
10877 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10878 }
10879
10880 static void
10881 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10882 {
10883 unsigned rn = INSTR (14, 10);
10884 unsigned rd = INSTR (9, 5);
10885 unsigned rm = INSTR (4, 0);
10886 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10887
10888 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10889 HALT_UNALLOC; /* ??? */
10890
10891 offset <<= 3;
10892
10893 if (wb != Post)
10894 address += offset;
10895
10896 aarch64_set_mem_u64 (cpu, address,
10897 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10898 aarch64_set_mem_u64 (cpu, address + 8,
10899 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10900
10901 if (wb == Post)
10902 address += offset;
10903
10904 if (wb != NoWriteBack)
10905 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10906 }
10907
10908 static void
10909 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10910 {
10911 unsigned rn = INSTR (14, 10);
10912 unsigned rd = INSTR (9, 5);
10913 unsigned rm = INSTR (4, 0);
10914 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10915
10916 /* Treat this as unalloc to make sure we don't do it. */
10917 if (rn == rm)
10918 HALT_UNALLOC;
10919
10920 offset <<= 2;
10921
10922 if (wb != Post)
10923 address += offset;
10924
10925 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10926 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10927
10928 if (wb == Post)
10929 address += offset;
10930
10931 if (wb != NoWriteBack)
10932 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10933 }
10934
10935 static void
10936 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10937 {
10938 unsigned rn = INSTR (14, 10);
10939 unsigned rd = INSTR (9, 5);
10940 unsigned rm = INSTR (4, 0);
10941 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10942
10943 /* Treat this as unalloc to make sure we don't do it. */
10944 if (rn == rm)
10945 HALT_UNALLOC;
10946
10947 offset <<= 2;
10948
10949 if (wb != Post)
10950 address += offset;
10951
10952 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10953 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10954
10955 if (wb == Post)
10956 address += offset;
10957
10958 if (wb != NoWriteBack)
10959 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10960 }
10961
10962 static void
10963 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10964 {
10965 unsigned rn = INSTR (14, 10);
10966 unsigned rd = INSTR (9, 5);
10967 unsigned rm = INSTR (4, 0);
10968 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10969
10970 /* Treat this as unalloc to make sure we don't do it. */
10971 if (rn == rm)
10972 HALT_UNALLOC;
10973
10974 offset <<= 3;
10975
10976 if (wb != Post)
10977 address += offset;
10978
10979 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10980 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10981
10982 if (wb == Post)
10983 address += offset;
10984
10985 if (wb != NoWriteBack)
10986 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10987 }
10988
10989 static void
10990 dex_load_store_pair_gr (sim_cpu *cpu)
10991 {
10992 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10993 instr[29,25] = instruction encoding: 101_0
10994 instr[26] = V : 1 if fp 0 if gp
10995 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10996 instr[22] = load/store (1=> load)
10997 instr[21,15] = signed, scaled, offset
10998 instr[14,10] = Rn
10999 instr[ 9, 5] = Rd
11000 instr[ 4, 0] = Rm. */
11001
11002 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11003 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11004
11005 switch (dispatch)
11006 {
11007 case 2: store_pair_u32 (cpu, offset, Post); return;
11008 case 3: load_pair_u32 (cpu, offset, Post); return;
11009 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11010 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11011 case 6: store_pair_u32 (cpu, offset, Pre); return;
11012 case 7: load_pair_u32 (cpu, offset, Pre); return;
11013
11014 case 11: load_pair_s32 (cpu, offset, Post); return;
11015 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11016 case 15: load_pair_s32 (cpu, offset, Pre); return;
11017
11018 case 18: store_pair_u64 (cpu, offset, Post); return;
11019 case 19: load_pair_u64 (cpu, offset, Post); return;
11020 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11021 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11022 case 22: store_pair_u64 (cpu, offset, Pre); return;
11023 case 23: load_pair_u64 (cpu, offset, Pre); return;
11024
11025 default:
11026 HALT_UNALLOC;
11027 }
11028 }
11029
11030 static void
11031 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11032 {
11033 unsigned rn = INSTR (14, 10);
11034 unsigned rd = INSTR (9, 5);
11035 unsigned rm = INSTR (4, 0);
11036 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11037
11038 offset <<= 2;
11039
11040 if (wb != Post)
11041 address += offset;
11042
11043 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11044 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11045
11046 if (wb == Post)
11047 address += offset;
11048
11049 if (wb != NoWriteBack)
11050 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11051 }
11052
11053 static void
11054 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11055 {
11056 unsigned rn = INSTR (14, 10);
11057 unsigned rd = INSTR (9, 5);
11058 unsigned rm = INSTR (4, 0);
11059 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11060
11061 offset <<= 3;
11062
11063 if (wb != Post)
11064 address += offset;
11065
11066 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11067 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11068
11069 if (wb == Post)
11070 address += offset;
11071
11072 if (wb != NoWriteBack)
11073 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11074 }
11075
11076 static void
11077 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11078 {
11079 FRegister a;
11080 unsigned rn = INSTR (14, 10);
11081 unsigned rd = INSTR (9, 5);
11082 unsigned rm = INSTR (4, 0);
11083 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11084
11085 offset <<= 4;
11086
11087 if (wb != Post)
11088 address += offset;
11089
11090 aarch64_get_FP_long_double (cpu, rm, & a);
11091 aarch64_set_mem_long_double (cpu, address, a);
11092 aarch64_get_FP_long_double (cpu, rn, & a);
11093 aarch64_set_mem_long_double (cpu, address + 16, a);
11094
11095 if (wb == Post)
11096 address += offset;
11097
11098 if (wb != NoWriteBack)
11099 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11100 }
11101
11102 static void
11103 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11104 {
11105 unsigned rn = INSTR (14, 10);
11106 unsigned rd = INSTR (9, 5);
11107 unsigned rm = INSTR (4, 0);
11108 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11109
11110 if (rm == rn)
11111 HALT_UNALLOC;
11112
11113 offset <<= 2;
11114
11115 if (wb != Post)
11116 address += offset;
11117
11118 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11119 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11120
11121 if (wb == Post)
11122 address += offset;
11123
11124 if (wb != NoWriteBack)
11125 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11126 }
11127
11128 static void
11129 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11130 {
11131 unsigned rn = INSTR (14, 10);
11132 unsigned rd = INSTR (9, 5);
11133 unsigned rm = INSTR (4, 0);
11134 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11135
11136 if (rm == rn)
11137 HALT_UNALLOC;
11138
11139 offset <<= 3;
11140
11141 if (wb != Post)
11142 address += offset;
11143
11144 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11145 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11146
11147 if (wb == Post)
11148 address += offset;
11149
11150 if (wb != NoWriteBack)
11151 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11152 }
11153
11154 static void
11155 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11156 {
11157 FRegister a;
11158 unsigned rn = INSTR (14, 10);
11159 unsigned rd = INSTR (9, 5);
11160 unsigned rm = INSTR (4, 0);
11161 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11162
11163 if (rm == rn)
11164 HALT_UNALLOC;
11165
11166 offset <<= 4;
11167
11168 if (wb != Post)
11169 address += offset;
11170
11171 aarch64_get_mem_long_double (cpu, address, & a);
11172 aarch64_set_FP_long_double (cpu, rm, a);
11173 aarch64_get_mem_long_double (cpu, address + 16, & a);
11174 aarch64_set_FP_long_double (cpu, rn, a);
11175
11176 if (wb == Post)
11177 address += offset;
11178
11179 if (wb != NoWriteBack)
11180 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11181 }
11182
11183 static void
11184 dex_load_store_pair_fp (sim_cpu *cpu)
11185 {
11186 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11187 instr[29,25] = instruction encoding
11188 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11189 instr[22] = load/store (1=> load)
11190 instr[21,15] = signed, scaled, offset
11191 instr[14,10] = Rn
11192 instr[ 9, 5] = Rd
11193 instr[ 4, 0] = Rm */
11194
11195 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11196 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11197
11198 switch (dispatch)
11199 {
11200 case 2: store_pair_float (cpu, offset, Post); return;
11201 case 3: load_pair_float (cpu, offset, Post); return;
11202 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11203 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11204 case 6: store_pair_float (cpu, offset, Pre); return;
11205 case 7: load_pair_float (cpu, offset, Pre); return;
11206
11207 case 10: store_pair_double (cpu, offset, Post); return;
11208 case 11: load_pair_double (cpu, offset, Post); return;
11209 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11210 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11211 case 14: store_pair_double (cpu, offset, Pre); return;
11212 case 15: load_pair_double (cpu, offset, Pre); return;
11213
11214 case 18: store_pair_long_double (cpu, offset, Post); return;
11215 case 19: load_pair_long_double (cpu, offset, Post); return;
11216 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11217 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11218 case 22: store_pair_long_double (cpu, offset, Pre); return;
11219 case 23: load_pair_long_double (cpu, offset, Pre); return;
11220
11221 default:
11222 HALT_UNALLOC;
11223 }
11224 }
11225
11226 static inline unsigned
11227 vec_reg (unsigned v, unsigned o)
11228 {
11229 return (v + o) & 0x3F;
11230 }
11231
11232 /* Load multiple N-element structures to N consecutive registers. */
11233 static void
11234 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11235 {
11236 int all = INSTR (30, 30);
11237 unsigned size = INSTR (11, 10);
11238 unsigned vd = INSTR (4, 0);
11239 unsigned i;
11240
11241 switch (size)
11242 {
11243 case 0: /* 8-bit operations. */
11244 if (all)
11245 for (i = 0; i < (16 * N); i++)
11246 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11247 aarch64_get_mem_u8 (cpu, address + i));
11248 else
11249 for (i = 0; i < (8 * N); i++)
11250 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11251 aarch64_get_mem_u8 (cpu, address + i));
11252 return;
11253
11254 case 1: /* 16-bit operations. */
11255 if (all)
11256 for (i = 0; i < (8 * N); i++)
11257 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11258 aarch64_get_mem_u16 (cpu, address + i * 2));
11259 else
11260 for (i = 0; i < (4 * N); i++)
11261 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11262 aarch64_get_mem_u16 (cpu, address + i * 2));
11263 return;
11264
11265 case 2: /* 32-bit operations. */
11266 if (all)
11267 for (i = 0; i < (4 * N); i++)
11268 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11269 aarch64_get_mem_u32 (cpu, address + i * 4));
11270 else
11271 for (i = 0; i < (2 * N); i++)
11272 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11273 aarch64_get_mem_u32 (cpu, address + i * 4));
11274 return;
11275
11276 case 3: /* 64-bit operations. */
11277 if (all)
11278 for (i = 0; i < (2 * N); i++)
11279 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11280 aarch64_get_mem_u64 (cpu, address + i * 8));
11281 else
11282 for (i = 0; i < N; i++)
11283 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11284 aarch64_get_mem_u64 (cpu, address + i * 8));
11285 return;
11286 }
11287 }
11288
11289 /* LD4: load multiple 4-element to four consecutive registers. */
11290 static void
11291 LD4 (sim_cpu *cpu, uint64_t address)
11292 {
11293 vec_load (cpu, address, 4);
11294 }
11295
11296 /* LD3: load multiple 3-element structures to three consecutive registers. */
11297 static void
11298 LD3 (sim_cpu *cpu, uint64_t address)
11299 {
11300 vec_load (cpu, address, 3);
11301 }
11302
11303 /* LD2: load multiple 2-element structures to two consecutive registers. */
11304 static void
11305 LD2 (sim_cpu *cpu, uint64_t address)
11306 {
11307 vec_load (cpu, address, 2);
11308 }
11309
11310 /* Load multiple 1-element structures into one register. */
11311 static void
11312 LD1_1 (sim_cpu *cpu, uint64_t address)
11313 {
11314 int all = INSTR (30, 30);
11315 unsigned size = INSTR (11, 10);
11316 unsigned vd = INSTR (4, 0);
11317 unsigned i;
11318
11319 switch (size)
11320 {
11321 case 0:
11322 /* LD1 {Vd.16b}, addr, #16 */
11323 /* LD1 {Vd.8b}, addr, #8 */
11324 for (i = 0; i < (all ? 16 : 8); i++)
11325 aarch64_set_vec_u8 (cpu, vd, i,
11326 aarch64_get_mem_u8 (cpu, address + i));
11327 return;
11328
11329 case 1:
11330 /* LD1 {Vd.8h}, addr, #16 */
11331 /* LD1 {Vd.4h}, addr, #8 */
11332 for (i = 0; i < (all ? 8 : 4); i++)
11333 aarch64_set_vec_u16 (cpu, vd, i,
11334 aarch64_get_mem_u16 (cpu, address + i * 2));
11335 return;
11336
11337 case 2:
11338 /* LD1 {Vd.4s}, addr, #16 */
11339 /* LD1 {Vd.2s}, addr, #8 */
11340 for (i = 0; i < (all ? 4 : 2); i++)
11341 aarch64_set_vec_u32 (cpu, vd, i,
11342 aarch64_get_mem_u32 (cpu, address + i * 4));
11343 return;
11344
11345 case 3:
11346 /* LD1 {Vd.2d}, addr, #16 */
11347 /* LD1 {Vd.1d}, addr, #8 */
11348 for (i = 0; i < (all ? 2 : 1); i++)
11349 aarch64_set_vec_u64 (cpu, vd, i,
11350 aarch64_get_mem_u64 (cpu, address + i * 8));
11351 return;
11352 }
11353 }
11354
11355 /* Load multiple 1-element structures into two registers. */
11356 static void
11357 LD1_2 (sim_cpu *cpu, uint64_t address)
11358 {
11359 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11360 So why have two different instructions ? There must be something
11361 wrong somewhere. */
11362 vec_load (cpu, address, 2);
11363 }
11364
11365 /* Load multiple 1-element structures into three registers. */
11366 static void
11367 LD1_3 (sim_cpu *cpu, uint64_t address)
11368 {
11369 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11370 So why have two different instructions ? There must be something
11371 wrong somewhere. */
11372 vec_load (cpu, address, 3);
11373 }
11374
11375 /* Load multiple 1-element structures into four registers. */
11376 static void
11377 LD1_4 (sim_cpu *cpu, uint64_t address)
11378 {
11379 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11380 So why have two different instructions ? There must be something
11381 wrong somewhere. */
11382 vec_load (cpu, address, 4);
11383 }
11384
11385 /* Store multiple N-element structures to N consecutive registers. */
11386 static void
11387 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11388 {
11389 int all = INSTR (30, 30);
11390 unsigned size = INSTR (11, 10);
11391 unsigned vd = INSTR (4, 0);
11392 unsigned i;
11393
11394 switch (size)
11395 {
11396 case 0: /* 8-bit operations. */
11397 if (all)
11398 for (i = 0; i < (16 * N); i++)
11399 aarch64_set_mem_u8
11400 (cpu, address + i,
11401 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11402 else
11403 for (i = 0; i < (8 * N); i++)
11404 aarch64_set_mem_u8
11405 (cpu, address + i,
11406 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11407 return;
11408
11409 case 1: /* 16-bit operations. */
11410 if (all)
11411 for (i = 0; i < (8 * N); i++)
11412 aarch64_set_mem_u16
11413 (cpu, address + i * 2,
11414 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11415 else
11416 for (i = 0; i < (4 * N); i++)
11417 aarch64_set_mem_u16
11418 (cpu, address + i * 2,
11419 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11420 return;
11421
11422 case 2: /* 32-bit operations. */
11423 if (all)
11424 for (i = 0; i < (4 * N); i++)
11425 aarch64_set_mem_u32
11426 (cpu, address + i * 4,
11427 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11428 else
11429 for (i = 0; i < (2 * N); i++)
11430 aarch64_set_mem_u32
11431 (cpu, address + i * 4,
11432 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11433 return;
11434
11435 case 3: /* 64-bit operations. */
11436 if (all)
11437 for (i = 0; i < (2 * N); i++)
11438 aarch64_set_mem_u64
11439 (cpu, address + i * 8,
11440 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11441 else
11442 for (i = 0; i < N; i++)
11443 aarch64_set_mem_u64
11444 (cpu, address + i * 8,
11445 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11446 return;
11447 }
11448 }
11449
11450 /* Store multiple 4-element structure to four consecutive registers. */
11451 static void
11452 ST4 (sim_cpu *cpu, uint64_t address)
11453 {
11454 vec_store (cpu, address, 4);
11455 }
11456
11457 /* Store multiple 3-element structures to three consecutive registers. */
11458 static void
11459 ST3 (sim_cpu *cpu, uint64_t address)
11460 {
11461 vec_store (cpu, address, 3);
11462 }
11463
11464 /* Store multiple 2-element structures to two consecutive registers. */
11465 static void
11466 ST2 (sim_cpu *cpu, uint64_t address)
11467 {
11468 vec_store (cpu, address, 2);
11469 }
11470
11471 /* Store multiple 1-element structures into one register. */
11472 static void
11473 ST1_1 (sim_cpu *cpu, uint64_t address)
11474 {
11475 int all = INSTR (30, 30);
11476 unsigned size = INSTR (11, 10);
11477 unsigned vd = INSTR (4, 0);
11478 unsigned i;
11479
11480 switch (size)
11481 {
11482 case 0:
11483 for (i = 0; i < (all ? 16 : 8); i++)
11484 aarch64_set_mem_u8 (cpu, address + i,
11485 aarch64_get_vec_u8 (cpu, vd, i));
11486 return;
11487
11488 case 1:
11489 for (i = 0; i < (all ? 8 : 4); i++)
11490 aarch64_set_mem_u16 (cpu, address + i * 2,
11491 aarch64_get_vec_u16 (cpu, vd, i));
11492 return;
11493
11494 case 2:
11495 for (i = 0; i < (all ? 4 : 2); i++)
11496 aarch64_set_mem_u32 (cpu, address + i * 4,
11497 aarch64_get_vec_u32 (cpu, vd, i));
11498 return;
11499
11500 case 3:
11501 for (i = 0; i < (all ? 2 : 1); i++)
11502 aarch64_set_mem_u64 (cpu, address + i * 8,
11503 aarch64_get_vec_u64 (cpu, vd, i));
11504 return;
11505 }
11506 }
11507
11508 /* Store multiple 1-element structures into two registers. */
11509 static void
11510 ST1_2 (sim_cpu *cpu, uint64_t address)
11511 {
11512 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11513 So why have two different instructions ? There must be
11514 something wrong somewhere. */
11515 vec_store (cpu, address, 2);
11516 }
11517
11518 /* Store multiple 1-element structures into three registers. */
11519 static void
11520 ST1_3 (sim_cpu *cpu, uint64_t address)
11521 {
11522 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11523 So why have two different instructions ? There must be
11524 something wrong somewhere. */
11525 vec_store (cpu, address, 3);
11526 }
11527
11528 /* Store multiple 1-element structures into four registers. */
11529 static void
11530 ST1_4 (sim_cpu *cpu, uint64_t address)
11531 {
11532 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11533 So why have two different instructions ? There must be
11534 something wrong somewhere. */
11535 vec_store (cpu, address, 4);
11536 }
11537
11538 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11539 do \
11540 { \
11541 switch (INSTR (15, 14)) \
11542 { \
11543 case 0: \
11544 lane = (full << 3) | (s << 2) | size; \
11545 size = 0; \
11546 break; \
11547 \
11548 case 1: \
11549 if ((size & 1) == 1) \
11550 HALT_UNALLOC; \
11551 lane = (full << 2) | (s << 1) | (size >> 1); \
11552 size = 1; \
11553 break; \
11554 \
11555 case 2: \
11556 if ((size & 2) == 2) \
11557 HALT_UNALLOC; \
11558 \
11559 if ((size & 1) == 0) \
11560 { \
11561 lane = (full << 1) | s; \
11562 size = 2; \
11563 } \
11564 else \
11565 { \
11566 if (s) \
11567 HALT_UNALLOC; \
11568 lane = full; \
11569 size = 3; \
11570 } \
11571 break; \
11572 \
11573 default: \
11574 HALT_UNALLOC; \
11575 } \
11576 } \
11577 while (0)
11578
11579 /* Load single structure into one lane of N registers. */
11580 static void
11581 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11582 {
11583 /* instr[31] = 0
11584 instr[30] = element selector 0=>half, 1=>all elements
11585 instr[29,24] = 00 1101
11586 instr[23] = 0=>simple, 1=>post
11587 instr[22] = 1
11588 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11589 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11590 11111 (immediate post inc)
11591 instr[15,13] = opcode
11592 instr[12] = S, used for lane number
11593 instr[11,10] = size, also used for lane number
11594 instr[9,5] = address
11595 instr[4,0] = Vd */
11596
11597 unsigned full = INSTR (30, 30);
11598 unsigned vd = INSTR (4, 0);
11599 unsigned size = INSTR (11, 10);
11600 unsigned s = INSTR (12, 12);
11601 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11602 int lane = 0;
11603 int i;
11604
11605 NYI_assert (29, 24, 0x0D);
11606 NYI_assert (22, 22, 1);
11607
11608 /* Compute the lane number first (using size), and then compute size. */
11609 LDn_STn_SINGLE_LANE_AND_SIZE ();
11610
11611 for (i = 0; i < nregs; i++)
11612 switch (size)
11613 {
11614 case 0:
11615 {
11616 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11617 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11618 break;
11619 }
11620
11621 case 1:
11622 {
11623 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11624 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11625 break;
11626 }
11627
11628 case 2:
11629 {
11630 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11631 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11632 break;
11633 }
11634
11635 case 3:
11636 {
11637 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11638 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11639 break;
11640 }
11641 }
11642 }
11643
11644 /* Store single structure from one lane from N registers. */
11645 static void
11646 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11647 {
11648 /* instr[31] = 0
11649 instr[30] = element selector 0=>half, 1=>all elements
11650 instr[29,24] = 00 1101
11651 instr[23] = 0=>simple, 1=>post
11652 instr[22] = 0
11653 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11654 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11655 11111 (immediate post inc)
11656 instr[15,13] = opcode
11657 instr[12] = S, used for lane number
11658 instr[11,10] = size, also used for lane number
11659 instr[9,5] = address
11660 instr[4,0] = Vd */
11661
11662 unsigned full = INSTR (30, 30);
11663 unsigned vd = INSTR (4, 0);
11664 unsigned size = INSTR (11, 10);
11665 unsigned s = INSTR (12, 12);
11666 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11667 int lane = 0;
11668 int i;
11669
11670 NYI_assert (29, 24, 0x0D);
11671 NYI_assert (22, 22, 0);
11672
11673 /* Compute the lane number first (using size), and then compute size. */
11674 LDn_STn_SINGLE_LANE_AND_SIZE ();
11675
11676 for (i = 0; i < nregs; i++)
11677 switch (size)
11678 {
11679 case 0:
11680 {
11681 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11682 aarch64_set_mem_u8 (cpu, address + i, val);
11683 break;
11684 }
11685
11686 case 1:
11687 {
11688 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11689 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11690 break;
11691 }
11692
11693 case 2:
11694 {
11695 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11696 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11697 break;
11698 }
11699
11700 case 3:
11701 {
11702 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11703 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11704 break;
11705 }
11706 }
11707 }
11708
11709 /* Load single structure into all lanes of N registers. */
11710 static void
11711 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11712 {
11713 /* instr[31] = 0
11714 instr[30] = element selector 0=>half, 1=>all elements
11715 instr[29,24] = 00 1101
11716 instr[23] = 0=>simple, 1=>post
11717 instr[22] = 1
11718 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11719 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11720 11111 (immediate post inc)
11721 instr[15,14] = 11
11722 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11723 instr[12] = 0
11724 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11725 10=> word(s), 11=> double(d)
11726 instr[9,5] = address
11727 instr[4,0] = Vd */
11728
11729 unsigned full = INSTR (30, 30);
11730 unsigned vd = INSTR (4, 0);
11731 unsigned size = INSTR (11, 10);
11732 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11733 int i, n;
11734
11735 NYI_assert (29, 24, 0x0D);
11736 NYI_assert (22, 22, 1);
11737 NYI_assert (15, 14, 3);
11738 NYI_assert (12, 12, 0);
11739
11740 for (n = 0; n < nregs; n++)
11741 switch (size)
11742 {
11743 case 0:
11744 {
11745 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11746 for (i = 0; i < (full ? 16 : 8); i++)
11747 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11748 break;
11749 }
11750
11751 case 1:
11752 {
11753 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11754 for (i = 0; i < (full ? 8 : 4); i++)
11755 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11756 break;
11757 }
11758
11759 case 2:
11760 {
11761 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11762 for (i = 0; i < (full ? 4 : 2); i++)
11763 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11764 break;
11765 }
11766
11767 case 3:
11768 {
11769 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11770 for (i = 0; i < (full ? 2 : 1); i++)
11771 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11772 break;
11773 }
11774
11775 default:
11776 HALT_UNALLOC;
11777 }
11778 }
11779
11780 static void
11781 do_vec_load_store (sim_cpu *cpu)
11782 {
11783 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11784
11785 instr[31] = 0
11786 instr[30] = element selector 0=>half, 1=>all elements
11787 instr[29,25] = 00110
11788 instr[24] = 0=>multiple struct, 1=>single struct
11789 instr[23] = 0=>simple, 1=>post
11790 instr[22] = 0=>store, 1=>load
11791 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11792 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11793 11111 (immediate post inc)
11794 instr[15,12] = elements and destinations. eg for load:
11795 0000=>LD4 => load multiple 4-element to
11796 four consecutive registers
11797 0100=>LD3 => load multiple 3-element to
11798 three consecutive registers
11799 1000=>LD2 => load multiple 2-element to
11800 two consecutive registers
11801 0010=>LD1 => load multiple 1-element to
11802 four consecutive registers
11803 0110=>LD1 => load multiple 1-element to
11804 three consecutive registers
11805 1010=>LD1 => load multiple 1-element to
11806 two consecutive registers
11807 0111=>LD1 => load multiple 1-element to
11808 one register
11809 1100=>LDR1,LDR2
11810 1110=>LDR3,LDR4
11811 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11812 10=> word(s), 11=> double(d)
11813 instr[9,5] = Vn, can be SP
11814 instr[4,0] = Vd */
11815
11816 int single;
11817 int post;
11818 int load;
11819 unsigned vn;
11820 uint64_t address;
11821 int type;
11822
11823 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11824 HALT_NYI;
11825
11826 single = INSTR (24, 24);
11827 post = INSTR (23, 23);
11828 load = INSTR (22, 22);
11829 type = INSTR (15, 12);
11830 vn = INSTR (9, 5);
11831 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11832
11833 if (! single && INSTR (21, 21) != 0)
11834 HALT_UNALLOC;
11835
11836 if (post)
11837 {
11838 unsigned vm = INSTR (20, 16);
11839
11840 if (vm == R31)
11841 {
11842 unsigned sizeof_operation;
11843
11844 if (single)
11845 {
11846 if ((type >= 0) && (type <= 11))
11847 {
11848 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11849 switch (INSTR (15, 14))
11850 {
11851 case 0:
11852 sizeof_operation = nregs * 1;
11853 break;
11854 case 1:
11855 sizeof_operation = nregs * 2;
11856 break;
11857 case 2:
11858 if (INSTR (10, 10) == 0)
11859 sizeof_operation = nregs * 4;
11860 else
11861 sizeof_operation = nregs * 8;
11862 break;
11863 default:
11864 HALT_UNALLOC;
11865 }
11866 }
11867 else if (type == 0xC)
11868 {
11869 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11870 sizeof_operation <<= INSTR (11, 10);
11871 }
11872 else if (type == 0xE)
11873 {
11874 sizeof_operation = INSTR (21, 21) ? 4 : 3;
11875 sizeof_operation <<= INSTR (11, 10);
11876 }
11877 else
11878 HALT_UNALLOC;
11879 }
11880 else
11881 {
11882 switch (type)
11883 {
11884 case 0: sizeof_operation = 32; break;
11885 case 4: sizeof_operation = 24; break;
11886 case 8: sizeof_operation = 16; break;
11887
11888 case 7:
11889 /* One register, immediate offset variant. */
11890 sizeof_operation = 8;
11891 break;
11892
11893 case 10:
11894 /* Two registers, immediate offset variant. */
11895 sizeof_operation = 16;
11896 break;
11897
11898 case 6:
11899 /* Three registers, immediate offset variant. */
11900 sizeof_operation = 24;
11901 break;
11902
11903 case 2:
11904 /* Four registers, immediate offset variant. */
11905 sizeof_operation = 32;
11906 break;
11907
11908 default:
11909 HALT_UNALLOC;
11910 }
11911
11912 if (INSTR (30, 30))
11913 sizeof_operation *= 2;
11914 }
11915
11916 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11917 }
11918 else
11919 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11920 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11921 }
11922 else
11923 {
11924 NYI_assert (20, 16, 0);
11925 }
11926
11927 if (single)
11928 {
11929 if (load)
11930 {
11931 if ((type >= 0) && (type <= 11))
11932 do_vec_LDn_single (cpu, address);
11933 else if ((type == 0xC) || (type == 0xE))
11934 do_vec_LDnR (cpu, address);
11935 else
11936 HALT_UNALLOC;
11937 return;
11938 }
11939
11940 /* Stores. */
11941 if ((type >= 0) && (type <= 11))
11942 {
11943 do_vec_STn_single (cpu, address);
11944 return;
11945 }
11946
11947 HALT_UNALLOC;
11948 }
11949
11950 if (load)
11951 {
11952 switch (type)
11953 {
11954 case 0: LD4 (cpu, address); return;
11955 case 4: LD3 (cpu, address); return;
11956 case 8: LD2 (cpu, address); return;
11957 case 2: LD1_4 (cpu, address); return;
11958 case 6: LD1_3 (cpu, address); return;
11959 case 10: LD1_2 (cpu, address); return;
11960 case 7: LD1_1 (cpu, address); return;
11961
11962 default:
11963 HALT_UNALLOC;
11964 }
11965 }
11966
11967 /* Stores. */
11968 switch (type)
11969 {
11970 case 0: ST4 (cpu, address); return;
11971 case 4: ST3 (cpu, address); return;
11972 case 8: ST2 (cpu, address); return;
11973 case 2: ST1_4 (cpu, address); return;
11974 case 6: ST1_3 (cpu, address); return;
11975 case 10: ST1_2 (cpu, address); return;
11976 case 7: ST1_1 (cpu, address); return;
11977 default:
11978 HALT_UNALLOC;
11979 }
11980 }
11981
11982 static void
11983 dexLdSt (sim_cpu *cpu)
11984 {
11985 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11986 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11987 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11988 bits [29,28:26] of a LS are the secondary dispatch vector. */
11989 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11990
11991 switch (group2)
11992 {
11993 case LS_EXCL_000:
11994 dexLoadExclusive (cpu); return;
11995
11996 case LS_LIT_010:
11997 case LS_LIT_011:
11998 dexLoadLiteral (cpu); return;
11999
12000 case LS_OTHER_110:
12001 case LS_OTHER_111:
12002 dexLoadOther (cpu); return;
12003
12004 case LS_ADVSIMD_001:
12005 do_vec_load_store (cpu); return;
12006
12007 case LS_PAIR_100:
12008 dex_load_store_pair_gr (cpu); return;
12009
12010 case LS_PAIR_101:
12011 dex_load_store_pair_fp (cpu); return;
12012
12013 default:
12014 /* Should never reach here. */
12015 HALT_NYI;
12016 }
12017 }
12018
12019 /* Specific decode and execute for group Data Processing Register. */
12020
12021 static void
12022 dexLogicalShiftedRegister (sim_cpu *cpu)
12023 {
12024 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12025 instr[30,29] = op
12026 instr[28:24] = 01010
12027 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12028 instr[21] = N
12029 instr[20,16] = Rm
12030 instr[15,10] = count : must be 0xxxxx for 32 bit
12031 instr[9,5] = Rn
12032 instr[4,0] = Rd */
12033
12034 uint32_t size = INSTR (31, 31);
12035 Shift shiftType = INSTR (23, 22);
12036 uint32_t count = INSTR (15, 10);
12037
12038 /* 32 bit operations must have count[5] = 0.
12039 or else we have an UNALLOC. */
12040 if (size == 0 && uimm (count, 5, 5))
12041 HALT_UNALLOC;
12042
12043 /* Dispatch on size:op:N. */
12044 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12045 {
12046 case 0: and32_shift (cpu, shiftType, count); return;
12047 case 1: bic32_shift (cpu, shiftType, count); return;
12048 case 2: orr32_shift (cpu, shiftType, count); return;
12049 case 3: orn32_shift (cpu, shiftType, count); return;
12050 case 4: eor32_shift (cpu, shiftType, count); return;
12051 case 5: eon32_shift (cpu, shiftType, count); return;
12052 case 6: ands32_shift (cpu, shiftType, count); return;
12053 case 7: bics32_shift (cpu, shiftType, count); return;
12054 case 8: and64_shift (cpu, shiftType, count); return;
12055 case 9: bic64_shift (cpu, shiftType, count); return;
12056 case 10:orr64_shift (cpu, shiftType, count); return;
12057 case 11:orn64_shift (cpu, shiftType, count); return;
12058 case 12:eor64_shift (cpu, shiftType, count); return;
12059 case 13:eon64_shift (cpu, shiftType, count); return;
12060 case 14:ands64_shift (cpu, shiftType, count); return;
12061 case 15:bics64_shift (cpu, shiftType, count); return;
12062 }
12063 }
12064
12065 /* 32 bit conditional select. */
12066 static void
12067 csel32 (sim_cpu *cpu, CondCode cc)
12068 {
12069 unsigned rm = INSTR (20, 16);
12070 unsigned rn = INSTR (9, 5);
12071 unsigned rd = INSTR (4, 0);
12072
12073 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12074 testConditionCode (cpu, cc)
12075 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12076 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12077 }
12078
12079 /* 64 bit conditional select. */
12080 static void
12081 csel64 (sim_cpu *cpu, CondCode cc)
12082 {
12083 unsigned rm = INSTR (20, 16);
12084 unsigned rn = INSTR (9, 5);
12085 unsigned rd = INSTR (4, 0);
12086
12087 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12088 testConditionCode (cpu, cc)
12089 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12090 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12091 }
12092
12093 /* 32 bit conditional increment. */
12094 static void
12095 csinc32 (sim_cpu *cpu, CondCode cc)
12096 {
12097 unsigned rm = INSTR (20, 16);
12098 unsigned rn = INSTR (9, 5);
12099 unsigned rd = INSTR (4, 0);
12100
12101 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12102 testConditionCode (cpu, cc)
12103 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12104 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12105 }
12106
12107 /* 64 bit conditional increment. */
12108 static void
12109 csinc64 (sim_cpu *cpu, CondCode cc)
12110 {
12111 unsigned rm = INSTR (20, 16);
12112 unsigned rn = INSTR (9, 5);
12113 unsigned rd = INSTR (4, 0);
12114
12115 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12116 testConditionCode (cpu, cc)
12117 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12118 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12119 }
12120
12121 /* 32 bit conditional invert. */
12122 static void
12123 csinv32 (sim_cpu *cpu, CondCode cc)
12124 {
12125 unsigned rm = INSTR (20, 16);
12126 unsigned rn = INSTR (9, 5);
12127 unsigned rd = INSTR (4, 0);
12128
12129 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12130 testConditionCode (cpu, cc)
12131 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12132 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12133 }
12134
12135 /* 64 bit conditional invert. */
12136 static void
12137 csinv64 (sim_cpu *cpu, CondCode cc)
12138 {
12139 unsigned rm = INSTR (20, 16);
12140 unsigned rn = INSTR (9, 5);
12141 unsigned rd = INSTR (4, 0);
12142
12143 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12144 testConditionCode (cpu, cc)
12145 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12146 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12147 }
12148
12149 /* 32 bit conditional negate. */
12150 static void
12151 csneg32 (sim_cpu *cpu, CondCode cc)
12152 {
12153 unsigned rm = INSTR (20, 16);
12154 unsigned rn = INSTR (9, 5);
12155 unsigned rd = INSTR (4, 0);
12156
12157 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12158 testConditionCode (cpu, cc)
12159 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12160 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12161 }
12162
12163 /* 64 bit conditional negate. */
12164 static void
12165 csneg64 (sim_cpu *cpu, CondCode cc)
12166 {
12167 unsigned rm = INSTR (20, 16);
12168 unsigned rn = INSTR (9, 5);
12169 unsigned rd = INSTR (4, 0);
12170
12171 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12172 testConditionCode (cpu, cc)
12173 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12174 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12175 }
12176
12177 static void
12178 dexCondSelect (sim_cpu *cpu)
12179 {
12180 /* instr[28,21] = 11011011
12181 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12182 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12183 100 ==> CSINV, 101 ==> CSNEG,
12184 _1_ ==> UNALLOC
12185 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12186 instr[15,12] = cond
12187 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12188
12189 CondCode cc = INSTR (15, 12);
12190 uint32_t S = INSTR (29, 29);
12191 uint32_t op2 = INSTR (11, 10);
12192
12193 if (S == 1)
12194 HALT_UNALLOC;
12195
12196 if (op2 & 0x2)
12197 HALT_UNALLOC;
12198
12199 switch ((INSTR (31, 30) << 1) | op2)
12200 {
12201 case 0: csel32 (cpu, cc); return;
12202 case 1: csinc32 (cpu, cc); return;
12203 case 2: csinv32 (cpu, cc); return;
12204 case 3: csneg32 (cpu, cc); return;
12205 case 4: csel64 (cpu, cc); return;
12206 case 5: csinc64 (cpu, cc); return;
12207 case 6: csinv64 (cpu, cc); return;
12208 case 7: csneg64 (cpu, cc); return;
12209 }
12210 }
12211
12212 /* Some helpers for counting leading 1 or 0 bits. */
12213
12214 /* Counts the number of leading bits which are the same
12215 in a 32 bit value in the range 1 to 32. */
12216 static uint32_t
12217 leading32 (uint32_t value)
12218 {
12219 int32_t mask= 0xffff0000;
12220 uint32_t count= 16; /* Counts number of bits set in mask. */
12221 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12222 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12223
12224 while (lo + 1 < hi)
12225 {
12226 int32_t test = (value & mask);
12227
12228 if (test == 0 || test == mask)
12229 {
12230 lo = count;
12231 count = (lo + hi) / 2;
12232 mask >>= (count - lo);
12233 }
12234 else
12235 {
12236 hi = count;
12237 count = (lo + hi) / 2;
12238 mask <<= hi - count;
12239 }
12240 }
12241
12242 if (lo != hi)
12243 {
12244 int32_t test;
12245
12246 mask >>= 1;
12247 test = (value & mask);
12248
12249 if (test == 0 || test == mask)
12250 count = hi;
12251 else
12252 count = lo;
12253 }
12254
12255 return count;
12256 }
12257
12258 /* Counts the number of leading bits which are the same
12259 in a 64 bit value in the range 1 to 64. */
12260 static uint64_t
12261 leading64 (uint64_t value)
12262 {
12263 int64_t mask= 0xffffffff00000000LL;
12264 uint64_t count = 32; /* Counts number of bits set in mask. */
12265 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12266 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12267
12268 while (lo + 1 < hi)
12269 {
12270 int64_t test = (value & mask);
12271
12272 if (test == 0 || test == mask)
12273 {
12274 lo = count;
12275 count = (lo + hi) / 2;
12276 mask >>= (count - lo);
12277 }
12278 else
12279 {
12280 hi = count;
12281 count = (lo + hi) / 2;
12282 mask <<= hi - count;
12283 }
12284 }
12285
12286 if (lo != hi)
12287 {
12288 int64_t test;
12289
12290 mask >>= 1;
12291 test = (value & mask);
12292
12293 if (test == 0 || test == mask)
12294 count = hi;
12295 else
12296 count = lo;
12297 }
12298
12299 return count;
12300 }
12301
12302 /* Bit operations. */
12303 /* N.B register args may not be SP. */
12304
12305 /* 32 bit count leading sign bits. */
12306 static void
12307 cls32 (sim_cpu *cpu)
12308 {
12309 unsigned rn = INSTR (9, 5);
12310 unsigned rd = INSTR (4, 0);
12311
12312 /* N.B. the result needs to exclude the leading bit. */
12313 aarch64_set_reg_u64
12314 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12315 }
12316
12317 /* 64 bit count leading sign bits. */
12318 static void
12319 cls64 (sim_cpu *cpu)
12320 {
12321 unsigned rn = INSTR (9, 5);
12322 unsigned rd = INSTR (4, 0);
12323
12324 /* N.B. the result needs to exclude the leading bit. */
12325 aarch64_set_reg_u64
12326 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12327 }
12328
12329 /* 32 bit count leading zero bits. */
12330 static void
12331 clz32 (sim_cpu *cpu)
12332 {
12333 unsigned rn = INSTR (9, 5);
12334 unsigned rd = INSTR (4, 0);
12335 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12336
12337 /* if the sign (top) bit is set then the count is 0. */
12338 if (pick32 (value, 31, 31))
12339 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12340 else
12341 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12342 }
12343
12344 /* 64 bit count leading zero bits. */
12345 static void
12346 clz64 (sim_cpu *cpu)
12347 {
12348 unsigned rn = INSTR (9, 5);
12349 unsigned rd = INSTR (4, 0);
12350 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12351
12352 /* if the sign (top) bit is set then the count is 0. */
12353 if (pick64 (value, 63, 63))
12354 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12355 else
12356 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12357 }
12358
12359 /* 32 bit reverse bits. */
12360 static void
12361 rbit32 (sim_cpu *cpu)
12362 {
12363 unsigned rn = INSTR (9, 5);
12364 unsigned rd = INSTR (4, 0);
12365 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12366 uint32_t result = 0;
12367 int i;
12368
12369 for (i = 0; i < 32; i++)
12370 {
12371 result <<= 1;
12372 result |= (value & 1);
12373 value >>= 1;
12374 }
12375 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12376 }
12377
12378 /* 64 bit reverse bits. */
12379 static void
12380 rbit64 (sim_cpu *cpu)
12381 {
12382 unsigned rn = INSTR (9, 5);
12383 unsigned rd = INSTR (4, 0);
12384 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12385 uint64_t result = 0;
12386 int i;
12387
12388 for (i = 0; i < 64; i++)
12389 {
12390 result <<= 1;
12391 result |= (value & 1UL);
12392 value >>= 1;
12393 }
12394 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12395 }
12396
12397 /* 32 bit reverse bytes. */
12398 static void
12399 rev32 (sim_cpu *cpu)
12400 {
12401 unsigned rn = INSTR (9, 5);
12402 unsigned rd = INSTR (4, 0);
12403 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12404 uint32_t result = 0;
12405 int i;
12406
12407 for (i = 0; i < 4; i++)
12408 {
12409 result <<= 8;
12410 result |= (value & 0xff);
12411 value >>= 8;
12412 }
12413 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12414 }
12415
12416 /* 64 bit reverse bytes. */
12417 static void
12418 rev64 (sim_cpu *cpu)
12419 {
12420 unsigned rn = INSTR (9, 5);
12421 unsigned rd = INSTR (4, 0);
12422 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12423 uint64_t result = 0;
12424 int i;
12425
12426 for (i = 0; i < 8; i++)
12427 {
12428 result <<= 8;
12429 result |= (value & 0xffULL);
12430 value >>= 8;
12431 }
12432 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12433 }
12434
12435 /* 32 bit reverse shorts. */
12436 /* N.B.this reverses the order of the bytes in each half word. */
12437 static void
12438 revh32 (sim_cpu *cpu)
12439 {
12440 unsigned rn = INSTR (9, 5);
12441 unsigned rd = INSTR (4, 0);
12442 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12443 uint32_t result = 0;
12444 int i;
12445
12446 for (i = 0; i < 2; i++)
12447 {
12448 result <<= 8;
12449 result |= (value & 0x00ff00ff);
12450 value >>= 8;
12451 }
12452 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12453 }
12454
12455 /* 64 bit reverse shorts. */
12456 /* N.B.this reverses the order of the bytes in each half word. */
12457 static void
12458 revh64 (sim_cpu *cpu)
12459 {
12460 unsigned rn = INSTR (9, 5);
12461 unsigned rd = INSTR (4, 0);
12462 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12463 uint64_t result = 0;
12464 int i;
12465
12466 for (i = 0; i < 2; i++)
12467 {
12468 result <<= 8;
12469 result |= (value & 0x00ff00ff00ff00ffULL);
12470 value >>= 8;
12471 }
12472 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12473 }
12474
12475 static void
12476 dexDataProc1Source (sim_cpu *cpu)
12477 {
12478 /* instr[30] = 1
12479 instr[28,21] = 111010110
12480 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12481 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12482 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12483 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12484 000010 ==> REV, 000011 ==> UNALLOC
12485 000100 ==> CLZ, 000101 ==> CLS
12486 ow ==> UNALLOC
12487 instr[9,5] = rn : may not be SP
12488 instr[4,0] = rd : may not be SP. */
12489
12490 uint32_t S = INSTR (29, 29);
12491 uint32_t opcode2 = INSTR (20, 16);
12492 uint32_t opcode = INSTR (15, 10);
12493 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12494
12495 if (S == 1)
12496 HALT_UNALLOC;
12497
12498 if (opcode2 != 0)
12499 HALT_UNALLOC;
12500
12501 if (opcode & 0x38)
12502 HALT_UNALLOC;
12503
12504 switch (dispatch)
12505 {
12506 case 0: rbit32 (cpu); return;
12507 case 1: revh32 (cpu); return;
12508 case 2: rev32 (cpu); return;
12509 case 4: clz32 (cpu); return;
12510 case 5: cls32 (cpu); return;
12511 case 8: rbit64 (cpu); return;
12512 case 9: revh64 (cpu); return;
12513 case 10:rev32 (cpu); return;
12514 case 11:rev64 (cpu); return;
12515 case 12:clz64 (cpu); return;
12516 case 13:cls64 (cpu); return;
12517 default: HALT_UNALLOC;
12518 }
12519 }
12520
12521 /* Variable shift.
12522 Shifts by count supplied in register.
12523 N.B register args may not be SP.
12524 These all use the shifted auxiliary function for
12525 simplicity and clarity. Writing the actual shift
12526 inline would avoid a branch and so be faster but
12527 would also necessitate getting signs right. */
12528
12529 /* 32 bit arithmetic shift right. */
12530 static void
12531 asrv32 (sim_cpu *cpu)
12532 {
12533 unsigned rm = INSTR (20, 16);
12534 unsigned rn = INSTR (9, 5);
12535 unsigned rd = INSTR (4, 0);
12536
12537 aarch64_set_reg_u64
12538 (cpu, rd, NO_SP,
12539 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12540 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12541 }
12542
12543 /* 64 bit arithmetic shift right. */
12544 static void
12545 asrv64 (sim_cpu *cpu)
12546 {
12547 unsigned rm = INSTR (20, 16);
12548 unsigned rn = INSTR (9, 5);
12549 unsigned rd = INSTR (4, 0);
12550
12551 aarch64_set_reg_u64
12552 (cpu, rd, NO_SP,
12553 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12554 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12555 }
12556
12557 /* 32 bit logical shift left. */
12558 static void
12559 lslv32 (sim_cpu *cpu)
12560 {
12561 unsigned rm = INSTR (20, 16);
12562 unsigned rn = INSTR (9, 5);
12563 unsigned rd = INSTR (4, 0);
12564
12565 aarch64_set_reg_u64
12566 (cpu, rd, NO_SP,
12567 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12568 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12569 }
12570
12571 /* 64 bit arithmetic shift left. */
12572 static void
12573 lslv64 (sim_cpu *cpu)
12574 {
12575 unsigned rm = INSTR (20, 16);
12576 unsigned rn = INSTR (9, 5);
12577 unsigned rd = INSTR (4, 0);
12578
12579 aarch64_set_reg_u64
12580 (cpu, rd, NO_SP,
12581 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12582 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12583 }
12584
12585 /* 32 bit logical shift right. */
12586 static void
12587 lsrv32 (sim_cpu *cpu)
12588 {
12589 unsigned rm = INSTR (20, 16);
12590 unsigned rn = INSTR (9, 5);
12591 unsigned rd = INSTR (4, 0);
12592
12593 aarch64_set_reg_u64
12594 (cpu, rd, NO_SP,
12595 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12596 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12597 }
12598
12599 /* 64 bit logical shift right. */
12600 static void
12601 lsrv64 (sim_cpu *cpu)
12602 {
12603 unsigned rm = INSTR (20, 16);
12604 unsigned rn = INSTR (9, 5);
12605 unsigned rd = INSTR (4, 0);
12606
12607 aarch64_set_reg_u64
12608 (cpu, rd, NO_SP,
12609 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12610 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12611 }
12612
12613 /* 32 bit rotate right. */
12614 static void
12615 rorv32 (sim_cpu *cpu)
12616 {
12617 unsigned rm = INSTR (20, 16);
12618 unsigned rn = INSTR (9, 5);
12619 unsigned rd = INSTR (4, 0);
12620
12621 aarch64_set_reg_u64
12622 (cpu, rd, NO_SP,
12623 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12624 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12625 }
12626
12627 /* 64 bit rotate right. */
12628 static void
12629 rorv64 (sim_cpu *cpu)
12630 {
12631 unsigned rm = INSTR (20, 16);
12632 unsigned rn = INSTR (9, 5);
12633 unsigned rd = INSTR (4, 0);
12634
12635 aarch64_set_reg_u64
12636 (cpu, rd, NO_SP,
12637 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12638 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12639 }
12640
12641
12642 /* divide. */
12643
12644 /* 32 bit signed divide. */
12645 static void
12646 cpuiv32 (sim_cpu *cpu)
12647 {
12648 unsigned rm = INSTR (20, 16);
12649 unsigned rn = INSTR (9, 5);
12650 unsigned rd = INSTR (4, 0);
12651 /* N.B. the pseudo-code does the divide using 64 bit data. */
12652 /* TODO : check that this rounds towards zero as required. */
12653 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12654 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12655
12656 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12657 divisor ? ((int32_t) (dividend / divisor)) : 0);
12658 }
12659
12660 /* 64 bit signed divide. */
12661 static void
12662 cpuiv64 (sim_cpu *cpu)
12663 {
12664 unsigned rm = INSTR (20, 16);
12665 unsigned rn = INSTR (9, 5);
12666 unsigned rd = INSTR (4, 0);
12667
12668 /* TODO : check that this rounds towards zero as required. */
12669 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12670
12671 aarch64_set_reg_s64
12672 (cpu, rd, NO_SP,
12673 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12674 }
12675
12676 /* 32 bit unsigned divide. */
12677 static void
12678 udiv32 (sim_cpu *cpu)
12679 {
12680 unsigned rm = INSTR (20, 16);
12681 unsigned rn = INSTR (9, 5);
12682 unsigned rd = INSTR (4, 0);
12683
12684 /* N.B. the pseudo-code does the divide using 64 bit data. */
12685 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12686 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12687
12688 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12689 divisor ? (uint32_t) (dividend / divisor) : 0);
12690 }
12691
12692 /* 64 bit unsigned divide. */
12693 static void
12694 udiv64 (sim_cpu *cpu)
12695 {
12696 unsigned rm = INSTR (20, 16);
12697 unsigned rn = INSTR (9, 5);
12698 unsigned rd = INSTR (4, 0);
12699
12700 /* TODO : check that this rounds towards zero as required. */
12701 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12702
12703 aarch64_set_reg_u64
12704 (cpu, rd, NO_SP,
12705 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12706 }
12707
12708 static void
12709 dexDataProc2Source (sim_cpu *cpu)
12710 {
12711 /* assert instr[30] == 0
12712 instr[28,21] == 11010110
12713 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12714 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12715 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12716 001000 ==> LSLV, 001001 ==> LSRV
12717 001010 ==> ASRV, 001011 ==> RORV
12718 ow ==> UNALLOC. */
12719
12720 uint32_t dispatch;
12721 uint32_t S = INSTR (29, 29);
12722 uint32_t opcode = INSTR (15, 10);
12723
12724 if (S == 1)
12725 HALT_UNALLOC;
12726
12727 if (opcode & 0x34)
12728 HALT_UNALLOC;
12729
12730 dispatch = ( (INSTR (31, 31) << 3)
12731 | (uimm (opcode, 3, 3) << 2)
12732 | uimm (opcode, 1, 0));
12733 switch (dispatch)
12734 {
12735 case 2: udiv32 (cpu); return;
12736 case 3: cpuiv32 (cpu); return;
12737 case 4: lslv32 (cpu); return;
12738 case 5: lsrv32 (cpu); return;
12739 case 6: asrv32 (cpu); return;
12740 case 7: rorv32 (cpu); return;
12741 case 10: udiv64 (cpu); return;
12742 case 11: cpuiv64 (cpu); return;
12743 case 12: lslv64 (cpu); return;
12744 case 13: lsrv64 (cpu); return;
12745 case 14: asrv64 (cpu); return;
12746 case 15: rorv64 (cpu); return;
12747 default: HALT_UNALLOC;
12748 }
12749 }
12750
12751
12752 /* Multiply. */
12753
12754 /* 32 bit multiply and add. */
12755 static void
12756 madd32 (sim_cpu *cpu)
12757 {
12758 unsigned rm = INSTR (20, 16);
12759 unsigned ra = INSTR (14, 10);
12760 unsigned rn = INSTR (9, 5);
12761 unsigned rd = INSTR (4, 0);
12762
12763 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12764 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12765 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12766 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12767 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12768 }
12769
12770 /* 64 bit multiply and add. */
12771 static void
12772 madd64 (sim_cpu *cpu)
12773 {
12774 unsigned rm = INSTR (20, 16);
12775 unsigned ra = INSTR (14, 10);
12776 unsigned rn = INSTR (9, 5);
12777 unsigned rd = INSTR (4, 0);
12778
12779 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12780 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12781 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12782 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12783 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12784 }
12785
12786 /* 32 bit multiply and sub. */
12787 static void
12788 msub32 (sim_cpu *cpu)
12789 {
12790 unsigned rm = INSTR (20, 16);
12791 unsigned ra = INSTR (14, 10);
12792 unsigned rn = INSTR (9, 5);
12793 unsigned rd = INSTR (4, 0);
12794
12795 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12796 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12797 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12798 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12799 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12800 }
12801
12802 /* 64 bit multiply and sub. */
12803 static void
12804 msub64 (sim_cpu *cpu)
12805 {
12806 unsigned rm = INSTR (20, 16);
12807 unsigned ra = INSTR (14, 10);
12808 unsigned rn = INSTR (9, 5);
12809 unsigned rd = INSTR (4, 0);
12810
12811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12812 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12813 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12814 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12815 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12816 }
12817
12818 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12819 static void
12820 smaddl (sim_cpu *cpu)
12821 {
12822 unsigned rm = INSTR (20, 16);
12823 unsigned ra = INSTR (14, 10);
12824 unsigned rn = INSTR (9, 5);
12825 unsigned rd = INSTR (4, 0);
12826
12827 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12828 obtain a 64 bit product. */
12829 aarch64_set_reg_s64
12830 (cpu, rd, NO_SP,
12831 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12832 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12833 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12834 }
12835
12836 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12837 static void
12838 smsubl (sim_cpu *cpu)
12839 {
12840 unsigned rm = INSTR (20, 16);
12841 unsigned ra = INSTR (14, 10);
12842 unsigned rn = INSTR (9, 5);
12843 unsigned rd = INSTR (4, 0);
12844
12845 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12846 obtain a 64 bit product. */
12847 aarch64_set_reg_s64
12848 (cpu, rd, NO_SP,
12849 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12850 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12851 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12852 }
12853
12854 /* Integer Multiply/Divide. */
12855
12856 /* First some macros and a helper function. */
12857 /* Macros to test or access elements of 64 bit words. */
12858
12859 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12860 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12861 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12862 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12863 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12864 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12865
12866 /* Offset of sign bit in 64 bit signed integger. */
12867 #define SIGN_SHIFT_U64 63
12868 /* The sign bit itself -- also identifies the minimum negative int value. */
12869 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12870 /* Return true if a 64 bit signed int presented as an unsigned int is the
12871 most negative value. */
12872 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12873 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12874 int has its sign bit set to false. */
12875 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12876 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12877 an unsigned int has its sign bit set or not. */
12878 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12879 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12880 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12881
12882 /* Multiply two 64 bit ints and return.
12883 the hi 64 bits of the 128 bit product. */
12884
12885 static uint64_t
12886 mul64hi (uint64_t value1, uint64_t value2)
12887 {
12888 uint64_t resultmid1;
12889 uint64_t result;
12890 uint64_t value1_lo = lowWordToU64 (value1);
12891 uint64_t value1_hi = highWordToU64 (value1) ;
12892 uint64_t value2_lo = lowWordToU64 (value2);
12893 uint64_t value2_hi = highWordToU64 (value2);
12894
12895 /* Cross-multiply and collect results. */
12896 uint64_t xproductlo = value1_lo * value2_lo;
12897 uint64_t xproductmid1 = value1_lo * value2_hi;
12898 uint64_t xproductmid2 = value1_hi * value2_lo;
12899 uint64_t xproducthi = value1_hi * value2_hi;
12900 uint64_t carry = 0;
12901 /* Start accumulating 64 bit results. */
12902 /* Drop bottom half of lowest cross-product. */
12903 uint64_t resultmid = xproductlo >> 32;
12904 /* Add in middle products. */
12905 resultmid = resultmid + xproductmid1;
12906
12907 /* Check for overflow. */
12908 if (resultmid < xproductmid1)
12909 /* Carry over 1 into top cross-product. */
12910 carry++;
12911
12912 resultmid1 = resultmid + xproductmid2;
12913
12914 /* Check for overflow. */
12915 if (resultmid1 < xproductmid2)
12916 /* Carry over 1 into top cross-product. */
12917 carry++;
12918
12919 /* Drop lowest 32 bits of middle cross-product. */
12920 result = resultmid1 >> 32;
12921
12922 /* Add top cross-product plus and any carry. */
12923 result += xproducthi + carry;
12924
12925 return result;
12926 }
12927
12928 /* Signed multiply high, source, source2 :
12929 64 bit, dest <-- high 64-bit of result. */
12930 static void
12931 smulh (sim_cpu *cpu)
12932 {
12933 uint64_t uresult;
12934 int64_t result;
12935 unsigned rm = INSTR (20, 16);
12936 unsigned rn = INSTR (9, 5);
12937 unsigned rd = INSTR (4, 0);
12938 GReg ra = INSTR (14, 10);
12939 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12940 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12941 uint64_t uvalue1;
12942 uint64_t uvalue2;
12943 int64_t signum = 1;
12944
12945 if (ra != R31)
12946 HALT_UNALLOC;
12947
12948 /* Convert to unsigned and use the unsigned mul64hi routine
12949 the fix the sign up afterwards. */
12950 if (value1 < 0)
12951 {
12952 signum *= -1L;
12953 uvalue1 = -value1;
12954 }
12955 else
12956 {
12957 uvalue1 = value1;
12958 }
12959
12960 if (value2 < 0)
12961 {
12962 signum *= -1L;
12963 uvalue2 = -value2;
12964 }
12965 else
12966 {
12967 uvalue2 = value2;
12968 }
12969
12970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12971 uresult = mul64hi (uvalue1, uvalue2);
12972 result = uresult;
12973 result *= signum;
12974
12975 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12976 }
12977
12978 /* Unsigned multiply add long -- source, source2 :
12979 32 bit, source3 : 64 bit. */
12980 static void
12981 umaddl (sim_cpu *cpu)
12982 {
12983 unsigned rm = INSTR (20, 16);
12984 unsigned ra = INSTR (14, 10);
12985 unsigned rn = INSTR (9, 5);
12986 unsigned rd = INSTR (4, 0);
12987
12988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12989 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12990 obtain a 64 bit product. */
12991 aarch64_set_reg_u64
12992 (cpu, rd, NO_SP,
12993 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12994 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12995 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12996 }
12997
12998 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12999 static void
13000 umsubl (sim_cpu *cpu)
13001 {
13002 unsigned rm = INSTR (20, 16);
13003 unsigned ra = INSTR (14, 10);
13004 unsigned rn = INSTR (9, 5);
13005 unsigned rd = INSTR (4, 0);
13006
13007 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13008 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13009 obtain a 64 bit product. */
13010 aarch64_set_reg_u64
13011 (cpu, rd, NO_SP,
13012 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13013 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13014 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13015 }
13016
13017 /* Unsigned multiply high, source, source2 :
13018 64 bit, dest <-- high 64-bit of result. */
13019 static void
13020 umulh (sim_cpu *cpu)
13021 {
13022 unsigned rm = INSTR (20, 16);
13023 unsigned rn = INSTR (9, 5);
13024 unsigned rd = INSTR (4, 0);
13025 GReg ra = INSTR (14, 10);
13026
13027 if (ra != R31)
13028 HALT_UNALLOC;
13029
13030 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13031 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13032 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13033 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13034 }
13035
13036 static void
13037 dexDataProc3Source (sim_cpu *cpu)
13038 {
13039 /* assert instr[28,24] == 11011. */
13040 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13041 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13042 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13043 instr[15] = o0 : 0/1 ==> ok
13044 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13045 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13046 0100 ==> SMULH, (64 bit only)
13047 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13048 1100 ==> UMULH (64 bit only)
13049 ow ==> UNALLOC. */
13050
13051 uint32_t dispatch;
13052 uint32_t size = INSTR (31, 31);
13053 uint32_t op54 = INSTR (30, 29);
13054 uint32_t op31 = INSTR (23, 21);
13055 uint32_t o0 = INSTR (15, 15);
13056
13057 if (op54 != 0)
13058 HALT_UNALLOC;
13059
13060 if (size == 0)
13061 {
13062 if (op31 != 0)
13063 HALT_UNALLOC;
13064
13065 if (o0 == 0)
13066 madd32 (cpu);
13067 else
13068 msub32 (cpu);
13069 return;
13070 }
13071
13072 dispatch = (op31 << 1) | o0;
13073
13074 switch (dispatch)
13075 {
13076 case 0: madd64 (cpu); return;
13077 case 1: msub64 (cpu); return;
13078 case 2: smaddl (cpu); return;
13079 case 3: smsubl (cpu); return;
13080 case 4: smulh (cpu); return;
13081 case 10: umaddl (cpu); return;
13082 case 11: umsubl (cpu); return;
13083 case 12: umulh (cpu); return;
13084 default: HALT_UNALLOC;
13085 }
13086 }
13087
13088 static void
13089 dexDPReg (sim_cpu *cpu)
13090 {
13091 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13092 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13093 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13094 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13095
13096 switch (group2)
13097 {
13098 case DPREG_LOG_000:
13099 case DPREG_LOG_001:
13100 dexLogicalShiftedRegister (cpu); return;
13101
13102 case DPREG_ADDSHF_010:
13103 dexAddSubtractShiftedRegister (cpu); return;
13104
13105 case DPREG_ADDEXT_011:
13106 dexAddSubtractExtendedRegister (cpu); return;
13107
13108 case DPREG_ADDCOND_100:
13109 {
13110 /* This set bundles a variety of different operations. */
13111 /* Check for. */
13112 /* 1) add/sub w carry. */
13113 uint32_t mask1 = 0x1FE00000U;
13114 uint32_t val1 = 0x1A000000U;
13115 /* 2) cond compare register/immediate. */
13116 uint32_t mask2 = 0x1FE00000U;
13117 uint32_t val2 = 0x1A400000U;
13118 /* 3) cond select. */
13119 uint32_t mask3 = 0x1FE00000U;
13120 uint32_t val3 = 0x1A800000U;
13121 /* 4) data proc 1/2 source. */
13122 uint32_t mask4 = 0x1FE00000U;
13123 uint32_t val4 = 0x1AC00000U;
13124
13125 if ((aarch64_get_instr (cpu) & mask1) == val1)
13126 dexAddSubtractWithCarry (cpu);
13127
13128 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13129 CondCompare (cpu);
13130
13131 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13132 dexCondSelect (cpu);
13133
13134 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13135 {
13136 /* Bit 30 is clear for data proc 2 source
13137 and set for data proc 1 source. */
13138 if (aarch64_get_instr (cpu) & (1U << 30))
13139 dexDataProc1Source (cpu);
13140 else
13141 dexDataProc2Source (cpu);
13142 }
13143
13144 else
13145 /* Should not reach here. */
13146 HALT_NYI;
13147
13148 return;
13149 }
13150
13151 case DPREG_3SRC_110:
13152 dexDataProc3Source (cpu); return;
13153
13154 case DPREG_UNALLOC_101:
13155 HALT_UNALLOC;
13156
13157 case DPREG_3SRC_111:
13158 dexDataProc3Source (cpu); return;
13159
13160 default:
13161 /* Should never reach here. */
13162 HALT_NYI;
13163 }
13164 }
13165
13166 /* Unconditional Branch immediate.
13167 Offset is a PC-relative byte offset in the range +/- 128MiB.
13168 The offset is assumed to be raw from the decode i.e. the
13169 simulator is expected to scale them from word offsets to byte. */
13170
13171 /* Unconditional branch. */
13172 static void
13173 buc (sim_cpu *cpu, int32_t offset)
13174 {
13175 aarch64_set_next_PC_by_offset (cpu, offset);
13176 }
13177
13178 static unsigned stack_depth = 0;
13179
13180 /* Unconditional branch and link -- writes return PC to LR. */
13181 static void
13182 bl (sim_cpu *cpu, int32_t offset)
13183 {
13184 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13185 aarch64_save_LR (cpu);
13186 aarch64_set_next_PC_by_offset (cpu, offset);
13187
13188 if (TRACE_BRANCH_P (cpu))
13189 {
13190 ++ stack_depth;
13191 TRACE_BRANCH (cpu,
13192 " %*scall %" PRIx64 " [%s]"
13193 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13194 stack_depth, " ", aarch64_get_next_PC (cpu),
13195 aarch64_get_func (CPU_STATE (cpu),
13196 aarch64_get_next_PC (cpu)),
13197 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13198 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13199 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13200 );
13201 }
13202 }
13203
13204 /* Unconditional Branch register.
13205 Branch/return address is in source register. */
13206
13207 /* Unconditional branch. */
13208 static void
13209 br (sim_cpu *cpu)
13210 {
13211 unsigned rn = INSTR (9, 5);
13212 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13213 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13214 }
13215
13216 /* Unconditional branch and link -- writes return PC to LR. */
13217 static void
13218 blr (sim_cpu *cpu)
13219 {
13220 unsigned rn = INSTR (9, 5);
13221
13222 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13223 /* The pseudo code in the spec says we update LR before fetching.
13224 the value from the rn. */
13225 aarch64_save_LR (cpu);
13226 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13227
13228 if (TRACE_BRANCH_P (cpu))
13229 {
13230 ++ stack_depth;
13231 TRACE_BRANCH (cpu,
13232 " %*scall %" PRIx64 " [%s]"
13233 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13234 stack_depth, " ", aarch64_get_next_PC (cpu),
13235 aarch64_get_func (CPU_STATE (cpu),
13236 aarch64_get_next_PC (cpu)),
13237 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13238 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13239 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13240 );
13241 }
13242 }
13243
13244 /* Return -- assembler will default source to LR this is functionally
13245 equivalent to br but, presumably, unlike br it side effects the
13246 branch predictor. */
13247 static void
13248 ret (sim_cpu *cpu)
13249 {
13250 unsigned rn = INSTR (9, 5);
13251 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13252
13253 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13254 if (TRACE_BRANCH_P (cpu))
13255 {
13256 TRACE_BRANCH (cpu,
13257 " %*sreturn [result: %" PRIx64 "]",
13258 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13259 -- stack_depth;
13260 }
13261 }
13262
13263 /* NOP -- we implement this and call it from the decode in case we
13264 want to intercept it later. */
13265
13266 static void
13267 nop (sim_cpu *cpu)
13268 {
13269 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13270 }
13271
13272 /* Data synchronization barrier. */
13273
13274 static void
13275 dsb (sim_cpu *cpu)
13276 {
13277 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13278 }
13279
13280 /* Data memory barrier. */
13281
13282 static void
13283 dmb (sim_cpu *cpu)
13284 {
13285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13286 }
13287
13288 /* Instruction synchronization barrier. */
13289
13290 static void
13291 isb (sim_cpu *cpu)
13292 {
13293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13294 }
13295
13296 static void
13297 dexBranchImmediate (sim_cpu *cpu)
13298 {
13299 /* assert instr[30,26] == 00101
13300 instr[31] ==> 0 == B, 1 == BL
13301 instr[25,0] == imm26 branch offset counted in words. */
13302
13303 uint32_t top = INSTR (31, 31);
13304 /* We have a 26 byte signed word offset which we need to pass to the
13305 execute routine as a signed byte offset. */
13306 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13307
13308 if (top)
13309 bl (cpu, offset);
13310 else
13311 buc (cpu, offset);
13312 }
13313
13314 /* Control Flow. */
13315
13316 /* Conditional branch
13317
13318 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13319 a bit position in the range 0 .. 63
13320
13321 cc is a CondCode enum value as pulled out of the decode
13322
13323 N.B. any offset register (source) can only be Xn or Wn. */
13324
13325 static void
13326 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13327 {
13328 /* The test returns TRUE if CC is met. */
13329 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13330 if (testConditionCode (cpu, cc))
13331 aarch64_set_next_PC_by_offset (cpu, offset);
13332 }
13333
13334 /* 32 bit branch on register non-zero. */
13335 static void
13336 cbnz32 (sim_cpu *cpu, int32_t offset)
13337 {
13338 unsigned rt = INSTR (4, 0);
13339
13340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13341 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13342 aarch64_set_next_PC_by_offset (cpu, offset);
13343 }
13344
13345 /* 64 bit branch on register zero. */
13346 static void
13347 cbnz (sim_cpu *cpu, int32_t offset)
13348 {
13349 unsigned rt = INSTR (4, 0);
13350
13351 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13352 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13353 aarch64_set_next_PC_by_offset (cpu, offset);
13354 }
13355
13356 /* 32 bit branch on register non-zero. */
13357 static void
13358 cbz32 (sim_cpu *cpu, int32_t offset)
13359 {
13360 unsigned rt = INSTR (4, 0);
13361
13362 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13363 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13364 aarch64_set_next_PC_by_offset (cpu, offset);
13365 }
13366
13367 /* 64 bit branch on register zero. */
13368 static void
13369 cbz (sim_cpu *cpu, int32_t offset)
13370 {
13371 unsigned rt = INSTR (4, 0);
13372
13373 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13374 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13375 aarch64_set_next_PC_by_offset (cpu, offset);
13376 }
13377
13378 /* Branch on register bit test non-zero -- one size fits all. */
13379 static void
13380 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13381 {
13382 unsigned rt = INSTR (4, 0);
13383
13384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13385 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13386 aarch64_set_next_PC_by_offset (cpu, offset);
13387 }
13388
13389 /* Branch on register bit test zero -- one size fits all. */
13390 static void
13391 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13392 {
13393 unsigned rt = INSTR (4, 0);
13394
13395 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13396 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13397 aarch64_set_next_PC_by_offset (cpu, offset);
13398 }
13399
13400 static void
13401 dexCompareBranchImmediate (sim_cpu *cpu)
13402 {
13403 /* instr[30,25] = 01 1010
13404 instr[31] = size : 0 ==> 32, 1 ==> 64
13405 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13406 instr[23,5] = simm19 branch offset counted in words
13407 instr[4,0] = rt */
13408
13409 uint32_t size = INSTR (31, 31);
13410 uint32_t op = INSTR (24, 24);
13411 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13412
13413 if (size == 0)
13414 {
13415 if (op == 0)
13416 cbz32 (cpu, offset);
13417 else
13418 cbnz32 (cpu, offset);
13419 }
13420 else
13421 {
13422 if (op == 0)
13423 cbz (cpu, offset);
13424 else
13425 cbnz (cpu, offset);
13426 }
13427 }
13428
13429 static void
13430 dexTestBranchImmediate (sim_cpu *cpu)
13431 {
13432 /* instr[31] = b5 : bit 5 of test bit idx
13433 instr[30,25] = 01 1011
13434 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13435 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13436 instr[18,5] = simm14 : signed offset counted in words
13437 instr[4,0] = uimm5 */
13438
13439 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13440 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13441
13442 NYI_assert (30, 25, 0x1b);
13443
13444 if (INSTR (24, 24) == 0)
13445 tbz (cpu, pos, offset);
13446 else
13447 tbnz (cpu, pos, offset);
13448 }
13449
13450 static void
13451 dexCondBranchImmediate (sim_cpu *cpu)
13452 {
13453 /* instr[31,25] = 010 1010
13454 instr[24] = op1; op => 00 ==> B.cond
13455 instr[23,5] = simm19 : signed offset counted in words
13456 instr[4] = op0
13457 instr[3,0] = cond */
13458
13459 int32_t offset;
13460 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13461
13462 NYI_assert (31, 25, 0x2a);
13463
13464 if (op != 0)
13465 HALT_UNALLOC;
13466
13467 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13468
13469 bcc (cpu, offset, INSTR (3, 0));
13470 }
13471
13472 static void
13473 dexBranchRegister (sim_cpu *cpu)
13474 {
13475 /* instr[31,25] = 110 1011
13476 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13477 instr[20,16] = op2 : must be 11111
13478 instr[15,10] = op3 : must be 000000
13479 instr[4,0] = op2 : must be 11111. */
13480
13481 uint32_t op = INSTR (24, 21);
13482 uint32_t op2 = INSTR (20, 16);
13483 uint32_t op3 = INSTR (15, 10);
13484 uint32_t op4 = INSTR (4, 0);
13485
13486 NYI_assert (31, 25, 0x6b);
13487
13488 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13489 HALT_UNALLOC;
13490
13491 if (op == 0)
13492 br (cpu);
13493
13494 else if (op == 1)
13495 blr (cpu);
13496
13497 else if (op == 2)
13498 ret (cpu);
13499
13500 else
13501 {
13502 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13503 /* anything else is unallocated. */
13504 uint32_t rn = INSTR (4, 0);
13505
13506 if (rn != 0x1f)
13507 HALT_UNALLOC;
13508
13509 if (op == 4 || op == 5)
13510 HALT_NYI;
13511
13512 HALT_UNALLOC;
13513 }
13514 }
13515
13516 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13517 but this may not be available. So instead we define the values we need
13518 here. */
13519 #define AngelSVC_Reason_Open 0x01
13520 #define AngelSVC_Reason_Close 0x02
13521 #define AngelSVC_Reason_Write 0x05
13522 #define AngelSVC_Reason_Read 0x06
13523 #define AngelSVC_Reason_IsTTY 0x09
13524 #define AngelSVC_Reason_Seek 0x0A
13525 #define AngelSVC_Reason_FLen 0x0C
13526 #define AngelSVC_Reason_Remove 0x0E
13527 #define AngelSVC_Reason_Rename 0x0F
13528 #define AngelSVC_Reason_Clock 0x10
13529 #define AngelSVC_Reason_Time 0x11
13530 #define AngelSVC_Reason_System 0x12
13531 #define AngelSVC_Reason_Errno 0x13
13532 #define AngelSVC_Reason_GetCmdLine 0x15
13533 #define AngelSVC_Reason_HeapInfo 0x16
13534 #define AngelSVC_Reason_ReportException 0x18
13535 #define AngelSVC_Reason_Elapsed 0x30
13536
13537
13538 static void
13539 handle_halt (sim_cpu *cpu, uint32_t val)
13540 {
13541 uint64_t result = 0;
13542
13543 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13544 if (val != 0xf000)
13545 {
13546 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13547 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13548 sim_stopped, SIM_SIGTRAP);
13549 }
13550
13551 /* We have encountered an Angel SVC call. See if we can process it. */
13552 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13553 {
13554 case AngelSVC_Reason_HeapInfo:
13555 {
13556 /* Get the values. */
13557 uint64_t stack_top = aarch64_get_stack_start (cpu);
13558 uint64_t heap_base = aarch64_get_heap_start (cpu);
13559
13560 /* Get the pointer */
13561 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13562 ptr = aarch64_get_mem_u64 (cpu, ptr);
13563
13564 /* Fill in the memory block. */
13565 /* Start addr of heap. */
13566 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13567 /* End addr of heap. */
13568 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13569 /* Lowest stack addr. */
13570 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13571 /* Initial stack addr. */
13572 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13573
13574 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13575 }
13576 break;
13577
13578 case AngelSVC_Reason_Open:
13579 {
13580 /* Get the pointer */
13581 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13582 /* FIXME: For now we just assume that we will only be asked
13583 to open the standard file descriptors. */
13584 static int fd = 0;
13585 result = fd ++;
13586
13587 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13588 }
13589 break;
13590
13591 case AngelSVC_Reason_Close:
13592 {
13593 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13594 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13595 result = 0;
13596 }
13597 break;
13598
13599 case AngelSVC_Reason_Errno:
13600 result = 0;
13601 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13602 break;
13603
13604 case AngelSVC_Reason_Clock:
13605 result =
13606 #ifdef CLOCKS_PER_SEC
13607 (CLOCKS_PER_SEC >= 100)
13608 ? (clock () / (CLOCKS_PER_SEC / 100))
13609 : ((clock () * 100) / CLOCKS_PER_SEC)
13610 #else
13611 /* Presume unix... clock() returns microseconds. */
13612 (clock () / 10000)
13613 #endif
13614 ;
13615 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13616 break;
13617
13618 case AngelSVC_Reason_GetCmdLine:
13619 {
13620 /* Get the pointer */
13621 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13622 ptr = aarch64_get_mem_u64 (cpu, ptr);
13623
13624 /* FIXME: No command line for now. */
13625 aarch64_set_mem_u64 (cpu, ptr, 0);
13626 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13627 }
13628 break;
13629
13630 case AngelSVC_Reason_IsTTY:
13631 result = 1;
13632 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13633 break;
13634
13635 case AngelSVC_Reason_Write:
13636 {
13637 /* Get the pointer */
13638 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13639 /* Get the write control block. */
13640 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13641 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13642 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13643
13644 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13645 PRIx64 " on descriptor %" PRIx64,
13646 len, buf, fd);
13647
13648 if (len > 1280)
13649 {
13650 TRACE_SYSCALL (cpu,
13651 " AngelSVC: Write: Suspiciously long write: %ld",
13652 (long) len);
13653 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13654 sim_stopped, SIM_SIGBUS);
13655 }
13656 else if (fd == 1)
13657 {
13658 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13659 }
13660 else if (fd == 2)
13661 {
13662 TRACE (cpu, 0, "\n");
13663 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13664 (int) len, aarch64_get_mem_ptr (cpu, buf));
13665 TRACE (cpu, 0, "\n");
13666 }
13667 else
13668 {
13669 TRACE_SYSCALL (cpu,
13670 " AngelSVC: Write: Unexpected file handle: %d",
13671 (int) fd);
13672 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13673 sim_stopped, SIM_SIGABRT);
13674 }
13675 }
13676 break;
13677
13678 case AngelSVC_Reason_ReportException:
13679 {
13680 /* Get the pointer */
13681 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13682 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13683 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13684 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13685
13686 TRACE_SYSCALL (cpu,
13687 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13688 type, state);
13689
13690 if (type == 0x20026)
13691 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13692 sim_exited, state);
13693 else
13694 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13695 sim_stopped, SIM_SIGINT);
13696 }
13697 break;
13698
13699 case AngelSVC_Reason_Read:
13700 case AngelSVC_Reason_FLen:
13701 case AngelSVC_Reason_Seek:
13702 case AngelSVC_Reason_Remove:
13703 case AngelSVC_Reason_Time:
13704 case AngelSVC_Reason_System:
13705 case AngelSVC_Reason_Rename:
13706 case AngelSVC_Reason_Elapsed:
13707 default:
13708 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13709 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13710 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13711 sim_stopped, SIM_SIGTRAP);
13712 }
13713
13714 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13715 }
13716
13717 static void
13718 dexExcpnGen (sim_cpu *cpu)
13719 {
13720 /* instr[31:24] = 11010100
13721 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13722 010 ==> HLT, 101 ==> DBG GEN EXCPN
13723 instr[20,5] = imm16
13724 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13725 instr[1,0] = LL : discriminates opc */
13726
13727 uint32_t opc = INSTR (23, 21);
13728 uint32_t imm16 = INSTR (20, 5);
13729 uint32_t opc2 = INSTR (4, 2);
13730 uint32_t LL;
13731
13732 NYI_assert (31, 24, 0xd4);
13733
13734 if (opc2 != 0)
13735 HALT_UNALLOC;
13736
13737 LL = INSTR (1, 0);
13738
13739 /* We only implement HLT and BRK for now. */
13740 if (opc == 1 && LL == 0)
13741 {
13742 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13743 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13744 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13745 }
13746
13747 if (opc == 2 && LL == 0)
13748 handle_halt (cpu, imm16);
13749
13750 else if (opc == 0 || opc == 5)
13751 HALT_NYI;
13752
13753 else
13754 HALT_UNALLOC;
13755 }
13756
13757 /* Stub for accessing system registers. */
13758
13759 static uint64_t
13760 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13761 unsigned crm, unsigned op2)
13762 {
13763 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13764 /* DCZID_EL0 - the Data Cache Zero ID register.
13765 We do not support DC ZVA at the moment, so
13766 we return a value with the disable bit set.
13767 We implement support for the DCZID register since
13768 it is used by the C library's memset function. */
13769 return ((uint64_t) 1) << 4;
13770
13771 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13772 /* Cache Type Register. */
13773 return 0x80008000UL;
13774
13775 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13776 /* TPIDR_EL0 - thread pointer id. */
13777 return aarch64_get_thread_id (cpu);
13778
13779 if (op1 == 3 && crm == 4 && op2 == 0)
13780 return aarch64_get_FPCR (cpu);
13781
13782 if (op1 == 3 && crm == 4 && op2 == 1)
13783 return aarch64_get_FPSR (cpu);
13784
13785 else if (op1 == 3 && crm == 2 && op2 == 0)
13786 return aarch64_get_CPSR (cpu);
13787
13788 HALT_NYI;
13789 }
13790
13791 static void
13792 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13793 unsigned crm, unsigned op2, uint64_t val)
13794 {
13795 if (op1 == 3 && crm == 4 && op2 == 0)
13796 aarch64_set_FPCR (cpu, val);
13797
13798 else if (op1 == 3 && crm == 4 && op2 == 1)
13799 aarch64_set_FPSR (cpu, val);
13800
13801 else if (op1 == 3 && crm == 2 && op2 == 0)
13802 aarch64_set_CPSR (cpu, val);
13803
13804 else
13805 HALT_NYI;
13806 }
13807
13808 static void
13809 do_mrs (sim_cpu *cpu)
13810 {
13811 /* instr[31:20] = 1101 0101 0001 1
13812 instr[19] = op0
13813 instr[18,16] = op1
13814 instr[15,12] = CRn
13815 instr[11,8] = CRm
13816 instr[7,5] = op2
13817 instr[4,0] = Rt */
13818 unsigned sys_op0 = INSTR (19, 19) + 2;
13819 unsigned sys_op1 = INSTR (18, 16);
13820 unsigned sys_crn = INSTR (15, 12);
13821 unsigned sys_crm = INSTR (11, 8);
13822 unsigned sys_op2 = INSTR (7, 5);
13823 unsigned rt = INSTR (4, 0);
13824
13825 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13826 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13827 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13828 }
13829
13830 static void
13831 do_MSR_immediate (sim_cpu *cpu)
13832 {
13833 /* instr[31:19] = 1101 0101 0000 0
13834 instr[18,16] = op1
13835 instr[15,12] = 0100
13836 instr[11,8] = CRm
13837 instr[7,5] = op2
13838 instr[4,0] = 1 1111 */
13839
13840 unsigned op1 = INSTR (18, 16);
13841 /*unsigned crm = INSTR (11, 8);*/
13842 unsigned op2 = INSTR (7, 5);
13843
13844 NYI_assert (31, 19, 0x1AA0);
13845 NYI_assert (15, 12, 0x4);
13846 NYI_assert (4, 0, 0x1F);
13847
13848 if (op1 == 0)
13849 {
13850 if (op2 == 5)
13851 HALT_NYI; /* set SPSel. */
13852 else
13853 HALT_UNALLOC;
13854 }
13855 else if (op1 == 3)
13856 {
13857 if (op2 == 6)
13858 HALT_NYI; /* set DAIFset. */
13859 else if (op2 == 7)
13860 HALT_NYI; /* set DAIFclr. */
13861 else
13862 HALT_UNALLOC;
13863 }
13864 else
13865 HALT_UNALLOC;
13866 }
13867
13868 static void
13869 do_MSR_reg (sim_cpu *cpu)
13870 {
13871 /* instr[31:20] = 1101 0101 0001
13872 instr[19] = op0
13873 instr[18,16] = op1
13874 instr[15,12] = CRn
13875 instr[11,8] = CRm
13876 instr[7,5] = op2
13877 instr[4,0] = Rt */
13878
13879 unsigned sys_op0 = INSTR (19, 19) + 2;
13880 unsigned sys_op1 = INSTR (18, 16);
13881 unsigned sys_crn = INSTR (15, 12);
13882 unsigned sys_crm = INSTR (11, 8);
13883 unsigned sys_op2 = INSTR (7, 5);
13884 unsigned rt = INSTR (4, 0);
13885
13886 NYI_assert (31, 20, 0xD51);
13887
13888 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13889 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13890 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13891 }
13892
13893 static void
13894 do_SYS (sim_cpu *cpu)
13895 {
13896 /* instr[31,19] = 1101 0101 0000 1
13897 instr[18,16] = op1
13898 instr[15,12] = CRn
13899 instr[11,8] = CRm
13900 instr[7,5] = op2
13901 instr[4,0] = Rt */
13902 NYI_assert (31, 19, 0x1AA1);
13903
13904 /* FIXME: For now we just silently accept system ops. */
13905 }
13906
13907 static void
13908 dexSystem (sim_cpu *cpu)
13909 {
13910 /* instr[31:22] = 1101 01010 0
13911 instr[21] = L
13912 instr[20,19] = op0
13913 instr[18,16] = op1
13914 instr[15,12] = CRn
13915 instr[11,8] = CRm
13916 instr[7,5] = op2
13917 instr[4,0] = uimm5 */
13918
13919 /* We are interested in HINT, DSB, DMB and ISB
13920
13921 Hint #0 encodes NOOP (this is the only hint we care about)
13922 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13923 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13924
13925 DSB, DMB, ISB are data store barrier, data memory barrier and
13926 instruction store barrier, respectively, where
13927
13928 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13929 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13930 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13931 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13932 10 ==> InerShareable, 11 ==> FullSystem
13933 types : 01 ==> Reads, 10 ==> Writes,
13934 11 ==> All, 00 ==> All (domain == FullSystem). */
13935
13936 unsigned rt = INSTR (4, 0);
13937
13938 NYI_assert (31, 22, 0x354);
13939
13940 switch (INSTR (21, 12))
13941 {
13942 case 0x032:
13943 if (rt == 0x1F)
13944 {
13945 /* NOP has CRm != 0000 OR. */
13946 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13947 uint32_t crm = INSTR (11, 8);
13948 uint32_t op2 = INSTR (7, 5);
13949
13950 if (crm != 0 || (op2 == 0 || op2 > 5))
13951 {
13952 /* Actually call nop method so we can reimplement it later. */
13953 nop (cpu);
13954 return;
13955 }
13956 }
13957 HALT_NYI;
13958
13959 case 0x033:
13960 {
13961 uint32_t op2 = INSTR (7, 5);
13962
13963 switch (op2)
13964 {
13965 case 2: HALT_NYI;
13966 case 4: dsb (cpu); return;
13967 case 5: dmb (cpu); return;
13968 case 6: isb (cpu); return;
13969 default: HALT_UNALLOC;
13970 }
13971 }
13972
13973 case 0x3B0:
13974 case 0x3B4:
13975 case 0x3BD:
13976 do_mrs (cpu);
13977 return;
13978
13979 case 0x0B7:
13980 do_SYS (cpu); /* DC is an alias of SYS. */
13981 return;
13982
13983 default:
13984 if (INSTR (21, 20) == 0x1)
13985 do_MSR_reg (cpu);
13986 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13987 do_MSR_immediate (cpu);
13988 else
13989 HALT_NYI;
13990 return;
13991 }
13992 }
13993
13994 static void
13995 dexBr (sim_cpu *cpu)
13996 {
13997 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13998 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13999 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14000 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14001
14002 switch (group2)
14003 {
14004 case BR_IMM_000:
14005 return dexBranchImmediate (cpu);
14006
14007 case BR_IMMCMP_001:
14008 /* Compare has bit 25 clear while test has it set. */
14009 if (!INSTR (25, 25))
14010 dexCompareBranchImmediate (cpu);
14011 else
14012 dexTestBranchImmediate (cpu);
14013 return;
14014
14015 case BR_IMMCOND_010:
14016 /* This is a conditional branch if bit 25 is clear otherwise
14017 unallocated. */
14018 if (!INSTR (25, 25))
14019 dexCondBranchImmediate (cpu);
14020 else
14021 HALT_UNALLOC;
14022 return;
14023
14024 case BR_UNALLOC_011:
14025 HALT_UNALLOC;
14026
14027 case BR_IMM_100:
14028 dexBranchImmediate (cpu);
14029 return;
14030
14031 case BR_IMMCMP_101:
14032 /* Compare has bit 25 clear while test has it set. */
14033 if (!INSTR (25, 25))
14034 dexCompareBranchImmediate (cpu);
14035 else
14036 dexTestBranchImmediate (cpu);
14037 return;
14038
14039 case BR_REG_110:
14040 /* Unconditional branch reg has bit 25 set. */
14041 if (INSTR (25, 25))
14042 dexBranchRegister (cpu);
14043
14044 /* This includes both Excpn Gen, System and unalloc operations.
14045 We need to decode the Excpn Gen operation BRK so we can plant
14046 debugger entry points.
14047 Excpn Gen operations have instr [24] = 0.
14048 we need to decode at least one of the System operations NOP
14049 which is an alias for HINT #0.
14050 System operations have instr [24,22] = 100. */
14051 else if (INSTR (24, 24) == 0)
14052 dexExcpnGen (cpu);
14053
14054 else if (INSTR (24, 22) == 4)
14055 dexSystem (cpu);
14056
14057 else
14058 HALT_UNALLOC;
14059
14060 return;
14061
14062 case BR_UNALLOC_111:
14063 HALT_UNALLOC;
14064
14065 default:
14066 /* Should never reach here. */
14067 HALT_NYI;
14068 }
14069 }
14070
14071 static void
14072 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14073 {
14074 /* We need to check if gdb wants an in here. */
14075 /* checkBreak (cpu);. */
14076
14077 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14078
14079 switch (group)
14080 {
14081 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14082 case GROUP_LDST_0100: dexLdSt (cpu); break;
14083 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14084 case GROUP_LDST_0110: dexLdSt (cpu); break;
14085 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14086 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14087 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14088 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14089 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14090 case GROUP_LDST_1100: dexLdSt (cpu); break;
14091 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14092 case GROUP_LDST_1110: dexLdSt (cpu); break;
14093 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14094
14095 case GROUP_UNALLOC_0001:
14096 case GROUP_UNALLOC_0010:
14097 case GROUP_UNALLOC_0011:
14098 HALT_UNALLOC;
14099
14100 default:
14101 /* Should never reach here. */
14102 HALT_NYI;
14103 }
14104 }
14105
14106 static bfd_boolean
14107 aarch64_step (sim_cpu *cpu)
14108 {
14109 uint64_t pc = aarch64_get_PC (cpu);
14110
14111 if (pc == TOP_LEVEL_RETURN_PC)
14112 return FALSE;
14113
14114 aarch64_set_next_PC (cpu, pc + 4);
14115
14116 /* Code is always little-endian. */
14117 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14118 & aarch64_get_instr (cpu), pc, 4);
14119 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14120
14121 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14122 aarch64_get_instr (cpu));
14123 TRACE_DISASM (cpu, pc);
14124
14125 aarch64_decode_and_execute (cpu, pc);
14126
14127 return TRUE;
14128 }
14129
14130 void
14131 aarch64_run (SIM_DESC sd)
14132 {
14133 sim_cpu *cpu = STATE_CPU (sd, 0);
14134
14135 while (aarch64_step (cpu))
14136 {
14137 aarch64_update_PC (cpu);
14138
14139 if (sim_events_tick (sd))
14140 sim_events_process (sd);
14141 }
14142
14143 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14144 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14145 }
14146
14147 void
14148 aarch64_init (sim_cpu *cpu, uint64_t pc)
14149 {
14150 uint64_t sp = aarch64_get_stack_start (cpu);
14151
14152 /* Install SP, FP and PC and set LR to -20
14153 so we can detect a top-level return. */
14154 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14155 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14156 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14157 aarch64_set_next_PC (cpu, pc);
14158 aarch64_update_PC (cpu);
14159 aarch64_init_LIT_table ();
14160 }
This page took 0.401821 seconds and 4 git commands to generate.