Add missing cnt (popcount) instruction support.
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2017 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
71 } \
72 while (0)
73
74 #define NYI_assert(HI, LO, EXPECTED) \
75 do \
76 { \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
78 HALT_NYI; \
79 } \
80 while (0)
81
82 /* Helper functions used by expandLogicalImmediate. */
83
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
86 ones (int N)
87 {
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
89 }
90
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
94 {
95 return pickbits64 (val, N, N);
96 }
97
98 static uint64_t
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
100 {
101 uint64_t mask;
102 uint64_t imm;
103 unsigned simd_size;
104
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
107 if (N != 0)
108 {
109 simd_size = 64;
110 mask = 0xffffffffffffffffull;
111 }
112 else
113 {
114 switch (S)
115 {
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
121 default: return 0;
122 }
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
125 R &= simd_size - 1;
126 }
127
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
130 return 0;
131
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
135
136 /* Rotate to the left by simd_size - R. */
137 if (R != 0)
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
139
140 /* Replicate the value according to SIMD size. */
141 switch (simd_size)
142 {
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
148 case 64: break;
149 default: return 0;
150 }
151
152 return imm;
153 }
154
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
159
160 void
161 aarch64_init_LIT_table (void)
162 {
163 unsigned index;
164
165 for (index = 0; index < LI_TABLE_SIZE; index++)
166 {
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
170
171 LITable [index] = expand_logical_immediate (imms, immr, N);
172 }
173 }
174
175 static void
176 dexNotify (sim_cpu *cpu)
177 {
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
181
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
183
184 switch (type)
185 {
186 case 0:
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 case 1:
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 break;
194 case 2:
195 /* aarch64_notifyMethodExit (); */
196 break;
197 case 3:
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
200 break;
201 }
202 }
203
204 /* secondary decode within top level groups */
205
206 static void
207 dexPseudo (sim_cpu *cpu)
208 {
209 /* assert instr[28,27] = 00
210
211 We provide 2 pseudo instructions:
212
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
215
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
221
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
226 uint32_t dispatch;
227
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
229 {
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
233 }
234
235 dispatch = INSTR (31, 15);
236
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
239 {
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
243 }
244
245 else if (dispatch == PSEUDO_NOTIFY)
246 dexNotify (cpu);
247
248 else
249 HALT_UNALLOC;
250 }
251
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
254 9 bit offset.
255
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
258
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
260 static void
261 ldur32 (sim_cpu *cpu, int32_t offset)
262 {
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
265
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
269 + offset));
270 }
271
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
273 static void
274 ldur64 (sim_cpu *cpu, int32_t offset)
275 {
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
278
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
295 + offset));
296 }
297
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 static void
300 ldursb32 (sim_cpu *cpu, int32_t offset)
301 {
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
304
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
308 + offset));
309 }
310
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
312 static void
313 ldursb64 (sim_cpu *cpu, int32_t offset)
314 {
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
317
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 + offset));
322 }
323
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
325 static void
326 ldurh32 (sim_cpu *cpu, int32_t offset)
327 {
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
330
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
334 + offset));
335 }
336
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
338 static void
339 ldursh32 (sim_cpu *cpu, int32_t offset)
340 {
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
343
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
347 + offset));
348 }
349
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
351 static void
352 ldursh64 (sim_cpu *cpu, int32_t offset)
353 {
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
356
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
360 + offset));
361 }
362
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
364 static void
365 ldursw (sim_cpu *cpu, int32_t offset)
366 {
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
369
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
373 + offset));
374 }
375
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
378
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
380 static void
381 stur32 (sim_cpu *cpu, int32_t offset)
382 {
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
385
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
390 }
391
392 /* 64 bit store 64 bit unscaled signed 9 bit */
393 static void
394 stur64 (sim_cpu *cpu, int32_t offset)
395 {
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
398
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
403 }
404
405 /* 32 bit store byte unscaled signed 9 bit */
406 static void
407 sturb (sim_cpu *cpu, int32_t offset)
408 {
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
411
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
416 }
417
418 /* 32 bit store short unscaled signed 9 bit */
419 static void
420 sturh (sim_cpu *cpu, int32_t offset)
421 {
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
424
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
429 }
430
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
433 rt may not be SP. */
434
435 /* 32 bit pc-relative load */
436 static void
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 aarch64_get_mem_u32
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
445 }
446
447 /* 64 bit pc-relative load */
448 static void
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
450 {
451 unsigned rd = INSTR (4, 0);
452
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
455 aarch64_get_mem_u64
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 }
458
459 /* sign extended 32 bit pc-relative load */
460 static void
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
462 {
463 unsigned rd = INSTR (4, 0);
464
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
467 aarch64_get_mem_s32
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
469 }
470
471 /* float pc-relative load */
472 static void
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
474 {
475 unsigned int rd = INSTR (4, 0);
476
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
479 aarch64_get_mem_u32
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
481 }
482
483 /* double pc-relative load */
484 static void
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
486 {
487 unsigned int st = INSTR (4, 0);
488
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
491 aarch64_get_mem_u64
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
493 }
494
495 /* long double pc-relative load. */
496 static void
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
498 {
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
501 FRegister a;
502
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
506 }
507
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
510 16, 32 or 64. */
511
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
514
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
521
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
524
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
529
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
532 {
533 union
534 {
535 uint32_t u;
536 int32_t n;
537 } x;
538
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
541 return value;
542
543 x.u = value;
544 return x.n;
545 }
546
547 /* Scalar Floating Point
548
549 FP load/store single register (4 addressing modes)
550
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
553
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
555 static void
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
557 {
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
561
562 if (wb != Post)
563 address += offset;
564
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
567 if (wb == Post)
568 address += offset;
569
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
572 }
573
574 /* Load 8 bit with unsigned 12 bit offset. */
575 static void
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
577 {
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
581
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
584 }
585
586 /* Load 16 bit scaled unsigned 12 bit. */
587 static void
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
589 {
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
593
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
596 }
597
598 /* Load 32 bit scaled unsigned 12 bit. */
599 static void
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
601 {
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
605
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
608 }
609
610 /* Load 64 bit scaled unsigned 12 bit. */
611 static void
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
613 {
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
617
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
620 }
621
622 /* Load 128 bit scaled unsigned 12 bit. */
623 static void
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
625 {
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
629
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
633 }
634
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
637 static void
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
639 {
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
646
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
650 }
651
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
653 static void
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
655 {
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
659
660 if (wb != Post)
661 address += offset;
662
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
665
666 if (wb == Post)
667 address += offset;
668
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
671 }
672
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
674 static void
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
676 {
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
680
681 fldrd_wb (cpu, displacement, NoWriteBack);
682 }
683
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
685 static void
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
687 {
688 FRegister a;
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
692
693 if (wb != Post)
694 address += offset;
695
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
699
700 if (wb == Post)
701 address += offset;
702
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
705 }
706
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
708 static void
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
710 {
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
714
715 fldrq_wb (cpu, displacement, NoWriteBack);
716 }
717
718 /* Memory Access
719
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
723
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
726
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
728 post-index options.
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
730 writeback.
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
733
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
739
740 A separate method is provided for each possible addressing mode. */
741
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
743 static void
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
745 {
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
748
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
754 }
755
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
757 static void
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
759 {
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 uint64_t address;
763
764 if (rn == rt && wb != NoWriteBack)
765 HALT_UNALLOC;
766
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
768
769 if (wb != Post)
770 address += offset;
771
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
774
775 if (wb == Post)
776 address += offset;
777
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
780 }
781
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
784 static void
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
786 {
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
791
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
795
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
799 }
800
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
802 static void
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
804 {
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
807
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
813 }
814
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
816 static void
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
818 {
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
821 uint64_t address;
822
823 if (rn == rt && wb != NoWriteBack)
824 HALT_UNALLOC;
825
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
827
828 if (wb != Post)
829 address += offset;
830
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
833
834 if (wb == Post)
835 address += offset;
836
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
839 }
840
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
843 static void
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
845 {
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
850
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
854
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
858 }
859
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
861 static void
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
863 {
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
866
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
871 aarch64_get_mem_u8
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
873 }
874
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
876 static void
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
878 {
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
881 uint64_t address;
882
883 if (rn == rt && wb != NoWriteBack)
884 HALT_UNALLOC;
885
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
887
888 if (wb != Post)
889 address += offset;
890
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
893
894 if (wb == Post)
895 address += offset;
896
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
899 }
900
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
903 static void
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
905 {
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
910
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
913 extension);
914
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
919 }
920
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
923 static void
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
925 {
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
928 uint64_t address;
929 int64_t val;
930
931 if (rn == rt && wb != NoWriteBack)
932 HALT_UNALLOC;
933
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
935
936 if (wb != Post)
937 address += offset;
938
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
942
943 if (wb == Post)
944 address += offset;
945
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
948 }
949
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
951 static void
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
953 {
954 ldrsb_wb (cpu, offset, NoWriteBack);
955 }
956
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
959 static void
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
961 {
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
966
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
969 extension);
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
974 }
975
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
977 static void
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
979 {
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
982 uint32_t val;
983
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
989 }
990
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
993 static void
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
995 {
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
998 uint64_t address;
999
1000 if (rn == rt && wb != NoWriteBack)
1001 HALT_UNALLOC;
1002
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1004
1005 if (wb != Post)
1006 address += offset;
1007
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1010
1011 if (wb == Post)
1012 address += offset;
1013
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1016 }
1017
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1020 static void
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1022 {
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1027
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1031
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1035 }
1036
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1038 static void
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1040 {
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1043 int32_t val;
1044
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1050 }
1051
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1054 static void
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1056 {
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1059 uint64_t address;
1060
1061 if (rn == rt && wb != NoWriteBack)
1062 HALT_UNALLOC;
1063
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1065
1066 if (wb != Post)
1067 address += offset;
1068
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1072
1073 if (wb == Post)
1074 address += offset;
1075
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1078 }
1079
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1082 static void
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1084 {
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1089
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1093
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1098 }
1099
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1101 static void
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1103 {
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1106 int64_t val;
1107
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1113 }
1114
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1117 static void
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1119 {
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1122 uint64_t address;
1123 int64_t val;
1124
1125 if (rn == rt && wb != NoWriteBack)
1126 HALT_UNALLOC;
1127
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1130
1131 if (wb != Post)
1132 address += offset;
1133
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1136
1137 if (wb == Post)
1138 address += offset;
1139
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1142 }
1143
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1146 static void
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1148 {
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1152
1153 /* rn may reference SP, rm and rt must reference ZR */
1154
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1158 int64_t val;
1159
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1163 }
1164
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1166 static void
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1168 {
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1171 int64_t val;
1172
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1178 }
1179
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1182 static void
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1184 {
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1187 uint64_t address;
1188
1189 if (rn == rt && wb != NoWriteBack)
1190 HALT_UNALLOC;
1191
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1193
1194 if (wb != Post)
1195 address += offset;
1196
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1199
1200 if (wb == Post)
1201 address += offset;
1202
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1205 }
1206
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1209 static void
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1211 {
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1216
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1220
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1224 }
1225
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1228
1229 /* 32 bit store scaled unsigned 12 bit. */
1230 static void
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1232 {
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1235
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1241 }
1242
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1244 static void
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1246 {
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1249 uint64_t address;
1250
1251 if (rn == rt && wb != NoWriteBack)
1252 HALT_UNALLOC;
1253
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1255 if (wb != Post)
1256 address += offset;
1257
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1260
1261 if (wb == Post)
1262 address += offset;
1263
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1266 }
1267
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1270 static void
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1272 {
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1276
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1280
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1284 }
1285
1286 /* 64 bit store scaled unsigned 12 bit. */
1287 static void
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1289 {
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1292
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1298 }
1299
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1301 static void
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1303 {
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1306 uint64_t address;
1307
1308 if (rn == rt && wb != NoWriteBack)
1309 HALT_UNALLOC;
1310
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312
1313 if (wb != Post)
1314 address += offset;
1315
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1318
1319 if (wb == Post)
1320 address += offset;
1321
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1324 }
1325
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1328 static void
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1330 {
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1335
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1338 extension);
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1340
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1344 }
1345
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1347 static void
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1349 {
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1352
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1359 }
1360
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1362 static void
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1364 {
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1367 uint64_t address;
1368
1369 if (rn == rt && wb != NoWriteBack)
1370 HALT_UNALLOC;
1371
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1373
1374 if (wb != Post)
1375 address += offset;
1376
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1399 extension);
1400
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1405 }
1406
1407 /* 32 bit store short scaled unsigned 12 bit. */
1408 static void
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1410 {
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1413
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1419 }
1420
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1422 static void
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1424 {
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1427 uint64_t address;
1428
1429 if (rn == rt && wb != NoWriteBack)
1430 HALT_UNALLOC;
1431
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1433
1434 if (wb != Post)
1435 address += offset;
1436
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1439
1440 if (wb == Post)
1441 address += offset;
1442
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1445 }
1446
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1449 static void
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1451 {
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1456
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1460
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1464 }
1465
1466 /* Prefetch unsigned 12 bit. */
1467 static void
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1469 {
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1476 ow ==> UNALLOC
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1480
1481 /* TODO : implement prefetch of address. */
1482 }
1483
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1485 static void
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1487 {
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1494 ow ==> UNALLOC
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1499 extension);
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1502
1503 /* TODO : implement prefetch of address */
1504 }
1505
1506 /* 64 bit pc-relative prefetch. */
1507 static void
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1509 {
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1516 ow ==> UNALLOC
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1519
1520 /* TODO : implement this */
1521 }
1522
1523 /* Load-store exclusive. */
1524
1525 static void
1526 ldxr (sim_cpu *cpu)
1527 {
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1534
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1536 switch (size)
1537 {
1538 case 0:
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1540 break;
1541 case 1:
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1543 break;
1544 case 2:
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1546 break;
1547 case 3:
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1549 break;
1550 }
1551 }
1552
1553 static void
1554 stxr (sim_cpu *cpu)
1555 {
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1562
1563 switch (size)
1564 {
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1569 }
1570
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1573 }
1574
1575 static void
1576 dexLoadLiteral (sim_cpu *cpu)
1577 {
1578 /* instr[29,27] == 011
1579 instr[25,24] == 00
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1586
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1590
1591 switch (dispatch)
1592 {
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1600 case 7:
1601 default:
1602 HALT_UNALLOC;
1603 }
1604 }
1605
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1609
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1613
1614 /* 32 bit add immediate. */
1615 static void
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1617 {
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1620
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1624 }
1625
1626 /* 64 bit add immediate. */
1627 static void
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1629 {
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1632
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1636 }
1637
1638 static void
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1640 {
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1645 uint32_t flags = 0;
1646
1647 if (result == 0)
1648 flags |= Z;
1649
1650 if (result & (1 << 31))
1651 flags |= N;
1652
1653 if (uresult != result)
1654 flags |= C;
1655
1656 if (sresult != result)
1657 flags |= V;
1658
1659 aarch64_set_CPSR (cpu, flags);
1660 }
1661
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1664
1665 static void
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1667 {
1668 uint64_t result = value1 + value2;
1669 uint32_t flags = 0;
1670 uint64_t signbit = 1ULL << 63;
1671
1672 if (result == 0)
1673 flags |= Z;
1674
1675 if (NEG (result))
1676 flags |= N;
1677
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1681 flags |= C;
1682
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1685 flags |= V;
1686
1687 aarch64_set_CPSR (cpu, flags);
1688 }
1689
1690 static void
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1692 {
1693 uint32_t result = value1 - value2;
1694 uint32_t flags = 0;
1695 uint32_t signbit = 1U << 31;
1696
1697 if (result == 0)
1698 flags |= Z;
1699
1700 if (NEG (result))
1701 flags |= N;
1702
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1706 flags |= C;
1707
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1710 flags |= V;
1711
1712 aarch64_set_CPSR (cpu, flags);
1713 }
1714
1715 static void
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1717 {
1718 uint64_t result = value1 - value2;
1719 uint32_t flags = 0;
1720 uint64_t signbit = 1ULL << 63;
1721
1722 if (result == 0)
1723 flags |= Z;
1724
1725 if (NEG (result))
1726 flags |= N;
1727
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1731 flags |= C;
1732
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1735 flags |= V;
1736
1737 aarch64_set_CPSR (cpu, flags);
1738 }
1739
1740 static void
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1742 {
1743 uint32_t flags = 0;
1744
1745 if (result == 0)
1746 flags |= Z;
1747 else
1748 flags &= ~ Z;
1749
1750 if (result & (1 << 31))
1751 flags |= N;
1752 else
1753 flags &= ~ N;
1754
1755 aarch64_set_CPSR (cpu, flags);
1756 }
1757
1758 static void
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1760 {
1761 uint32_t flags = 0;
1762
1763 if (result == 0)
1764 flags |= Z;
1765 else
1766 flags &= ~ Z;
1767
1768 if (result & (1ULL << 63))
1769 flags |= N;
1770 else
1771 flags &= ~ N;
1772
1773 aarch64_set_CPSR (cpu, flags);
1774 }
1775
1776 /* 32 bit add immediate set flags. */
1777 static void
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1779 {
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1784
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1788 }
1789
1790 /* 64 bit add immediate set flags. */
1791 static void
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1793 {
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1798
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1802 }
1803
1804 /* 32 bit sub immediate. */
1805 static void
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1807 {
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1810
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1814 }
1815
1816 /* 64 bit sub immediate. */
1817 static void
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1819 {
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1822
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1826 }
1827
1828 /* 32 bit sub immediate set flags. */
1829 static void
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1831 {
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1836
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1840 }
1841
1842 /* 64 bit sub immediate set flags. */
1843 static void
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1845 {
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1850
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1854 }
1855
1856 /* Data Processing Register. */
1857
1858 /* First two helpers to perform the shift operations. */
1859
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1862 {
1863 switch (shift)
1864 {
1865 default:
1866 case LSL:
1867 return (value << count);
1868 case LSR:
1869 return (value >> count);
1870 case ASR:
1871 {
1872 int32_t svalue = value;
1873 return (svalue >> count);
1874 }
1875 case ROR:
1876 {
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1880 }
1881 }
1882 }
1883
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1886 {
1887 switch (shift)
1888 {
1889 default:
1890 case LSL:
1891 return (value << count);
1892 case LSR:
1893 return (value >> count);
1894 case ASR:
1895 {
1896 int64_t svalue = value;
1897 return (svalue >> count);
1898 }
1899 case ROR:
1900 {
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1904 }
1905 }
1906 }
1907
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1911
1912 N.B register args may not be SP. */
1913
1914 /* 32 bit ADD shifted register. */
1915 static void
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1917 {
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1921
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1926 shift, count));
1927 }
1928
1929 /* 64 bit ADD shifted register. */
1930 static void
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1932 {
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1936
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1941 shift, count));
1942 }
1943
1944 /* 32 bit ADD shifted register setting flags. */
1945 static void
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1947 {
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1951
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1954 shift, count);
1955
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1959 }
1960
1961 /* 64 bit ADD shifted register setting flags. */
1962 static void
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1964 {
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1968
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1971 shift, count);
1972
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1976 }
1977
1978 /* 32 bit SUB shifted register. */
1979 static void
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1981 {
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1985
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1990 shift, count));
1991 }
1992
1993 /* 64 bit SUB shifted register. */
1994 static void
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1996 {
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2000
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2005 shift, count));
2006 }
2007
2008 /* 32 bit SUB shifted register setting flags. */
2009 static void
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2011 {
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2015
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2018 shift, count);
2019
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2023 }
2024
2025 /* 64 bit SUB shifted register setting flags. */
2026 static void
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2028 {
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2032
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2035 shift, count);
2036
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2040 }
2041
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2045 extension value. */
2046
2047 static uint32_t
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2049 {
2050 switch (extension)
2051 {
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2061 }
2062 }
2063
2064 static uint64_t
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2066 {
2067 switch (extension)
2068 {
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2076 case SXTX:
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2078 }
2079 }
2080
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2085
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2089
2090 /* 32 bit ADD extending register. */
2091 static void
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2097
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2102 }
2103
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2106 static void
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2108 {
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2112
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2117 }
2118
2119 /* 32 bit ADD extending register setting flags. */
2120 static void
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2122 {
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2126
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2129
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2133 }
2134
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2137 static void
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2139 {
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2143
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2146
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2150 }
2151
2152 /* 32 bit SUB extending register. */
2153 static void
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2155 {
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2159
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2164 }
2165
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2168 static void
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2170 {
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2174
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2179 }
2180
2181 /* 32 bit SUB extending register setting flags. */
2182 static void
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2184 {
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2188
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2191
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2195 }
2196
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2199 static void
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2201 {
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2205
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2208
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2212 }
2213
2214 static void
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2216 {
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2223 instr[9,5] = Rn
2224 instr[4,0] = Rd */
2225
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2230
2231 NYI_assert (28, 24, 0x11);
2232
2233 if (shift > 1)
2234 HALT_UNALLOC;
2235
2236 if (shift)
2237 imm <<= 12;
2238
2239 switch (dispatch)
2240 {
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2249 }
2250 }
2251
2252 static void
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2254 {
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2259 instr[21] = 0
2260 instr[20,16] = Rm
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2262 instr[9,5] = Rn
2263 instr[4,0] = Rd */
2264
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2268
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2271
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2274 HALT_UNALLOC;
2275
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2279 HALT_UNALLOC;
2280
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2283 {
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2292 }
2293 }
2294
2295 static void
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2297 {
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2303 instr[21] = 1
2304 instr[20,16] = Rm
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2310 instr[9,5] = Rn
2311 instr[4,0] = Rd */
2312
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2315
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2318
2319 /* Shift may not exceed 4. */
2320 if (shift > 4)
2321 HALT_UNALLOC;
2322
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2325 {
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2334 }
2335 }
2336
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2339
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2342
2343 static void
2344 adc32 (sim_cpu *cpu)
2345 {
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2349
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2354 + IS_SET (C));
2355 }
2356
2357 /* 64 bit add with carry */
2358 static void
2359 adc64 (sim_cpu *cpu)
2360 {
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2364
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2369 + IS_SET (C));
2370 }
2371
2372 /* 32 bit add with carry setting flags. */
2373 static void
2374 adcs32 (sim_cpu *cpu)
2375 {
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2379
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2383
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2387 }
2388
2389 /* 64 bit add with carry setting flags. */
2390 static void
2391 adcs64 (sim_cpu *cpu)
2392 {
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2396
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2400
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2404 }
2405
2406 /* 32 bit sub with carry. */
2407 static void
2408 sbc32 (sim_cpu *cpu)
2409 {
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2413
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2418 - 1 + IS_SET (C));
2419 }
2420
2421 /* 64 bit sub with carry */
2422 static void
2423 sbc64 (sim_cpu *cpu)
2424 {
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2428
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2433 - 1 + IS_SET (C));
2434 }
2435
2436 /* 32 bit sub with carry setting flags */
2437 static void
2438 sbcs32 (sim_cpu *cpu)
2439 {
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2443
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2448
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2452 }
2453
2454 /* 64 bit sub with carry setting flags */
2455 static void
2456 sbcs64 (sim_cpu *cpu)
2457 {
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2461
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2466
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2470 }
2471
2472 static void
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2474 {
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2479 instr[20,16] = Rm
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2481 instr[9,5] = Rn
2482 instr[4,0] = Rd */
2483
2484 uint32_t op2 = INSTR (15, 10);
2485
2486 NYI_assert (28, 21, 0xD0);
2487
2488 if (op2 != 0)
2489 HALT_UNALLOC;
2490
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2493 {
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2502 }
2503 }
2504
2505 static uint32_t
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2507 {
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2512
2513 For now we do it with a switch. */
2514 int res;
2515
2516 switch (cc)
2517 {
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2532 case AL:
2533 case NV:
2534 default:
2535 res = 1;
2536 break;
2537 }
2538 return res;
2539 }
2540
2541 static void
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2543 {
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2548 instr[15,12] = cond
2549 instr[11] = compare reg (0) or const (1)
2550 instr[10] = 0
2551 instr[9,5] = Rn
2552 instr[4] = 0
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2554 signed int negate;
2555 unsigned rm;
2556 unsigned rn;
2557
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2561
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2564 {
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2566 return;
2567 }
2568
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2571 rn = INSTR ( 9, 5);
2572
2573 if (INSTR (31, 31))
2574 {
2575 if (INSTR (11, 11))
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2578 else
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2581 }
2582 else
2583 {
2584 if (INSTR (11, 11))
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2586 negate * rm);
2587 else
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2590 }
2591 }
2592
2593 static void
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2595 {
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2597
2598 instr[31] = 0
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2601 instr[20,16] = Vs
2602 instr[15,10] = 000111
2603 instr[9,5] = Vs
2604 instr[4,0] = Vd */
2605
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2608
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2611
2612 if (INSTR (20, 16) != vs)
2613 HALT_NYI;
2614
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2616 if (INSTR (30, 30))
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2618
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2620 }
2621
2622 static void
2623 do_vec_MOV_into_scalar (sim_cpu *cpu)
2624 {
2625 /* instr[31] = 0
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,18] = element size and index
2629 instr[17,10] = 00 0011 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2632
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2635
2636 NYI_assert (29, 21, 0x070);
2637 NYI_assert (17, 10, 0x0F);
2638
2639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2640 switch (INSTR (20, 18))
2641 {
2642 case 0x2:
2643 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2644 break;
2645
2646 case 0x6:
2647 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2648 break;
2649
2650 case 0x1:
2651 case 0x3:
2652 case 0x5:
2653 case 0x7:
2654 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2655 (cpu, vs, INSTR (20, 19)));
2656 break;
2657
2658 default:
2659 HALT_NYI;
2660 }
2661 }
2662
2663 static void
2664 do_vec_INS (sim_cpu *cpu)
2665 {
2666 /* instr[31,21] = 01001110000
2667 instr[20,16] = element size and index
2668 instr[15,10] = 000111
2669 instr[9,5] = W source
2670 instr[4,0] = V dest */
2671
2672 int index;
2673 unsigned rs = INSTR (9, 5);
2674 unsigned vd = INSTR (4, 0);
2675
2676 NYI_assert (31, 21, 0x270);
2677 NYI_assert (15, 10, 0x07);
2678
2679 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2680 if (INSTR (16, 16))
2681 {
2682 index = INSTR (20, 17);
2683 aarch64_set_vec_u8 (cpu, vd, index,
2684 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2685 }
2686 else if (INSTR (17, 17))
2687 {
2688 index = INSTR (20, 18);
2689 aarch64_set_vec_u16 (cpu, vd, index,
2690 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2691 }
2692 else if (INSTR (18, 18))
2693 {
2694 index = INSTR (20, 19);
2695 aarch64_set_vec_u32 (cpu, vd, index,
2696 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2697 }
2698 else if (INSTR (19, 19))
2699 {
2700 index = INSTR (20, 20);
2701 aarch64_set_vec_u64 (cpu, vd, index,
2702 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2703 }
2704 else
2705 HALT_NYI;
2706 }
2707
2708 static void
2709 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2710 {
2711 /* instr[31] = 0
2712 instr[30] = half(0)/full(1)
2713 instr[29,21] = 00 1110 000
2714 instr[20,16] = element size and index
2715 instr[15,10] = 0000 01
2716 instr[9,5] = V source
2717 instr[4,0] = V dest. */
2718
2719 unsigned full = INSTR (30, 30);
2720 unsigned vs = INSTR (9, 5);
2721 unsigned vd = INSTR (4, 0);
2722 int i, index;
2723
2724 NYI_assert (29, 21, 0x070);
2725 NYI_assert (15, 10, 0x01);
2726
2727 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2728 if (INSTR (16, 16))
2729 {
2730 index = INSTR (20, 17);
2731
2732 for (i = 0; i < (full ? 16 : 8); i++)
2733 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2734 }
2735 else if (INSTR (17, 17))
2736 {
2737 index = INSTR (20, 18);
2738
2739 for (i = 0; i < (full ? 8 : 4); i++)
2740 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2741 }
2742 else if (INSTR (18, 18))
2743 {
2744 index = INSTR (20, 19);
2745
2746 for (i = 0; i < (full ? 4 : 2); i++)
2747 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2748 }
2749 else
2750 {
2751 if (INSTR (19, 19) == 0)
2752 HALT_UNALLOC;
2753
2754 if (! full)
2755 HALT_UNALLOC;
2756
2757 index = INSTR (20, 20);
2758
2759 for (i = 0; i < 2; i++)
2760 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2761 }
2762 }
2763
2764 static void
2765 do_vec_TBL (sim_cpu *cpu)
2766 {
2767 /* instr[31] = 0
2768 instr[30] = half(0)/full(1)
2769 instr[29,21] = 00 1110 000
2770 instr[20,16] = Vm
2771 instr[15] = 0
2772 instr[14,13] = vec length
2773 instr[12,10] = 000
2774 instr[9,5] = V start
2775 instr[4,0] = V dest */
2776
2777 int full = INSTR (30, 30);
2778 int len = INSTR (14, 13) + 1;
2779 unsigned vm = INSTR (20, 16);
2780 unsigned vn = INSTR (9, 5);
2781 unsigned vd = INSTR (4, 0);
2782 unsigned i;
2783
2784 NYI_assert (29, 21, 0x070);
2785 NYI_assert (12, 10, 0);
2786
2787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 for (i = 0; i < (full ? 16 : 8); i++)
2789 {
2790 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2791 uint8_t val;
2792
2793 if (selector < 16)
2794 val = aarch64_get_vec_u8 (cpu, vn, selector);
2795 else if (selector < 32)
2796 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2797 else if (selector < 48)
2798 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2799 else if (selector < 64)
2800 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2801 else
2802 val = 0;
2803
2804 aarch64_set_vec_u8 (cpu, vd, i, val);
2805 }
2806 }
2807
2808 static void
2809 do_vec_TRN (sim_cpu *cpu)
2810 {
2811 /* instr[31] = 0
2812 instr[30] = half(0)/full(1)
2813 instr[29,24] = 00 1110
2814 instr[23,22] = size
2815 instr[21] = 0
2816 instr[20,16] = Vm
2817 instr[15] = 0
2818 instr[14] = TRN1 (0) / TRN2 (1)
2819 instr[13,10] = 1010
2820 instr[9,5] = V source
2821 instr[4,0] = V dest. */
2822
2823 int full = INSTR (30, 30);
2824 int second = INSTR (14, 14);
2825 unsigned vm = INSTR (20, 16);
2826 unsigned vn = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2828 unsigned i;
2829
2830 NYI_assert (29, 24, 0x0E);
2831 NYI_assert (13, 10, 0xA);
2832
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2834 switch (INSTR (23, 22))
2835 {
2836 case 0:
2837 for (i = 0; i < (full ? 8 : 4); i++)
2838 {
2839 aarch64_set_vec_u8
2840 (cpu, vd, i * 2,
2841 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2842 aarch64_set_vec_u8
2843 (cpu, vd, 1 * 2 + 1,
2844 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2845 }
2846 break;
2847
2848 case 1:
2849 for (i = 0; i < (full ? 4 : 2); i++)
2850 {
2851 aarch64_set_vec_u16
2852 (cpu, vd, i * 2,
2853 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2854 aarch64_set_vec_u16
2855 (cpu, vd, 1 * 2 + 1,
2856 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2857 }
2858 break;
2859
2860 case 2:
2861 aarch64_set_vec_u32
2862 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2863 aarch64_set_vec_u32
2864 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2865 aarch64_set_vec_u32
2866 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2867 aarch64_set_vec_u32
2868 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2869 break;
2870
2871 case 3:
2872 if (! full)
2873 HALT_UNALLOC;
2874
2875 aarch64_set_vec_u64 (cpu, vd, 0,
2876 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2877 aarch64_set_vec_u64 (cpu, vd, 1,
2878 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2879 break;
2880 }
2881 }
2882
2883 static void
2884 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2885 {
2886 /* instr[31] = 0
2887 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2888 [must be 1 for 64-bit xfer]
2889 instr[29,20] = 00 1110 0000
2890 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2891 0100=> 32-bits. 1000=>64-bits
2892 instr[15,10] = 0000 11
2893 instr[9,5] = W source
2894 instr[4,0] = V dest. */
2895
2896 unsigned i;
2897 unsigned Vd = INSTR (4, 0);
2898 unsigned Rs = INSTR (9, 5);
2899 int both = INSTR (30, 30);
2900
2901 NYI_assert (29, 20, 0x0E0);
2902 NYI_assert (15, 10, 0x03);
2903
2904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2905 switch (INSTR (19, 16))
2906 {
2907 case 1:
2908 for (i = 0; i < (both ? 16 : 8); i++)
2909 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2910 break;
2911
2912 case 2:
2913 for (i = 0; i < (both ? 8 : 4); i++)
2914 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2915 break;
2916
2917 case 4:
2918 for (i = 0; i < (both ? 4 : 2); i++)
2919 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2920 break;
2921
2922 case 8:
2923 if (!both)
2924 HALT_NYI;
2925 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2926 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2927 break;
2928
2929 default:
2930 HALT_NYI;
2931 }
2932 }
2933
2934 static void
2935 do_vec_UZP (sim_cpu *cpu)
2936 {
2937 /* instr[31] = 0
2938 instr[30] = half(0)/full(1)
2939 instr[29,24] = 00 1110
2940 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2941 instr[21] = 0
2942 instr[20,16] = Vm
2943 instr[15] = 0
2944 instr[14] = lower (0) / upper (1)
2945 instr[13,10] = 0110
2946 instr[9,5] = Vn
2947 instr[4,0] = Vd. */
2948
2949 int full = INSTR (30, 30);
2950 int upper = INSTR (14, 14);
2951
2952 unsigned vm = INSTR (20, 16);
2953 unsigned vn = INSTR (9, 5);
2954 unsigned vd = INSTR (4, 0);
2955
2956 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2957 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2958 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2959 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2960
2961 uint64_t val1;
2962 uint64_t val2;
2963
2964 uint64_t input2 = full ? val_n2 : val_m1;
2965
2966 NYI_assert (29, 24, 0x0E);
2967 NYI_assert (21, 21, 0);
2968 NYI_assert (15, 15, 0);
2969 NYI_assert (13, 10, 6);
2970
2971 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2972 switch (INSTR (23, 22))
2973 {
2974 case 0:
2975 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
2976 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2977 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2978 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2979
2980 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2981 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2982 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2983 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2984
2985 if (full)
2986 {
2987 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
2988 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2989 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2990 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2991
2992 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2993 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2994 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2995 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2996 }
2997 break;
2998
2999 case 1:
3000 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3001 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3002
3003 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3004 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3005
3006 if (full)
3007 {
3008 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3009 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3010
3011 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3012 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3013 }
3014 break;
3015
3016 case 2:
3017 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3018 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3019
3020 if (full)
3021 {
3022 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3023 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3024 }
3025 break;
3026
3027 case 3:
3028 if (! full)
3029 HALT_UNALLOC;
3030
3031 val1 = upper ? val_n2 : val_n1;
3032 val2 = upper ? val_m2 : val_m1;
3033 break;
3034 }
3035
3036 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3037 if (full)
3038 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3039 }
3040
3041 static void
3042 do_vec_ZIP (sim_cpu *cpu)
3043 {
3044 /* instr[31] = 0
3045 instr[30] = half(0)/full(1)
3046 instr[29,24] = 00 1110
3047 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3048 instr[21] = 0
3049 instr[20,16] = Vm
3050 instr[15] = 0
3051 instr[14] = lower (0) / upper (1)
3052 instr[13,10] = 1110
3053 instr[9,5] = Vn
3054 instr[4,0] = Vd. */
3055
3056 int full = INSTR (30, 30);
3057 int upper = INSTR (14, 14);
3058
3059 unsigned vm = INSTR (20, 16);
3060 unsigned vn = INSTR (9, 5);
3061 unsigned vd = INSTR (4, 0);
3062
3063 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3064 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3065 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3066 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3067
3068 uint64_t val1 = 0;
3069 uint64_t val2 = 0;
3070
3071 uint64_t input1 = upper ? val_n1 : val_m1;
3072 uint64_t input2 = upper ? val_n2 : val_m2;
3073
3074 NYI_assert (29, 24, 0x0E);
3075 NYI_assert (21, 21, 0);
3076 NYI_assert (15, 15, 0);
3077 NYI_assert (13, 10, 0xE);
3078
3079 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3080 switch (INSTR (23, 23))
3081 {
3082 case 0:
3083 val1 =
3084 ((input1 << 0) & (0xFF << 0))
3085 | ((input2 << 8) & (0xFF << 8))
3086 | ((input1 << 8) & (0xFF << 16))
3087 | ((input2 << 16) & (0xFF << 24))
3088 | ((input1 << 16) & (0xFFULL << 32))
3089 | ((input2 << 24) & (0xFFULL << 40))
3090 | ((input1 << 24) & (0xFFULL << 48))
3091 | ((input2 << 32) & (0xFFULL << 56));
3092
3093 val2 =
3094 ((input1 >> 32) & (0xFF << 0))
3095 | ((input2 >> 24) & (0xFF << 8))
3096 | ((input1 >> 24) & (0xFF << 16))
3097 | ((input2 >> 16) & (0xFF << 24))
3098 | ((input1 >> 16) & (0xFFULL << 32))
3099 | ((input2 >> 8) & (0xFFULL << 40))
3100 | ((input1 >> 8) & (0xFFULL << 48))
3101 | ((input2 >> 0) & (0xFFULL << 56));
3102 break;
3103
3104 case 1:
3105 val1 =
3106 ((input1 << 0) & (0xFFFF << 0))
3107 | ((input2 << 16) & (0xFFFF << 16))
3108 | ((input1 << 16) & (0xFFFFULL << 32))
3109 | ((input2 << 32) & (0xFFFFULL << 48));
3110
3111 val2 =
3112 ((input1 >> 32) & (0xFFFF << 0))
3113 | ((input2 >> 16) & (0xFFFF << 16))
3114 | ((input1 >> 16) & (0xFFFFULL << 32))
3115 | ((input2 >> 0) & (0xFFFFULL << 48));
3116 break;
3117
3118 case 2:
3119 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3120 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3121 break;
3122
3123 case 3:
3124 val1 = input1;
3125 val2 = input2;
3126 break;
3127 }
3128
3129 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3130 if (full)
3131 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3132 }
3133
3134 /* Floating point immediates are encoded in 8 bits.
3135 fpimm[7] = sign bit.
3136 fpimm[6:4] = signed exponent.
3137 fpimm[3:0] = fraction (assuming leading 1).
3138 i.e. F = s * 1.f * 2^(e - b). */
3139
3140 static float
3141 fp_immediate_for_encoding_32 (uint32_t imm8)
3142 {
3143 float u;
3144 uint32_t s, e, f, i;
3145
3146 s = (imm8 >> 7) & 0x1;
3147 e = (imm8 >> 4) & 0x7;
3148 f = imm8 & 0xf;
3149
3150 /* The fp value is s * n/16 * 2r where n is 16+e. */
3151 u = (16.0 + f) / 16.0;
3152
3153 /* N.B. exponent is signed. */
3154 if (e < 4)
3155 {
3156 int epos = e;
3157
3158 for (i = 0; i <= epos; i++)
3159 u *= 2.0;
3160 }
3161 else
3162 {
3163 int eneg = 7 - e;
3164
3165 for (i = 0; i < eneg; i++)
3166 u /= 2.0;
3167 }
3168
3169 if (s)
3170 u = - u;
3171
3172 return u;
3173 }
3174
3175 static double
3176 fp_immediate_for_encoding_64 (uint32_t imm8)
3177 {
3178 double u;
3179 uint32_t s, e, f, i;
3180
3181 s = (imm8 >> 7) & 0x1;
3182 e = (imm8 >> 4) & 0x7;
3183 f = imm8 & 0xf;
3184
3185 /* The fp value is s * n/16 * 2r where n is 16+e. */
3186 u = (16.0 + f) / 16.0;
3187
3188 /* N.B. exponent is signed. */
3189 if (e < 4)
3190 {
3191 int epos = e;
3192
3193 for (i = 0; i <= epos; i++)
3194 u *= 2.0;
3195 }
3196 else
3197 {
3198 int eneg = 7 - e;
3199
3200 for (i = 0; i < eneg; i++)
3201 u /= 2.0;
3202 }
3203
3204 if (s)
3205 u = - u;
3206
3207 return u;
3208 }
3209
3210 static void
3211 do_vec_MOV_immediate (sim_cpu *cpu)
3212 {
3213 /* instr[31] = 0
3214 instr[30] = full/half selector
3215 instr[29,19] = 00111100000
3216 instr[18,16] = high 3 bits of uimm8
3217 instr[15,12] = size & shift:
3218 0000 => 32-bit
3219 0010 => 32-bit + LSL#8
3220 0100 => 32-bit + LSL#16
3221 0110 => 32-bit + LSL#24
3222 1010 => 16-bit + LSL#8
3223 1000 => 16-bit
3224 1101 => 32-bit + MSL#16
3225 1100 => 32-bit + MSL#8
3226 1110 => 8-bit
3227 1111 => double
3228 instr[11,10] = 01
3229 instr[9,5] = low 5-bits of uimm8
3230 instr[4,0] = Vd. */
3231
3232 int full = INSTR (30, 30);
3233 unsigned vd = INSTR (4, 0);
3234 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3235 unsigned i;
3236
3237 NYI_assert (29, 19, 0x1E0);
3238 NYI_assert (11, 10, 1);
3239
3240 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3241 switch (INSTR (15, 12))
3242 {
3243 case 0x0: /* 32-bit, no shift. */
3244 case 0x2: /* 32-bit, shift by 8. */
3245 case 0x4: /* 32-bit, shift by 16. */
3246 case 0x6: /* 32-bit, shift by 24. */
3247 val <<= (8 * INSTR (14, 13));
3248 for (i = 0; i < (full ? 4 : 2); i++)
3249 aarch64_set_vec_u32 (cpu, vd, i, val);
3250 break;
3251
3252 case 0xa: /* 16-bit, shift by 8. */
3253 val <<= 8;
3254 /* Fall through. */
3255 case 0x8: /* 16-bit, no shift. */
3256 for (i = 0; i < (full ? 8 : 4); i++)
3257 aarch64_set_vec_u16 (cpu, vd, i, val);
3258 break;
3259
3260 case 0xd: /* 32-bit, mask shift by 16. */
3261 val <<= 8;
3262 val |= 0xFF;
3263 /* Fall through. */
3264 case 0xc: /* 32-bit, mask shift by 8. */
3265 val <<= 8;
3266 val |= 0xFF;
3267 for (i = 0; i < (full ? 4 : 2); i++)
3268 aarch64_set_vec_u32 (cpu, vd, i, val);
3269 break;
3270
3271 case 0xe: /* 8-bit, no shift. */
3272 for (i = 0; i < (full ? 16 : 8); i++)
3273 aarch64_set_vec_u8 (cpu, vd, i, val);
3274 break;
3275
3276 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3277 {
3278 float u = fp_immediate_for_encoding_32 (val);
3279 for (i = 0; i < (full ? 4 : 2); i++)
3280 aarch64_set_vec_float (cpu, vd, i, u);
3281 break;
3282 }
3283
3284 default:
3285 HALT_NYI;
3286 }
3287 }
3288
3289 static void
3290 do_vec_MVNI (sim_cpu *cpu)
3291 {
3292 /* instr[31] = 0
3293 instr[30] = full/half selector
3294 instr[29,19] = 10111100000
3295 instr[18,16] = high 3 bits of uimm8
3296 instr[15,12] = selector
3297 instr[11,10] = 01
3298 instr[9,5] = low 5-bits of uimm8
3299 instr[4,0] = Vd. */
3300
3301 int full = INSTR (30, 30);
3302 unsigned vd = INSTR (4, 0);
3303 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3304 unsigned i;
3305
3306 NYI_assert (29, 19, 0x5E0);
3307 NYI_assert (11, 10, 1);
3308
3309 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3310 switch (INSTR (15, 12))
3311 {
3312 case 0x0: /* 32-bit, no shift. */
3313 case 0x2: /* 32-bit, shift by 8. */
3314 case 0x4: /* 32-bit, shift by 16. */
3315 case 0x6: /* 32-bit, shift by 24. */
3316 val <<= (8 * INSTR (14, 13));
3317 val = ~ val;
3318 for (i = 0; i < (full ? 4 : 2); i++)
3319 aarch64_set_vec_u32 (cpu, vd, i, val);
3320 return;
3321
3322 case 0xa: /* 16-bit, 8 bit shift. */
3323 val <<= 8;
3324 case 0x8: /* 16-bit, no shift. */
3325 val = ~ val;
3326 for (i = 0; i < (full ? 8 : 4); i++)
3327 aarch64_set_vec_u16 (cpu, vd, i, val);
3328 return;
3329
3330 case 0xd: /* 32-bit, mask shift by 16. */
3331 val <<= 8;
3332 val |= 0xFF;
3333 case 0xc: /* 32-bit, mask shift by 8. */
3334 val <<= 8;
3335 val |= 0xFF;
3336 val = ~ val;
3337 for (i = 0; i < (full ? 4 : 2); i++)
3338 aarch64_set_vec_u32 (cpu, vd, i, val);
3339 return;
3340
3341 case 0xE: /* MOVI Dn, #mask64 */
3342 {
3343 uint64_t mask = 0;
3344
3345 for (i = 0; i < 8; i++)
3346 if (val & (1 << i))
3347 mask |= (0xFFUL << (i * 8));
3348 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3349 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3350 return;
3351 }
3352
3353 case 0xf: /* FMOV Vd.2D, #fpimm. */
3354 {
3355 double u = fp_immediate_for_encoding_64 (val);
3356
3357 if (! full)
3358 HALT_UNALLOC;
3359
3360 aarch64_set_vec_double (cpu, vd, 0, u);
3361 aarch64_set_vec_double (cpu, vd, 1, u);
3362 return;
3363 }
3364
3365 default:
3366 HALT_NYI;
3367 }
3368 }
3369
3370 #define ABS(A) ((A) < 0 ? - (A) : (A))
3371
3372 static void
3373 do_vec_ABS (sim_cpu *cpu)
3374 {
3375 /* instr[31] = 0
3376 instr[30] = half(0)/full(1)
3377 instr[29,24] = 00 1110
3378 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3379 instr[21,10] = 10 0000 1011 10
3380 instr[9,5] = Vn
3381 instr[4.0] = Vd. */
3382
3383 unsigned vn = INSTR (9, 5);
3384 unsigned vd = INSTR (4, 0);
3385 unsigned full = INSTR (30, 30);
3386 unsigned i;
3387
3388 NYI_assert (29, 24, 0x0E);
3389 NYI_assert (21, 10, 0x82E);
3390
3391 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3392 switch (INSTR (23, 22))
3393 {
3394 case 0:
3395 for (i = 0; i < (full ? 16 : 8); i++)
3396 aarch64_set_vec_s8 (cpu, vd, i,
3397 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3398 break;
3399
3400 case 1:
3401 for (i = 0; i < (full ? 8 : 4); i++)
3402 aarch64_set_vec_s16 (cpu, vd, i,
3403 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3404 break;
3405
3406 case 2:
3407 for (i = 0; i < (full ? 4 : 2); i++)
3408 aarch64_set_vec_s32 (cpu, vd, i,
3409 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3410 break;
3411
3412 case 3:
3413 if (! full)
3414 HALT_NYI;
3415 for (i = 0; i < 2; i++)
3416 aarch64_set_vec_s64 (cpu, vd, i,
3417 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3418 break;
3419 }
3420 }
3421
3422 static void
3423 do_vec_ADDV (sim_cpu *cpu)
3424 {
3425 /* instr[31] = 0
3426 instr[30] = full/half selector
3427 instr[29,24] = 00 1110
3428 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3429 instr[21,10] = 11 0001 1011 10
3430 instr[9,5] = Vm
3431 instr[4.0] = Rd. */
3432
3433 unsigned vm = INSTR (9, 5);
3434 unsigned rd = INSTR (4, 0);
3435 unsigned i;
3436 int full = INSTR (30, 30);
3437
3438 NYI_assert (29, 24, 0x0E);
3439 NYI_assert (21, 10, 0xC6E);
3440
3441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3442 switch (INSTR (23, 22))
3443 {
3444 case 0:
3445 {
3446 uint8_t val = 0;
3447 for (i = 0; i < (full ? 16 : 8); i++)
3448 val += aarch64_get_vec_u8 (cpu, vm, i);
3449 aarch64_set_vec_u64 (cpu, rd, 0, val);
3450 return;
3451 }
3452
3453 case 1:
3454 {
3455 uint16_t val = 0;
3456 for (i = 0; i < (full ? 8 : 4); i++)
3457 val += aarch64_get_vec_u16 (cpu, vm, i);
3458 aarch64_set_vec_u64 (cpu, rd, 0, val);
3459 return;
3460 }
3461
3462 case 2:
3463 {
3464 uint32_t val = 0;
3465 if (! full)
3466 HALT_UNALLOC;
3467 for (i = 0; i < 4; i++)
3468 val += aarch64_get_vec_u32 (cpu, vm, i);
3469 aarch64_set_vec_u64 (cpu, rd, 0, val);
3470 return;
3471 }
3472
3473 case 3:
3474 HALT_UNALLOC;
3475 }
3476 }
3477
3478 static void
3479 do_vec_ins_2 (sim_cpu *cpu)
3480 {
3481 /* instr[31,21] = 01001110000
3482 instr[20,18] = size & element selector
3483 instr[17,14] = 0000
3484 instr[13] = direction: to vec(0), from vec (1)
3485 instr[12,10] = 111
3486 instr[9,5] = Vm
3487 instr[4,0] = Vd. */
3488
3489 unsigned elem;
3490 unsigned vm = INSTR (9, 5);
3491 unsigned vd = INSTR (4, 0);
3492
3493 NYI_assert (31, 21, 0x270);
3494 NYI_assert (17, 14, 0);
3495 NYI_assert (12, 10, 7);
3496
3497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3498 if (INSTR (13, 13) == 1)
3499 {
3500 if (INSTR (18, 18) == 1)
3501 {
3502 /* 32-bit moves. */
3503 elem = INSTR (20, 19);
3504 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3505 aarch64_get_vec_u32 (cpu, vm, elem));
3506 }
3507 else
3508 {
3509 /* 64-bit moves. */
3510 if (INSTR (19, 19) != 1)
3511 HALT_NYI;
3512
3513 elem = INSTR (20, 20);
3514 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3515 aarch64_get_vec_u64 (cpu, vm, elem));
3516 }
3517 }
3518 else
3519 {
3520 if (INSTR (18, 18) == 1)
3521 {
3522 /* 32-bit moves. */
3523 elem = INSTR (20, 19);
3524 aarch64_set_vec_u32 (cpu, vd, elem,
3525 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3526 }
3527 else
3528 {
3529 /* 64-bit moves. */
3530 if (INSTR (19, 19) != 1)
3531 HALT_NYI;
3532
3533 elem = INSTR (20, 20);
3534 aarch64_set_vec_u64 (cpu, vd, elem,
3535 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3536 }
3537 }
3538 }
3539
3540 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3541 do \
3542 { \
3543 DST_TYPE a[N], b[N]; \
3544 \
3545 for (i = 0; i < (N); i++) \
3546 { \
3547 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3548 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3549 } \
3550 for (i = 0; i < (N); i++) \
3551 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3552 } \
3553 while (0)
3554
3555 static void
3556 do_vec_mull (sim_cpu *cpu)
3557 {
3558 /* instr[31] = 0
3559 instr[30] = lower(0)/upper(1) selector
3560 instr[29] = signed(0)/unsigned(1)
3561 instr[28,24] = 0 1110
3562 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3563 instr[21] = 1
3564 instr[20,16] = Vm
3565 instr[15,10] = 11 0000
3566 instr[9,5] = Vn
3567 instr[4.0] = Vd. */
3568
3569 int unsign = INSTR (29, 29);
3570 int bias = INSTR (30, 30);
3571 unsigned vm = INSTR (20, 16);
3572 unsigned vn = INSTR ( 9, 5);
3573 unsigned vd = INSTR ( 4, 0);
3574 unsigned i;
3575
3576 NYI_assert (28, 24, 0x0E);
3577 NYI_assert (15, 10, 0x30);
3578
3579 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3580 /* NB: Read source values before writing results, in case
3581 the source and destination vectors are the same. */
3582 switch (INSTR (23, 22))
3583 {
3584 case 0:
3585 if (bias)
3586 bias = 8;
3587 if (unsign)
3588 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3589 else
3590 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3591 return;
3592
3593 case 1:
3594 if (bias)
3595 bias = 4;
3596 if (unsign)
3597 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3598 else
3599 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3600 return;
3601
3602 case 2:
3603 if (bias)
3604 bias = 2;
3605 if (unsign)
3606 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3607 else
3608 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3609 return;
3610
3611 case 3:
3612 HALT_NYI;
3613 }
3614 }
3615
3616 static void
3617 do_vec_fadd (sim_cpu *cpu)
3618 {
3619 /* instr[31] = 0
3620 instr[30] = half(0)/full(1)
3621 instr[29,24] = 001110
3622 instr[23] = FADD(0)/FSUB(1)
3623 instr[22] = float (0)/double(1)
3624 instr[21] = 1
3625 instr[20,16] = Vm
3626 instr[15,10] = 110101
3627 instr[9,5] = Vn
3628 instr[4.0] = Vd. */
3629
3630 unsigned vm = INSTR (20, 16);
3631 unsigned vn = INSTR (9, 5);
3632 unsigned vd = INSTR (4, 0);
3633 unsigned i;
3634 int full = INSTR (30, 30);
3635
3636 NYI_assert (29, 24, 0x0E);
3637 NYI_assert (21, 21, 1);
3638 NYI_assert (15, 10, 0x35);
3639
3640 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3641 if (INSTR (23, 23))
3642 {
3643 if (INSTR (22, 22))
3644 {
3645 if (! full)
3646 HALT_NYI;
3647
3648 for (i = 0; i < 2; i++)
3649 aarch64_set_vec_double (cpu, vd, i,
3650 aarch64_get_vec_double (cpu, vn, i)
3651 - aarch64_get_vec_double (cpu, vm, i));
3652 }
3653 else
3654 {
3655 for (i = 0; i < (full ? 4 : 2); i++)
3656 aarch64_set_vec_float (cpu, vd, i,
3657 aarch64_get_vec_float (cpu, vn, i)
3658 - aarch64_get_vec_float (cpu, vm, i));
3659 }
3660 }
3661 else
3662 {
3663 if (INSTR (22, 22))
3664 {
3665 if (! full)
3666 HALT_NYI;
3667
3668 for (i = 0; i < 2; i++)
3669 aarch64_set_vec_double (cpu, vd, i,
3670 aarch64_get_vec_double (cpu, vm, i)
3671 + aarch64_get_vec_double (cpu, vn, i));
3672 }
3673 else
3674 {
3675 for (i = 0; i < (full ? 4 : 2); i++)
3676 aarch64_set_vec_float (cpu, vd, i,
3677 aarch64_get_vec_float (cpu, vm, i)
3678 + aarch64_get_vec_float (cpu, vn, i));
3679 }
3680 }
3681 }
3682
3683 static void
3684 do_vec_add (sim_cpu *cpu)
3685 {
3686 /* instr[31] = 0
3687 instr[30] = full/half selector
3688 instr[29,24] = 001110
3689 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3690 instr[21] = 1
3691 instr[20,16] = Vn
3692 instr[15,10] = 100001
3693 instr[9,5] = Vm
3694 instr[4.0] = Vd. */
3695
3696 unsigned vm = INSTR (20, 16);
3697 unsigned vn = INSTR (9, 5);
3698 unsigned vd = INSTR (4, 0);
3699 unsigned i;
3700 int full = INSTR (30, 30);
3701
3702 NYI_assert (29, 24, 0x0E);
3703 NYI_assert (21, 21, 1);
3704 NYI_assert (15, 10, 0x21);
3705
3706 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3707 switch (INSTR (23, 22))
3708 {
3709 case 0:
3710 for (i = 0; i < (full ? 16 : 8); i++)
3711 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3712 + aarch64_get_vec_u8 (cpu, vm, i));
3713 return;
3714
3715 case 1:
3716 for (i = 0; i < (full ? 8 : 4); i++)
3717 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3718 + aarch64_get_vec_u16 (cpu, vm, i));
3719 return;
3720
3721 case 2:
3722 for (i = 0; i < (full ? 4 : 2); i++)
3723 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3724 + aarch64_get_vec_u32 (cpu, vm, i));
3725 return;
3726
3727 case 3:
3728 if (! full)
3729 HALT_UNALLOC;
3730 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3731 + aarch64_get_vec_u64 (cpu, vm, 0));
3732 aarch64_set_vec_u64 (cpu, vd, 1,
3733 aarch64_get_vec_u64 (cpu, vn, 1)
3734 + aarch64_get_vec_u64 (cpu, vm, 1));
3735 return;
3736 }
3737 }
3738
3739 static void
3740 do_vec_mul (sim_cpu *cpu)
3741 {
3742 /* instr[31] = 0
3743 instr[30] = full/half selector
3744 instr[29,24] = 00 1110
3745 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3746 instr[21] = 1
3747 instr[20,16] = Vn
3748 instr[15,10] = 10 0111
3749 instr[9,5] = Vm
3750 instr[4.0] = Vd. */
3751
3752 unsigned vm = INSTR (20, 16);
3753 unsigned vn = INSTR (9, 5);
3754 unsigned vd = INSTR (4, 0);
3755 unsigned i;
3756 int full = INSTR (30, 30);
3757 int bias = 0;
3758
3759 NYI_assert (29, 24, 0x0E);
3760 NYI_assert (21, 21, 1);
3761 NYI_assert (15, 10, 0x27);
3762
3763 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3764 switch (INSTR (23, 22))
3765 {
3766 case 0:
3767 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3768 return;
3769
3770 case 1:
3771 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3772 return;
3773
3774 case 2:
3775 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3776 return;
3777
3778 case 3:
3779 HALT_UNALLOC;
3780 }
3781 }
3782
3783 static void
3784 do_vec_MLA (sim_cpu *cpu)
3785 {
3786 /* instr[31] = 0
3787 instr[30] = full/half selector
3788 instr[29,24] = 00 1110
3789 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3790 instr[21] = 1
3791 instr[20,16] = Vn
3792 instr[15,10] = 1001 01
3793 instr[9,5] = Vm
3794 instr[4.0] = Vd. */
3795
3796 unsigned vm = INSTR (20, 16);
3797 unsigned vn = INSTR (9, 5);
3798 unsigned vd = INSTR (4, 0);
3799 unsigned i;
3800 int full = INSTR (30, 30);
3801
3802 NYI_assert (29, 24, 0x0E);
3803 NYI_assert (21, 21, 1);
3804 NYI_assert (15, 10, 0x25);
3805
3806 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3807 switch (INSTR (23, 22))
3808 {
3809 case 0:
3810 for (i = 0; i < (full ? 16 : 8); i++)
3811 aarch64_set_vec_u8 (cpu, vd, i,
3812 aarch64_get_vec_u8 (cpu, vd, i)
3813 + (aarch64_get_vec_u8 (cpu, vn, i)
3814 * aarch64_get_vec_u8 (cpu, vm, i)));
3815 return;
3816
3817 case 1:
3818 for (i = 0; i < (full ? 8 : 4); i++)
3819 aarch64_set_vec_u16 (cpu, vd, i,
3820 aarch64_get_vec_u16 (cpu, vd, i)
3821 + (aarch64_get_vec_u16 (cpu, vn, i)
3822 * aarch64_get_vec_u16 (cpu, vm, i)));
3823 return;
3824
3825 case 2:
3826 for (i = 0; i < (full ? 4 : 2); i++)
3827 aarch64_set_vec_u32 (cpu, vd, i,
3828 aarch64_get_vec_u32 (cpu, vd, i)
3829 + (aarch64_get_vec_u32 (cpu, vn, i)
3830 * aarch64_get_vec_u32 (cpu, vm, i)));
3831 return;
3832
3833 default:
3834 HALT_UNALLOC;
3835 }
3836 }
3837
3838 static float
3839 fmaxnm (float a, float b)
3840 {
3841 if (! isnan (a))
3842 {
3843 if (! isnan (b))
3844 return a > b ? a : b;
3845 return a;
3846 }
3847 else if (! isnan (b))
3848 return b;
3849 return a;
3850 }
3851
3852 static float
3853 fminnm (float a, float b)
3854 {
3855 if (! isnan (a))
3856 {
3857 if (! isnan (b))
3858 return a < b ? a : b;
3859 return a;
3860 }
3861 else if (! isnan (b))
3862 return b;
3863 return a;
3864 }
3865
3866 static double
3867 dmaxnm (double a, double b)
3868 {
3869 if (! isnan (a))
3870 {
3871 if (! isnan (b))
3872 return a > b ? a : b;
3873 return a;
3874 }
3875 else if (! isnan (b))
3876 return b;
3877 return a;
3878 }
3879
3880 static double
3881 dminnm (double a, double b)
3882 {
3883 if (! isnan (a))
3884 {
3885 if (! isnan (b))
3886 return a < b ? a : b;
3887 return a;
3888 }
3889 else if (! isnan (b))
3890 return b;
3891 return a;
3892 }
3893
3894 static void
3895 do_vec_FminmaxNMP (sim_cpu *cpu)
3896 {
3897 /* instr [31] = 0
3898 instr [30] = half (0)/full (1)
3899 instr [29,24] = 10 1110
3900 instr [23] = max(0)/min(1)
3901 instr [22] = float (0)/double (1)
3902 instr [21] = 1
3903 instr [20,16] = Vn
3904 instr [15,10] = 1100 01
3905 instr [9,5] = Vm
3906 instr [4.0] = Vd. */
3907
3908 unsigned vm = INSTR (20, 16);
3909 unsigned vn = INSTR (9, 5);
3910 unsigned vd = INSTR (4, 0);
3911 int full = INSTR (30, 30);
3912
3913 NYI_assert (29, 24, 0x2E);
3914 NYI_assert (21, 21, 1);
3915 NYI_assert (15, 10, 0x31);
3916
3917 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3918 if (INSTR (22, 22))
3919 {
3920 double (* fn)(double, double) = INSTR (23, 23)
3921 ? dminnm : dmaxnm;
3922
3923 if (! full)
3924 HALT_NYI;
3925 aarch64_set_vec_double (cpu, vd, 0,
3926 fn (aarch64_get_vec_double (cpu, vn, 0),
3927 aarch64_get_vec_double (cpu, vn, 1)));
3928 aarch64_set_vec_double (cpu, vd, 0,
3929 fn (aarch64_get_vec_double (cpu, vm, 0),
3930 aarch64_get_vec_double (cpu, vm, 1)));
3931 }
3932 else
3933 {
3934 float (* fn)(float, float) = INSTR (23, 23)
3935 ? fminnm : fmaxnm;
3936
3937 aarch64_set_vec_float (cpu, vd, 0,
3938 fn (aarch64_get_vec_float (cpu, vn, 0),
3939 aarch64_get_vec_float (cpu, vn, 1)));
3940 if (full)
3941 aarch64_set_vec_float (cpu, vd, 1,
3942 fn (aarch64_get_vec_float (cpu, vn, 2),
3943 aarch64_get_vec_float (cpu, vn, 3)));
3944
3945 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3946 fn (aarch64_get_vec_float (cpu, vm, 0),
3947 aarch64_get_vec_float (cpu, vm, 1)));
3948 if (full)
3949 aarch64_set_vec_float (cpu, vd, 3,
3950 fn (aarch64_get_vec_float (cpu, vm, 2),
3951 aarch64_get_vec_float (cpu, vm, 3)));
3952 }
3953 }
3954
3955 static void
3956 do_vec_AND (sim_cpu *cpu)
3957 {
3958 /* instr[31] = 0
3959 instr[30] = half (0)/full (1)
3960 instr[29,21] = 001110001
3961 instr[20,16] = Vm
3962 instr[15,10] = 000111
3963 instr[9,5] = Vn
3964 instr[4.0] = Vd. */
3965
3966 unsigned vm = INSTR (20, 16);
3967 unsigned vn = INSTR (9, 5);
3968 unsigned vd = INSTR (4, 0);
3969 unsigned i;
3970 int full = INSTR (30, 30);
3971
3972 NYI_assert (29, 21, 0x071);
3973 NYI_assert (15, 10, 0x07);
3974
3975 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3976 for (i = 0; i < (full ? 4 : 2); i++)
3977 aarch64_set_vec_u32 (cpu, vd, i,
3978 aarch64_get_vec_u32 (cpu, vn, i)
3979 & aarch64_get_vec_u32 (cpu, vm, i));
3980 }
3981
3982 static void
3983 do_vec_BSL (sim_cpu *cpu)
3984 {
3985 /* instr[31] = 0
3986 instr[30] = half (0)/full (1)
3987 instr[29,21] = 101110011
3988 instr[20,16] = Vm
3989 instr[15,10] = 000111
3990 instr[9,5] = Vn
3991 instr[4.0] = Vd. */
3992
3993 unsigned vm = INSTR (20, 16);
3994 unsigned vn = INSTR (9, 5);
3995 unsigned vd = INSTR (4, 0);
3996 unsigned i;
3997 int full = INSTR (30, 30);
3998
3999 NYI_assert (29, 21, 0x173);
4000 NYI_assert (15, 10, 0x07);
4001
4002 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4003 for (i = 0; i < (full ? 16 : 8); i++)
4004 aarch64_set_vec_u8 (cpu, vd, i,
4005 ( aarch64_get_vec_u8 (cpu, vd, i)
4006 & aarch64_get_vec_u8 (cpu, vn, i))
4007 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4008 & aarch64_get_vec_u8 (cpu, vm, i)));
4009 }
4010
4011 static void
4012 do_vec_EOR (sim_cpu *cpu)
4013 {
4014 /* instr[31] = 0
4015 instr[30] = half (0)/full (1)
4016 instr[29,21] = 10 1110 001
4017 instr[20,16] = Vm
4018 instr[15,10] = 000111
4019 instr[9,5] = Vn
4020 instr[4.0] = Vd. */
4021
4022 unsigned vm = INSTR (20, 16);
4023 unsigned vn = INSTR (9, 5);
4024 unsigned vd = INSTR (4, 0);
4025 unsigned i;
4026 int full = INSTR (30, 30);
4027
4028 NYI_assert (29, 21, 0x171);
4029 NYI_assert (15, 10, 0x07);
4030
4031 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4032 for (i = 0; i < (full ? 4 : 2); i++)
4033 aarch64_set_vec_u32 (cpu, vd, i,
4034 aarch64_get_vec_u32 (cpu, vn, i)
4035 ^ aarch64_get_vec_u32 (cpu, vm, i));
4036 }
4037
4038 static void
4039 do_vec_bit (sim_cpu *cpu)
4040 {
4041 /* instr[31] = 0
4042 instr[30] = half (0)/full (1)
4043 instr[29,23] = 10 1110 1
4044 instr[22] = BIT (0) / BIF (1)
4045 instr[21] = 1
4046 instr[20,16] = Vm
4047 instr[15,10] = 0001 11
4048 instr[9,5] = Vn
4049 instr[4.0] = Vd. */
4050
4051 unsigned vm = INSTR (20, 16);
4052 unsigned vn = INSTR (9, 5);
4053 unsigned vd = INSTR (4, 0);
4054 unsigned full = INSTR (30, 30);
4055 unsigned test_false = INSTR (22, 22);
4056 unsigned i;
4057
4058 NYI_assert (29, 23, 0x5D);
4059 NYI_assert (21, 21, 1);
4060 NYI_assert (15, 10, 0x07);
4061
4062 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4063 for (i = 0; i < (full ? 4 : 2); i++)
4064 {
4065 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4066 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4067 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4068 if (test_false)
4069 aarch64_set_vec_u32 (cpu, vd, i,
4070 (vd_val & vm_val) | (vn_val & ~vm_val));
4071 else
4072 aarch64_set_vec_u32 (cpu, vd, i,
4073 (vd_val & ~vm_val) | (vn_val & vm_val));
4074 }
4075 }
4076
4077 static void
4078 do_vec_ORN (sim_cpu *cpu)
4079 {
4080 /* instr[31] = 0
4081 instr[30] = half (0)/full (1)
4082 instr[29,21] = 00 1110 111
4083 instr[20,16] = Vm
4084 instr[15,10] = 00 0111
4085 instr[9,5] = Vn
4086 instr[4.0] = Vd. */
4087
4088 unsigned vm = INSTR (20, 16);
4089 unsigned vn = INSTR (9, 5);
4090 unsigned vd = INSTR (4, 0);
4091 unsigned i;
4092 int full = INSTR (30, 30);
4093
4094 NYI_assert (29, 21, 0x077);
4095 NYI_assert (15, 10, 0x07);
4096
4097 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4098 for (i = 0; i < (full ? 16 : 8); i++)
4099 aarch64_set_vec_u8 (cpu, vd, i,
4100 aarch64_get_vec_u8 (cpu, vn, i)
4101 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4102 }
4103
4104 static void
4105 do_vec_ORR (sim_cpu *cpu)
4106 {
4107 /* instr[31] = 0
4108 instr[30] = half (0)/full (1)
4109 instr[29,21] = 00 1110 101
4110 instr[20,16] = Vm
4111 instr[15,10] = 0001 11
4112 instr[9,5] = Vn
4113 instr[4.0] = Vd. */
4114
4115 unsigned vm = INSTR (20, 16);
4116 unsigned vn = INSTR (9, 5);
4117 unsigned vd = INSTR (4, 0);
4118 unsigned i;
4119 int full = INSTR (30, 30);
4120
4121 NYI_assert (29, 21, 0x075);
4122 NYI_assert (15, 10, 0x07);
4123
4124 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4125 for (i = 0; i < (full ? 16 : 8); i++)
4126 aarch64_set_vec_u8 (cpu, vd, i,
4127 aarch64_get_vec_u8 (cpu, vn, i)
4128 | aarch64_get_vec_u8 (cpu, vm, i));
4129 }
4130
4131 static void
4132 do_vec_BIC (sim_cpu *cpu)
4133 {
4134 /* instr[31] = 0
4135 instr[30] = half (0)/full (1)
4136 instr[29,21] = 00 1110 011
4137 instr[20,16] = Vm
4138 instr[15,10] = 00 0111
4139 instr[9,5] = Vn
4140 instr[4.0] = Vd. */
4141
4142 unsigned vm = INSTR (20, 16);
4143 unsigned vn = INSTR (9, 5);
4144 unsigned vd = INSTR (4, 0);
4145 unsigned i;
4146 int full = INSTR (30, 30);
4147
4148 NYI_assert (29, 21, 0x073);
4149 NYI_assert (15, 10, 0x07);
4150
4151 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4152 for (i = 0; i < (full ? 16 : 8); i++)
4153 aarch64_set_vec_u8 (cpu, vd, i,
4154 aarch64_get_vec_u8 (cpu, vn, i)
4155 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4156 }
4157
4158 static void
4159 do_vec_XTN (sim_cpu *cpu)
4160 {
4161 /* instr[31] = 0
4162 instr[30] = first part (0)/ second part (1)
4163 instr[29,24] = 00 1110
4164 instr[23,22] = size: byte(00), half(01), word (10)
4165 instr[21,10] = 1000 0100 1010
4166 instr[9,5] = Vs
4167 instr[4,0] = Vd. */
4168
4169 unsigned vs = INSTR (9, 5);
4170 unsigned vd = INSTR (4, 0);
4171 unsigned bias = INSTR (30, 30);
4172 unsigned i;
4173
4174 NYI_assert (29, 24, 0x0E);
4175 NYI_assert (21, 10, 0x84A);
4176
4177 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4178 switch (INSTR (23, 22))
4179 {
4180 case 0:
4181 for (i = 0; i < 8; i++)
4182 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4183 aarch64_get_vec_u16 (cpu, vs, i));
4184 return;
4185
4186 case 1:
4187 for (i = 0; i < 4; i++)
4188 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4189 aarch64_get_vec_u32 (cpu, vs, i));
4190 return;
4191
4192 case 2:
4193 for (i = 0; i < 2; i++)
4194 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4195 aarch64_get_vec_u64 (cpu, vs, i));
4196 return;
4197 }
4198 }
4199
4200 /* Return the number of bits set in the input value. */
4201 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4202 # define popcount __builtin_popcount
4203 #else
4204 static int
4205 popcount (unsigned char x)
4206 {
4207 static const unsigned char popcnt[16] =
4208 {
4209 0, 1, 1, 2,
4210 1, 2, 2, 3,
4211 1, 2, 2, 3,
4212 2, 3, 3, 4
4213 };
4214
4215 /* Only counts the low 8 bits of the input as that is all we need. */
4216 return popcnt[x % 16] + popcnt[x / 16];
4217 }
4218 #endif
4219
4220 static void
4221 do_vec_CNT (sim_cpu *cpu)
4222 {
4223 /* instr[31] = 0
4224 instr[30] = half (0)/ full (1)
4225 instr[29,24] = 00 1110
4226 instr[23,22] = size: byte(00)
4227 instr[21,10] = 1000 0001 0110
4228 instr[9,5] = Vs
4229 instr[4,0] = Vd. */
4230
4231 unsigned vs = INSTR (9, 5);
4232 unsigned vd = INSTR (4, 0);
4233 int full = INSTR (30, 30);
4234 int size = INSTR (23, 22);
4235 int i;
4236
4237 NYI_assert (29, 24, 0x0E);
4238 NYI_assert (21, 10, 0x816);
4239
4240 if (size != 0)
4241 HALT_UNALLOC;
4242
4243 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4244
4245 for (i = 0; i < (full ? 16 : 8); i++)
4246 aarch64_set_vec_u8 (cpu, vd, i,
4247 popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4248 }
4249
4250 static void
4251 do_vec_maxv (sim_cpu *cpu)
4252 {
4253 /* instr[31] = 0
4254 instr[30] = half(0)/full(1)
4255 instr[29] = signed (0)/unsigned(1)
4256 instr[28,24] = 0 1110
4257 instr[23,22] = size: byte(00), half(01), word (10)
4258 instr[21] = 1
4259 instr[20,17] = 1 000
4260 instr[16] = max(0)/min(1)
4261 instr[15,10] = 1010 10
4262 instr[9,5] = V source
4263 instr[4.0] = R dest. */
4264
4265 unsigned vs = INSTR (9, 5);
4266 unsigned rd = INSTR (4, 0);
4267 unsigned full = INSTR (30, 30);
4268 unsigned i;
4269
4270 NYI_assert (28, 24, 0x0E);
4271 NYI_assert (21, 21, 1);
4272 NYI_assert (20, 17, 8);
4273 NYI_assert (15, 10, 0x2A);
4274
4275 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4276 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4277 {
4278 case 0: /* SMAXV. */
4279 {
4280 int64_t smax;
4281 switch (INSTR (23, 22))
4282 {
4283 case 0:
4284 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4285 for (i = 1; i < (full ? 16 : 8); i++)
4286 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4287 break;
4288 case 1:
4289 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4290 for (i = 1; i < (full ? 8 : 4); i++)
4291 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4292 break;
4293 case 2:
4294 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4295 for (i = 1; i < (full ? 4 : 2); i++)
4296 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4297 break;
4298 case 3:
4299 HALT_UNALLOC;
4300 }
4301 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4302 return;
4303 }
4304
4305 case 1: /* SMINV. */
4306 {
4307 int64_t smin;
4308 switch (INSTR (23, 22))
4309 {
4310 case 0:
4311 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4312 for (i = 1; i < (full ? 16 : 8); i++)
4313 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4314 break;
4315 case 1:
4316 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4317 for (i = 1; i < (full ? 8 : 4); i++)
4318 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4319 break;
4320 case 2:
4321 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4322 for (i = 1; i < (full ? 4 : 2); i++)
4323 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4324 break;
4325
4326 case 3:
4327 HALT_UNALLOC;
4328 }
4329 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4330 return;
4331 }
4332
4333 case 2: /* UMAXV. */
4334 {
4335 uint64_t umax;
4336 switch (INSTR (23, 22))
4337 {
4338 case 0:
4339 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4340 for (i = 1; i < (full ? 16 : 8); i++)
4341 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4342 break;
4343 case 1:
4344 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4345 for (i = 1; i < (full ? 8 : 4); i++)
4346 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4347 break;
4348 case 2:
4349 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4350 for (i = 1; i < (full ? 4 : 2); i++)
4351 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4352 break;
4353
4354 case 3:
4355 HALT_UNALLOC;
4356 }
4357 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4358 return;
4359 }
4360
4361 case 3: /* UMINV. */
4362 {
4363 uint64_t umin;
4364 switch (INSTR (23, 22))
4365 {
4366 case 0:
4367 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4368 for (i = 1; i < (full ? 16 : 8); i++)
4369 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4370 break;
4371 case 1:
4372 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4373 for (i = 1; i < (full ? 8 : 4); i++)
4374 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4375 break;
4376 case 2:
4377 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4378 for (i = 1; i < (full ? 4 : 2); i++)
4379 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4380 break;
4381
4382 case 3:
4383 HALT_UNALLOC;
4384 }
4385 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4386 return;
4387 }
4388 }
4389 }
4390
4391 static void
4392 do_vec_fminmaxV (sim_cpu *cpu)
4393 {
4394 /* instr[31,24] = 0110 1110
4395 instr[23] = max(0)/min(1)
4396 instr[22,14] = 011 0000 11
4397 instr[13,12] = nm(00)/normal(11)
4398 instr[11,10] = 10
4399 instr[9,5] = V source
4400 instr[4.0] = R dest. */
4401
4402 unsigned vs = INSTR (9, 5);
4403 unsigned rd = INSTR (4, 0);
4404 unsigned i;
4405 float res = aarch64_get_vec_float (cpu, vs, 0);
4406
4407 NYI_assert (31, 24, 0x6E);
4408 NYI_assert (22, 14, 0x0C3);
4409 NYI_assert (11, 10, 2);
4410
4411 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4412 if (INSTR (23, 23))
4413 {
4414 switch (INSTR (13, 12))
4415 {
4416 case 0: /* FMNINNMV. */
4417 for (i = 1; i < 4; i++)
4418 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4419 break;
4420
4421 case 3: /* FMINV. */
4422 for (i = 1; i < 4; i++)
4423 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4424 break;
4425
4426 default:
4427 HALT_NYI;
4428 }
4429 }
4430 else
4431 {
4432 switch (INSTR (13, 12))
4433 {
4434 case 0: /* FMNAXNMV. */
4435 for (i = 1; i < 4; i++)
4436 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4437 break;
4438
4439 case 3: /* FMAXV. */
4440 for (i = 1; i < 4; i++)
4441 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4442 break;
4443
4444 default:
4445 HALT_NYI;
4446 }
4447 }
4448
4449 aarch64_set_FP_float (cpu, rd, res);
4450 }
4451
4452 static void
4453 do_vec_Fminmax (sim_cpu *cpu)
4454 {
4455 /* instr[31] = 0
4456 instr[30] = half(0)/full(1)
4457 instr[29,24] = 00 1110
4458 instr[23] = max(0)/min(1)
4459 instr[22] = float(0)/double(1)
4460 instr[21] = 1
4461 instr[20,16] = Vm
4462 instr[15,14] = 11
4463 instr[13,12] = nm(00)/normal(11)
4464 instr[11,10] = 01
4465 instr[9,5] = Vn
4466 instr[4,0] = Vd. */
4467
4468 unsigned vm = INSTR (20, 16);
4469 unsigned vn = INSTR (9, 5);
4470 unsigned vd = INSTR (4, 0);
4471 unsigned full = INSTR (30, 30);
4472 unsigned min = INSTR (23, 23);
4473 unsigned i;
4474
4475 NYI_assert (29, 24, 0x0E);
4476 NYI_assert (21, 21, 1);
4477 NYI_assert (15, 14, 3);
4478 NYI_assert (11, 10, 1);
4479
4480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4481 if (INSTR (22, 22))
4482 {
4483 double (* func)(double, double);
4484
4485 if (! full)
4486 HALT_NYI;
4487
4488 if (INSTR (13, 12) == 0)
4489 func = min ? dminnm : dmaxnm;
4490 else if (INSTR (13, 12) == 3)
4491 func = min ? fmin : fmax;
4492 else
4493 HALT_NYI;
4494
4495 for (i = 0; i < 2; i++)
4496 aarch64_set_vec_double (cpu, vd, i,
4497 func (aarch64_get_vec_double (cpu, vn, i),
4498 aarch64_get_vec_double (cpu, vm, i)));
4499 }
4500 else
4501 {
4502 float (* func)(float, float);
4503
4504 if (INSTR (13, 12) == 0)
4505 func = min ? fminnm : fmaxnm;
4506 else if (INSTR (13, 12) == 3)
4507 func = min ? fminf : fmaxf;
4508 else
4509 HALT_NYI;
4510
4511 for (i = 0; i < (full ? 4 : 2); i++)
4512 aarch64_set_vec_float (cpu, vd, i,
4513 func (aarch64_get_vec_float (cpu, vn, i),
4514 aarch64_get_vec_float (cpu, vm, i)));
4515 }
4516 }
4517
4518 static void
4519 do_vec_SCVTF (sim_cpu *cpu)
4520 {
4521 /* instr[31] = 0
4522 instr[30] = Q
4523 instr[29,23] = 00 1110 0
4524 instr[22] = float(0)/double(1)
4525 instr[21,10] = 10 0001 1101 10
4526 instr[9,5] = Vn
4527 instr[4,0] = Vd. */
4528
4529 unsigned vn = INSTR (9, 5);
4530 unsigned vd = INSTR (4, 0);
4531 unsigned full = INSTR (30, 30);
4532 unsigned size = INSTR (22, 22);
4533 unsigned i;
4534
4535 NYI_assert (29, 23, 0x1C);
4536 NYI_assert (21, 10, 0x876);
4537
4538 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4539 if (size)
4540 {
4541 if (! full)
4542 HALT_UNALLOC;
4543
4544 for (i = 0; i < 2; i++)
4545 {
4546 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4547 aarch64_set_vec_double (cpu, vd, i, val);
4548 }
4549 }
4550 else
4551 {
4552 for (i = 0; i < (full ? 4 : 2); i++)
4553 {
4554 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4555 aarch64_set_vec_float (cpu, vd, i, val);
4556 }
4557 }
4558 }
4559
4560 #define VEC_CMP(SOURCE, CMP) \
4561 do \
4562 { \
4563 switch (size) \
4564 { \
4565 case 0: \
4566 for (i = 0; i < (full ? 16 : 8); i++) \
4567 aarch64_set_vec_u8 (cpu, vd, i, \
4568 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4569 CMP \
4570 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4571 ? -1 : 0); \
4572 return; \
4573 case 1: \
4574 for (i = 0; i < (full ? 8 : 4); i++) \
4575 aarch64_set_vec_u16 (cpu, vd, i, \
4576 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4577 CMP \
4578 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4579 ? -1 : 0); \
4580 return; \
4581 case 2: \
4582 for (i = 0; i < (full ? 4 : 2); i++) \
4583 aarch64_set_vec_u32 (cpu, vd, i, \
4584 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4585 CMP \
4586 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4587 ? -1 : 0); \
4588 return; \
4589 case 3: \
4590 if (! full) \
4591 HALT_UNALLOC; \
4592 for (i = 0; i < 2; i++) \
4593 aarch64_set_vec_u64 (cpu, vd, i, \
4594 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4595 CMP \
4596 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4597 ? -1ULL : 0); \
4598 return; \
4599 } \
4600 } \
4601 while (0)
4602
4603 #define VEC_CMP0(SOURCE, CMP) \
4604 do \
4605 { \
4606 switch (size) \
4607 { \
4608 case 0: \
4609 for (i = 0; i < (full ? 16 : 8); i++) \
4610 aarch64_set_vec_u8 (cpu, vd, i, \
4611 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4612 CMP 0 ? -1 : 0); \
4613 return; \
4614 case 1: \
4615 for (i = 0; i < (full ? 8 : 4); i++) \
4616 aarch64_set_vec_u16 (cpu, vd, i, \
4617 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4618 CMP 0 ? -1 : 0); \
4619 return; \
4620 case 2: \
4621 for (i = 0; i < (full ? 4 : 2); i++) \
4622 aarch64_set_vec_u32 (cpu, vd, i, \
4623 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4624 CMP 0 ? -1 : 0); \
4625 return; \
4626 case 3: \
4627 if (! full) \
4628 HALT_UNALLOC; \
4629 for (i = 0; i < 2; i++) \
4630 aarch64_set_vec_u64 (cpu, vd, i, \
4631 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4632 CMP 0 ? -1ULL : 0); \
4633 return; \
4634 } \
4635 } \
4636 while (0)
4637
4638 #define VEC_FCMP0(CMP) \
4639 do \
4640 { \
4641 if (vm != 0) \
4642 HALT_NYI; \
4643 if (INSTR (22, 22)) \
4644 { \
4645 if (! full) \
4646 HALT_NYI; \
4647 for (i = 0; i < 2; i++) \
4648 aarch64_set_vec_u64 (cpu, vd, i, \
4649 aarch64_get_vec_double (cpu, vn, i) \
4650 CMP 0.0 ? -1 : 0); \
4651 } \
4652 else \
4653 { \
4654 for (i = 0; i < (full ? 4 : 2); i++) \
4655 aarch64_set_vec_u32 (cpu, vd, i, \
4656 aarch64_get_vec_float (cpu, vn, i) \
4657 CMP 0.0 ? -1 : 0); \
4658 } \
4659 return; \
4660 } \
4661 while (0)
4662
4663 #define VEC_FCMP(CMP) \
4664 do \
4665 { \
4666 if (INSTR (22, 22)) \
4667 { \
4668 if (! full) \
4669 HALT_NYI; \
4670 for (i = 0; i < 2; i++) \
4671 aarch64_set_vec_u64 (cpu, vd, i, \
4672 aarch64_get_vec_double (cpu, vn, i) \
4673 CMP \
4674 aarch64_get_vec_double (cpu, vm, i) \
4675 ? -1 : 0); \
4676 } \
4677 else \
4678 { \
4679 for (i = 0; i < (full ? 4 : 2); i++) \
4680 aarch64_set_vec_u32 (cpu, vd, i, \
4681 aarch64_get_vec_float (cpu, vn, i) \
4682 CMP \
4683 aarch64_get_vec_float (cpu, vm, i) \
4684 ? -1 : 0); \
4685 } \
4686 return; \
4687 } \
4688 while (0)
4689
4690 static void
4691 do_vec_compare (sim_cpu *cpu)
4692 {
4693 /* instr[31] = 0
4694 instr[30] = half(0)/full(1)
4695 instr[29] = part-of-comparison-type
4696 instr[28,24] = 0 1110
4697 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4698 type of float compares: single (-0) / double (-1)
4699 instr[21] = 1
4700 instr[20,16] = Vm or 00000 (compare vs 0)
4701 instr[15,10] = part-of-comparison-type
4702 instr[9,5] = Vn
4703 instr[4.0] = Vd. */
4704
4705 int full = INSTR (30, 30);
4706 int size = INSTR (23, 22);
4707 unsigned vm = INSTR (20, 16);
4708 unsigned vn = INSTR (9, 5);
4709 unsigned vd = INSTR (4, 0);
4710 unsigned i;
4711
4712 NYI_assert (28, 24, 0x0E);
4713 NYI_assert (21, 21, 1);
4714
4715 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4716 if ((INSTR (11, 11)
4717 && INSTR (14, 14))
4718 || ((INSTR (11, 11) == 0
4719 && INSTR (10, 10) == 0)))
4720 {
4721 /* A compare vs 0. */
4722 if (vm != 0)
4723 {
4724 if (INSTR (15, 10) == 0x2A)
4725 do_vec_maxv (cpu);
4726 else if (INSTR (15, 10) == 0x32
4727 || INSTR (15, 10) == 0x3E)
4728 do_vec_fminmaxV (cpu);
4729 else if (INSTR (29, 23) == 0x1C
4730 && INSTR (21, 10) == 0x876)
4731 do_vec_SCVTF (cpu);
4732 else
4733 HALT_NYI;
4734 return;
4735 }
4736 }
4737
4738 if (INSTR (14, 14))
4739 {
4740 /* A floating point compare. */
4741 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4742 | INSTR (13, 10);
4743
4744 NYI_assert (15, 15, 1);
4745
4746 switch (decode)
4747 {
4748 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4749 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4750 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4751 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4752 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4753 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4754 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4755 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4756
4757 default:
4758 HALT_NYI;
4759 }
4760 }
4761 else
4762 {
4763 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4764
4765 switch (decode)
4766 {
4767 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4768 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4769 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4770 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4771 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4772 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4773 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4774 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4775 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4776 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4777 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4778 default:
4779 if (vm == 0)
4780 HALT_NYI;
4781 do_vec_maxv (cpu);
4782 }
4783 }
4784 }
4785
4786 static void
4787 do_vec_SSHL (sim_cpu *cpu)
4788 {
4789 /* instr[31] = 0
4790 instr[30] = first part (0)/ second part (1)
4791 instr[29,24] = 00 1110
4792 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4793 instr[21] = 1
4794 instr[20,16] = Vm
4795 instr[15,10] = 0100 01
4796 instr[9,5] = Vn
4797 instr[4,0] = Vd. */
4798
4799 unsigned full = INSTR (30, 30);
4800 unsigned vm = INSTR (20, 16);
4801 unsigned vn = INSTR (9, 5);
4802 unsigned vd = INSTR (4, 0);
4803 unsigned i;
4804 signed int shift;
4805
4806 NYI_assert (29, 24, 0x0E);
4807 NYI_assert (21, 21, 1);
4808 NYI_assert (15, 10, 0x11);
4809
4810 /* FIXME: What is a signed shift left in this context ?. */
4811
4812 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4813 switch (INSTR (23, 22))
4814 {
4815 case 0:
4816 for (i = 0; i < (full ? 16 : 8); i++)
4817 {
4818 shift = aarch64_get_vec_s8 (cpu, vm, i);
4819 if (shift >= 0)
4820 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4821 << shift);
4822 else
4823 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4824 >> - shift);
4825 }
4826 return;
4827
4828 case 1:
4829 for (i = 0; i < (full ? 8 : 4); i++)
4830 {
4831 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4832 if (shift >= 0)
4833 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4834 << shift);
4835 else
4836 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4837 >> - shift);
4838 }
4839 return;
4840
4841 case 2:
4842 for (i = 0; i < (full ? 4 : 2); i++)
4843 {
4844 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4845 if (shift >= 0)
4846 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4847 << shift);
4848 else
4849 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4850 >> - shift);
4851 }
4852 return;
4853
4854 case 3:
4855 if (! full)
4856 HALT_UNALLOC;
4857 for (i = 0; i < 2; i++)
4858 {
4859 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4860 if (shift >= 0)
4861 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4862 << shift);
4863 else
4864 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4865 >> - shift);
4866 }
4867 return;
4868 }
4869 }
4870
4871 static void
4872 do_vec_USHL (sim_cpu *cpu)
4873 {
4874 /* instr[31] = 0
4875 instr[30] = first part (0)/ second part (1)
4876 instr[29,24] = 10 1110
4877 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4878 instr[21] = 1
4879 instr[20,16] = Vm
4880 instr[15,10] = 0100 01
4881 instr[9,5] = Vn
4882 instr[4,0] = Vd */
4883
4884 unsigned full = INSTR (30, 30);
4885 unsigned vm = INSTR (20, 16);
4886 unsigned vn = INSTR (9, 5);
4887 unsigned vd = INSTR (4, 0);
4888 unsigned i;
4889 signed int shift;
4890
4891 NYI_assert (29, 24, 0x2E);
4892 NYI_assert (15, 10, 0x11);
4893
4894 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4895 switch (INSTR (23, 22))
4896 {
4897 case 0:
4898 for (i = 0; i < (full ? 16 : 8); i++)
4899 {
4900 shift = aarch64_get_vec_s8 (cpu, vm, i);
4901 if (shift >= 0)
4902 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4903 << shift);
4904 else
4905 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4906 >> - shift);
4907 }
4908 return;
4909
4910 case 1:
4911 for (i = 0; i < (full ? 8 : 4); i++)
4912 {
4913 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4914 if (shift >= 0)
4915 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4916 << shift);
4917 else
4918 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4919 >> - shift);
4920 }
4921 return;
4922
4923 case 2:
4924 for (i = 0; i < (full ? 4 : 2); i++)
4925 {
4926 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4927 if (shift >= 0)
4928 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4929 << shift);
4930 else
4931 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4932 >> - shift);
4933 }
4934 return;
4935
4936 case 3:
4937 if (! full)
4938 HALT_UNALLOC;
4939 for (i = 0; i < 2; i++)
4940 {
4941 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4942 if (shift >= 0)
4943 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4944 << shift);
4945 else
4946 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4947 >> - shift);
4948 }
4949 return;
4950 }
4951 }
4952
4953 static void
4954 do_vec_FMLA (sim_cpu *cpu)
4955 {
4956 /* instr[31] = 0
4957 instr[30] = full/half selector
4958 instr[29,23] = 0011100
4959 instr[22] = size: 0=>float, 1=>double
4960 instr[21] = 1
4961 instr[20,16] = Vn
4962 instr[15,10] = 1100 11
4963 instr[9,5] = Vm
4964 instr[4.0] = Vd. */
4965
4966 unsigned vm = INSTR (20, 16);
4967 unsigned vn = INSTR (9, 5);
4968 unsigned vd = INSTR (4, 0);
4969 unsigned i;
4970 int full = INSTR (30, 30);
4971
4972 NYI_assert (29, 23, 0x1C);
4973 NYI_assert (21, 21, 1);
4974 NYI_assert (15, 10, 0x33);
4975
4976 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4977 if (INSTR (22, 22))
4978 {
4979 if (! full)
4980 HALT_UNALLOC;
4981 for (i = 0; i < 2; i++)
4982 aarch64_set_vec_double (cpu, vd, i,
4983 aarch64_get_vec_double (cpu, vn, i) *
4984 aarch64_get_vec_double (cpu, vm, i) +
4985 aarch64_get_vec_double (cpu, vd, i));
4986 }
4987 else
4988 {
4989 for (i = 0; i < (full ? 4 : 2); i++)
4990 aarch64_set_vec_float (cpu, vd, i,
4991 aarch64_get_vec_float (cpu, vn, i) *
4992 aarch64_get_vec_float (cpu, vm, i) +
4993 aarch64_get_vec_float (cpu, vd, i));
4994 }
4995 }
4996
4997 static void
4998 do_vec_max (sim_cpu *cpu)
4999 {
5000 /* instr[31] = 0
5001 instr[30] = full/half selector
5002 instr[29] = SMAX (0) / UMAX (1)
5003 instr[28,24] = 0 1110
5004 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5005 instr[21] = 1
5006 instr[20,16] = Vn
5007 instr[15,10] = 0110 01
5008 instr[9,5] = Vm
5009 instr[4.0] = Vd. */
5010
5011 unsigned vm = INSTR (20, 16);
5012 unsigned vn = INSTR (9, 5);
5013 unsigned vd = INSTR (4, 0);
5014 unsigned i;
5015 int full = INSTR (30, 30);
5016
5017 NYI_assert (28, 24, 0x0E);
5018 NYI_assert (21, 21, 1);
5019 NYI_assert (15, 10, 0x19);
5020
5021 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5022 if (INSTR (29, 29))
5023 {
5024 switch (INSTR (23, 22))
5025 {
5026 case 0:
5027 for (i = 0; i < (full ? 16 : 8); i++)
5028 aarch64_set_vec_u8 (cpu, vd, i,
5029 aarch64_get_vec_u8 (cpu, vn, i)
5030 > aarch64_get_vec_u8 (cpu, vm, i)
5031 ? aarch64_get_vec_u8 (cpu, vn, i)
5032 : aarch64_get_vec_u8 (cpu, vm, i));
5033 return;
5034
5035 case 1:
5036 for (i = 0; i < (full ? 8 : 4); i++)
5037 aarch64_set_vec_u16 (cpu, vd, i,
5038 aarch64_get_vec_u16 (cpu, vn, i)
5039 > aarch64_get_vec_u16 (cpu, vm, i)
5040 ? aarch64_get_vec_u16 (cpu, vn, i)
5041 : aarch64_get_vec_u16 (cpu, vm, i));
5042 return;
5043
5044 case 2:
5045 for (i = 0; i < (full ? 4 : 2); i++)
5046 aarch64_set_vec_u32 (cpu, vd, i,
5047 aarch64_get_vec_u32 (cpu, vn, i)
5048 > aarch64_get_vec_u32 (cpu, vm, i)
5049 ? aarch64_get_vec_u32 (cpu, vn, i)
5050 : aarch64_get_vec_u32 (cpu, vm, i));
5051 return;
5052
5053 case 3:
5054 HALT_UNALLOC;
5055 }
5056 }
5057 else
5058 {
5059 switch (INSTR (23, 22))
5060 {
5061 case 0:
5062 for (i = 0; i < (full ? 16 : 8); i++)
5063 aarch64_set_vec_s8 (cpu, vd, i,
5064 aarch64_get_vec_s8 (cpu, vn, i)
5065 > aarch64_get_vec_s8 (cpu, vm, i)
5066 ? aarch64_get_vec_s8 (cpu, vn, i)
5067 : aarch64_get_vec_s8 (cpu, vm, i));
5068 return;
5069
5070 case 1:
5071 for (i = 0; i < (full ? 8 : 4); i++)
5072 aarch64_set_vec_s16 (cpu, vd, i,
5073 aarch64_get_vec_s16 (cpu, vn, i)
5074 > aarch64_get_vec_s16 (cpu, vm, i)
5075 ? aarch64_get_vec_s16 (cpu, vn, i)
5076 : aarch64_get_vec_s16 (cpu, vm, i));
5077 return;
5078
5079 case 2:
5080 for (i = 0; i < (full ? 4 : 2); i++)
5081 aarch64_set_vec_s32 (cpu, vd, i,
5082 aarch64_get_vec_s32 (cpu, vn, i)
5083 > aarch64_get_vec_s32 (cpu, vm, i)
5084 ? aarch64_get_vec_s32 (cpu, vn, i)
5085 : aarch64_get_vec_s32 (cpu, vm, i));
5086 return;
5087
5088 case 3:
5089 HALT_UNALLOC;
5090 }
5091 }
5092 }
5093
5094 static void
5095 do_vec_min (sim_cpu *cpu)
5096 {
5097 /* instr[31] = 0
5098 instr[30] = full/half selector
5099 instr[29] = SMIN (0) / UMIN (1)
5100 instr[28,24] = 0 1110
5101 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5102 instr[21] = 1
5103 instr[20,16] = Vn
5104 instr[15,10] = 0110 11
5105 instr[9,5] = Vm
5106 instr[4.0] = Vd. */
5107
5108 unsigned vm = INSTR (20, 16);
5109 unsigned vn = INSTR (9, 5);
5110 unsigned vd = INSTR (4, 0);
5111 unsigned i;
5112 int full = INSTR (30, 30);
5113
5114 NYI_assert (28, 24, 0x0E);
5115 NYI_assert (21, 21, 1);
5116 NYI_assert (15, 10, 0x1B);
5117
5118 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5119 if (INSTR (29, 29))
5120 {
5121 switch (INSTR (23, 22))
5122 {
5123 case 0:
5124 for (i = 0; i < (full ? 16 : 8); i++)
5125 aarch64_set_vec_u8 (cpu, vd, i,
5126 aarch64_get_vec_u8 (cpu, vn, i)
5127 < aarch64_get_vec_u8 (cpu, vm, i)
5128 ? aarch64_get_vec_u8 (cpu, vn, i)
5129 : aarch64_get_vec_u8 (cpu, vm, i));
5130 return;
5131
5132 case 1:
5133 for (i = 0; i < (full ? 8 : 4); i++)
5134 aarch64_set_vec_u16 (cpu, vd, i,
5135 aarch64_get_vec_u16 (cpu, vn, i)
5136 < aarch64_get_vec_u16 (cpu, vm, i)
5137 ? aarch64_get_vec_u16 (cpu, vn, i)
5138 : aarch64_get_vec_u16 (cpu, vm, i));
5139 return;
5140
5141 case 2:
5142 for (i = 0; i < (full ? 4 : 2); i++)
5143 aarch64_set_vec_u32 (cpu, vd, i,
5144 aarch64_get_vec_u32 (cpu, vn, i)
5145 < aarch64_get_vec_u32 (cpu, vm, i)
5146 ? aarch64_get_vec_u32 (cpu, vn, i)
5147 : aarch64_get_vec_u32 (cpu, vm, i));
5148 return;
5149
5150 case 3:
5151 HALT_UNALLOC;
5152 }
5153 }
5154 else
5155 {
5156 switch (INSTR (23, 22))
5157 {
5158 case 0:
5159 for (i = 0; i < (full ? 16 : 8); i++)
5160 aarch64_set_vec_s8 (cpu, vd, i,
5161 aarch64_get_vec_s8 (cpu, vn, i)
5162 < aarch64_get_vec_s8 (cpu, vm, i)
5163 ? aarch64_get_vec_s8 (cpu, vn, i)
5164 : aarch64_get_vec_s8 (cpu, vm, i));
5165 return;
5166
5167 case 1:
5168 for (i = 0; i < (full ? 8 : 4); i++)
5169 aarch64_set_vec_s16 (cpu, vd, i,
5170 aarch64_get_vec_s16 (cpu, vn, i)
5171 < aarch64_get_vec_s16 (cpu, vm, i)
5172 ? aarch64_get_vec_s16 (cpu, vn, i)
5173 : aarch64_get_vec_s16 (cpu, vm, i));
5174 return;
5175
5176 case 2:
5177 for (i = 0; i < (full ? 4 : 2); i++)
5178 aarch64_set_vec_s32 (cpu, vd, i,
5179 aarch64_get_vec_s32 (cpu, vn, i)
5180 < aarch64_get_vec_s32 (cpu, vm, i)
5181 ? aarch64_get_vec_s32 (cpu, vn, i)
5182 : aarch64_get_vec_s32 (cpu, vm, i));
5183 return;
5184
5185 case 3:
5186 HALT_UNALLOC;
5187 }
5188 }
5189 }
5190
5191 static void
5192 do_vec_sub_long (sim_cpu *cpu)
5193 {
5194 /* instr[31] = 0
5195 instr[30] = lower (0) / upper (1)
5196 instr[29] = signed (0) / unsigned (1)
5197 instr[28,24] = 0 1110
5198 instr[23,22] = size: bytes (00), half (01), word (10)
5199 instr[21] = 1
5200 insrt[20,16] = Vm
5201 instr[15,10] = 0010 00
5202 instr[9,5] = Vn
5203 instr[4,0] = V dest. */
5204
5205 unsigned size = INSTR (23, 22);
5206 unsigned vm = INSTR (20, 16);
5207 unsigned vn = INSTR (9, 5);
5208 unsigned vd = INSTR (4, 0);
5209 unsigned bias = 0;
5210 unsigned i;
5211
5212 NYI_assert (28, 24, 0x0E);
5213 NYI_assert (21, 21, 1);
5214 NYI_assert (15, 10, 0x08);
5215
5216 if (size == 3)
5217 HALT_UNALLOC;
5218
5219 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5220 switch (INSTR (30, 29))
5221 {
5222 case 2: /* SSUBL2. */
5223 bias = 2;
5224 case 0: /* SSUBL. */
5225 switch (size)
5226 {
5227 case 0:
5228 bias *= 3;
5229 for (i = 0; i < 8; i++)
5230 aarch64_set_vec_s16 (cpu, vd, i,
5231 aarch64_get_vec_s8 (cpu, vn, i + bias)
5232 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5233 break;
5234
5235 case 1:
5236 bias *= 2;
5237 for (i = 0; i < 4; i++)
5238 aarch64_set_vec_s32 (cpu, vd, i,
5239 aarch64_get_vec_s16 (cpu, vn, i + bias)
5240 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5241 break;
5242
5243 case 2:
5244 for (i = 0; i < 2; i++)
5245 aarch64_set_vec_s64 (cpu, vd, i,
5246 aarch64_get_vec_s32 (cpu, vn, i + bias)
5247 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5248 break;
5249
5250 default:
5251 HALT_UNALLOC;
5252 }
5253 break;
5254
5255 case 3: /* USUBL2. */
5256 bias = 2;
5257 case 1: /* USUBL. */
5258 switch (size)
5259 {
5260 case 0:
5261 bias *= 3;
5262 for (i = 0; i < 8; i++)
5263 aarch64_set_vec_u16 (cpu, vd, i,
5264 aarch64_get_vec_u8 (cpu, vn, i + bias)
5265 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5266 break;
5267
5268 case 1:
5269 bias *= 2;
5270 for (i = 0; i < 4; i++)
5271 aarch64_set_vec_u32 (cpu, vd, i,
5272 aarch64_get_vec_u16 (cpu, vn, i + bias)
5273 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5274 break;
5275
5276 case 2:
5277 for (i = 0; i < 2; i++)
5278 aarch64_set_vec_u64 (cpu, vd, i,
5279 aarch64_get_vec_u32 (cpu, vn, i + bias)
5280 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5281 break;
5282
5283 default:
5284 HALT_UNALLOC;
5285 }
5286 break;
5287 }
5288 }
5289
5290 static void
5291 do_vec_ADDP (sim_cpu *cpu)
5292 {
5293 /* instr[31] = 0
5294 instr[30] = half(0)/full(1)
5295 instr[29,24] = 00 1110
5296 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5297 instr[21] = 1
5298 insrt[20,16] = Vm
5299 instr[15,10] = 1011 11
5300 instr[9,5] = Vn
5301 instr[4,0] = V dest. */
5302
5303 FRegister copy_vn;
5304 FRegister copy_vm;
5305 unsigned full = INSTR (30, 30);
5306 unsigned size = INSTR (23, 22);
5307 unsigned vm = INSTR (20, 16);
5308 unsigned vn = INSTR (9, 5);
5309 unsigned vd = INSTR (4, 0);
5310 unsigned i, range;
5311
5312 NYI_assert (29, 24, 0x0E);
5313 NYI_assert (21, 21, 1);
5314 NYI_assert (15, 10, 0x2F);
5315
5316 /* Make copies of the source registers in case vd == vn/vm. */
5317 copy_vn = cpu->fr[vn];
5318 copy_vm = cpu->fr[vm];
5319
5320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5321 switch (size)
5322 {
5323 case 0:
5324 range = full ? 8 : 4;
5325 for (i = 0; i < range; i++)
5326 {
5327 aarch64_set_vec_u8 (cpu, vd, i,
5328 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5329 aarch64_set_vec_u8 (cpu, vd, i + range,
5330 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5331 }
5332 return;
5333
5334 case 1:
5335 range = full ? 4 : 2;
5336 for (i = 0; i < range; i++)
5337 {
5338 aarch64_set_vec_u16 (cpu, vd, i,
5339 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5340 aarch64_set_vec_u16 (cpu, vd, i + range,
5341 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5342 }
5343 return;
5344
5345 case 2:
5346 range = full ? 2 : 1;
5347 for (i = 0; i < range; i++)
5348 {
5349 aarch64_set_vec_u32 (cpu, vd, i,
5350 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5351 aarch64_set_vec_u32 (cpu, vd, i + range,
5352 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5353 }
5354 return;
5355
5356 case 3:
5357 if (! full)
5358 HALT_UNALLOC;
5359 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5360 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5361 return;
5362 }
5363 }
5364
5365 static void
5366 do_vec_UMOV (sim_cpu *cpu)
5367 {
5368 /* instr[31] = 0
5369 instr[30] = 32-bit(0)/64-bit(1)
5370 instr[29,21] = 00 1110 000
5371 insrt[20,16] = size & index
5372 instr[15,10] = 0011 11
5373 instr[9,5] = V source
5374 instr[4,0] = R dest. */
5375
5376 unsigned vs = INSTR (9, 5);
5377 unsigned rd = INSTR (4, 0);
5378 unsigned index;
5379
5380 NYI_assert (29, 21, 0x070);
5381 NYI_assert (15, 10, 0x0F);
5382
5383 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5384 if (INSTR (16, 16))
5385 {
5386 /* Byte transfer. */
5387 index = INSTR (20, 17);
5388 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5389 aarch64_get_vec_u8 (cpu, vs, index));
5390 }
5391 else if (INSTR (17, 17))
5392 {
5393 index = INSTR (20, 18);
5394 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5395 aarch64_get_vec_u16 (cpu, vs, index));
5396 }
5397 else if (INSTR (18, 18))
5398 {
5399 index = INSTR (20, 19);
5400 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5401 aarch64_get_vec_u32 (cpu, vs, index));
5402 }
5403 else
5404 {
5405 if (INSTR (30, 30) != 1)
5406 HALT_UNALLOC;
5407
5408 index = INSTR (20, 20);
5409 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5410 aarch64_get_vec_u64 (cpu, vs, index));
5411 }
5412 }
5413
5414 static void
5415 do_vec_FABS (sim_cpu *cpu)
5416 {
5417 /* instr[31] = 0
5418 instr[30] = half(0)/full(1)
5419 instr[29,23] = 00 1110 1
5420 instr[22] = float(0)/double(1)
5421 instr[21,16] = 10 0000
5422 instr[15,10] = 1111 10
5423 instr[9,5] = Vn
5424 instr[4,0] = Vd. */
5425
5426 unsigned vn = INSTR (9, 5);
5427 unsigned vd = INSTR (4, 0);
5428 unsigned full = INSTR (30, 30);
5429 unsigned i;
5430
5431 NYI_assert (29, 23, 0x1D);
5432 NYI_assert (21, 10, 0x83E);
5433
5434 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5435 if (INSTR (22, 22))
5436 {
5437 if (! full)
5438 HALT_NYI;
5439
5440 for (i = 0; i < 2; i++)
5441 aarch64_set_vec_double (cpu, vd, i,
5442 fabs (aarch64_get_vec_double (cpu, vn, i)));
5443 }
5444 else
5445 {
5446 for (i = 0; i < (full ? 4 : 2); i++)
5447 aarch64_set_vec_float (cpu, vd, i,
5448 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5449 }
5450 }
5451
5452 static void
5453 do_vec_FCVTZS (sim_cpu *cpu)
5454 {
5455 /* instr[31] = 0
5456 instr[30] = half (0) / all (1)
5457 instr[29,23] = 00 1110 1
5458 instr[22] = single (0) / double (1)
5459 instr[21,10] = 10 0001 1011 10
5460 instr[9,5] = Rn
5461 instr[4,0] = Rd. */
5462
5463 unsigned rn = INSTR (9, 5);
5464 unsigned rd = INSTR (4, 0);
5465 unsigned full = INSTR (30, 30);
5466 unsigned i;
5467
5468 NYI_assert (31, 31, 0);
5469 NYI_assert (29, 23, 0x1D);
5470 NYI_assert (21, 10, 0x86E);
5471
5472 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5473 if (INSTR (22, 22))
5474 {
5475 if (! full)
5476 HALT_UNALLOC;
5477
5478 for (i = 0; i < 2; i++)
5479 aarch64_set_vec_s64 (cpu, rd, i,
5480 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5481 }
5482 else
5483 for (i = 0; i < (full ? 4 : 2); i++)
5484 aarch64_set_vec_s32 (cpu, rd, i,
5485 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5486 }
5487
5488 static void
5489 do_vec_REV64 (sim_cpu *cpu)
5490 {
5491 /* instr[31] = 0
5492 instr[30] = full/half
5493 instr[29,24] = 00 1110
5494 instr[23,22] = size
5495 instr[21,10] = 10 0000 0000 10
5496 instr[9,5] = Rn
5497 instr[4,0] = Rd. */
5498
5499 unsigned rn = INSTR (9, 5);
5500 unsigned rd = INSTR (4, 0);
5501 unsigned size = INSTR (23, 22);
5502 unsigned full = INSTR (30, 30);
5503 unsigned i;
5504 FRegister val;
5505
5506 NYI_assert (29, 24, 0x0E);
5507 NYI_assert (21, 10, 0x802);
5508
5509 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5510 switch (size)
5511 {
5512 case 0:
5513 for (i = 0; i < (full ? 16 : 8); i++)
5514 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5515 break;
5516
5517 case 1:
5518 for (i = 0; i < (full ? 8 : 4); i++)
5519 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5520 break;
5521
5522 case 2:
5523 for (i = 0; i < (full ? 4 : 2); i++)
5524 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5525 break;
5526
5527 case 3:
5528 HALT_UNALLOC;
5529 }
5530
5531 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5532 if (full)
5533 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5534 }
5535
5536 static void
5537 do_vec_REV16 (sim_cpu *cpu)
5538 {
5539 /* instr[31] = 0
5540 instr[30] = full/half
5541 instr[29,24] = 00 1110
5542 instr[23,22] = size
5543 instr[21,10] = 10 0000 0001 10
5544 instr[9,5] = Rn
5545 instr[4,0] = Rd. */
5546
5547 unsigned rn = INSTR (9, 5);
5548 unsigned rd = INSTR (4, 0);
5549 unsigned size = INSTR (23, 22);
5550 unsigned full = INSTR (30, 30);
5551 unsigned i;
5552 FRegister val;
5553
5554 NYI_assert (29, 24, 0x0E);
5555 NYI_assert (21, 10, 0x806);
5556
5557 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5558 switch (size)
5559 {
5560 case 0:
5561 for (i = 0; i < (full ? 16 : 8); i++)
5562 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5563 break;
5564
5565 default:
5566 HALT_UNALLOC;
5567 }
5568
5569 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5570 if (full)
5571 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5572 }
5573
5574 static void
5575 do_vec_op1 (sim_cpu *cpu)
5576 {
5577 /* instr[31] = 0
5578 instr[30] = half/full
5579 instr[29,24] = 00 1110
5580 instr[23,21] = ???
5581 instr[20,16] = Vm
5582 instr[15,10] = sub-opcode
5583 instr[9,5] = Vn
5584 instr[4,0] = Vd */
5585 NYI_assert (29, 24, 0x0E);
5586
5587 if (INSTR (21, 21) == 0)
5588 {
5589 if (INSTR (23, 22) == 0)
5590 {
5591 if (INSTR (30, 30) == 1
5592 && INSTR (17, 14) == 0
5593 && INSTR (12, 10) == 7)
5594 return do_vec_ins_2 (cpu);
5595
5596 switch (INSTR (15, 10))
5597 {
5598 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5599 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5600 case 0x07: do_vec_INS (cpu); return;
5601 case 0x0A: do_vec_TRN (cpu); return;
5602
5603 case 0x0F:
5604 if (INSTR (17, 16) == 0)
5605 {
5606 do_vec_MOV_into_scalar (cpu);
5607 return;
5608 }
5609 break;
5610
5611 case 0x00:
5612 case 0x08:
5613 case 0x10:
5614 case 0x18:
5615 do_vec_TBL (cpu); return;
5616
5617 case 0x06:
5618 case 0x16:
5619 do_vec_UZP (cpu); return;
5620
5621 case 0x0E:
5622 case 0x1E:
5623 do_vec_ZIP (cpu); return;
5624
5625 default:
5626 HALT_NYI;
5627 }
5628 }
5629
5630 switch (INSTR (13, 10))
5631 {
5632 case 0x6: do_vec_UZP (cpu); return;
5633 case 0xE: do_vec_ZIP (cpu); return;
5634 case 0xA: do_vec_TRN (cpu); return;
5635 case 0xF: do_vec_UMOV (cpu); return;
5636 default: HALT_NYI;
5637 }
5638 }
5639
5640 switch (INSTR (15, 10))
5641 {
5642 case 0x02: do_vec_REV64 (cpu); return;
5643 case 0x06: do_vec_REV16 (cpu); return;
5644
5645 case 0x07:
5646 switch (INSTR (23, 21))
5647 {
5648 case 1: do_vec_AND (cpu); return;
5649 case 3: do_vec_BIC (cpu); return;
5650 case 5: do_vec_ORR (cpu); return;
5651 case 7: do_vec_ORN (cpu); return;
5652 default: HALT_NYI;
5653 }
5654
5655 case 0x08: do_vec_sub_long (cpu); return;
5656 case 0x0a: do_vec_XTN (cpu); return;
5657 case 0x11: do_vec_SSHL (cpu); return;
5658 case 0x16: do_vec_CNT (cpu); return;
5659 case 0x19: do_vec_max (cpu); return;
5660 case 0x1B: do_vec_min (cpu); return;
5661 case 0x21: do_vec_add (cpu); return;
5662 case 0x25: do_vec_MLA (cpu); return;
5663 case 0x27: do_vec_mul (cpu); return;
5664 case 0x2F: do_vec_ADDP (cpu); return;
5665 case 0x30: do_vec_mull (cpu); return;
5666 case 0x33: do_vec_FMLA (cpu); return;
5667 case 0x35: do_vec_fadd (cpu); return;
5668
5669 case 0x2E:
5670 switch (INSTR (20, 16))
5671 {
5672 case 0x00: do_vec_ABS (cpu); return;
5673 case 0x01: do_vec_FCVTZS (cpu); return;
5674 case 0x11: do_vec_ADDV (cpu); return;
5675 default: HALT_NYI;
5676 }
5677
5678 case 0x31:
5679 case 0x3B:
5680 do_vec_Fminmax (cpu); return;
5681
5682 case 0x0D:
5683 case 0x0F:
5684 case 0x22:
5685 case 0x23:
5686 case 0x26:
5687 case 0x2A:
5688 case 0x32:
5689 case 0x36:
5690 case 0x39:
5691 case 0x3A:
5692 do_vec_compare (cpu); return;
5693
5694 case 0x3E:
5695 do_vec_FABS (cpu); return;
5696
5697 default:
5698 HALT_NYI;
5699 }
5700 }
5701
5702 static void
5703 do_vec_xtl (sim_cpu *cpu)
5704 {
5705 /* instr[31] = 0
5706 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5707 instr[28,22] = 0 1111 00
5708 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5709 instr[15,10] = 1010 01
5710 instr[9,5] = V source
5711 instr[4,0] = V dest. */
5712
5713 unsigned vs = INSTR (9, 5);
5714 unsigned vd = INSTR (4, 0);
5715 unsigned i, shift, bias = 0;
5716
5717 NYI_assert (28, 22, 0x3C);
5718 NYI_assert (15, 10, 0x29);
5719
5720 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5721 switch (INSTR (30, 29))
5722 {
5723 case 2: /* SXTL2, SSHLL2. */
5724 bias = 2;
5725 case 0: /* SXTL, SSHLL. */
5726 if (INSTR (21, 21))
5727 {
5728 int64_t val1, val2;
5729
5730 shift = INSTR (20, 16);
5731 /* Get the source values before setting the destination values
5732 in case the source and destination are the same. */
5733 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5734 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5735 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5736 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5737 }
5738 else if (INSTR (20, 20))
5739 {
5740 int32_t v[4];
5741 int32_t v1,v2,v3,v4;
5742
5743 shift = INSTR (19, 16);
5744 bias *= 2;
5745 for (i = 0; i < 4; i++)
5746 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5747 for (i = 0; i < 4; i++)
5748 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5749 }
5750 else
5751 {
5752 int16_t v[8];
5753 NYI_assert (19, 19, 1);
5754
5755 shift = INSTR (18, 16);
5756 bias *= 4;
5757 for (i = 0; i < 8; i++)
5758 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5759 for (i = 0; i < 8; i++)
5760 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5761 }
5762 return;
5763
5764 case 3: /* UXTL2, USHLL2. */
5765 bias = 2;
5766 case 1: /* UXTL, USHLL. */
5767 if (INSTR (21, 21))
5768 {
5769 uint64_t v1, v2;
5770 shift = INSTR (20, 16);
5771 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5772 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5773 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5774 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5775 }
5776 else if (INSTR (20, 20))
5777 {
5778 uint32_t v[4];
5779 shift = INSTR (19, 16);
5780 bias *= 2;
5781 for (i = 0; i < 4; i++)
5782 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5783 for (i = 0; i < 4; i++)
5784 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5785 }
5786 else
5787 {
5788 uint16_t v[8];
5789 NYI_assert (19, 19, 1);
5790
5791 shift = INSTR (18, 16);
5792 bias *= 4;
5793 for (i = 0; i < 8; i++)
5794 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5795 for (i = 0; i < 8; i++)
5796 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5797 }
5798 return;
5799 }
5800 }
5801
5802 static void
5803 do_vec_SHL (sim_cpu *cpu)
5804 {
5805 /* instr [31] = 0
5806 instr [30] = half(0)/full(1)
5807 instr [29,23] = 001 1110
5808 instr [22,16] = size and shift amount
5809 instr [15,10] = 01 0101
5810 instr [9, 5] = Vs
5811 instr [4, 0] = Vd. */
5812
5813 int shift;
5814 int full = INSTR (30, 30);
5815 unsigned vs = INSTR (9, 5);
5816 unsigned vd = INSTR (4, 0);
5817 unsigned i;
5818
5819 NYI_assert (29, 23, 0x1E);
5820 NYI_assert (15, 10, 0x15);
5821
5822 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5823 if (INSTR (22, 22))
5824 {
5825 shift = INSTR (21, 16);
5826
5827 if (full == 0)
5828 HALT_UNALLOC;
5829
5830 for (i = 0; i < 2; i++)
5831 {
5832 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5833 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5834 }
5835
5836 return;
5837 }
5838
5839 if (INSTR (21, 21))
5840 {
5841 shift = INSTR (20, 16);
5842
5843 for (i = 0; i < (full ? 4 : 2); i++)
5844 {
5845 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5846 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5847 }
5848
5849 return;
5850 }
5851
5852 if (INSTR (20, 20))
5853 {
5854 shift = INSTR (19, 16);
5855
5856 for (i = 0; i < (full ? 8 : 4); i++)
5857 {
5858 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5859 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5860 }
5861
5862 return;
5863 }
5864
5865 if (INSTR (19, 19) == 0)
5866 HALT_UNALLOC;
5867
5868 shift = INSTR (18, 16);
5869
5870 for (i = 0; i < (full ? 16 : 8); i++)
5871 {
5872 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5873 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5874 }
5875 }
5876
5877 static void
5878 do_vec_SSHR_USHR (sim_cpu *cpu)
5879 {
5880 /* instr [31] = 0
5881 instr [30] = half(0)/full(1)
5882 instr [29] = signed(0)/unsigned(1)
5883 instr [28,23] = 0 1111 0
5884 instr [22,16] = size and shift amount
5885 instr [15,10] = 0000 01
5886 instr [9, 5] = Vs
5887 instr [4, 0] = Vd. */
5888
5889 int full = INSTR (30, 30);
5890 int sign = ! INSTR (29, 29);
5891 unsigned shift = INSTR (22, 16);
5892 unsigned vs = INSTR (9, 5);
5893 unsigned vd = INSTR (4, 0);
5894 unsigned i;
5895
5896 NYI_assert (28, 23, 0x1E);
5897 NYI_assert (15, 10, 0x01);
5898
5899 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5900 if (INSTR (22, 22))
5901 {
5902 shift = 128 - shift;
5903
5904 if (full == 0)
5905 HALT_UNALLOC;
5906
5907 if (sign)
5908 for (i = 0; i < 2; i++)
5909 {
5910 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5911 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5912 }
5913 else
5914 for (i = 0; i < 2; i++)
5915 {
5916 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5917 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5918 }
5919
5920 return;
5921 }
5922
5923 if (INSTR (21, 21))
5924 {
5925 shift = 64 - shift;
5926
5927 if (sign)
5928 for (i = 0; i < (full ? 4 : 2); i++)
5929 {
5930 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5931 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5932 }
5933 else
5934 for (i = 0; i < (full ? 4 : 2); i++)
5935 {
5936 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5937 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5938 }
5939
5940 return;
5941 }
5942
5943 if (INSTR (20, 20))
5944 {
5945 shift = 32 - shift;
5946
5947 if (sign)
5948 for (i = 0; i < (full ? 8 : 4); i++)
5949 {
5950 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5951 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5952 }
5953 else
5954 for (i = 0; i < (full ? 8 : 4); i++)
5955 {
5956 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5957 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5958 }
5959
5960 return;
5961 }
5962
5963 if (INSTR (19, 19) == 0)
5964 HALT_UNALLOC;
5965
5966 shift = 16 - shift;
5967
5968 if (sign)
5969 for (i = 0; i < (full ? 16 : 8); i++)
5970 {
5971 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5972 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5973 }
5974 else
5975 for (i = 0; i < (full ? 16 : 8); i++)
5976 {
5977 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5978 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5979 }
5980 }
5981
5982 static void
5983 do_vec_MUL_by_element (sim_cpu *cpu)
5984 {
5985 /* instr[31] = 0
5986 instr[30] = half/full
5987 instr[29,24] = 00 1111
5988 instr[23,22] = size
5989 instr[21] = L
5990 instr[20] = M
5991 instr[19,16] = m
5992 instr[15,12] = 1000
5993 instr[11] = H
5994 instr[10] = 0
5995 instr[9,5] = Vn
5996 instr[4,0] = Vd */
5997
5998 unsigned full = INSTR (30, 30);
5999 unsigned L = INSTR (21, 21);
6000 unsigned H = INSTR (11, 11);
6001 unsigned vn = INSTR (9, 5);
6002 unsigned vd = INSTR (4, 0);
6003 unsigned size = INSTR (23, 22);
6004 unsigned index;
6005 unsigned vm;
6006 unsigned e;
6007
6008 NYI_assert (29, 24, 0x0F);
6009 NYI_assert (15, 12, 0x8);
6010 NYI_assert (10, 10, 0);
6011
6012 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6013 switch (size)
6014 {
6015 case 1:
6016 {
6017 /* 16 bit products. */
6018 uint16_t product;
6019 uint16_t element1;
6020 uint16_t element2;
6021
6022 index = (H << 2) | (L << 1) | INSTR (20, 20);
6023 vm = INSTR (19, 16);
6024 element2 = aarch64_get_vec_u16 (cpu, vm, index);
6025
6026 for (e = 0; e < (full ? 8 : 4); e ++)
6027 {
6028 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6029 product = element1 * element2;
6030 aarch64_set_vec_u16 (cpu, vd, e, product);
6031 }
6032 }
6033 break;
6034
6035 case 2:
6036 {
6037 /* 32 bit products. */
6038 uint32_t product;
6039 uint32_t element1;
6040 uint32_t element2;
6041
6042 index = (H << 1) | L;
6043 vm = INSTR (20, 16);
6044 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6045
6046 for (e = 0; e < (full ? 4 : 2); e ++)
6047 {
6048 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6049 product = element1 * element2;
6050 aarch64_set_vec_u32 (cpu, vd, e, product);
6051 }
6052 }
6053 break;
6054
6055 default:
6056 HALT_UNALLOC;
6057 }
6058 }
6059
6060 static void
6061 do_FMLA_by_element (sim_cpu *cpu)
6062 {
6063 /* instr[31] = 0
6064 instr[30] = half/full
6065 instr[29,23] = 00 1111 1
6066 instr[22] = size
6067 instr[21] = L
6068 instr[20,16] = m
6069 instr[15,12] = 0001
6070 instr[11] = H
6071 instr[10] = 0
6072 instr[9,5] = Vn
6073 instr[4,0] = Vd */
6074
6075 unsigned full = INSTR (30, 30);
6076 unsigned size = INSTR (22, 22);
6077 unsigned L = INSTR (21, 21);
6078 unsigned vm = INSTR (20, 16);
6079 unsigned H = INSTR (11, 11);
6080 unsigned vn = INSTR (9, 5);
6081 unsigned vd = INSTR (4, 0);
6082 unsigned e;
6083
6084 NYI_assert (29, 23, 0x1F);
6085 NYI_assert (15, 12, 0x1);
6086 NYI_assert (10, 10, 0);
6087
6088 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6089 if (size)
6090 {
6091 double element1, element2;
6092
6093 if (! full || L)
6094 HALT_UNALLOC;
6095
6096 element2 = aarch64_get_vec_double (cpu, vm, H);
6097
6098 for (e = 0; e < 2; e++)
6099 {
6100 element1 = aarch64_get_vec_double (cpu, vn, e);
6101 element1 *= element2;
6102 element1 += aarch64_get_vec_double (cpu, vd, e);
6103 aarch64_set_vec_double (cpu, vd, e, element1);
6104 }
6105 }
6106 else
6107 {
6108 float element1;
6109 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6110
6111 for (e = 0; e < (full ? 4 : 2); e++)
6112 {
6113 element1 = aarch64_get_vec_float (cpu, vn, e);
6114 element1 *= element2;
6115 element1 += aarch64_get_vec_float (cpu, vd, e);
6116 aarch64_set_vec_float (cpu, vd, e, element1);
6117 }
6118 }
6119 }
6120
6121 static void
6122 do_vec_op2 (sim_cpu *cpu)
6123 {
6124 /* instr[31] = 0
6125 instr[30] = half/full
6126 instr[29,24] = 00 1111
6127 instr[23] = ?
6128 instr[22,16] = element size & index
6129 instr[15,10] = sub-opcode
6130 instr[9,5] = Vm
6131 instr[4,0] = Vd */
6132
6133 NYI_assert (29, 24, 0x0F);
6134
6135 if (INSTR (23, 23) != 0)
6136 {
6137 switch (INSTR (15, 10))
6138 {
6139 case 0x04:
6140 case 0x06:
6141 do_FMLA_by_element (cpu);
6142 return;
6143
6144 case 0x20:
6145 case 0x22:
6146 do_vec_MUL_by_element (cpu);
6147 return;
6148
6149 default:
6150 HALT_NYI;
6151 }
6152 }
6153 else
6154 {
6155 switch (INSTR (15, 10))
6156 {
6157 case 0x01: do_vec_SSHR_USHR (cpu); return;
6158 case 0x15: do_vec_SHL (cpu); return;
6159 case 0x20:
6160 case 0x22: do_vec_MUL_by_element (cpu); return;
6161 case 0x29: do_vec_xtl (cpu); return;
6162 default: HALT_NYI;
6163 }
6164 }
6165 }
6166
6167 static void
6168 do_vec_neg (sim_cpu *cpu)
6169 {
6170 /* instr[31] = 0
6171 instr[30] = full(1)/half(0)
6172 instr[29,24] = 10 1110
6173 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6174 instr[21,10] = 1000 0010 1110
6175 instr[9,5] = Vs
6176 instr[4,0] = Vd */
6177
6178 int full = INSTR (30, 30);
6179 unsigned vs = INSTR (9, 5);
6180 unsigned vd = INSTR (4, 0);
6181 unsigned i;
6182
6183 NYI_assert (29, 24, 0x2E);
6184 NYI_assert (21, 10, 0x82E);
6185
6186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6187 switch (INSTR (23, 22))
6188 {
6189 case 0:
6190 for (i = 0; i < (full ? 16 : 8); i++)
6191 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6192 return;
6193
6194 case 1:
6195 for (i = 0; i < (full ? 8 : 4); i++)
6196 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6197 return;
6198
6199 case 2:
6200 for (i = 0; i < (full ? 4 : 2); i++)
6201 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6202 return;
6203
6204 case 3:
6205 if (! full)
6206 HALT_NYI;
6207 for (i = 0; i < 2; i++)
6208 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6209 return;
6210 }
6211 }
6212
6213 static void
6214 do_vec_sqrt (sim_cpu *cpu)
6215 {
6216 /* instr[31] = 0
6217 instr[30] = full(1)/half(0)
6218 instr[29,23] = 101 1101
6219 instr[22] = single(0)/double(1)
6220 instr[21,10] = 1000 0111 1110
6221 instr[9,5] = Vs
6222 instr[4,0] = Vd. */
6223
6224 int full = INSTR (30, 30);
6225 unsigned vs = INSTR (9, 5);
6226 unsigned vd = INSTR (4, 0);
6227 unsigned i;
6228
6229 NYI_assert (29, 23, 0x5B);
6230 NYI_assert (21, 10, 0x87E);
6231
6232 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6233 if (INSTR (22, 22) == 0)
6234 for (i = 0; i < (full ? 4 : 2); i++)
6235 aarch64_set_vec_float (cpu, vd, i,
6236 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6237 else
6238 for (i = 0; i < 2; i++)
6239 aarch64_set_vec_double (cpu, vd, i,
6240 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6241 }
6242
6243 static void
6244 do_vec_mls_indexed (sim_cpu *cpu)
6245 {
6246 /* instr[31] = 0
6247 instr[30] = half(0)/full(1)
6248 instr[29,24] = 10 1111
6249 instr[23,22] = 16-bit(01)/32-bit(10)
6250 instr[21,20+11] = index (if 16-bit)
6251 instr[21+11] = index (if 32-bit)
6252 instr[20,16] = Vm
6253 instr[15,12] = 0100
6254 instr[11] = part of index
6255 instr[10] = 0
6256 instr[9,5] = Vs
6257 instr[4,0] = Vd. */
6258
6259 int full = INSTR (30, 30);
6260 unsigned vs = INSTR (9, 5);
6261 unsigned vd = INSTR (4, 0);
6262 unsigned vm = INSTR (20, 16);
6263 unsigned i;
6264
6265 NYI_assert (15, 12, 4);
6266 NYI_assert (10, 10, 0);
6267
6268 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6269 switch (INSTR (23, 22))
6270 {
6271 case 1:
6272 {
6273 unsigned elem;
6274 uint32_t val;
6275
6276 if (vm > 15)
6277 HALT_NYI;
6278
6279 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6280 val = aarch64_get_vec_u16 (cpu, vm, elem);
6281
6282 for (i = 0; i < (full ? 8 : 4); i++)
6283 aarch64_set_vec_u32 (cpu, vd, i,
6284 aarch64_get_vec_u32 (cpu, vd, i) -
6285 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6286 return;
6287 }
6288
6289 case 2:
6290 {
6291 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6292 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6293
6294 for (i = 0; i < (full ? 4 : 2); i++)
6295 aarch64_set_vec_u64 (cpu, vd, i,
6296 aarch64_get_vec_u64 (cpu, vd, i) -
6297 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6298 return;
6299 }
6300
6301 case 0:
6302 case 3:
6303 default:
6304 HALT_NYI;
6305 }
6306 }
6307
6308 static void
6309 do_vec_SUB (sim_cpu *cpu)
6310 {
6311 /* instr [31] = 0
6312 instr [30] = half(0)/full(1)
6313 instr [29,24] = 10 1110
6314 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6315 instr [21] = 1
6316 instr [20,16] = Vm
6317 instr [15,10] = 10 0001
6318 instr [9, 5] = Vn
6319 instr [4, 0] = Vd. */
6320
6321 unsigned full = INSTR (30, 30);
6322 unsigned vm = INSTR (20, 16);
6323 unsigned vn = INSTR (9, 5);
6324 unsigned vd = INSTR (4, 0);
6325 unsigned i;
6326
6327 NYI_assert (29, 24, 0x2E);
6328 NYI_assert (21, 21, 1);
6329 NYI_assert (15, 10, 0x21);
6330
6331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6332 switch (INSTR (23, 22))
6333 {
6334 case 0:
6335 for (i = 0; i < (full ? 16 : 8); i++)
6336 aarch64_set_vec_s8 (cpu, vd, i,
6337 aarch64_get_vec_s8 (cpu, vn, i)
6338 - aarch64_get_vec_s8 (cpu, vm, i));
6339 return;
6340
6341 case 1:
6342 for (i = 0; i < (full ? 8 : 4); i++)
6343 aarch64_set_vec_s16 (cpu, vd, i,
6344 aarch64_get_vec_s16 (cpu, vn, i)
6345 - aarch64_get_vec_s16 (cpu, vm, i));
6346 return;
6347
6348 case 2:
6349 for (i = 0; i < (full ? 4 : 2); i++)
6350 aarch64_set_vec_s32 (cpu, vd, i,
6351 aarch64_get_vec_s32 (cpu, vn, i)
6352 - aarch64_get_vec_s32 (cpu, vm, i));
6353 return;
6354
6355 case 3:
6356 if (full == 0)
6357 HALT_UNALLOC;
6358
6359 for (i = 0; i < 2; i++)
6360 aarch64_set_vec_s64 (cpu, vd, i,
6361 aarch64_get_vec_s64 (cpu, vn, i)
6362 - aarch64_get_vec_s64 (cpu, vm, i));
6363 return;
6364 }
6365 }
6366
6367 static void
6368 do_vec_MLS (sim_cpu *cpu)
6369 {
6370 /* instr [31] = 0
6371 instr [30] = half(0)/full(1)
6372 instr [29,24] = 10 1110
6373 instr [23,22] = size: byte(00, half(01), word (10)
6374 instr [21] = 1
6375 instr [20,16] = Vm
6376 instr [15,10] = 10 0101
6377 instr [9, 5] = Vn
6378 instr [4, 0] = Vd. */
6379
6380 unsigned full = INSTR (30, 30);
6381 unsigned vm = INSTR (20, 16);
6382 unsigned vn = INSTR (9, 5);
6383 unsigned vd = INSTR (4, 0);
6384 unsigned i;
6385
6386 NYI_assert (29, 24, 0x2E);
6387 NYI_assert (21, 21, 1);
6388 NYI_assert (15, 10, 0x25);
6389
6390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6391 switch (INSTR (23, 22))
6392 {
6393 case 0:
6394 for (i = 0; i < (full ? 16 : 8); i++)
6395 aarch64_set_vec_u8 (cpu, vd, i,
6396 aarch64_get_vec_u8 (cpu, vd, i)
6397 - (aarch64_get_vec_u8 (cpu, vn, i)
6398 * aarch64_get_vec_u8 (cpu, vm, i)));
6399 return;
6400
6401 case 1:
6402 for (i = 0; i < (full ? 8 : 4); i++)
6403 aarch64_set_vec_u16 (cpu, vd, i,
6404 aarch64_get_vec_u16 (cpu, vd, i)
6405 - (aarch64_get_vec_u16 (cpu, vn, i)
6406 * aarch64_get_vec_u16 (cpu, vm, i)));
6407 return;
6408
6409 case 2:
6410 for (i = 0; i < (full ? 4 : 2); i++)
6411 aarch64_set_vec_u32 (cpu, vd, i,
6412 aarch64_get_vec_u32 (cpu, vd, i)
6413 - (aarch64_get_vec_u32 (cpu, vn, i)
6414 * aarch64_get_vec_u32 (cpu, vm, i)));
6415 return;
6416
6417 default:
6418 HALT_UNALLOC;
6419 }
6420 }
6421
6422 static void
6423 do_vec_FDIV (sim_cpu *cpu)
6424 {
6425 /* instr [31] = 0
6426 instr [30] = half(0)/full(1)
6427 instr [29,23] = 10 1110 0
6428 instr [22] = float()/double(1)
6429 instr [21] = 1
6430 instr [20,16] = Vm
6431 instr [15,10] = 1111 11
6432 instr [9, 5] = Vn
6433 instr [4, 0] = Vd. */
6434
6435 unsigned full = INSTR (30, 30);
6436 unsigned vm = INSTR (20, 16);
6437 unsigned vn = INSTR (9, 5);
6438 unsigned vd = INSTR (4, 0);
6439 unsigned i;
6440
6441 NYI_assert (29, 23, 0x5C);
6442 NYI_assert (21, 21, 1);
6443 NYI_assert (15, 10, 0x3F);
6444
6445 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6446 if (INSTR (22, 22))
6447 {
6448 if (! full)
6449 HALT_UNALLOC;
6450
6451 for (i = 0; i < 2; i++)
6452 aarch64_set_vec_double (cpu, vd, i,
6453 aarch64_get_vec_double (cpu, vn, i)
6454 / aarch64_get_vec_double (cpu, vm, i));
6455 }
6456 else
6457 for (i = 0; i < (full ? 4 : 2); i++)
6458 aarch64_set_vec_float (cpu, vd, i,
6459 aarch64_get_vec_float (cpu, vn, i)
6460 / aarch64_get_vec_float (cpu, vm, i));
6461 }
6462
6463 static void
6464 do_vec_FMUL (sim_cpu *cpu)
6465 {
6466 /* instr [31] = 0
6467 instr [30] = half(0)/full(1)
6468 instr [29,23] = 10 1110 0
6469 instr [22] = float(0)/double(1)
6470 instr [21] = 1
6471 instr [20,16] = Vm
6472 instr [15,10] = 1101 11
6473 instr [9, 5] = Vn
6474 instr [4, 0] = Vd. */
6475
6476 unsigned full = INSTR (30, 30);
6477 unsigned vm = INSTR (20, 16);
6478 unsigned vn = INSTR (9, 5);
6479 unsigned vd = INSTR (4, 0);
6480 unsigned i;
6481
6482 NYI_assert (29, 23, 0x5C);
6483 NYI_assert (21, 21, 1);
6484 NYI_assert (15, 10, 0x37);
6485
6486 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6487 if (INSTR (22, 22))
6488 {
6489 if (! full)
6490 HALT_UNALLOC;
6491
6492 for (i = 0; i < 2; i++)
6493 aarch64_set_vec_double (cpu, vd, i,
6494 aarch64_get_vec_double (cpu, vn, i)
6495 * aarch64_get_vec_double (cpu, vm, i));
6496 }
6497 else
6498 for (i = 0; i < (full ? 4 : 2); i++)
6499 aarch64_set_vec_float (cpu, vd, i,
6500 aarch64_get_vec_float (cpu, vn, i)
6501 * aarch64_get_vec_float (cpu, vm, i));
6502 }
6503
6504 static void
6505 do_vec_FADDP (sim_cpu *cpu)
6506 {
6507 /* instr [31] = 0
6508 instr [30] = half(0)/full(1)
6509 instr [29,23] = 10 1110 0
6510 instr [22] = float(0)/double(1)
6511 instr [21] = 1
6512 instr [20,16] = Vm
6513 instr [15,10] = 1101 01
6514 instr [9, 5] = Vn
6515 instr [4, 0] = Vd. */
6516
6517 unsigned full = INSTR (30, 30);
6518 unsigned vm = INSTR (20, 16);
6519 unsigned vn = INSTR (9, 5);
6520 unsigned vd = INSTR (4, 0);
6521
6522 NYI_assert (29, 23, 0x5C);
6523 NYI_assert (21, 21, 1);
6524 NYI_assert (15, 10, 0x35);
6525
6526 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6527 if (INSTR (22, 22))
6528 {
6529 /* Extract values before adding them incase vd == vn/vm. */
6530 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6531 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6532 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6533 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6534
6535 if (! full)
6536 HALT_UNALLOC;
6537
6538 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6539 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6540 }
6541 else
6542 {
6543 /* Extract values before adding them incase vd == vn/vm. */
6544 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6545 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6546 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6547 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6548
6549 if (full)
6550 {
6551 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6552 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6553 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6554 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6555
6556 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6557 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6558 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6559 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6560 }
6561 else
6562 {
6563 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6564 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6565 }
6566 }
6567 }
6568
6569 static void
6570 do_vec_FSQRT (sim_cpu *cpu)
6571 {
6572 /* instr[31] = 0
6573 instr[30] = half(0)/full(1)
6574 instr[29,23] = 10 1110 1
6575 instr[22] = single(0)/double(1)
6576 instr[21,10] = 10 0001 1111 10
6577 instr[9,5] = Vsrc
6578 instr[4,0] = Vdest. */
6579
6580 unsigned vn = INSTR (9, 5);
6581 unsigned vd = INSTR (4, 0);
6582 unsigned full = INSTR (30, 30);
6583 int i;
6584
6585 NYI_assert (29, 23, 0x5D);
6586 NYI_assert (21, 10, 0x87E);
6587
6588 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6589 if (INSTR (22, 22))
6590 {
6591 if (! full)
6592 HALT_UNALLOC;
6593
6594 for (i = 0; i < 2; i++)
6595 aarch64_set_vec_double (cpu, vd, i,
6596 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6597 }
6598 else
6599 {
6600 for (i = 0; i < (full ? 4 : 2); i++)
6601 aarch64_set_vec_float (cpu, vd, i,
6602 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6603 }
6604 }
6605
6606 static void
6607 do_vec_FNEG (sim_cpu *cpu)
6608 {
6609 /* instr[31] = 0
6610 instr[30] = half (0)/full (1)
6611 instr[29,23] = 10 1110 1
6612 instr[22] = single (0)/double (1)
6613 instr[21,10] = 10 0000 1111 10
6614 instr[9,5] = Vsrc
6615 instr[4,0] = Vdest. */
6616
6617 unsigned vn = INSTR (9, 5);
6618 unsigned vd = INSTR (4, 0);
6619 unsigned full = INSTR (30, 30);
6620 int i;
6621
6622 NYI_assert (29, 23, 0x5D);
6623 NYI_assert (21, 10, 0x83E);
6624
6625 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6626 if (INSTR (22, 22))
6627 {
6628 if (! full)
6629 HALT_UNALLOC;
6630
6631 for (i = 0; i < 2; i++)
6632 aarch64_set_vec_double (cpu, vd, i,
6633 - aarch64_get_vec_double (cpu, vn, i));
6634 }
6635 else
6636 {
6637 for (i = 0; i < (full ? 4 : 2); i++)
6638 aarch64_set_vec_float (cpu, vd, i,
6639 - aarch64_get_vec_float (cpu, vn, i));
6640 }
6641 }
6642
6643 static void
6644 do_vec_NOT (sim_cpu *cpu)
6645 {
6646 /* instr[31] = 0
6647 instr[30] = half (0)/full (1)
6648 instr[29,10] = 10 1110 0010 0000 0101 10
6649 instr[9,5] = Vn
6650 instr[4.0] = Vd. */
6651
6652 unsigned vn = INSTR (9, 5);
6653 unsigned vd = INSTR (4, 0);
6654 unsigned i;
6655 int full = INSTR (30, 30);
6656
6657 NYI_assert (29, 10, 0xB8816);
6658
6659 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6660 for (i = 0; i < (full ? 16 : 8); i++)
6661 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6662 }
6663
6664 static unsigned int
6665 clz (uint64_t val, unsigned size)
6666 {
6667 uint64_t mask = 1;
6668 int count;
6669
6670 mask <<= (size - 1);
6671 count = 0;
6672 do
6673 {
6674 if (val & mask)
6675 break;
6676 mask >>= 1;
6677 count ++;
6678 }
6679 while (mask);
6680
6681 return count;
6682 }
6683
6684 static void
6685 do_vec_CLZ (sim_cpu *cpu)
6686 {
6687 /* instr[31] = 0
6688 instr[30] = half (0)/full (1)
6689 instr[29,24] = 10 1110
6690 instr[23,22] = size
6691 instr[21,10] = 10 0000 0100 10
6692 instr[9,5] = Vn
6693 instr[4.0] = Vd. */
6694
6695 unsigned vn = INSTR (9, 5);
6696 unsigned vd = INSTR (4, 0);
6697 unsigned i;
6698 int full = INSTR (30,30);
6699
6700 NYI_assert (29, 24, 0x2E);
6701 NYI_assert (21, 10, 0x812);
6702
6703 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6704 switch (INSTR (23, 22))
6705 {
6706 case 0:
6707 for (i = 0; i < (full ? 16 : 8); i++)
6708 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6709 break;
6710 case 1:
6711 for (i = 0; i < (full ? 8 : 4); i++)
6712 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6713 break;
6714 case 2:
6715 for (i = 0; i < (full ? 4 : 2); i++)
6716 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6717 break;
6718 case 3:
6719 if (! full)
6720 HALT_UNALLOC;
6721 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6722 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6723 break;
6724 }
6725 }
6726
6727 static void
6728 do_vec_MOV_element (sim_cpu *cpu)
6729 {
6730 /* instr[31,21] = 0110 1110 000
6731 instr[20,16] = size & dest index
6732 instr[15] = 0
6733 instr[14,11] = source index
6734 instr[10] = 1
6735 instr[9,5] = Vs
6736 instr[4.0] = Vd. */
6737
6738 unsigned vs = INSTR (9, 5);
6739 unsigned vd = INSTR (4, 0);
6740 unsigned src_index;
6741 unsigned dst_index;
6742
6743 NYI_assert (31, 21, 0x370);
6744 NYI_assert (15, 15, 0);
6745 NYI_assert (10, 10, 1);
6746
6747 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6748 if (INSTR (16, 16))
6749 {
6750 /* Move a byte. */
6751 src_index = INSTR (14, 11);
6752 dst_index = INSTR (20, 17);
6753 aarch64_set_vec_u8 (cpu, vd, dst_index,
6754 aarch64_get_vec_u8 (cpu, vs, src_index));
6755 }
6756 else if (INSTR (17, 17))
6757 {
6758 /* Move 16-bits. */
6759 NYI_assert (11, 11, 0);
6760 src_index = INSTR (14, 12);
6761 dst_index = INSTR (20, 18);
6762 aarch64_set_vec_u16 (cpu, vd, dst_index,
6763 aarch64_get_vec_u16 (cpu, vs, src_index));
6764 }
6765 else if (INSTR (18, 18))
6766 {
6767 /* Move 32-bits. */
6768 NYI_assert (12, 11, 0);
6769 src_index = INSTR (14, 13);
6770 dst_index = INSTR (20, 19);
6771 aarch64_set_vec_u32 (cpu, vd, dst_index,
6772 aarch64_get_vec_u32 (cpu, vs, src_index));
6773 }
6774 else
6775 {
6776 NYI_assert (19, 19, 1);
6777 NYI_assert (13, 11, 0);
6778 src_index = INSTR (14, 14);
6779 dst_index = INSTR (20, 20);
6780 aarch64_set_vec_u64 (cpu, vd, dst_index,
6781 aarch64_get_vec_u64 (cpu, vs, src_index));
6782 }
6783 }
6784
6785 static void
6786 do_vec_REV32 (sim_cpu *cpu)
6787 {
6788 /* instr[31] = 0
6789 instr[30] = full/half
6790 instr[29,24] = 10 1110
6791 instr[23,22] = size
6792 instr[21,10] = 10 0000 0000 10
6793 instr[9,5] = Rn
6794 instr[4,0] = Rd. */
6795
6796 unsigned rn = INSTR (9, 5);
6797 unsigned rd = INSTR (4, 0);
6798 unsigned size = INSTR (23, 22);
6799 unsigned full = INSTR (30, 30);
6800 unsigned i;
6801 FRegister val;
6802
6803 NYI_assert (29, 24, 0x2E);
6804 NYI_assert (21, 10, 0x802);
6805
6806 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6807 switch (size)
6808 {
6809 case 0:
6810 for (i = 0; i < (full ? 16 : 8); i++)
6811 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6812 break;
6813
6814 case 1:
6815 for (i = 0; i < (full ? 8 : 4); i++)
6816 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6817 break;
6818
6819 default:
6820 HALT_UNALLOC;
6821 }
6822
6823 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6824 if (full)
6825 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6826 }
6827
6828 static void
6829 do_vec_EXT (sim_cpu *cpu)
6830 {
6831 /* instr[31] = 0
6832 instr[30] = full/half
6833 instr[29,21] = 10 1110 000
6834 instr[20,16] = Vm
6835 instr[15] = 0
6836 instr[14,11] = source index
6837 instr[10] = 0
6838 instr[9,5] = Vn
6839 instr[4.0] = Vd. */
6840
6841 unsigned vm = INSTR (20, 16);
6842 unsigned vn = INSTR (9, 5);
6843 unsigned vd = INSTR (4, 0);
6844 unsigned src_index = INSTR (14, 11);
6845 unsigned full = INSTR (30, 30);
6846 unsigned i;
6847 unsigned j;
6848 FRegister val;
6849
6850 NYI_assert (31, 21, 0x370);
6851 NYI_assert (15, 15, 0);
6852 NYI_assert (10, 10, 0);
6853
6854 if (!full && (src_index & 0x8))
6855 HALT_UNALLOC;
6856
6857 j = 0;
6858
6859 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6860 for (i = src_index; i < (full ? 16 : 8); i++)
6861 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6862 for (i = 0; i < src_index; i++)
6863 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6864
6865 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6866 if (full)
6867 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6868 }
6869
6870 static void
6871 dexAdvSIMD0 (sim_cpu *cpu)
6872 {
6873 /* instr [28,25] = 0 111. */
6874 if ( INSTR (15, 10) == 0x07
6875 && (INSTR (9, 5) ==
6876 INSTR (20, 16)))
6877 {
6878 if (INSTR (31, 21) == 0x075
6879 || INSTR (31, 21) == 0x275)
6880 {
6881 do_vec_MOV_whole_vector (cpu);
6882 return;
6883 }
6884 }
6885
6886 if (INSTR (29, 19) == 0x1E0)
6887 {
6888 do_vec_MOV_immediate (cpu);
6889 return;
6890 }
6891
6892 if (INSTR (29, 19) == 0x5E0)
6893 {
6894 do_vec_MVNI (cpu);
6895 return;
6896 }
6897
6898 if (INSTR (29, 19) == 0x1C0
6899 || INSTR (29, 19) == 0x1C1)
6900 {
6901 if (INSTR (15, 10) == 0x03)
6902 {
6903 do_vec_DUP_scalar_into_vector (cpu);
6904 return;
6905 }
6906 }
6907
6908 switch (INSTR (29, 24))
6909 {
6910 case 0x0E: do_vec_op1 (cpu); return;
6911 case 0x0F: do_vec_op2 (cpu); return;
6912
6913 case 0x2E:
6914 if (INSTR (21, 21) == 1)
6915 {
6916 switch (INSTR (15, 10))
6917 {
6918 case 0x02:
6919 do_vec_REV32 (cpu);
6920 return;
6921
6922 case 0x07:
6923 switch (INSTR (23, 22))
6924 {
6925 case 0: do_vec_EOR (cpu); return;
6926 case 1: do_vec_BSL (cpu); return;
6927 case 2:
6928 case 3: do_vec_bit (cpu); return;
6929 }
6930 break;
6931
6932 case 0x08: do_vec_sub_long (cpu); return;
6933 case 0x11: do_vec_USHL (cpu); return;
6934 case 0x12: do_vec_CLZ (cpu); return;
6935 case 0x16: do_vec_NOT (cpu); return;
6936 case 0x19: do_vec_max (cpu); return;
6937 case 0x1B: do_vec_min (cpu); return;
6938 case 0x21: do_vec_SUB (cpu); return;
6939 case 0x25: do_vec_MLS (cpu); return;
6940 case 0x31: do_vec_FminmaxNMP (cpu); return;
6941 case 0x35: do_vec_FADDP (cpu); return;
6942 case 0x37: do_vec_FMUL (cpu); return;
6943 case 0x3F: do_vec_FDIV (cpu); return;
6944
6945 case 0x3E:
6946 switch (INSTR (20, 16))
6947 {
6948 case 0x00: do_vec_FNEG (cpu); return;
6949 case 0x01: do_vec_FSQRT (cpu); return;
6950 default: HALT_NYI;
6951 }
6952
6953 case 0x0D:
6954 case 0x0F:
6955 case 0x22:
6956 case 0x23:
6957 case 0x26:
6958 case 0x2A:
6959 case 0x32:
6960 case 0x36:
6961 case 0x39:
6962 case 0x3A:
6963 do_vec_compare (cpu); return;
6964
6965 default:
6966 break;
6967 }
6968 }
6969
6970 if (INSTR (31, 21) == 0x370)
6971 {
6972 if (INSTR (10, 10))
6973 do_vec_MOV_element (cpu);
6974 else
6975 do_vec_EXT (cpu);
6976 return;
6977 }
6978
6979 switch (INSTR (21, 10))
6980 {
6981 case 0x82E: do_vec_neg (cpu); return;
6982 case 0x87E: do_vec_sqrt (cpu); return;
6983 default:
6984 if (INSTR (15, 10) == 0x30)
6985 {
6986 do_vec_mull (cpu);
6987 return;
6988 }
6989 break;
6990 }
6991 break;
6992
6993 case 0x2f:
6994 switch (INSTR (15, 10))
6995 {
6996 case 0x01: do_vec_SSHR_USHR (cpu); return;
6997 case 0x10:
6998 case 0x12: do_vec_mls_indexed (cpu); return;
6999 case 0x29: do_vec_xtl (cpu); return;
7000 default:
7001 HALT_NYI;
7002 }
7003
7004 default:
7005 break;
7006 }
7007
7008 HALT_NYI;
7009 }
7010
7011 /* 3 sources. */
7012
7013 /* Float multiply add. */
7014 static void
7015 fmadds (sim_cpu *cpu)
7016 {
7017 unsigned sa = INSTR (14, 10);
7018 unsigned sm = INSTR (20, 16);
7019 unsigned sn = INSTR ( 9, 5);
7020 unsigned sd = INSTR ( 4, 0);
7021
7022 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7023 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7024 + aarch64_get_FP_float (cpu, sn)
7025 * aarch64_get_FP_float (cpu, sm));
7026 }
7027
7028 /* Double multiply add. */
7029 static void
7030 fmaddd (sim_cpu *cpu)
7031 {
7032 unsigned sa = INSTR (14, 10);
7033 unsigned sm = INSTR (20, 16);
7034 unsigned sn = INSTR ( 9, 5);
7035 unsigned sd = INSTR ( 4, 0);
7036
7037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7038 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7039 + aarch64_get_FP_double (cpu, sn)
7040 * aarch64_get_FP_double (cpu, sm));
7041 }
7042
7043 /* Float multiply subtract. */
7044 static void
7045 fmsubs (sim_cpu *cpu)
7046 {
7047 unsigned sa = INSTR (14, 10);
7048 unsigned sm = INSTR (20, 16);
7049 unsigned sn = INSTR ( 9, 5);
7050 unsigned sd = INSTR ( 4, 0);
7051
7052 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7053 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7054 - aarch64_get_FP_float (cpu, sn)
7055 * aarch64_get_FP_float (cpu, sm));
7056 }
7057
7058 /* Double multiply subtract. */
7059 static void
7060 fmsubd (sim_cpu *cpu)
7061 {
7062 unsigned sa = INSTR (14, 10);
7063 unsigned sm = INSTR (20, 16);
7064 unsigned sn = INSTR ( 9, 5);
7065 unsigned sd = INSTR ( 4, 0);
7066
7067 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7068 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7069 - aarch64_get_FP_double (cpu, sn)
7070 * aarch64_get_FP_double (cpu, sm));
7071 }
7072
7073 /* Float negative multiply add. */
7074 static void
7075 fnmadds (sim_cpu *cpu)
7076 {
7077 unsigned sa = INSTR (14, 10);
7078 unsigned sm = INSTR (20, 16);
7079 unsigned sn = INSTR ( 9, 5);
7080 unsigned sd = INSTR ( 4, 0);
7081
7082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7083 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7084 + (- aarch64_get_FP_float (cpu, sn))
7085 * aarch64_get_FP_float (cpu, sm));
7086 }
7087
7088 /* Double negative multiply add. */
7089 static void
7090 fnmaddd (sim_cpu *cpu)
7091 {
7092 unsigned sa = INSTR (14, 10);
7093 unsigned sm = INSTR (20, 16);
7094 unsigned sn = INSTR ( 9, 5);
7095 unsigned sd = INSTR ( 4, 0);
7096
7097 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7098 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7099 + (- aarch64_get_FP_double (cpu, sn))
7100 * aarch64_get_FP_double (cpu, sm));
7101 }
7102
7103 /* Float negative multiply subtract. */
7104 static void
7105 fnmsubs (sim_cpu *cpu)
7106 {
7107 unsigned sa = INSTR (14, 10);
7108 unsigned sm = INSTR (20, 16);
7109 unsigned sn = INSTR ( 9, 5);
7110 unsigned sd = INSTR ( 4, 0);
7111
7112 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7113 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7114 + aarch64_get_FP_float (cpu, sn)
7115 * aarch64_get_FP_float (cpu, sm));
7116 }
7117
7118 /* Double negative multiply subtract. */
7119 static void
7120 fnmsubd (sim_cpu *cpu)
7121 {
7122 unsigned sa = INSTR (14, 10);
7123 unsigned sm = INSTR (20, 16);
7124 unsigned sn = INSTR ( 9, 5);
7125 unsigned sd = INSTR ( 4, 0);
7126
7127 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7128 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7129 + aarch64_get_FP_double (cpu, sn)
7130 * aarch64_get_FP_double (cpu, sm));
7131 }
7132
7133 static void
7134 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7135 {
7136 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7137 instr[30] = 0
7138 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7139 instr[28,25] = 1111
7140 instr[24] = 1
7141 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7142 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7143 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7144
7145 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7146 /* dispatch on combined type:o1:o2. */
7147 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7148
7149 if (M_S != 0)
7150 HALT_UNALLOC;
7151
7152 switch (dispatch)
7153 {
7154 case 0: fmadds (cpu); return;
7155 case 1: fmsubs (cpu); return;
7156 case 2: fnmadds (cpu); return;
7157 case 3: fnmsubs (cpu); return;
7158 case 4: fmaddd (cpu); return;
7159 case 5: fmsubd (cpu); return;
7160 case 6: fnmaddd (cpu); return;
7161 case 7: fnmsubd (cpu); return;
7162 default:
7163 /* type > 1 is currently unallocated. */
7164 HALT_UNALLOC;
7165 }
7166 }
7167
7168 static void
7169 dexSimpleFPFixedConvert (sim_cpu *cpu)
7170 {
7171 HALT_NYI;
7172 }
7173
7174 static void
7175 dexSimpleFPCondCompare (sim_cpu *cpu)
7176 {
7177 /* instr [31,23] = 0001 1110 0
7178 instr [22] = type
7179 instr [21] = 1
7180 instr [20,16] = Rm
7181 instr [15,12] = condition
7182 instr [11,10] = 01
7183 instr [9,5] = Rn
7184 instr [4] = 0
7185 instr [3,0] = nzcv */
7186
7187 unsigned rm = INSTR (20, 16);
7188 unsigned rn = INSTR (9, 5);
7189
7190 NYI_assert (31, 23, 0x3C);
7191 NYI_assert (11, 10, 0x1);
7192 NYI_assert (4, 4, 0);
7193
7194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7195 if (! testConditionCode (cpu, INSTR (15, 12)))
7196 {
7197 aarch64_set_CPSR (cpu, INSTR (3, 0));
7198 return;
7199 }
7200
7201 if (INSTR (22, 22))
7202 {
7203 /* Double precision. */
7204 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7205 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7206
7207 /* FIXME: Check for NaNs. */
7208 if (val1 == val2)
7209 aarch64_set_CPSR (cpu, (Z | C));
7210 else if (val1 < val2)
7211 aarch64_set_CPSR (cpu, N);
7212 else /* val1 > val2 */
7213 aarch64_set_CPSR (cpu, C);
7214 }
7215 else
7216 {
7217 /* Single precision. */
7218 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7219 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7220
7221 /* FIXME: Check for NaNs. */
7222 if (val1 == val2)
7223 aarch64_set_CPSR (cpu, (Z | C));
7224 else if (val1 < val2)
7225 aarch64_set_CPSR (cpu, N);
7226 else /* val1 > val2 */
7227 aarch64_set_CPSR (cpu, C);
7228 }
7229 }
7230
7231 /* 2 sources. */
7232
7233 /* Float add. */
7234 static void
7235 fadds (sim_cpu *cpu)
7236 {
7237 unsigned sm = INSTR (20, 16);
7238 unsigned sn = INSTR ( 9, 5);
7239 unsigned sd = INSTR ( 4, 0);
7240
7241 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7242 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7243 + aarch64_get_FP_float (cpu, sm));
7244 }
7245
7246 /* Double add. */
7247 static void
7248 faddd (sim_cpu *cpu)
7249 {
7250 unsigned sm = INSTR (20, 16);
7251 unsigned sn = INSTR ( 9, 5);
7252 unsigned sd = INSTR ( 4, 0);
7253
7254 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7255 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7256 + aarch64_get_FP_double (cpu, sm));
7257 }
7258
7259 /* Float divide. */
7260 static void
7261 fdivs (sim_cpu *cpu)
7262 {
7263 unsigned sm = INSTR (20, 16);
7264 unsigned sn = INSTR ( 9, 5);
7265 unsigned sd = INSTR ( 4, 0);
7266
7267 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7268 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7269 / aarch64_get_FP_float (cpu, sm));
7270 }
7271
7272 /* Double divide. */
7273 static void
7274 fdivd (sim_cpu *cpu)
7275 {
7276 unsigned sm = INSTR (20, 16);
7277 unsigned sn = INSTR ( 9, 5);
7278 unsigned sd = INSTR ( 4, 0);
7279
7280 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7281 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7282 / aarch64_get_FP_double (cpu, sm));
7283 }
7284
7285 /* Float multiply. */
7286 static void
7287 fmuls (sim_cpu *cpu)
7288 {
7289 unsigned sm = INSTR (20, 16);
7290 unsigned sn = INSTR ( 9, 5);
7291 unsigned sd = INSTR ( 4, 0);
7292
7293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7294 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7295 * aarch64_get_FP_float (cpu, sm));
7296 }
7297
7298 /* Double multiply. */
7299 static void
7300 fmuld (sim_cpu *cpu)
7301 {
7302 unsigned sm = INSTR (20, 16);
7303 unsigned sn = INSTR ( 9, 5);
7304 unsigned sd = INSTR ( 4, 0);
7305
7306 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7307 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7308 * aarch64_get_FP_double (cpu, sm));
7309 }
7310
7311 /* Float negate and multiply. */
7312 static void
7313 fnmuls (sim_cpu *cpu)
7314 {
7315 unsigned sm = INSTR (20, 16);
7316 unsigned sn = INSTR ( 9, 5);
7317 unsigned sd = INSTR ( 4, 0);
7318
7319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7320 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7321 * aarch64_get_FP_float (cpu, sm)));
7322 }
7323
7324 /* Double negate and multiply. */
7325 static void
7326 fnmuld (sim_cpu *cpu)
7327 {
7328 unsigned sm = INSTR (20, 16);
7329 unsigned sn = INSTR ( 9, 5);
7330 unsigned sd = INSTR ( 4, 0);
7331
7332 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7333 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7334 * aarch64_get_FP_double (cpu, sm)));
7335 }
7336
7337 /* Float subtract. */
7338 static void
7339 fsubs (sim_cpu *cpu)
7340 {
7341 unsigned sm = INSTR (20, 16);
7342 unsigned sn = INSTR ( 9, 5);
7343 unsigned sd = INSTR ( 4, 0);
7344
7345 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7346 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7347 - aarch64_get_FP_float (cpu, sm));
7348 }
7349
7350 /* Double subtract. */
7351 static void
7352 fsubd (sim_cpu *cpu)
7353 {
7354 unsigned sm = INSTR (20, 16);
7355 unsigned sn = INSTR ( 9, 5);
7356 unsigned sd = INSTR ( 4, 0);
7357
7358 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7359 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7360 - aarch64_get_FP_double (cpu, sm));
7361 }
7362
7363 static void
7364 do_FMINNM (sim_cpu *cpu)
7365 {
7366 /* instr[31,23] = 0 0011 1100
7367 instr[22] = float(0)/double(1)
7368 instr[21] = 1
7369 instr[20,16] = Sm
7370 instr[15,10] = 01 1110
7371 instr[9,5] = Sn
7372 instr[4,0] = Cpu */
7373
7374 unsigned sm = INSTR (20, 16);
7375 unsigned sn = INSTR ( 9, 5);
7376 unsigned sd = INSTR ( 4, 0);
7377
7378 NYI_assert (31, 23, 0x03C);
7379 NYI_assert (15, 10, 0x1E);
7380
7381 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7382 if (INSTR (22, 22))
7383 aarch64_set_FP_double (cpu, sd,
7384 dminnm (aarch64_get_FP_double (cpu, sn),
7385 aarch64_get_FP_double (cpu, sm)));
7386 else
7387 aarch64_set_FP_float (cpu, sd,
7388 fminnm (aarch64_get_FP_float (cpu, sn),
7389 aarch64_get_FP_float (cpu, sm)));
7390 }
7391
7392 static void
7393 do_FMAXNM (sim_cpu *cpu)
7394 {
7395 /* instr[31,23] = 0 0011 1100
7396 instr[22] = float(0)/double(1)
7397 instr[21] = 1
7398 instr[20,16] = Sm
7399 instr[15,10] = 01 1010
7400 instr[9,5] = Sn
7401 instr[4,0] = Cpu */
7402
7403 unsigned sm = INSTR (20, 16);
7404 unsigned sn = INSTR ( 9, 5);
7405 unsigned sd = INSTR ( 4, 0);
7406
7407 NYI_assert (31, 23, 0x03C);
7408 NYI_assert (15, 10, 0x1A);
7409
7410 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7411 if (INSTR (22, 22))
7412 aarch64_set_FP_double (cpu, sd,
7413 dmaxnm (aarch64_get_FP_double (cpu, sn),
7414 aarch64_get_FP_double (cpu, sm)));
7415 else
7416 aarch64_set_FP_float (cpu, sd,
7417 fmaxnm (aarch64_get_FP_float (cpu, sn),
7418 aarch64_get_FP_float (cpu, sm)));
7419 }
7420
7421 static void
7422 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7423 {
7424 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7425 instr[30] = 0
7426 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7427 instr[28,25] = 1111
7428 instr[24] = 0
7429 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7430 instr[21] = 1
7431 instr[20,16] = Vm
7432 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7433 0010 ==> FADD, 0011 ==> FSUB,
7434 0100 ==> FMAX, 0101 ==> FMIN
7435 0110 ==> FMAXNM, 0111 ==> FMINNM
7436 1000 ==> FNMUL, ow ==> UNALLOC
7437 instr[11,10] = 10
7438 instr[9,5] = Vn
7439 instr[4,0] = Vd */
7440
7441 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7442 uint32_t type = INSTR (23, 22);
7443 /* Dispatch on opcode. */
7444 uint32_t dispatch = INSTR (15, 12);
7445
7446 if (type > 1)
7447 HALT_UNALLOC;
7448
7449 if (M_S != 0)
7450 HALT_UNALLOC;
7451
7452 if (type)
7453 switch (dispatch)
7454 {
7455 case 0: fmuld (cpu); return;
7456 case 1: fdivd (cpu); return;
7457 case 2: faddd (cpu); return;
7458 case 3: fsubd (cpu); return;
7459 case 6: do_FMAXNM (cpu); return;
7460 case 7: do_FMINNM (cpu); return;
7461 case 8: fnmuld (cpu); return;
7462
7463 /* Have not yet implemented fmax and fmin. */
7464 case 4:
7465 case 5:
7466 HALT_NYI;
7467
7468 default:
7469 HALT_UNALLOC;
7470 }
7471 else /* type == 0 => floats. */
7472 switch (dispatch)
7473 {
7474 case 0: fmuls (cpu); return;
7475 case 1: fdivs (cpu); return;
7476 case 2: fadds (cpu); return;
7477 case 3: fsubs (cpu); return;
7478 case 6: do_FMAXNM (cpu); return;
7479 case 7: do_FMINNM (cpu); return;
7480 case 8: fnmuls (cpu); return;
7481
7482 case 4:
7483 case 5:
7484 HALT_NYI;
7485
7486 default:
7487 HALT_UNALLOC;
7488 }
7489 }
7490
7491 static void
7492 dexSimpleFPCondSelect (sim_cpu *cpu)
7493 {
7494 /* FCSEL
7495 instr[31,23] = 0 0011 1100
7496 instr[22] = 0=>single 1=>double
7497 instr[21] = 1
7498 instr[20,16] = Sm
7499 instr[15,12] = cond
7500 instr[11,10] = 11
7501 instr[9,5] = Sn
7502 instr[4,0] = Cpu */
7503 unsigned sm = INSTR (20, 16);
7504 unsigned sn = INSTR ( 9, 5);
7505 unsigned sd = INSTR ( 4, 0);
7506 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7507
7508 NYI_assert (31, 23, 0x03C);
7509 NYI_assert (11, 10, 0x3);
7510
7511 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7512 if (INSTR (22, 22))
7513 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7514 : aarch64_get_FP_double (cpu, sm)));
7515 else
7516 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7517 : aarch64_get_FP_float (cpu, sm)));
7518 }
7519
7520 /* Store 32 bit unscaled signed 9 bit. */
7521 static void
7522 fsturs (sim_cpu *cpu, int32_t offset)
7523 {
7524 unsigned int rn = INSTR (9, 5);
7525 unsigned int st = INSTR (4, 0);
7526
7527 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7528 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7529 aarch64_get_vec_u32 (cpu, st, 0));
7530 }
7531
7532 /* Store 64 bit unscaled signed 9 bit. */
7533 static void
7534 fsturd (sim_cpu *cpu, int32_t offset)
7535 {
7536 unsigned int rn = INSTR (9, 5);
7537 unsigned int st = INSTR (4, 0);
7538
7539 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7540 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7541 aarch64_get_vec_u64 (cpu, st, 0));
7542 }
7543
7544 /* Store 128 bit unscaled signed 9 bit. */
7545 static void
7546 fsturq (sim_cpu *cpu, int32_t offset)
7547 {
7548 unsigned int rn = INSTR (9, 5);
7549 unsigned int st = INSTR (4, 0);
7550 FRegister a;
7551
7552 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7553 aarch64_get_FP_long_double (cpu, st, & a);
7554 aarch64_set_mem_long_double (cpu,
7555 aarch64_get_reg_u64 (cpu, rn, 1)
7556 + offset, a);
7557 }
7558
7559 /* TODO FP move register. */
7560
7561 /* 32 bit fp to fp move register. */
7562 static void
7563 ffmovs (sim_cpu *cpu)
7564 {
7565 unsigned int rn = INSTR (9, 5);
7566 unsigned int st = INSTR (4, 0);
7567
7568 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7569 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7570 }
7571
7572 /* 64 bit fp to fp move register. */
7573 static void
7574 ffmovd (sim_cpu *cpu)
7575 {
7576 unsigned int rn = INSTR (9, 5);
7577 unsigned int st = INSTR (4, 0);
7578
7579 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7580 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7581 }
7582
7583 /* 32 bit GReg to Vec move register. */
7584 static void
7585 fgmovs (sim_cpu *cpu)
7586 {
7587 unsigned int rn = INSTR (9, 5);
7588 unsigned int st = INSTR (4, 0);
7589
7590 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7591 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7592 }
7593
7594 /* 64 bit g to fp move register. */
7595 static void
7596 fgmovd (sim_cpu *cpu)
7597 {
7598 unsigned int rn = INSTR (9, 5);
7599 unsigned int st = INSTR (4, 0);
7600
7601 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7602 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7603 }
7604
7605 /* 32 bit fp to g move register. */
7606 static void
7607 gfmovs (sim_cpu *cpu)
7608 {
7609 unsigned int rn = INSTR (9, 5);
7610 unsigned int st = INSTR (4, 0);
7611
7612 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7613 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7614 }
7615
7616 /* 64 bit fp to g move register. */
7617 static void
7618 gfmovd (sim_cpu *cpu)
7619 {
7620 unsigned int rn = INSTR (9, 5);
7621 unsigned int st = INSTR (4, 0);
7622
7623 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7624 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7625 }
7626
7627 /* FP move immediate
7628
7629 These install an immediate 8 bit value in the target register
7630 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7631 bit exponent. */
7632
7633 static void
7634 fmovs (sim_cpu *cpu)
7635 {
7636 unsigned int sd = INSTR (4, 0);
7637 uint32_t imm = INSTR (20, 13);
7638 float f = fp_immediate_for_encoding_32 (imm);
7639
7640 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7641 aarch64_set_FP_float (cpu, sd, f);
7642 }
7643
7644 static void
7645 fmovd (sim_cpu *cpu)
7646 {
7647 unsigned int sd = INSTR (4, 0);
7648 uint32_t imm = INSTR (20, 13);
7649 double d = fp_immediate_for_encoding_64 (imm);
7650
7651 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7652 aarch64_set_FP_double (cpu, sd, d);
7653 }
7654
7655 static void
7656 dexSimpleFPImmediate (sim_cpu *cpu)
7657 {
7658 /* instr[31,23] == 00111100
7659 instr[22] == type : single(0)/double(1)
7660 instr[21] == 1
7661 instr[20,13] == imm8
7662 instr[12,10] == 100
7663 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7664 instr[4,0] == Rd */
7665 uint32_t imm5 = INSTR (9, 5);
7666
7667 NYI_assert (31, 23, 0x3C);
7668
7669 if (imm5 != 0)
7670 HALT_UNALLOC;
7671
7672 if (INSTR (22, 22))
7673 fmovd (cpu);
7674 else
7675 fmovs (cpu);
7676 }
7677
7678 /* TODO specific decode and execute for group Load Store. */
7679
7680 /* TODO FP load/store single register (unscaled offset). */
7681
7682 /* TODO load 8 bit unscaled signed 9 bit. */
7683 /* TODO load 16 bit unscaled signed 9 bit. */
7684
7685 /* Load 32 bit unscaled signed 9 bit. */
7686 static void
7687 fldurs (sim_cpu *cpu, int32_t offset)
7688 {
7689 unsigned int rn = INSTR (9, 5);
7690 unsigned int st = INSTR (4, 0);
7691
7692 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7693 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7694 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7695 }
7696
7697 /* Load 64 bit unscaled signed 9 bit. */
7698 static void
7699 fldurd (sim_cpu *cpu, int32_t offset)
7700 {
7701 unsigned int rn = INSTR (9, 5);
7702 unsigned int st = INSTR (4, 0);
7703
7704 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7705 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7706 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7707 }
7708
7709 /* Load 128 bit unscaled signed 9 bit. */
7710 static void
7711 fldurq (sim_cpu *cpu, int32_t offset)
7712 {
7713 unsigned int rn = INSTR (9, 5);
7714 unsigned int st = INSTR (4, 0);
7715 FRegister a;
7716 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7717
7718 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7719 aarch64_get_mem_long_double (cpu, addr, & a);
7720 aarch64_set_FP_long_double (cpu, st, a);
7721 }
7722
7723 /* TODO store 8 bit unscaled signed 9 bit. */
7724 /* TODO store 16 bit unscaled signed 9 bit. */
7725
7726
7727 /* 1 source. */
7728
7729 /* Float absolute value. */
7730 static void
7731 fabss (sim_cpu *cpu)
7732 {
7733 unsigned sn = INSTR (9, 5);
7734 unsigned sd = INSTR (4, 0);
7735 float value = aarch64_get_FP_float (cpu, sn);
7736
7737 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7738 aarch64_set_FP_float (cpu, sd, fabsf (value));
7739 }
7740
7741 /* Double absolute value. */
7742 static void
7743 fabcpu (sim_cpu *cpu)
7744 {
7745 unsigned sn = INSTR (9, 5);
7746 unsigned sd = INSTR (4, 0);
7747 double value = aarch64_get_FP_double (cpu, sn);
7748
7749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7750 aarch64_set_FP_double (cpu, sd, fabs (value));
7751 }
7752
7753 /* Float negative value. */
7754 static void
7755 fnegs (sim_cpu *cpu)
7756 {
7757 unsigned sn = INSTR (9, 5);
7758 unsigned sd = INSTR (4, 0);
7759
7760 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7761 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7762 }
7763
7764 /* Double negative value. */
7765 static void
7766 fnegd (sim_cpu *cpu)
7767 {
7768 unsigned sn = INSTR (9, 5);
7769 unsigned sd = INSTR (4, 0);
7770
7771 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7772 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7773 }
7774
7775 /* Float square root. */
7776 static void
7777 fsqrts (sim_cpu *cpu)
7778 {
7779 unsigned sn = INSTR (9, 5);
7780 unsigned sd = INSTR (4, 0);
7781
7782 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7783 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7784 }
7785
7786 /* Double square root. */
7787 static void
7788 fsqrtd (sim_cpu *cpu)
7789 {
7790 unsigned sn = INSTR (9, 5);
7791 unsigned sd = INSTR (4, 0);
7792
7793 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7794 aarch64_set_FP_double (cpu, sd,
7795 sqrt (aarch64_get_FP_double (cpu, sn)));
7796 }
7797
7798 /* Convert double to float. */
7799 static void
7800 fcvtds (sim_cpu *cpu)
7801 {
7802 unsigned sn = INSTR (9, 5);
7803 unsigned sd = INSTR (4, 0);
7804
7805 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7806 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7807 }
7808
7809 /* Convert float to double. */
7810 static void
7811 fcvtcpu (sim_cpu *cpu)
7812 {
7813 unsigned sn = INSTR (9, 5);
7814 unsigned sd = INSTR (4, 0);
7815
7816 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7817 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7818 }
7819
7820 static void
7821 do_FRINT (sim_cpu *cpu)
7822 {
7823 /* instr[31,23] = 0001 1110 0
7824 instr[22] = single(0)/double(1)
7825 instr[21,18] = 1001
7826 instr[17,15] = rounding mode
7827 instr[14,10] = 10000
7828 instr[9,5] = source
7829 instr[4,0] = dest */
7830
7831 float val;
7832 unsigned rs = INSTR (9, 5);
7833 unsigned rd = INSTR (4, 0);
7834 unsigned int rmode = INSTR (17, 15);
7835
7836 NYI_assert (31, 23, 0x03C);
7837 NYI_assert (21, 18, 0x9);
7838 NYI_assert (14, 10, 0x10);
7839
7840 if (rmode == 6 || rmode == 7)
7841 /* FIXME: Add support for rmode == 6 exactness check. */
7842 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7843
7844 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7845 if (INSTR (22, 22))
7846 {
7847 double val = aarch64_get_FP_double (cpu, rs);
7848
7849 switch (rmode)
7850 {
7851 case 0: /* mode N: nearest or even. */
7852 {
7853 double rval = round (val);
7854
7855 if (val - rval == 0.5)
7856 {
7857 if (((rval / 2.0) * 2.0) != rval)
7858 rval += 1.0;
7859 }
7860
7861 aarch64_set_FP_double (cpu, rd, round (val));
7862 return;
7863 }
7864
7865 case 1: /* mode P: towards +inf. */
7866 if (val < 0.0)
7867 aarch64_set_FP_double (cpu, rd, trunc (val));
7868 else
7869 aarch64_set_FP_double (cpu, rd, round (val));
7870 return;
7871
7872 case 2: /* mode M: towards -inf. */
7873 if (val < 0.0)
7874 aarch64_set_FP_double (cpu, rd, round (val));
7875 else
7876 aarch64_set_FP_double (cpu, rd, trunc (val));
7877 return;
7878
7879 case 3: /* mode Z: towards 0. */
7880 aarch64_set_FP_double (cpu, rd, trunc (val));
7881 return;
7882
7883 case 4: /* mode A: away from 0. */
7884 aarch64_set_FP_double (cpu, rd, round (val));
7885 return;
7886
7887 case 6: /* mode X: use FPCR with exactness check. */
7888 case 7: /* mode I: use FPCR mode. */
7889 HALT_NYI;
7890
7891 default:
7892 HALT_UNALLOC;
7893 }
7894 }
7895
7896 val = aarch64_get_FP_float (cpu, rs);
7897
7898 switch (rmode)
7899 {
7900 case 0: /* mode N: nearest or even. */
7901 {
7902 float rval = roundf (val);
7903
7904 if (val - rval == 0.5)
7905 {
7906 if (((rval / 2.0) * 2.0) != rval)
7907 rval += 1.0;
7908 }
7909
7910 aarch64_set_FP_float (cpu, rd, rval);
7911 return;
7912 }
7913
7914 case 1: /* mode P: towards +inf. */
7915 if (val < 0.0)
7916 aarch64_set_FP_float (cpu, rd, truncf (val));
7917 else
7918 aarch64_set_FP_float (cpu, rd, roundf (val));
7919 return;
7920
7921 case 2: /* mode M: towards -inf. */
7922 if (val < 0.0)
7923 aarch64_set_FP_float (cpu, rd, truncf (val));
7924 else
7925 aarch64_set_FP_float (cpu, rd, roundf (val));
7926 return;
7927
7928 case 3: /* mode Z: towards 0. */
7929 aarch64_set_FP_float (cpu, rd, truncf (val));
7930 return;
7931
7932 case 4: /* mode A: away from 0. */
7933 aarch64_set_FP_float (cpu, rd, roundf (val));
7934 return;
7935
7936 case 6: /* mode X: use FPCR with exactness check. */
7937 case 7: /* mode I: use FPCR mode. */
7938 HALT_NYI;
7939
7940 default:
7941 HALT_UNALLOC;
7942 }
7943 }
7944
7945 /* Convert half to float. */
7946 static void
7947 do_FCVT_half_to_single (sim_cpu *cpu)
7948 {
7949 unsigned rn = INSTR (9, 5);
7950 unsigned rd = INSTR (4, 0);
7951
7952 NYI_assert (31, 10, 0x7B890);
7953
7954 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7955 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7956 }
7957
7958 /* Convert half to double. */
7959 static void
7960 do_FCVT_half_to_double (sim_cpu *cpu)
7961 {
7962 unsigned rn = INSTR (9, 5);
7963 unsigned rd = INSTR (4, 0);
7964
7965 NYI_assert (31, 10, 0x7B8B0);
7966
7967 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7968 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7969 }
7970
7971 static void
7972 do_FCVT_single_to_half (sim_cpu *cpu)
7973 {
7974 unsigned rn = INSTR (9, 5);
7975 unsigned rd = INSTR (4, 0);
7976
7977 NYI_assert (31, 10, 0x788F0);
7978
7979 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7980 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7981 }
7982
7983 /* Convert double to half. */
7984 static void
7985 do_FCVT_double_to_half (sim_cpu *cpu)
7986 {
7987 unsigned rn = INSTR (9, 5);
7988 unsigned rd = INSTR (4, 0);
7989
7990 NYI_assert (31, 10, 0x798F0);
7991
7992 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7993 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7994 }
7995
7996 static void
7997 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7998 {
7999 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
8000 instr[30] = 0
8001 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8002 instr[28,25] = 1111
8003 instr[24] = 0
8004 instr[23,22] ==> type : 00 ==> source is single,
8005 01 ==> source is double
8006 10 ==> UNALLOC
8007 11 ==> UNALLOC or source is half
8008 instr[21] = 1
8009 instr[20,15] ==> opcode : with type 00 or 01
8010 000000 ==> FMOV, 000001 ==> FABS,
8011 000010 ==> FNEG, 000011 ==> FSQRT,
8012 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8013 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8014 001000 ==> FRINTN, 001001 ==> FRINTP,
8015 001010 ==> FRINTM, 001011 ==> FRINTZ,
8016 001100 ==> FRINTA, 001101 ==> UNALLOC
8017 001110 ==> FRINTX, 001111 ==> FRINTI
8018 with type 11
8019 000100 ==> FCVT (half-to-single)
8020 000101 ==> FCVT (half-to-double)
8021 instr[14,10] = 10000. */
8022
8023 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8024 uint32_t type = INSTR (23, 22);
8025 uint32_t opcode = INSTR (20, 15);
8026
8027 if (M_S != 0)
8028 HALT_UNALLOC;
8029
8030 if (type == 3)
8031 {
8032 if (opcode == 4)
8033 do_FCVT_half_to_single (cpu);
8034 else if (opcode == 5)
8035 do_FCVT_half_to_double (cpu);
8036 else
8037 HALT_UNALLOC;
8038 return;
8039 }
8040
8041 if (type == 2)
8042 HALT_UNALLOC;
8043
8044 switch (opcode)
8045 {
8046 case 0:
8047 if (type)
8048 ffmovd (cpu);
8049 else
8050 ffmovs (cpu);
8051 return;
8052
8053 case 1:
8054 if (type)
8055 fabcpu (cpu);
8056 else
8057 fabss (cpu);
8058 return;
8059
8060 case 2:
8061 if (type)
8062 fnegd (cpu);
8063 else
8064 fnegs (cpu);
8065 return;
8066
8067 case 3:
8068 if (type)
8069 fsqrtd (cpu);
8070 else
8071 fsqrts (cpu);
8072 return;
8073
8074 case 4:
8075 if (type)
8076 fcvtds (cpu);
8077 else
8078 HALT_UNALLOC;
8079 return;
8080
8081 case 5:
8082 if (type)
8083 HALT_UNALLOC;
8084 fcvtcpu (cpu);
8085 return;
8086
8087 case 8: /* FRINTN etc. */
8088 case 9:
8089 case 10:
8090 case 11:
8091 case 12:
8092 case 14:
8093 case 15:
8094 do_FRINT (cpu);
8095 return;
8096
8097 case 7:
8098 if (INSTR (22, 22))
8099 do_FCVT_double_to_half (cpu);
8100 else
8101 do_FCVT_single_to_half (cpu);
8102 return;
8103
8104 case 13:
8105 HALT_NYI;
8106
8107 default:
8108 HALT_UNALLOC;
8109 }
8110 }
8111
8112 /* 32 bit signed int to float. */
8113 static void
8114 scvtf32 (sim_cpu *cpu)
8115 {
8116 unsigned rn = INSTR (9, 5);
8117 unsigned sd = INSTR (4, 0);
8118
8119 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8120 aarch64_set_FP_float
8121 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8122 }
8123
8124 /* signed int to float. */
8125 static void
8126 scvtf (sim_cpu *cpu)
8127 {
8128 unsigned rn = INSTR (9, 5);
8129 unsigned sd = INSTR (4, 0);
8130
8131 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8132 aarch64_set_FP_float
8133 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8134 }
8135
8136 /* 32 bit signed int to double. */
8137 static void
8138 scvtd32 (sim_cpu *cpu)
8139 {
8140 unsigned rn = INSTR (9, 5);
8141 unsigned sd = INSTR (4, 0);
8142
8143 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8144 aarch64_set_FP_double
8145 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8146 }
8147
8148 /* signed int to double. */
8149 static void
8150 scvtd (sim_cpu *cpu)
8151 {
8152 unsigned rn = INSTR (9, 5);
8153 unsigned sd = INSTR (4, 0);
8154
8155 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8156 aarch64_set_FP_double
8157 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8158 }
8159
8160 static const float FLOAT_INT_MAX = (float) INT_MAX;
8161 static const float FLOAT_INT_MIN = (float) INT_MIN;
8162 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8163 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8164 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8165 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8166 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8167 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8168
8169 #define UINT_MIN 0
8170 #define ULONG_MIN 0
8171 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8172 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8173 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8174 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8175 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8176 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8177 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8178 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8179
8180 /* Check for FP exception conditions:
8181 NaN raises IO
8182 Infinity raises IO
8183 Out of Range raises IO and IX and saturates value
8184 Denormal raises ID and IX and sets to zero. */
8185 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8186 do \
8187 { \
8188 switch (fpclassify (F)) \
8189 { \
8190 case FP_INFINITE: \
8191 case FP_NAN: \
8192 aarch64_set_FPSR (cpu, IO); \
8193 if (signbit (F)) \
8194 VALUE = ITYPE##_MAX; \
8195 else \
8196 VALUE = ITYPE##_MIN; \
8197 break; \
8198 \
8199 case FP_NORMAL: \
8200 if (F >= FTYPE##_##ITYPE##_MAX) \
8201 { \
8202 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8203 VALUE = ITYPE##_MAX; \
8204 } \
8205 else if (F <= FTYPE##_##ITYPE##_MIN) \
8206 { \
8207 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8208 VALUE = ITYPE##_MIN; \
8209 } \
8210 break; \
8211 \
8212 case FP_SUBNORMAL: \
8213 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8214 VALUE = 0; \
8215 break; \
8216 \
8217 default: \
8218 case FP_ZERO: \
8219 VALUE = 0; \
8220 break; \
8221 } \
8222 } \
8223 while (0)
8224
8225 /* 32 bit convert float to signed int truncate towards zero. */
8226 static void
8227 fcvtszs32 (sim_cpu *cpu)
8228 {
8229 unsigned sn = INSTR (9, 5);
8230 unsigned rd = INSTR (4, 0);
8231 /* TODO : check that this rounds toward zero. */
8232 float f = aarch64_get_FP_float (cpu, sn);
8233 int32_t value = (int32_t) f;
8234
8235 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8236
8237 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8238 /* Avoid sign extension to 64 bit. */
8239 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8240 }
8241
8242 /* 64 bit convert float to signed int truncate towards zero. */
8243 static void
8244 fcvtszs (sim_cpu *cpu)
8245 {
8246 unsigned sn = INSTR (9, 5);
8247 unsigned rd = INSTR (4, 0);
8248 float f = aarch64_get_FP_float (cpu, sn);
8249 int64_t value = (int64_t) f;
8250
8251 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8252
8253 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8254 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8255 }
8256
8257 /* 32 bit convert double to signed int truncate towards zero. */
8258 static void
8259 fcvtszd32 (sim_cpu *cpu)
8260 {
8261 unsigned sn = INSTR (9, 5);
8262 unsigned rd = INSTR (4, 0);
8263 /* TODO : check that this rounds toward zero. */
8264 double d = aarch64_get_FP_double (cpu, sn);
8265 int32_t value = (int32_t) d;
8266
8267 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8268
8269 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8270 /* Avoid sign extension to 64 bit. */
8271 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8272 }
8273
8274 /* 64 bit convert double to signed int truncate towards zero. */
8275 static void
8276 fcvtszd (sim_cpu *cpu)
8277 {
8278 unsigned sn = INSTR (9, 5);
8279 unsigned rd = INSTR (4, 0);
8280 /* TODO : check that this rounds toward zero. */
8281 double d = aarch64_get_FP_double (cpu, sn);
8282 int64_t value;
8283
8284 value = (int64_t) d;
8285
8286 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8287
8288 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8289 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8290 }
8291
8292 static void
8293 do_fcvtzu (sim_cpu *cpu)
8294 {
8295 /* instr[31] = size: 32-bit (0), 64-bit (1)
8296 instr[30,23] = 00111100
8297 instr[22] = type: single (0)/ double (1)
8298 instr[21] = enable (0)/disable(1) precision
8299 instr[20,16] = 11001
8300 instr[15,10] = precision
8301 instr[9,5] = Rs
8302 instr[4,0] = Rd. */
8303
8304 unsigned rs = INSTR (9, 5);
8305 unsigned rd = INSTR (4, 0);
8306
8307 NYI_assert (30, 23, 0x3C);
8308 NYI_assert (20, 16, 0x19);
8309
8310 if (INSTR (21, 21) != 1)
8311 /* Convert to fixed point. */
8312 HALT_NYI;
8313
8314 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8315 if (INSTR (31, 31))
8316 {
8317 /* Convert to unsigned 64-bit integer. */
8318 if (INSTR (22, 22))
8319 {
8320 double d = aarch64_get_FP_double (cpu, rs);
8321 uint64_t value = (uint64_t) d;
8322
8323 /* Do not raise an exception if we have reached ULONG_MAX. */
8324 if (value != (1UL << 63))
8325 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8326
8327 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8328 }
8329 else
8330 {
8331 float f = aarch64_get_FP_float (cpu, rs);
8332 uint64_t value = (uint64_t) f;
8333
8334 /* Do not raise an exception if we have reached ULONG_MAX. */
8335 if (value != (1UL << 63))
8336 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8337
8338 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8339 }
8340 }
8341 else
8342 {
8343 uint32_t value;
8344
8345 /* Convert to unsigned 32-bit integer. */
8346 if (INSTR (22, 22))
8347 {
8348 double d = aarch64_get_FP_double (cpu, rs);
8349
8350 value = (uint32_t) d;
8351 /* Do not raise an exception if we have reached UINT_MAX. */
8352 if (value != (1UL << 31))
8353 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8354 }
8355 else
8356 {
8357 float f = aarch64_get_FP_float (cpu, rs);
8358
8359 value = (uint32_t) f;
8360 /* Do not raise an exception if we have reached UINT_MAX. */
8361 if (value != (1UL << 31))
8362 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8363 }
8364
8365 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8366 }
8367 }
8368
8369 static void
8370 do_UCVTF (sim_cpu *cpu)
8371 {
8372 /* instr[31] = size: 32-bit (0), 64-bit (1)
8373 instr[30,23] = 001 1110 0
8374 instr[22] = type: single (0)/ double (1)
8375 instr[21] = enable (0)/disable(1) precision
8376 instr[20,16] = 0 0011
8377 instr[15,10] = precision
8378 instr[9,5] = Rs
8379 instr[4,0] = Rd. */
8380
8381 unsigned rs = INSTR (9, 5);
8382 unsigned rd = INSTR (4, 0);
8383
8384 NYI_assert (30, 23, 0x3C);
8385 NYI_assert (20, 16, 0x03);
8386
8387 if (INSTR (21, 21) != 1)
8388 HALT_NYI;
8389
8390 /* FIXME: Add exception raising. */
8391 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8392 if (INSTR (31, 31))
8393 {
8394 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8395
8396 if (INSTR (22, 22))
8397 aarch64_set_FP_double (cpu, rd, (double) value);
8398 else
8399 aarch64_set_FP_float (cpu, rd, (float) value);
8400 }
8401 else
8402 {
8403 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8404
8405 if (INSTR (22, 22))
8406 aarch64_set_FP_double (cpu, rd, (double) value);
8407 else
8408 aarch64_set_FP_float (cpu, rd, (float) value);
8409 }
8410 }
8411
8412 static void
8413 float_vector_move (sim_cpu *cpu)
8414 {
8415 /* instr[31,17] == 100 1111 0101 0111
8416 instr[16] ==> direction 0=> to GR, 1=> from GR
8417 instr[15,10] => ???
8418 instr[9,5] ==> source
8419 instr[4,0] ==> dest. */
8420
8421 unsigned rn = INSTR (9, 5);
8422 unsigned rd = INSTR (4, 0);
8423
8424 NYI_assert (31, 17, 0x4F57);
8425
8426 if (INSTR (15, 10) != 0)
8427 HALT_UNALLOC;
8428
8429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8430 if (INSTR (16, 16))
8431 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8432 else
8433 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8434 }
8435
8436 static void
8437 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8438 {
8439 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8440 instr[30 = 0
8441 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8442 instr[28,25] = 1111
8443 instr[24] = 0
8444 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8445 instr[21] = 1
8446 instr[20,19] = rmode
8447 instr[18,16] = opcode
8448 instr[15,10] = 10 0000 */
8449
8450 uint32_t rmode_opcode;
8451 uint32_t size_type;
8452 uint32_t type;
8453 uint32_t size;
8454 uint32_t S;
8455
8456 if (INSTR (31, 17) == 0x4F57)
8457 {
8458 float_vector_move (cpu);
8459 return;
8460 }
8461
8462 size = INSTR (31, 31);
8463 S = INSTR (29, 29);
8464 if (S != 0)
8465 HALT_UNALLOC;
8466
8467 type = INSTR (23, 22);
8468 if (type > 1)
8469 HALT_UNALLOC;
8470
8471 rmode_opcode = INSTR (20, 16);
8472 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8473
8474 switch (rmode_opcode)
8475 {
8476 case 2: /* SCVTF. */
8477 switch (size_type)
8478 {
8479 case 0: scvtf32 (cpu); return;
8480 case 1: scvtd32 (cpu); return;
8481 case 2: scvtf (cpu); return;
8482 case 3: scvtd (cpu); return;
8483 }
8484
8485 case 6: /* FMOV GR, Vec. */
8486 switch (size_type)
8487 {
8488 case 0: gfmovs (cpu); return;
8489 case 3: gfmovd (cpu); return;
8490 default: HALT_UNALLOC;
8491 }
8492
8493 case 7: /* FMOV vec, GR. */
8494 switch (size_type)
8495 {
8496 case 0: fgmovs (cpu); return;
8497 case 3: fgmovd (cpu); return;
8498 default: HALT_UNALLOC;
8499 }
8500
8501 case 24: /* FCVTZS. */
8502 switch (size_type)
8503 {
8504 case 0: fcvtszs32 (cpu); return;
8505 case 1: fcvtszd32 (cpu); return;
8506 case 2: fcvtszs (cpu); return;
8507 case 3: fcvtszd (cpu); return;
8508 }
8509
8510 case 25: do_fcvtzu (cpu); return;
8511 case 3: do_UCVTF (cpu); return;
8512
8513 case 0: /* FCVTNS. */
8514 case 1: /* FCVTNU. */
8515 case 4: /* FCVTAS. */
8516 case 5: /* FCVTAU. */
8517 case 8: /* FCVPTS. */
8518 case 9: /* FCVTPU. */
8519 case 16: /* FCVTMS. */
8520 case 17: /* FCVTMU. */
8521 default:
8522 HALT_NYI;
8523 }
8524 }
8525
8526 static void
8527 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8528 {
8529 uint32_t flags;
8530
8531 /* FIXME: Add exception raising. */
8532 if (isnan (fvalue1) || isnan (fvalue2))
8533 flags = C|V;
8534 else if (isinf (fvalue1) && isinf (fvalue2))
8535 {
8536 /* Subtracting two infinities may give a NaN. We only need to compare
8537 the signs, which we can get from isinf. */
8538 int result = isinf (fvalue1) - isinf (fvalue2);
8539
8540 if (result == 0)
8541 flags = Z|C;
8542 else if (result < 0)
8543 flags = N;
8544 else /* (result > 0). */
8545 flags = C;
8546 }
8547 else
8548 {
8549 float result = fvalue1 - fvalue2;
8550
8551 if (result == 0.0)
8552 flags = Z|C;
8553 else if (result < 0)
8554 flags = N;
8555 else /* (result > 0). */
8556 flags = C;
8557 }
8558
8559 aarch64_set_CPSR (cpu, flags);
8560 }
8561
8562 static void
8563 fcmps (sim_cpu *cpu)
8564 {
8565 unsigned sm = INSTR (20, 16);
8566 unsigned sn = INSTR ( 9, 5);
8567
8568 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8569 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8570
8571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8572 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8573 }
8574
8575 /* Float compare to zero -- Invalid Operation exception
8576 only on signaling NaNs. */
8577 static void
8578 fcmpzs (sim_cpu *cpu)
8579 {
8580 unsigned sn = INSTR ( 9, 5);
8581 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8582
8583 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8584 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8585 }
8586
8587 /* Float compare -- Invalid Operation exception on all NaNs. */
8588 static void
8589 fcmpes (sim_cpu *cpu)
8590 {
8591 unsigned sm = INSTR (20, 16);
8592 unsigned sn = INSTR ( 9, 5);
8593
8594 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8595 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8596
8597 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8598 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8599 }
8600
8601 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8602 static void
8603 fcmpzes (sim_cpu *cpu)
8604 {
8605 unsigned sn = INSTR ( 9, 5);
8606 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8607
8608 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8609 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8610 }
8611
8612 static void
8613 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8614 {
8615 uint32_t flags;
8616
8617 /* FIXME: Add exception raising. */
8618 if (isnan (dval1) || isnan (dval2))
8619 flags = C|V;
8620 else if (isinf (dval1) && isinf (dval2))
8621 {
8622 /* Subtracting two infinities may give a NaN. We only need to compare
8623 the signs, which we can get from isinf. */
8624 int result = isinf (dval1) - isinf (dval2);
8625
8626 if (result == 0)
8627 flags = Z|C;
8628 else if (result < 0)
8629 flags = N;
8630 else /* (result > 0). */
8631 flags = C;
8632 }
8633 else
8634 {
8635 double result = dval1 - dval2;
8636
8637 if (result == 0.0)
8638 flags = Z|C;
8639 else if (result < 0)
8640 flags = N;
8641 else /* (result > 0). */
8642 flags = C;
8643 }
8644
8645 aarch64_set_CPSR (cpu, flags);
8646 }
8647
8648 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8649 static void
8650 fcmpd (sim_cpu *cpu)
8651 {
8652 unsigned sm = INSTR (20, 16);
8653 unsigned sn = INSTR ( 9, 5);
8654
8655 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8656 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8657
8658 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8659 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8660 }
8661
8662 /* Double compare to zero -- Invalid Operation exception
8663 only on signaling NaNs. */
8664 static void
8665 fcmpzd (sim_cpu *cpu)
8666 {
8667 unsigned sn = INSTR ( 9, 5);
8668 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8669
8670 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8671 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8672 }
8673
8674 /* Double compare -- Invalid Operation exception on all NaNs. */
8675 static void
8676 fcmped (sim_cpu *cpu)
8677 {
8678 unsigned sm = INSTR (20, 16);
8679 unsigned sn = INSTR ( 9, 5);
8680
8681 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8682 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8683
8684 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8685 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8686 }
8687
8688 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8689 static void
8690 fcmpzed (sim_cpu *cpu)
8691 {
8692 unsigned sn = INSTR ( 9, 5);
8693 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8694
8695 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8696 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8697 }
8698
8699 static void
8700 dexSimpleFPCompare (sim_cpu *cpu)
8701 {
8702 /* assert instr[28,25] == 1111
8703 instr[30:24:21:13,10] = 0011000
8704 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8705 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8706 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8707 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8708 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8709 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8710 ow ==> UNALLOC */
8711 uint32_t dispatch;
8712 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8713 uint32_t type = INSTR (23, 22);
8714 uint32_t op = INSTR (15, 14);
8715 uint32_t op2_2_0 = INSTR (2, 0);
8716
8717 if (op2_2_0 != 0)
8718 HALT_UNALLOC;
8719
8720 if (M_S != 0)
8721 HALT_UNALLOC;
8722
8723 if (type > 1)
8724 HALT_UNALLOC;
8725
8726 if (op != 0)
8727 HALT_UNALLOC;
8728
8729 /* dispatch on type and top 2 bits of opcode. */
8730 dispatch = (type << 2) | INSTR (4, 3);
8731
8732 switch (dispatch)
8733 {
8734 case 0: fcmps (cpu); return;
8735 case 1: fcmpzs (cpu); return;
8736 case 2: fcmpes (cpu); return;
8737 case 3: fcmpzes (cpu); return;
8738 case 4: fcmpd (cpu); return;
8739 case 5: fcmpzd (cpu); return;
8740 case 6: fcmped (cpu); return;
8741 case 7: fcmpzed (cpu); return;
8742 }
8743 }
8744
8745 static void
8746 do_scalar_FADDP (sim_cpu *cpu)
8747 {
8748 /* instr [31,23] = 0111 1110 0
8749 instr [22] = single(0)/double(1)
8750 instr [21,10] = 11 0000 1101 10
8751 instr [9,5] = Fn
8752 instr [4,0] = Fd. */
8753
8754 unsigned Fn = INSTR (9, 5);
8755 unsigned Fd = INSTR (4, 0);
8756
8757 NYI_assert (31, 23, 0x0FC);
8758 NYI_assert (21, 10, 0xC36);
8759
8760 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8761 if (INSTR (22, 22))
8762 {
8763 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8764 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8765
8766 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8767 }
8768 else
8769 {
8770 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8771 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8772
8773 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8774 }
8775 }
8776
8777 /* Floating point absolute difference. */
8778
8779 static void
8780 do_scalar_FABD (sim_cpu *cpu)
8781 {
8782 /* instr [31,23] = 0111 1110 1
8783 instr [22] = float(0)/double(1)
8784 instr [21] = 1
8785 instr [20,16] = Rm
8786 instr [15,10] = 1101 01
8787 instr [9, 5] = Rn
8788 instr [4, 0] = Rd. */
8789
8790 unsigned rm = INSTR (20, 16);
8791 unsigned rn = INSTR (9, 5);
8792 unsigned rd = INSTR (4, 0);
8793
8794 NYI_assert (31, 23, 0x0FD);
8795 NYI_assert (21, 21, 1);
8796 NYI_assert (15, 10, 0x35);
8797
8798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8799 if (INSTR (22, 22))
8800 aarch64_set_FP_double (cpu, rd,
8801 fabs (aarch64_get_FP_double (cpu, rn)
8802 - aarch64_get_FP_double (cpu, rm)));
8803 else
8804 aarch64_set_FP_float (cpu, rd,
8805 fabsf (aarch64_get_FP_float (cpu, rn)
8806 - aarch64_get_FP_float (cpu, rm)));
8807 }
8808
8809 static void
8810 do_scalar_CMGT (sim_cpu *cpu)
8811 {
8812 /* instr [31,21] = 0101 1110 111
8813 instr [20,16] = Rm
8814 instr [15,10] = 00 1101
8815 instr [9, 5] = Rn
8816 instr [4, 0] = Rd. */
8817
8818 unsigned rm = INSTR (20, 16);
8819 unsigned rn = INSTR (9, 5);
8820 unsigned rd = INSTR (4, 0);
8821
8822 NYI_assert (31, 21, 0x2F7);
8823 NYI_assert (15, 10, 0x0D);
8824
8825 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8826 aarch64_set_vec_u64 (cpu, rd, 0,
8827 aarch64_get_vec_u64 (cpu, rn, 0) >
8828 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8829 }
8830
8831 static void
8832 do_scalar_USHR (sim_cpu *cpu)
8833 {
8834 /* instr [31,23] = 0111 1111 0
8835 instr [22,16] = shift amount
8836 instr [15,10] = 0000 01
8837 instr [9, 5] = Rn
8838 instr [4, 0] = Rd. */
8839
8840 unsigned amount = 128 - INSTR (22, 16);
8841 unsigned rn = INSTR (9, 5);
8842 unsigned rd = INSTR (4, 0);
8843
8844 NYI_assert (31, 23, 0x0FE);
8845 NYI_assert (15, 10, 0x01);
8846
8847 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8848 aarch64_set_vec_u64 (cpu, rd, 0,
8849 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8850 }
8851
8852 static void
8853 do_scalar_SSHL (sim_cpu *cpu)
8854 {
8855 /* instr [31,21] = 0101 1110 111
8856 instr [20,16] = Rm
8857 instr [15,10] = 0100 01
8858 instr [9, 5] = Rn
8859 instr [4, 0] = Rd. */
8860
8861 unsigned rm = INSTR (20, 16);
8862 unsigned rn = INSTR (9, 5);
8863 unsigned rd = INSTR (4, 0);
8864 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8865
8866 NYI_assert (31, 21, 0x2F7);
8867 NYI_assert (15, 10, 0x11);
8868
8869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8870 if (shift >= 0)
8871 aarch64_set_vec_s64 (cpu, rd, 0,
8872 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8873 else
8874 aarch64_set_vec_s64 (cpu, rd, 0,
8875 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8876 }
8877
8878 static void
8879 do_scalar_shift (sim_cpu *cpu)
8880 {
8881 /* instr [31,23] = 0101 1111 0
8882 instr [22,16] = shift amount
8883 instr [15,10] = 0101 01 [SHL]
8884 instr [15,10] = 0000 01 [SSHR]
8885 instr [9, 5] = Rn
8886 instr [4, 0] = Rd. */
8887
8888 unsigned rn = INSTR (9, 5);
8889 unsigned rd = INSTR (4, 0);
8890 unsigned amount;
8891
8892 NYI_assert (31, 23, 0x0BE);
8893
8894 if (INSTR (22, 22) == 0)
8895 HALT_UNALLOC;
8896
8897 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8898 switch (INSTR (15, 10))
8899 {
8900 case 0x01: /* SSHR */
8901 amount = 128 - INSTR (22, 16);
8902 aarch64_set_vec_s64 (cpu, rd, 0,
8903 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8904 return;
8905 case 0x15: /* SHL */
8906 amount = INSTR (22, 16) - 64;
8907 aarch64_set_vec_u64 (cpu, rd, 0,
8908 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8909 return;
8910 default:
8911 HALT_NYI;
8912 }
8913 }
8914
8915 /* FCMEQ FCMGT FCMGE. */
8916 static void
8917 do_scalar_FCM (sim_cpu *cpu)
8918 {
8919 /* instr [31,30] = 01
8920 instr [29] = U
8921 instr [28,24] = 1 1110
8922 instr [23] = E
8923 instr [22] = size
8924 instr [21] = 1
8925 instr [20,16] = Rm
8926 instr [15,12] = 1110
8927 instr [11] = AC
8928 instr [10] = 1
8929 instr [9, 5] = Rn
8930 instr [4, 0] = Rd. */
8931
8932 unsigned rm = INSTR (20, 16);
8933 unsigned rn = INSTR (9, 5);
8934 unsigned rd = INSTR (4, 0);
8935 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8936 unsigned result;
8937 float val1;
8938 float val2;
8939
8940 NYI_assert (31, 30, 1);
8941 NYI_assert (28, 24, 0x1E);
8942 NYI_assert (21, 21, 1);
8943 NYI_assert (15, 12, 0xE);
8944 NYI_assert (10, 10, 1);
8945
8946 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8947 if (INSTR (22, 22))
8948 {
8949 double val1 = aarch64_get_FP_double (cpu, rn);
8950 double val2 = aarch64_get_FP_double (cpu, rm);
8951
8952 switch (EUac)
8953 {
8954 case 0: /* 000 */
8955 result = val1 == val2;
8956 break;
8957
8958 case 3: /* 011 */
8959 val1 = fabs (val1);
8960 val2 = fabs (val2);
8961 /* Fall through. */
8962 case 2: /* 010 */
8963 result = val1 >= val2;
8964 break;
8965
8966 case 7: /* 111 */
8967 val1 = fabs (val1);
8968 val2 = fabs (val2);
8969 /* Fall through. */
8970 case 6: /* 110 */
8971 result = val1 > val2;
8972 break;
8973
8974 default:
8975 HALT_UNALLOC;
8976 }
8977
8978 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8979 return;
8980 }
8981
8982 val1 = aarch64_get_FP_float (cpu, rn);
8983 val2 = aarch64_get_FP_float (cpu, rm);
8984
8985 switch (EUac)
8986 {
8987 case 0: /* 000 */
8988 result = val1 == val2;
8989 break;
8990
8991 case 3: /* 011 */
8992 val1 = fabsf (val1);
8993 val2 = fabsf (val2);
8994 /* Fall through. */
8995 case 2: /* 010 */
8996 result = val1 >= val2;
8997 break;
8998
8999 case 7: /* 111 */
9000 val1 = fabsf (val1);
9001 val2 = fabsf (val2);
9002 /* Fall through. */
9003 case 6: /* 110 */
9004 result = val1 > val2;
9005 break;
9006
9007 default:
9008 HALT_UNALLOC;
9009 }
9010
9011 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9012 }
9013
9014 /* An alias of DUP. */
9015 static void
9016 do_scalar_MOV (sim_cpu *cpu)
9017 {
9018 /* instr [31,21] = 0101 1110 000
9019 instr [20,16] = imm5
9020 instr [15,10] = 0000 01
9021 instr [9, 5] = Rn
9022 instr [4, 0] = Rd. */
9023
9024 unsigned rn = INSTR (9, 5);
9025 unsigned rd = INSTR (4, 0);
9026 unsigned index;
9027
9028 NYI_assert (31, 21, 0x2F0);
9029 NYI_assert (15, 10, 0x01);
9030
9031 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9032 if (INSTR (16, 16))
9033 {
9034 /* 8-bit. */
9035 index = INSTR (20, 17);
9036 aarch64_set_vec_u8
9037 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9038 }
9039 else if (INSTR (17, 17))
9040 {
9041 /* 16-bit. */
9042 index = INSTR (20, 18);
9043 aarch64_set_vec_u16
9044 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9045 }
9046 else if (INSTR (18, 18))
9047 {
9048 /* 32-bit. */
9049 index = INSTR (20, 19);
9050 aarch64_set_vec_u32
9051 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9052 }
9053 else if (INSTR (19, 19))
9054 {
9055 /* 64-bit. */
9056 index = INSTR (20, 20);
9057 aarch64_set_vec_u64
9058 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9059 }
9060 else
9061 HALT_UNALLOC;
9062 }
9063
9064 static void
9065 do_scalar_NEG (sim_cpu *cpu)
9066 {
9067 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9068 instr [9, 5] = Rn
9069 instr [4, 0] = Rd. */
9070
9071 unsigned rn = INSTR (9, 5);
9072 unsigned rd = INSTR (4, 0);
9073
9074 NYI_assert (31, 10, 0x1FB82E);
9075
9076 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9077 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9078 }
9079
9080 static void
9081 do_scalar_USHL (sim_cpu *cpu)
9082 {
9083 /* instr [31,21] = 0111 1110 111
9084 instr [20,16] = Rm
9085 instr [15,10] = 0100 01
9086 instr [9, 5] = Rn
9087 instr [4, 0] = Rd. */
9088
9089 unsigned rm = INSTR (20, 16);
9090 unsigned rn = INSTR (9, 5);
9091 unsigned rd = INSTR (4, 0);
9092 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9093
9094 NYI_assert (31, 21, 0x3F7);
9095 NYI_assert (15, 10, 0x11);
9096
9097 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9098 if (shift >= 0)
9099 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9100 else
9101 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9102 }
9103
9104 static void
9105 do_double_add (sim_cpu *cpu)
9106 {
9107 /* instr [31,21] = 0101 1110 111
9108 instr [20,16] = Fn
9109 instr [15,10] = 1000 01
9110 instr [9,5] = Fm
9111 instr [4,0] = Fd. */
9112 unsigned Fd;
9113 unsigned Fm;
9114 unsigned Fn;
9115 double val1;
9116 double val2;
9117
9118 NYI_assert (31, 21, 0x2F7);
9119 NYI_assert (15, 10, 0x21);
9120
9121 Fd = INSTR (4, 0);
9122 Fm = INSTR (9, 5);
9123 Fn = INSTR (20, 16);
9124
9125 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9126 val1 = aarch64_get_FP_double (cpu, Fm);
9127 val2 = aarch64_get_FP_double (cpu, Fn);
9128
9129 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9130 }
9131
9132 static void
9133 do_scalar_UCVTF (sim_cpu *cpu)
9134 {
9135 /* instr [31,23] = 0111 1110 0
9136 instr [22] = single(0)/double(1)
9137 instr [21,10] = 10 0001 1101 10
9138 instr [9,5] = rn
9139 instr [4,0] = rd. */
9140
9141 unsigned rn = INSTR (9, 5);
9142 unsigned rd = INSTR (4, 0);
9143
9144 NYI_assert (31, 23, 0x0FC);
9145 NYI_assert (21, 10, 0x876);
9146
9147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9148 if (INSTR (22, 22))
9149 {
9150 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9151
9152 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9153 }
9154 else
9155 {
9156 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9157
9158 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9159 }
9160 }
9161
9162 static void
9163 do_scalar_vec (sim_cpu *cpu)
9164 {
9165 /* instr [30] = 1. */
9166 /* instr [28,25] = 1111. */
9167 switch (INSTR (31, 23))
9168 {
9169 case 0xBC:
9170 switch (INSTR (15, 10))
9171 {
9172 case 0x01: do_scalar_MOV (cpu); return;
9173 case 0x39: do_scalar_FCM (cpu); return;
9174 case 0x3B: do_scalar_FCM (cpu); return;
9175 }
9176 break;
9177
9178 case 0xBE: do_scalar_shift (cpu); return;
9179
9180 case 0xFC:
9181 switch (INSTR (15, 10))
9182 {
9183 case 0x36:
9184 switch (INSTR (21, 16))
9185 {
9186 case 0x30: do_scalar_FADDP (cpu); return;
9187 case 0x21: do_scalar_UCVTF (cpu); return;
9188 }
9189 HALT_NYI;
9190 case 0x39: do_scalar_FCM (cpu); return;
9191 case 0x3B: do_scalar_FCM (cpu); return;
9192 }
9193 break;
9194
9195 case 0xFD:
9196 switch (INSTR (15, 10))
9197 {
9198 case 0x0D: do_scalar_CMGT (cpu); return;
9199 case 0x11: do_scalar_USHL (cpu); return;
9200 case 0x2E: do_scalar_NEG (cpu); return;
9201 case 0x35: do_scalar_FABD (cpu); return;
9202 case 0x39: do_scalar_FCM (cpu); return;
9203 case 0x3B: do_scalar_FCM (cpu); return;
9204 default:
9205 HALT_NYI;
9206 }
9207
9208 case 0xFE: do_scalar_USHR (cpu); return;
9209
9210 case 0xBD:
9211 switch (INSTR (15, 10))
9212 {
9213 case 0x21: do_double_add (cpu); return;
9214 case 0x11: do_scalar_SSHL (cpu); return;
9215 default:
9216 HALT_NYI;
9217 }
9218
9219 default:
9220 HALT_NYI;
9221 }
9222 }
9223
9224 static void
9225 dexAdvSIMD1 (sim_cpu *cpu)
9226 {
9227 /* instr [28,25] = 1 111. */
9228
9229 /* We are currently only interested in the basic
9230 scalar fp routines which all have bit 30 = 0. */
9231 if (INSTR (30, 30))
9232 do_scalar_vec (cpu);
9233
9234 /* instr[24] is set for FP data processing 3-source and clear for
9235 all other basic scalar fp instruction groups. */
9236 else if (INSTR (24, 24))
9237 dexSimpleFPDataProc3Source (cpu);
9238
9239 /* instr[21] is clear for floating <-> fixed conversions and set for
9240 all other basic scalar fp instruction groups. */
9241 else if (!INSTR (21, 21))
9242 dexSimpleFPFixedConvert (cpu);
9243
9244 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9245 11 ==> cond select, 00 ==> other. */
9246 else
9247 switch (INSTR (11, 10))
9248 {
9249 case 1: dexSimpleFPCondCompare (cpu); return;
9250 case 2: dexSimpleFPDataProc2Source (cpu); return;
9251 case 3: dexSimpleFPCondSelect (cpu); return;
9252
9253 default:
9254 /* Now an ordered cascade of tests.
9255 FP immediate has instr [12] == 1.
9256 FP compare has instr [13] == 1.
9257 FP Data Proc 1 Source has instr [14] == 1.
9258 FP floating <--> integer conversions has instr [15] == 0. */
9259 if (INSTR (12, 12))
9260 dexSimpleFPImmediate (cpu);
9261
9262 else if (INSTR (13, 13))
9263 dexSimpleFPCompare (cpu);
9264
9265 else if (INSTR (14, 14))
9266 dexSimpleFPDataProc1Source (cpu);
9267
9268 else if (!INSTR (15, 15))
9269 dexSimpleFPIntegerConvert (cpu);
9270
9271 else
9272 /* If we get here then instr[15] == 1 which means UNALLOC. */
9273 HALT_UNALLOC;
9274 }
9275 }
9276
9277 /* PC relative addressing. */
9278
9279 static void
9280 pcadr (sim_cpu *cpu)
9281 {
9282 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9283 instr[30,29] = immlo
9284 instr[23,5] = immhi. */
9285 uint64_t address;
9286 unsigned rd = INSTR (4, 0);
9287 uint32_t isPage = INSTR (31, 31);
9288 union { int64_t u64; uint64_t s64; } imm;
9289 uint64_t offset;
9290
9291 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9292 offset = imm.u64;
9293 offset = (offset << 2) | INSTR (30, 29);
9294
9295 address = aarch64_get_PC (cpu);
9296
9297 if (isPage)
9298 {
9299 offset <<= 12;
9300 address &= ~0xfff;
9301 }
9302
9303 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9304 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9305 }
9306
9307 /* Specific decode and execute for group Data Processing Immediate. */
9308
9309 static void
9310 dexPCRelAddressing (sim_cpu *cpu)
9311 {
9312 /* assert instr[28,24] = 10000. */
9313 pcadr (cpu);
9314 }
9315
9316 /* Immediate logical.
9317 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9318 16, 32 or 64 bit sequence pulled out at decode and possibly
9319 inverting it..
9320
9321 N.B. the output register (dest) can normally be Xn or SP
9322 the exception occurs for flag setting instructions which may
9323 only use Xn for the output (dest). The input register can
9324 never be SP. */
9325
9326 /* 32 bit and immediate. */
9327 static void
9328 and32 (sim_cpu *cpu, uint32_t bimm)
9329 {
9330 unsigned rn = INSTR (9, 5);
9331 unsigned rd = INSTR (4, 0);
9332
9333 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9334 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9335 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9336 }
9337
9338 /* 64 bit and immediate. */
9339 static void
9340 and64 (sim_cpu *cpu, uint64_t bimm)
9341 {
9342 unsigned rn = INSTR (9, 5);
9343 unsigned rd = INSTR (4, 0);
9344
9345 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9346 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9347 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9348 }
9349
9350 /* 32 bit and immediate set flags. */
9351 static void
9352 ands32 (sim_cpu *cpu, uint32_t bimm)
9353 {
9354 unsigned rn = INSTR (9, 5);
9355 unsigned rd = INSTR (4, 0);
9356
9357 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9358 uint32_t value2 = bimm;
9359
9360 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9361 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9362 set_flags_for_binop32 (cpu, value1 & value2);
9363 }
9364
9365 /* 64 bit and immediate set flags. */
9366 static void
9367 ands64 (sim_cpu *cpu, uint64_t bimm)
9368 {
9369 unsigned rn = INSTR (9, 5);
9370 unsigned rd = INSTR (4, 0);
9371
9372 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9373 uint64_t value2 = bimm;
9374
9375 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9376 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9377 set_flags_for_binop64 (cpu, value1 & value2);
9378 }
9379
9380 /* 32 bit exclusive or immediate. */
9381 static void
9382 eor32 (sim_cpu *cpu, uint32_t bimm)
9383 {
9384 unsigned rn = INSTR (9, 5);
9385 unsigned rd = INSTR (4, 0);
9386
9387 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9388 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9389 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9390 }
9391
9392 /* 64 bit exclusive or immediate. */
9393 static void
9394 eor64 (sim_cpu *cpu, uint64_t bimm)
9395 {
9396 unsigned rn = INSTR (9, 5);
9397 unsigned rd = INSTR (4, 0);
9398
9399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9400 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9401 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9402 }
9403
9404 /* 32 bit or immediate. */
9405 static void
9406 orr32 (sim_cpu *cpu, uint32_t bimm)
9407 {
9408 unsigned rn = INSTR (9, 5);
9409 unsigned rd = INSTR (4, 0);
9410
9411 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9412 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9413 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9414 }
9415
9416 /* 64 bit or immediate. */
9417 static void
9418 orr64 (sim_cpu *cpu, uint64_t bimm)
9419 {
9420 unsigned rn = INSTR (9, 5);
9421 unsigned rd = INSTR (4, 0);
9422
9423 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9424 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9425 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9426 }
9427
9428 /* Logical shifted register.
9429 These allow an optional LSL, ASR, LSR or ROR to the second source
9430 register with a count up to the register bit count.
9431 N.B register args may not be SP. */
9432
9433 /* 32 bit AND shifted register. */
9434 static void
9435 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9436 {
9437 unsigned rm = INSTR (20, 16);
9438 unsigned rn = INSTR (9, 5);
9439 unsigned rd = INSTR (4, 0);
9440
9441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9442 aarch64_set_reg_u64
9443 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9444 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9445 }
9446
9447 /* 64 bit AND shifted register. */
9448 static void
9449 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9450 {
9451 unsigned rm = INSTR (20, 16);
9452 unsigned rn = INSTR (9, 5);
9453 unsigned rd = INSTR (4, 0);
9454
9455 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9456 aarch64_set_reg_u64
9457 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9458 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9459 }
9460
9461 /* 32 bit AND shifted register setting flags. */
9462 static void
9463 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9464 {
9465 unsigned rm = INSTR (20, 16);
9466 unsigned rn = INSTR (9, 5);
9467 unsigned rd = INSTR (4, 0);
9468
9469 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9470 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9471 shift, count);
9472
9473 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9474 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9475 set_flags_for_binop32 (cpu, value1 & value2);
9476 }
9477
9478 /* 64 bit AND shifted register setting flags. */
9479 static void
9480 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9481 {
9482 unsigned rm = INSTR (20, 16);
9483 unsigned rn = INSTR (9, 5);
9484 unsigned rd = INSTR (4, 0);
9485
9486 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9487 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9488 shift, count);
9489
9490 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9491 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9492 set_flags_for_binop64 (cpu, value1 & value2);
9493 }
9494
9495 /* 32 bit BIC shifted register. */
9496 static void
9497 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9498 {
9499 unsigned rm = INSTR (20, 16);
9500 unsigned rn = INSTR (9, 5);
9501 unsigned rd = INSTR (4, 0);
9502
9503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9504 aarch64_set_reg_u64
9505 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9506 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9507 }
9508
9509 /* 64 bit BIC shifted register. */
9510 static void
9511 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9512 {
9513 unsigned rm = INSTR (20, 16);
9514 unsigned rn = INSTR (9, 5);
9515 unsigned rd = INSTR (4, 0);
9516
9517 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9518 aarch64_set_reg_u64
9519 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9520 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9521 }
9522
9523 /* 32 bit BIC shifted register setting flags. */
9524 static void
9525 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9526 {
9527 unsigned rm = INSTR (20, 16);
9528 unsigned rn = INSTR (9, 5);
9529 unsigned rd = INSTR (4, 0);
9530
9531 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9532 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9533 shift, count);
9534
9535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9536 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9537 set_flags_for_binop32 (cpu, value1 & value2);
9538 }
9539
9540 /* 64 bit BIC shifted register setting flags. */
9541 static void
9542 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9543 {
9544 unsigned rm = INSTR (20, 16);
9545 unsigned rn = INSTR (9, 5);
9546 unsigned rd = INSTR (4, 0);
9547
9548 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9549 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9550 shift, count);
9551
9552 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9553 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9554 set_flags_for_binop64 (cpu, value1 & value2);
9555 }
9556
9557 /* 32 bit EON shifted register. */
9558 static void
9559 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9560 {
9561 unsigned rm = INSTR (20, 16);
9562 unsigned rn = INSTR (9, 5);
9563 unsigned rd = INSTR (4, 0);
9564
9565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9566 aarch64_set_reg_u64
9567 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9568 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9569 }
9570
9571 /* 64 bit EON shifted register. */
9572 static void
9573 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9574 {
9575 unsigned rm = INSTR (20, 16);
9576 unsigned rn = INSTR (9, 5);
9577 unsigned rd = INSTR (4, 0);
9578
9579 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9580 aarch64_set_reg_u64
9581 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9582 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9583 }
9584
9585 /* 32 bit EOR shifted register. */
9586 static void
9587 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9588 {
9589 unsigned rm = INSTR (20, 16);
9590 unsigned rn = INSTR (9, 5);
9591 unsigned rd = INSTR (4, 0);
9592
9593 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9594 aarch64_set_reg_u64
9595 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9596 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9597 }
9598
9599 /* 64 bit EOR shifted register. */
9600 static void
9601 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9602 {
9603 unsigned rm = INSTR (20, 16);
9604 unsigned rn = INSTR (9, 5);
9605 unsigned rd = INSTR (4, 0);
9606
9607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9608 aarch64_set_reg_u64
9609 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9610 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9611 }
9612
9613 /* 32 bit ORR shifted register. */
9614 static void
9615 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9616 {
9617 unsigned rm = INSTR (20, 16);
9618 unsigned rn = INSTR (9, 5);
9619 unsigned rd = INSTR (4, 0);
9620
9621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9622 aarch64_set_reg_u64
9623 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9624 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9625 }
9626
9627 /* 64 bit ORR shifted register. */
9628 static void
9629 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9630 {
9631 unsigned rm = INSTR (20, 16);
9632 unsigned rn = INSTR (9, 5);
9633 unsigned rd = INSTR (4, 0);
9634
9635 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9636 aarch64_set_reg_u64
9637 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9638 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9639 }
9640
9641 /* 32 bit ORN shifted register. */
9642 static void
9643 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9644 {
9645 unsigned rm = INSTR (20, 16);
9646 unsigned rn = INSTR (9, 5);
9647 unsigned rd = INSTR (4, 0);
9648
9649 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9650 aarch64_set_reg_u64
9651 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9652 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9653 }
9654
9655 /* 64 bit ORN shifted register. */
9656 static void
9657 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9658 {
9659 unsigned rm = INSTR (20, 16);
9660 unsigned rn = INSTR (9, 5);
9661 unsigned rd = INSTR (4, 0);
9662
9663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9664 aarch64_set_reg_u64
9665 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9666 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9667 }
9668
9669 static void
9670 dexLogicalImmediate (sim_cpu *cpu)
9671 {
9672 /* assert instr[28,23] = 1001000
9673 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9674 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9675 instr[22] = N : used to construct immediate mask
9676 instr[21,16] = immr
9677 instr[15,10] = imms
9678 instr[9,5] = Rn
9679 instr[4,0] = Rd */
9680
9681 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9682 uint32_t size = INSTR (31, 31);
9683 uint32_t N = INSTR (22, 22);
9684 /* uint32_t immr = INSTR (21, 16);. */
9685 /* uint32_t imms = INSTR (15, 10);. */
9686 uint32_t index = INSTR (22, 10);
9687 uint64_t bimm64 = LITable [index];
9688 uint32_t dispatch = INSTR (30, 29);
9689
9690 if (~size & N)
9691 HALT_UNALLOC;
9692
9693 if (!bimm64)
9694 HALT_UNALLOC;
9695
9696 if (size == 0)
9697 {
9698 uint32_t bimm = (uint32_t) bimm64;
9699
9700 switch (dispatch)
9701 {
9702 case 0: and32 (cpu, bimm); return;
9703 case 1: orr32 (cpu, bimm); return;
9704 case 2: eor32 (cpu, bimm); return;
9705 case 3: ands32 (cpu, bimm); return;
9706 }
9707 }
9708 else
9709 {
9710 switch (dispatch)
9711 {
9712 case 0: and64 (cpu, bimm64); return;
9713 case 1: orr64 (cpu, bimm64); return;
9714 case 2: eor64 (cpu, bimm64); return;
9715 case 3: ands64 (cpu, bimm64); return;
9716 }
9717 }
9718 HALT_UNALLOC;
9719 }
9720
9721 /* Immediate move.
9722 The uimm argument is a 16 bit value to be inserted into the
9723 target register the pos argument locates the 16 bit word in the
9724 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9725 3} for 64 bit.
9726 N.B register arg may not be SP so it should be.
9727 accessed using the setGZRegisterXXX accessors. */
9728
9729 /* 32 bit move 16 bit immediate zero remaining shorts. */
9730 static void
9731 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9732 {
9733 unsigned rd = INSTR (4, 0);
9734
9735 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9736 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9737 }
9738
9739 /* 64 bit move 16 bit immediate zero remaining shorts. */
9740 static void
9741 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9742 {
9743 unsigned rd = INSTR (4, 0);
9744
9745 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9746 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9747 }
9748
9749 /* 32 bit move 16 bit immediate negated. */
9750 static void
9751 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9752 {
9753 unsigned rd = INSTR (4, 0);
9754
9755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9756 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9757 }
9758
9759 /* 64 bit move 16 bit immediate negated. */
9760 static void
9761 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9762 {
9763 unsigned rd = INSTR (4, 0);
9764
9765 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9766 aarch64_set_reg_u64
9767 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9768 ^ 0xffffffffffffffffULL));
9769 }
9770
9771 /* 32 bit move 16 bit immediate keep remaining shorts. */
9772 static void
9773 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9774 {
9775 unsigned rd = INSTR (4, 0);
9776 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9777 uint32_t value = val << (pos * 16);
9778 uint32_t mask = ~(0xffffU << (pos * 16));
9779
9780 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9781 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9782 }
9783
9784 /* 64 bit move 16 it immediate keep remaining shorts. */
9785 static void
9786 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9787 {
9788 unsigned rd = INSTR (4, 0);
9789 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9790 uint64_t value = (uint64_t) val << (pos * 16);
9791 uint64_t mask = ~(0xffffULL << (pos * 16));
9792
9793 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9794 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9795 }
9796
9797 static void
9798 dexMoveWideImmediate (sim_cpu *cpu)
9799 {
9800 /* assert instr[28:23] = 100101
9801 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9802 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9803 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9804 instr[20,5] = uimm16
9805 instr[4,0] = Rd */
9806
9807 /* N.B. the (multiple of 16) shift is applied by the called routine,
9808 we just pass the multiplier. */
9809
9810 uint32_t imm;
9811 uint32_t size = INSTR (31, 31);
9812 uint32_t op = INSTR (30, 29);
9813 uint32_t shift = INSTR (22, 21);
9814
9815 /* 32 bit can only shift 0 or 1 lot of 16.
9816 anything else is an unallocated instruction. */
9817 if (size == 0 && (shift > 1))
9818 HALT_UNALLOC;
9819
9820 if (op == 1)
9821 HALT_UNALLOC;
9822
9823 imm = INSTR (20, 5);
9824
9825 if (size == 0)
9826 {
9827 if (op == 0)
9828 movn32 (cpu, imm, shift);
9829 else if (op == 2)
9830 movz32 (cpu, imm, shift);
9831 else
9832 movk32 (cpu, imm, shift);
9833 }
9834 else
9835 {
9836 if (op == 0)
9837 movn64 (cpu, imm, shift);
9838 else if (op == 2)
9839 movz64 (cpu, imm, shift);
9840 else
9841 movk64 (cpu, imm, shift);
9842 }
9843 }
9844
9845 /* Bitfield operations.
9846 These take a pair of bit positions r and s which are in {0..31}
9847 or {0..63} depending on the instruction word size.
9848 N.B register args may not be SP. */
9849
9850 /* OK, we start with ubfm which just needs to pick
9851 some bits out of source zero the rest and write
9852 the result to dest. Just need two logical shifts. */
9853
9854 /* 32 bit bitfield move, left and right of affected zeroed
9855 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9856 static void
9857 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9858 {
9859 unsigned rd;
9860 unsigned rn = INSTR (9, 5);
9861 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9862
9863 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9864 if (r <= s)
9865 {
9866 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9867 We want only bits s:xxx:r at the bottom of the word
9868 so we LSL bit s up to bit 31 i.e. by 31 - s
9869 and then we LSR to bring bit 31 down to bit s - r
9870 i.e. by 31 + r - s. */
9871 value <<= 31 - s;
9872 value >>= 31 + r - s;
9873 }
9874 else
9875 {
9876 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9877 We want only bits s:xxx:0 starting at it 31-(r-1)
9878 so we LSL bit s up to bit 31 i.e. by 31 - s
9879 and then we LSL to bring bit 31 down to 31-(r-1)+s
9880 i.e. by r - (s + 1). */
9881 value <<= 31 - s;
9882 value >>= r - (s + 1);
9883 }
9884
9885 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9886 rd = INSTR (4, 0);
9887 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9888 }
9889
9890 /* 64 bit bitfield move, left and right of affected zeroed
9891 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9892 static void
9893 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9894 {
9895 unsigned rd;
9896 unsigned rn = INSTR (9, 5);
9897 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9898
9899 if (r <= s)
9900 {
9901 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9902 We want only bits s:xxx:r at the bottom of the word.
9903 So we LSL bit s up to bit 63 i.e. by 63 - s
9904 and then we LSR to bring bit 63 down to bit s - r
9905 i.e. by 63 + r - s. */
9906 value <<= 63 - s;
9907 value >>= 63 + r - s;
9908 }
9909 else
9910 {
9911 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9912 We want only bits s:xxx:0 starting at it 63-(r-1).
9913 So we LSL bit s up to bit 63 i.e. by 63 - s
9914 and then we LSL to bring bit 63 down to 63-(r-1)+s
9915 i.e. by r - (s + 1). */
9916 value <<= 63 - s;
9917 value >>= r - (s + 1);
9918 }
9919
9920 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9921 rd = INSTR (4, 0);
9922 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9923 }
9924
9925 /* The signed versions need to insert sign bits
9926 on the left of the inserted bit field. so we do
9927 much the same as the unsigned version except we
9928 use an arithmetic shift right -- this just means
9929 we need to operate on signed values. */
9930
9931 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9932 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9933 static void
9934 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9935 {
9936 unsigned rd;
9937 unsigned rn = INSTR (9, 5);
9938 /* as per ubfm32 but use an ASR instead of an LSR. */
9939 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9940
9941 if (r <= s)
9942 {
9943 value <<= 31 - s;
9944 value >>= 31 + r - s;
9945 }
9946 else
9947 {
9948 value <<= 31 - s;
9949 value >>= r - (s + 1);
9950 }
9951
9952 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9953 rd = INSTR (4, 0);
9954 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9955 }
9956
9957 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9958 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9959 static void
9960 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9961 {
9962 unsigned rd;
9963 unsigned rn = INSTR (9, 5);
9964 /* acpu per ubfm but use an ASR instead of an LSR. */
9965 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9966
9967 if (r <= s)
9968 {
9969 value <<= 63 - s;
9970 value >>= 63 + r - s;
9971 }
9972 else
9973 {
9974 value <<= 63 - s;
9975 value >>= r - (s + 1);
9976 }
9977
9978 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9979 rd = INSTR (4, 0);
9980 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9981 }
9982
9983 /* Finally, these versions leave non-affected bits
9984 as is. so we need to generate the bits as per
9985 ubfm and also generate a mask to pick the
9986 bits from the original and computed values. */
9987
9988 /* 32 bit bitfield move, non-affected bits left as is.
9989 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9990 static void
9991 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9992 {
9993 unsigned rn = INSTR (9, 5);
9994 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9995 uint32_t mask = -1;
9996 unsigned rd;
9997 uint32_t value2;
9998
9999 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10000 if (r <= s)
10001 {
10002 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10003 We want only bits s:xxx:r at the bottom of the word
10004 so we LSL bit s up to bit 31 i.e. by 31 - s
10005 and then we LSR to bring bit 31 down to bit s - r
10006 i.e. by 31 + r - s. */
10007 value <<= 31 - s;
10008 value >>= 31 + r - s;
10009 /* the mask must include the same bits. */
10010 mask <<= 31 - s;
10011 mask >>= 31 + r - s;
10012 }
10013 else
10014 {
10015 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10016 We want only bits s:xxx:0 starting at it 31-(r-1)
10017 so we LSL bit s up to bit 31 i.e. by 31 - s
10018 and then we LSL to bring bit 31 down to 31-(r-1)+s
10019 i.e. by r - (s + 1). */
10020 value <<= 31 - s;
10021 value >>= r - (s + 1);
10022 /* The mask must include the same bits. */
10023 mask <<= 31 - s;
10024 mask >>= r - (s + 1);
10025 }
10026
10027 rd = INSTR (4, 0);
10028 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10029
10030 value2 &= ~mask;
10031 value2 |= value;
10032
10033 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10034 aarch64_set_reg_u64
10035 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10036 }
10037
10038 /* 64 bit bitfield move, non-affected bits left as is.
10039 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10040 static void
10041 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10042 {
10043 unsigned rd;
10044 unsigned rn = INSTR (9, 5);
10045 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10046 uint64_t mask = 0xffffffffffffffffULL;
10047
10048 if (r <= s)
10049 {
10050 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10051 We want only bits s:xxx:r at the bottom of the word
10052 so we LSL bit s up to bit 63 i.e. by 63 - s
10053 and then we LSR to bring bit 63 down to bit s - r
10054 i.e. by 63 + r - s. */
10055 value <<= 63 - s;
10056 value >>= 63 + r - s;
10057 /* The mask must include the same bits. */
10058 mask <<= 63 - s;
10059 mask >>= 63 + r - s;
10060 }
10061 else
10062 {
10063 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10064 We want only bits s:xxx:0 starting at it 63-(r-1)
10065 so we LSL bit s up to bit 63 i.e. by 63 - s
10066 and then we LSL to bring bit 63 down to 63-(r-1)+s
10067 i.e. by r - (s + 1). */
10068 value <<= 63 - s;
10069 value >>= r - (s + 1);
10070 /* The mask must include the same bits. */
10071 mask <<= 63 - s;
10072 mask >>= r - (s + 1);
10073 }
10074
10075 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10076 rd = INSTR (4, 0);
10077 aarch64_set_reg_u64
10078 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10079 }
10080
10081 static void
10082 dexBitfieldImmediate (sim_cpu *cpu)
10083 {
10084 /* assert instr[28:23] = 100110
10085 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10086 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10087 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10088 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10089 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10090 instr[9,5] = Rn
10091 instr[4,0] = Rd */
10092
10093 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10094 uint32_t dispatch;
10095 uint32_t imms;
10096 uint32_t size = INSTR (31, 31);
10097 uint32_t N = INSTR (22, 22);
10098 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10099 /* or else we have an UNALLOC. */
10100 uint32_t immr = INSTR (21, 16);
10101
10102 if (~size & N)
10103 HALT_UNALLOC;
10104
10105 if (!size && uimm (immr, 5, 5))
10106 HALT_UNALLOC;
10107
10108 imms = INSTR (15, 10);
10109 if (!size && uimm (imms, 5, 5))
10110 HALT_UNALLOC;
10111
10112 /* Switch on combined size and op. */
10113 dispatch = INSTR (31, 29);
10114 switch (dispatch)
10115 {
10116 case 0: sbfm32 (cpu, immr, imms); return;
10117 case 1: bfm32 (cpu, immr, imms); return;
10118 case 2: ubfm32 (cpu, immr, imms); return;
10119 case 4: sbfm (cpu, immr, imms); return;
10120 case 5: bfm (cpu, immr, imms); return;
10121 case 6: ubfm (cpu, immr, imms); return;
10122 default: HALT_UNALLOC;
10123 }
10124 }
10125
10126 static void
10127 do_EXTR_32 (sim_cpu *cpu)
10128 {
10129 /* instr[31:21] = 00010011100
10130 instr[20,16] = Rm
10131 instr[15,10] = imms : 0xxxxx for 32 bit
10132 instr[9,5] = Rn
10133 instr[4,0] = Rd */
10134 unsigned rm = INSTR (20, 16);
10135 unsigned imms = INSTR (15, 10) & 31;
10136 unsigned rn = INSTR ( 9, 5);
10137 unsigned rd = INSTR ( 4, 0);
10138 uint64_t val1;
10139 uint64_t val2;
10140
10141 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10142 val1 >>= imms;
10143 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10144 val2 <<= (32 - imms);
10145
10146 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10147 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10148 }
10149
10150 static void
10151 do_EXTR_64 (sim_cpu *cpu)
10152 {
10153 /* instr[31:21] = 10010011100
10154 instr[20,16] = Rm
10155 instr[15,10] = imms
10156 instr[9,5] = Rn
10157 instr[4,0] = Rd */
10158 unsigned rm = INSTR (20, 16);
10159 unsigned imms = INSTR (15, 10) & 63;
10160 unsigned rn = INSTR ( 9, 5);
10161 unsigned rd = INSTR ( 4, 0);
10162 uint64_t val;
10163
10164 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10165 val >>= imms;
10166 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10167
10168 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10169 }
10170
10171 static void
10172 dexExtractImmediate (sim_cpu *cpu)
10173 {
10174 /* assert instr[28:23] = 100111
10175 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10176 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10177 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10178 instr[21] = op0 : must be 0 or UNALLOC
10179 instr[20,16] = Rm
10180 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10181 instr[9,5] = Rn
10182 instr[4,0] = Rd */
10183
10184 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10185 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10186 uint32_t dispatch;
10187 uint32_t size = INSTR (31, 31);
10188 uint32_t N = INSTR (22, 22);
10189 /* 32 bit operations must have imms[5] = 0
10190 or else we have an UNALLOC. */
10191 uint32_t imms = INSTR (15, 10);
10192
10193 if (size ^ N)
10194 HALT_UNALLOC;
10195
10196 if (!size && uimm (imms, 5, 5))
10197 HALT_UNALLOC;
10198
10199 /* Switch on combined size and op. */
10200 dispatch = INSTR (31, 29);
10201
10202 if (dispatch == 0)
10203 do_EXTR_32 (cpu);
10204
10205 else if (dispatch == 4)
10206 do_EXTR_64 (cpu);
10207
10208 else if (dispatch == 1)
10209 HALT_NYI;
10210 else
10211 HALT_UNALLOC;
10212 }
10213
10214 static void
10215 dexDPImm (sim_cpu *cpu)
10216 {
10217 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10218 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10219 bits [25,23] of a DPImm are the secondary dispatch vector. */
10220 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10221
10222 switch (group2)
10223 {
10224 case DPIMM_PCADR_000:
10225 case DPIMM_PCADR_001:
10226 dexPCRelAddressing (cpu);
10227 return;
10228
10229 case DPIMM_ADDSUB_010:
10230 case DPIMM_ADDSUB_011:
10231 dexAddSubtractImmediate (cpu);
10232 return;
10233
10234 case DPIMM_LOG_100:
10235 dexLogicalImmediate (cpu);
10236 return;
10237
10238 case DPIMM_MOV_101:
10239 dexMoveWideImmediate (cpu);
10240 return;
10241
10242 case DPIMM_BITF_110:
10243 dexBitfieldImmediate (cpu);
10244 return;
10245
10246 case DPIMM_EXTR_111:
10247 dexExtractImmediate (cpu);
10248 return;
10249
10250 default:
10251 /* Should never reach here. */
10252 HALT_NYI;
10253 }
10254 }
10255
10256 static void
10257 dexLoadUnscaledImmediate (sim_cpu *cpu)
10258 {
10259 /* instr[29,24] == 111_00
10260 instr[21] == 0
10261 instr[11,10] == 00
10262 instr[31,30] = size
10263 instr[26] = V
10264 instr[23,22] = opc
10265 instr[20,12] = simm9
10266 instr[9,5] = rn may be SP. */
10267 /* unsigned rt = INSTR (4, 0); */
10268 uint32_t V = INSTR (26, 26);
10269 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10270 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10271
10272 if (!V)
10273 {
10274 /* GReg operations. */
10275 switch (dispatch)
10276 {
10277 case 0: sturb (cpu, imm); return;
10278 case 1: ldurb32 (cpu, imm); return;
10279 case 2: ldursb64 (cpu, imm); return;
10280 case 3: ldursb32 (cpu, imm); return;
10281 case 4: sturh (cpu, imm); return;
10282 case 5: ldurh32 (cpu, imm); return;
10283 case 6: ldursh64 (cpu, imm); return;
10284 case 7: ldursh32 (cpu, imm); return;
10285 case 8: stur32 (cpu, imm); return;
10286 case 9: ldur32 (cpu, imm); return;
10287 case 10: ldursw (cpu, imm); return;
10288 case 12: stur64 (cpu, imm); return;
10289 case 13: ldur64 (cpu, imm); return;
10290
10291 case 14:
10292 /* PRFUM NYI. */
10293 HALT_NYI;
10294
10295 default:
10296 case 11:
10297 case 15:
10298 HALT_UNALLOC;
10299 }
10300 }
10301
10302 /* FReg operations. */
10303 switch (dispatch)
10304 {
10305 case 2: fsturq (cpu, imm); return;
10306 case 3: fldurq (cpu, imm); return;
10307 case 8: fsturs (cpu, imm); return;
10308 case 9: fldurs (cpu, imm); return;
10309 case 12: fsturd (cpu, imm); return;
10310 case 13: fldurd (cpu, imm); return;
10311
10312 case 0: /* STUR 8 bit FP. */
10313 case 1: /* LDUR 8 bit FP. */
10314 case 4: /* STUR 16 bit FP. */
10315 case 5: /* LDUR 8 bit FP. */
10316 HALT_NYI;
10317
10318 default:
10319 case 6:
10320 case 7:
10321 case 10:
10322 case 11:
10323 case 14:
10324 case 15:
10325 HALT_UNALLOC;
10326 }
10327 }
10328
10329 /* N.B. A preliminary note regarding all the ldrs<x>32
10330 instructions
10331
10332 The signed value loaded by these instructions is cast to unsigned
10333 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10334 64 bit element of the GReg union. this performs a 32 bit sign extension
10335 (as required) but avoids 64 bit sign extension, thus ensuring that the
10336 top half of the register word is zero. this is what the spec demands
10337 when a 32 bit load occurs. */
10338
10339 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10340 static void
10341 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10342 {
10343 unsigned int rn = INSTR (9, 5);
10344 unsigned int rt = INSTR (4, 0);
10345
10346 /* The target register may not be SP but the source may be
10347 there is no scaling required for a byte load. */
10348 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10349 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10350 (int64_t) aarch64_get_mem_s8 (cpu, address));
10351 }
10352
10353 /* 32 bit load sign-extended byte scaled or unscaled zero-
10354 or sign-extended 32-bit register offset. */
10355 static void
10356 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10357 {
10358 unsigned int rm = INSTR (20, 16);
10359 unsigned int rn = INSTR (9, 5);
10360 unsigned int rt = INSTR (4, 0);
10361
10362 /* rn may reference SP, rm and rt must reference ZR. */
10363
10364 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10365 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10366 extension);
10367
10368 /* There is no scaling required for a byte load. */
10369 aarch64_set_reg_u64
10370 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10371 + displacement));
10372 }
10373
10374 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10375 pre- or post-writeback. */
10376 static void
10377 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10378 {
10379 uint64_t address;
10380 unsigned int rn = INSTR (9, 5);
10381 unsigned int rt = INSTR (4, 0);
10382
10383 if (rn == rt && wb != NoWriteBack)
10384 HALT_UNALLOC;
10385
10386 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10387
10388 if (wb == Pre)
10389 address += offset;
10390
10391 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10392 (int64_t) aarch64_get_mem_s8 (cpu, address));
10393
10394 if (wb == Post)
10395 address += offset;
10396
10397 if (wb != NoWriteBack)
10398 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10399 }
10400
10401 /* 8 bit store scaled. */
10402 static void
10403 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10404 {
10405 unsigned st = INSTR (4, 0);
10406 unsigned rn = INSTR (9, 5);
10407
10408 aarch64_set_mem_u8 (cpu,
10409 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10410 aarch64_get_vec_u8 (cpu, st, 0));
10411 }
10412
10413 /* 8 bit store scaled or unscaled zero- or
10414 sign-extended 8-bit register offset. */
10415 static void
10416 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10417 {
10418 unsigned rm = INSTR (20, 16);
10419 unsigned rn = INSTR (9, 5);
10420 unsigned st = INSTR (4, 0);
10421
10422 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10423 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10424 extension);
10425 uint64_t displacement = scaling == Scaled ? extended : 0;
10426
10427 aarch64_set_mem_u8
10428 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10429 }
10430
10431 /* 16 bit store scaled. */
10432 static void
10433 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10434 {
10435 unsigned st = INSTR (4, 0);
10436 unsigned rn = INSTR (9, 5);
10437
10438 aarch64_set_mem_u16
10439 (cpu,
10440 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10441 aarch64_get_vec_u16 (cpu, st, 0));
10442 }
10443
10444 /* 16 bit store scaled or unscaled zero-
10445 or sign-extended 16-bit register offset. */
10446 static void
10447 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10448 {
10449 unsigned rm = INSTR (20, 16);
10450 unsigned rn = INSTR (9, 5);
10451 unsigned st = INSTR (4, 0);
10452
10453 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10454 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10455 extension);
10456 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10457
10458 aarch64_set_mem_u16
10459 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10460 }
10461
10462 /* 32 bit store scaled unsigned 12 bit. */
10463 static void
10464 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10465 {
10466 unsigned st = INSTR (4, 0);
10467 unsigned rn = INSTR (9, 5);
10468
10469 aarch64_set_mem_u32
10470 (cpu,
10471 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10472 aarch64_get_vec_u32 (cpu, st, 0));
10473 }
10474
10475 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10476 static void
10477 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10478 {
10479 unsigned rn = INSTR (9, 5);
10480 unsigned st = INSTR (4, 0);
10481
10482 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10483
10484 if (wb != Post)
10485 address += offset;
10486
10487 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10488
10489 if (wb == Post)
10490 address += offset;
10491
10492 if (wb != NoWriteBack)
10493 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10494 }
10495
10496 /* 32 bit store scaled or unscaled zero-
10497 or sign-extended 32-bit register offset. */
10498 static void
10499 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10500 {
10501 unsigned rm = INSTR (20, 16);
10502 unsigned rn = INSTR (9, 5);
10503 unsigned st = INSTR (4, 0);
10504
10505 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10506 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10507 extension);
10508 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10509
10510 aarch64_set_mem_u32
10511 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10512 }
10513
10514 /* 64 bit store scaled unsigned 12 bit. */
10515 static void
10516 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10517 {
10518 unsigned st = INSTR (4, 0);
10519 unsigned rn = INSTR (9, 5);
10520
10521 aarch64_set_mem_u64
10522 (cpu,
10523 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10524 aarch64_get_vec_u64 (cpu, st, 0));
10525 }
10526
10527 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10528 static void
10529 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10530 {
10531 unsigned rn = INSTR (9, 5);
10532 unsigned st = INSTR (4, 0);
10533
10534 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10535
10536 if (wb != Post)
10537 address += offset;
10538
10539 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10540
10541 if (wb == Post)
10542 address += offset;
10543
10544 if (wb != NoWriteBack)
10545 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10546 }
10547
10548 /* 64 bit store scaled or unscaled zero-
10549 or sign-extended 32-bit register offset. */
10550 static void
10551 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10552 {
10553 unsigned rm = INSTR (20, 16);
10554 unsigned rn = INSTR (9, 5);
10555 unsigned st = INSTR (4, 0);
10556
10557 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10558 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10559 extension);
10560 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10561
10562 aarch64_set_mem_u64
10563 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10564 }
10565
10566 /* 128 bit store scaled unsigned 12 bit. */
10567 static void
10568 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10569 {
10570 FRegister a;
10571 unsigned st = INSTR (4, 0);
10572 unsigned rn = INSTR (9, 5);
10573 uint64_t addr;
10574
10575 aarch64_get_FP_long_double (cpu, st, & a);
10576
10577 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10578 aarch64_set_mem_long_double (cpu, addr, a);
10579 }
10580
10581 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10582 static void
10583 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10584 {
10585 FRegister a;
10586 unsigned rn = INSTR (9, 5);
10587 unsigned st = INSTR (4, 0);
10588 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10589
10590 if (wb != Post)
10591 address += offset;
10592
10593 aarch64_get_FP_long_double (cpu, st, & a);
10594 aarch64_set_mem_long_double (cpu, address, a);
10595
10596 if (wb == Post)
10597 address += offset;
10598
10599 if (wb != NoWriteBack)
10600 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10601 }
10602
10603 /* 128 bit store scaled or unscaled zero-
10604 or sign-extended 32-bit register offset. */
10605 static void
10606 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10607 {
10608 unsigned rm = INSTR (20, 16);
10609 unsigned rn = INSTR (9, 5);
10610 unsigned st = INSTR (4, 0);
10611
10612 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10613 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10614 extension);
10615 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10616
10617 FRegister a;
10618
10619 aarch64_get_FP_long_double (cpu, st, & a);
10620 aarch64_set_mem_long_double (cpu, address + displacement, a);
10621 }
10622
10623 static void
10624 dexLoadImmediatePrePost (sim_cpu *cpu)
10625 {
10626 /* instr[31,30] = size
10627 instr[29,27] = 111
10628 instr[26] = V
10629 instr[25,24] = 00
10630 instr[23,22] = opc
10631 instr[21] = 0
10632 instr[20,12] = simm9
10633 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10634 instr[10] = 0
10635 instr[9,5] = Rn may be SP.
10636 instr[4,0] = Rt */
10637
10638 uint32_t V = INSTR (26, 26);
10639 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10640 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10641 WriteBack wb = INSTR (11, 11);
10642
10643 if (!V)
10644 {
10645 /* GReg operations. */
10646 switch (dispatch)
10647 {
10648 case 0: strb_wb (cpu, imm, wb); return;
10649 case 1: ldrb32_wb (cpu, imm, wb); return;
10650 case 2: ldrsb_wb (cpu, imm, wb); return;
10651 case 3: ldrsb32_wb (cpu, imm, wb); return;
10652 case 4: strh_wb (cpu, imm, wb); return;
10653 case 5: ldrh32_wb (cpu, imm, wb); return;
10654 case 6: ldrsh64_wb (cpu, imm, wb); return;
10655 case 7: ldrsh32_wb (cpu, imm, wb); return;
10656 case 8: str32_wb (cpu, imm, wb); return;
10657 case 9: ldr32_wb (cpu, imm, wb); return;
10658 case 10: ldrsw_wb (cpu, imm, wb); return;
10659 case 12: str_wb (cpu, imm, wb); return;
10660 case 13: ldr_wb (cpu, imm, wb); return;
10661
10662 default:
10663 case 11:
10664 case 14:
10665 case 15:
10666 HALT_UNALLOC;
10667 }
10668 }
10669
10670 /* FReg operations. */
10671 switch (dispatch)
10672 {
10673 case 2: fstrq_wb (cpu, imm, wb); return;
10674 case 3: fldrq_wb (cpu, imm, wb); return;
10675 case 8: fstrs_wb (cpu, imm, wb); return;
10676 case 9: fldrs_wb (cpu, imm, wb); return;
10677 case 12: fstrd_wb (cpu, imm, wb); return;
10678 case 13: fldrd_wb (cpu, imm, wb); return;
10679
10680 case 0: /* STUR 8 bit FP. */
10681 case 1: /* LDUR 8 bit FP. */
10682 case 4: /* STUR 16 bit FP. */
10683 case 5: /* LDUR 8 bit FP. */
10684 HALT_NYI;
10685
10686 default:
10687 case 6:
10688 case 7:
10689 case 10:
10690 case 11:
10691 case 14:
10692 case 15:
10693 HALT_UNALLOC;
10694 }
10695 }
10696
10697 static void
10698 dexLoadRegisterOffset (sim_cpu *cpu)
10699 {
10700 /* instr[31,30] = size
10701 instr[29,27] = 111
10702 instr[26] = V
10703 instr[25,24] = 00
10704 instr[23,22] = opc
10705 instr[21] = 1
10706 instr[20,16] = rm
10707 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10708 110 ==> SXTW, 111 ==> SXTX,
10709 ow ==> RESERVED
10710 instr[12] = scaled
10711 instr[11,10] = 10
10712 instr[9,5] = rn
10713 instr[4,0] = rt. */
10714
10715 uint32_t V = INSTR (26, 26);
10716 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10717 Scaling scale = INSTR (12, 12);
10718 Extension extensionType = INSTR (15, 13);
10719
10720 /* Check for illegal extension types. */
10721 if (uimm (extensionType, 1, 1) == 0)
10722 HALT_UNALLOC;
10723
10724 if (extensionType == UXTX || extensionType == SXTX)
10725 extensionType = NoExtension;
10726
10727 if (!V)
10728 {
10729 /* GReg operations. */
10730 switch (dispatch)
10731 {
10732 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10733 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10734 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10735 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10736 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10737 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10738 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10739 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10740 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10741 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10742 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10743 case 12: str_scale_ext (cpu, scale, extensionType); return;
10744 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10745 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10746
10747 default:
10748 case 11:
10749 case 15:
10750 HALT_UNALLOC;
10751 }
10752 }
10753
10754 /* FReg operations. */
10755 switch (dispatch)
10756 {
10757 case 1: /* LDUR 8 bit FP. */
10758 HALT_NYI;
10759 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10760 case 5: /* LDUR 8 bit FP. */
10761 HALT_NYI;
10762 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10763 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10764
10765 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10766 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10767 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10768 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10769 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10770
10771 default:
10772 case 6:
10773 case 7:
10774 case 10:
10775 case 11:
10776 case 14:
10777 case 15:
10778 HALT_UNALLOC;
10779 }
10780 }
10781
10782 static void
10783 dexLoadUnsignedImmediate (sim_cpu *cpu)
10784 {
10785 /* instr[29,24] == 111_01
10786 instr[31,30] = size
10787 instr[26] = V
10788 instr[23,22] = opc
10789 instr[21,10] = uimm12 : unsigned immediate offset
10790 instr[9,5] = rn may be SP.
10791 instr[4,0] = rt. */
10792
10793 uint32_t V = INSTR (26,26);
10794 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10795 uint32_t imm = INSTR (21, 10);
10796
10797 if (!V)
10798 {
10799 /* GReg operations. */
10800 switch (dispatch)
10801 {
10802 case 0: strb_abs (cpu, imm); return;
10803 case 1: ldrb32_abs (cpu, imm); return;
10804 case 2: ldrsb_abs (cpu, imm); return;
10805 case 3: ldrsb32_abs (cpu, imm); return;
10806 case 4: strh_abs (cpu, imm); return;
10807 case 5: ldrh32_abs (cpu, imm); return;
10808 case 6: ldrsh_abs (cpu, imm); return;
10809 case 7: ldrsh32_abs (cpu, imm); return;
10810 case 8: str32_abs (cpu, imm); return;
10811 case 9: ldr32_abs (cpu, imm); return;
10812 case 10: ldrsw_abs (cpu, imm); return;
10813 case 12: str_abs (cpu, imm); return;
10814 case 13: ldr_abs (cpu, imm); return;
10815 case 14: prfm_abs (cpu, imm); return;
10816
10817 default:
10818 case 11:
10819 case 15:
10820 HALT_UNALLOC;
10821 }
10822 }
10823
10824 /* FReg operations. */
10825 switch (dispatch)
10826 {
10827 case 0: fstrb_abs (cpu, imm); return;
10828 case 4: fstrh_abs (cpu, imm); return;
10829 case 8: fstrs_abs (cpu, imm); return;
10830 case 12: fstrd_abs (cpu, imm); return;
10831 case 2: fstrq_abs (cpu, imm); return;
10832
10833 case 1: fldrb_abs (cpu, imm); return;
10834 case 5: fldrh_abs (cpu, imm); return;
10835 case 9: fldrs_abs (cpu, imm); return;
10836 case 13: fldrd_abs (cpu, imm); return;
10837 case 3: fldrq_abs (cpu, imm); return;
10838
10839 default:
10840 case 6:
10841 case 7:
10842 case 10:
10843 case 11:
10844 case 14:
10845 case 15:
10846 HALT_UNALLOC;
10847 }
10848 }
10849
10850 static void
10851 dexLoadExclusive (sim_cpu *cpu)
10852 {
10853 /* assert instr[29:24] = 001000;
10854 instr[31,30] = size
10855 instr[23] = 0 if exclusive
10856 instr[22] = L : 1 if load, 0 if store
10857 instr[21] = 1 if pair
10858 instr[20,16] = Rs
10859 instr[15] = o0 : 1 if ordered
10860 instr[14,10] = Rt2
10861 instr[9,5] = Rn
10862 instr[4.0] = Rt. */
10863
10864 switch (INSTR (22, 21))
10865 {
10866 case 2: ldxr (cpu); return;
10867 case 0: stxr (cpu); return;
10868 default: HALT_NYI;
10869 }
10870 }
10871
10872 static void
10873 dexLoadOther (sim_cpu *cpu)
10874 {
10875 uint32_t dispatch;
10876
10877 /* instr[29,25] = 111_0
10878 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10879 instr[21:11,10] is the secondary dispatch. */
10880 if (INSTR (24, 24))
10881 {
10882 dexLoadUnsignedImmediate (cpu);
10883 return;
10884 }
10885
10886 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10887 switch (dispatch)
10888 {
10889 case 0: dexLoadUnscaledImmediate (cpu); return;
10890 case 1: dexLoadImmediatePrePost (cpu); return;
10891 case 3: dexLoadImmediatePrePost (cpu); return;
10892 case 6: dexLoadRegisterOffset (cpu); return;
10893
10894 default:
10895 case 2:
10896 case 4:
10897 case 5:
10898 case 7:
10899 HALT_NYI;
10900 }
10901 }
10902
10903 static void
10904 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10905 {
10906 unsigned rn = INSTR (14, 10);
10907 unsigned rd = INSTR (9, 5);
10908 unsigned rm = INSTR (4, 0);
10909 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10910
10911 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10912 HALT_UNALLOC; /* ??? */
10913
10914 offset <<= 2;
10915
10916 if (wb != Post)
10917 address += offset;
10918
10919 aarch64_set_mem_u32 (cpu, address,
10920 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10921 aarch64_set_mem_u32 (cpu, address + 4,
10922 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10923
10924 if (wb == Post)
10925 address += offset;
10926
10927 if (wb != NoWriteBack)
10928 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10929 }
10930
10931 static void
10932 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10933 {
10934 unsigned rn = INSTR (14, 10);
10935 unsigned rd = INSTR (9, 5);
10936 unsigned rm = INSTR (4, 0);
10937 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10938
10939 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10940 HALT_UNALLOC; /* ??? */
10941
10942 offset <<= 3;
10943
10944 if (wb != Post)
10945 address += offset;
10946
10947 aarch64_set_mem_u64 (cpu, address,
10948 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10949 aarch64_set_mem_u64 (cpu, address + 8,
10950 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10951
10952 if (wb == Post)
10953 address += offset;
10954
10955 if (wb != NoWriteBack)
10956 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10957 }
10958
10959 static void
10960 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10961 {
10962 unsigned rn = INSTR (14, 10);
10963 unsigned rd = INSTR (9, 5);
10964 unsigned rm = INSTR (4, 0);
10965 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10966
10967 /* Treat this as unalloc to make sure we don't do it. */
10968 if (rn == rm)
10969 HALT_UNALLOC;
10970
10971 offset <<= 2;
10972
10973 if (wb != Post)
10974 address += offset;
10975
10976 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10977 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10978
10979 if (wb == Post)
10980 address += offset;
10981
10982 if (wb != NoWriteBack)
10983 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10984 }
10985
10986 static void
10987 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10988 {
10989 unsigned rn = INSTR (14, 10);
10990 unsigned rd = INSTR (9, 5);
10991 unsigned rm = INSTR (4, 0);
10992 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10993
10994 /* Treat this as unalloc to make sure we don't do it. */
10995 if (rn == rm)
10996 HALT_UNALLOC;
10997
10998 offset <<= 2;
10999
11000 if (wb != Post)
11001 address += offset;
11002
11003 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11004 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11005
11006 if (wb == Post)
11007 address += offset;
11008
11009 if (wb != NoWriteBack)
11010 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11011 }
11012
11013 static void
11014 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11015 {
11016 unsigned rn = INSTR (14, 10);
11017 unsigned rd = INSTR (9, 5);
11018 unsigned rm = INSTR (4, 0);
11019 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11020
11021 /* Treat this as unalloc to make sure we don't do it. */
11022 if (rn == rm)
11023 HALT_UNALLOC;
11024
11025 offset <<= 3;
11026
11027 if (wb != Post)
11028 address += offset;
11029
11030 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11031 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11032
11033 if (wb == Post)
11034 address += offset;
11035
11036 if (wb != NoWriteBack)
11037 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11038 }
11039
11040 static void
11041 dex_load_store_pair_gr (sim_cpu *cpu)
11042 {
11043 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11044 instr[29,25] = instruction encoding: 101_0
11045 instr[26] = V : 1 if fp 0 if gp
11046 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11047 instr[22] = load/store (1=> load)
11048 instr[21,15] = signed, scaled, offset
11049 instr[14,10] = Rn
11050 instr[ 9, 5] = Rd
11051 instr[ 4, 0] = Rm. */
11052
11053 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11054 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11055
11056 switch (dispatch)
11057 {
11058 case 2: store_pair_u32 (cpu, offset, Post); return;
11059 case 3: load_pair_u32 (cpu, offset, Post); return;
11060 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11061 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11062 case 6: store_pair_u32 (cpu, offset, Pre); return;
11063 case 7: load_pair_u32 (cpu, offset, Pre); return;
11064
11065 case 11: load_pair_s32 (cpu, offset, Post); return;
11066 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11067 case 15: load_pair_s32 (cpu, offset, Pre); return;
11068
11069 case 18: store_pair_u64 (cpu, offset, Post); return;
11070 case 19: load_pair_u64 (cpu, offset, Post); return;
11071 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11072 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11073 case 22: store_pair_u64 (cpu, offset, Pre); return;
11074 case 23: load_pair_u64 (cpu, offset, Pre); return;
11075
11076 default:
11077 HALT_UNALLOC;
11078 }
11079 }
11080
11081 static void
11082 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11083 {
11084 unsigned rn = INSTR (14, 10);
11085 unsigned rd = INSTR (9, 5);
11086 unsigned rm = INSTR (4, 0);
11087 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11088
11089 offset <<= 2;
11090
11091 if (wb != Post)
11092 address += offset;
11093
11094 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11095 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11096
11097 if (wb == Post)
11098 address += offset;
11099
11100 if (wb != NoWriteBack)
11101 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11102 }
11103
11104 static void
11105 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11106 {
11107 unsigned rn = INSTR (14, 10);
11108 unsigned rd = INSTR (9, 5);
11109 unsigned rm = INSTR (4, 0);
11110 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11111
11112 offset <<= 3;
11113
11114 if (wb != Post)
11115 address += offset;
11116
11117 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11118 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11119
11120 if (wb == Post)
11121 address += offset;
11122
11123 if (wb != NoWriteBack)
11124 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11125 }
11126
11127 static void
11128 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11129 {
11130 FRegister a;
11131 unsigned rn = INSTR (14, 10);
11132 unsigned rd = INSTR (9, 5);
11133 unsigned rm = INSTR (4, 0);
11134 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11135
11136 offset <<= 4;
11137
11138 if (wb != Post)
11139 address += offset;
11140
11141 aarch64_get_FP_long_double (cpu, rm, & a);
11142 aarch64_set_mem_long_double (cpu, address, a);
11143 aarch64_get_FP_long_double (cpu, rn, & a);
11144 aarch64_set_mem_long_double (cpu, address + 16, a);
11145
11146 if (wb == Post)
11147 address += offset;
11148
11149 if (wb != NoWriteBack)
11150 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11151 }
11152
11153 static void
11154 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11155 {
11156 unsigned rn = INSTR (14, 10);
11157 unsigned rd = INSTR (9, 5);
11158 unsigned rm = INSTR (4, 0);
11159 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11160
11161 if (rm == rn)
11162 HALT_UNALLOC;
11163
11164 offset <<= 2;
11165
11166 if (wb != Post)
11167 address += offset;
11168
11169 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11170 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11171
11172 if (wb == Post)
11173 address += offset;
11174
11175 if (wb != NoWriteBack)
11176 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11177 }
11178
11179 static void
11180 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11181 {
11182 unsigned rn = INSTR (14, 10);
11183 unsigned rd = INSTR (9, 5);
11184 unsigned rm = INSTR (4, 0);
11185 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11186
11187 if (rm == rn)
11188 HALT_UNALLOC;
11189
11190 offset <<= 3;
11191
11192 if (wb != Post)
11193 address += offset;
11194
11195 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11196 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11197
11198 if (wb == Post)
11199 address += offset;
11200
11201 if (wb != NoWriteBack)
11202 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11203 }
11204
11205 static void
11206 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11207 {
11208 FRegister a;
11209 unsigned rn = INSTR (14, 10);
11210 unsigned rd = INSTR (9, 5);
11211 unsigned rm = INSTR (4, 0);
11212 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11213
11214 if (rm == rn)
11215 HALT_UNALLOC;
11216
11217 offset <<= 4;
11218
11219 if (wb != Post)
11220 address += offset;
11221
11222 aarch64_get_mem_long_double (cpu, address, & a);
11223 aarch64_set_FP_long_double (cpu, rm, a);
11224 aarch64_get_mem_long_double (cpu, address + 16, & a);
11225 aarch64_set_FP_long_double (cpu, rn, a);
11226
11227 if (wb == Post)
11228 address += offset;
11229
11230 if (wb != NoWriteBack)
11231 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11232 }
11233
11234 static void
11235 dex_load_store_pair_fp (sim_cpu *cpu)
11236 {
11237 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11238 instr[29,25] = instruction encoding
11239 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11240 instr[22] = load/store (1=> load)
11241 instr[21,15] = signed, scaled, offset
11242 instr[14,10] = Rn
11243 instr[ 9, 5] = Rd
11244 instr[ 4, 0] = Rm */
11245
11246 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11247 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11248
11249 switch (dispatch)
11250 {
11251 case 2: store_pair_float (cpu, offset, Post); return;
11252 case 3: load_pair_float (cpu, offset, Post); return;
11253 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11254 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11255 case 6: store_pair_float (cpu, offset, Pre); return;
11256 case 7: load_pair_float (cpu, offset, Pre); return;
11257
11258 case 10: store_pair_double (cpu, offset, Post); return;
11259 case 11: load_pair_double (cpu, offset, Post); return;
11260 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11261 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11262 case 14: store_pair_double (cpu, offset, Pre); return;
11263 case 15: load_pair_double (cpu, offset, Pre); return;
11264
11265 case 18: store_pair_long_double (cpu, offset, Post); return;
11266 case 19: load_pair_long_double (cpu, offset, Post); return;
11267 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11268 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11269 case 22: store_pair_long_double (cpu, offset, Pre); return;
11270 case 23: load_pair_long_double (cpu, offset, Pre); return;
11271
11272 default:
11273 HALT_UNALLOC;
11274 }
11275 }
11276
11277 static inline unsigned
11278 vec_reg (unsigned v, unsigned o)
11279 {
11280 return (v + o) & 0x3F;
11281 }
11282
11283 /* Load multiple N-element structures to N consecutive registers. */
11284 static void
11285 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11286 {
11287 int all = INSTR (30, 30);
11288 unsigned size = INSTR (11, 10);
11289 unsigned vd = INSTR (4, 0);
11290 unsigned i;
11291
11292 switch (size)
11293 {
11294 case 0: /* 8-bit operations. */
11295 if (all)
11296 for (i = 0; i < (16 * N); i++)
11297 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11298 aarch64_get_mem_u8 (cpu, address + i));
11299 else
11300 for (i = 0; i < (8 * N); i++)
11301 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11302 aarch64_get_mem_u8 (cpu, address + i));
11303 return;
11304
11305 case 1: /* 16-bit operations. */
11306 if (all)
11307 for (i = 0; i < (8 * N); i++)
11308 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11309 aarch64_get_mem_u16 (cpu, address + i * 2));
11310 else
11311 for (i = 0; i < (4 * N); i++)
11312 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11313 aarch64_get_mem_u16 (cpu, address + i * 2));
11314 return;
11315
11316 case 2: /* 32-bit operations. */
11317 if (all)
11318 for (i = 0; i < (4 * N); i++)
11319 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11320 aarch64_get_mem_u32 (cpu, address + i * 4));
11321 else
11322 for (i = 0; i < (2 * N); i++)
11323 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11324 aarch64_get_mem_u32 (cpu, address + i * 4));
11325 return;
11326
11327 case 3: /* 64-bit operations. */
11328 if (all)
11329 for (i = 0; i < (2 * N); i++)
11330 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11331 aarch64_get_mem_u64 (cpu, address + i * 8));
11332 else
11333 for (i = 0; i < N; i++)
11334 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11335 aarch64_get_mem_u64 (cpu, address + i * 8));
11336 return;
11337 }
11338 }
11339
11340 /* LD4: load multiple 4-element to four consecutive registers. */
11341 static void
11342 LD4 (sim_cpu *cpu, uint64_t address)
11343 {
11344 vec_load (cpu, address, 4);
11345 }
11346
11347 /* LD3: load multiple 3-element structures to three consecutive registers. */
11348 static void
11349 LD3 (sim_cpu *cpu, uint64_t address)
11350 {
11351 vec_load (cpu, address, 3);
11352 }
11353
11354 /* LD2: load multiple 2-element structures to two consecutive registers. */
11355 static void
11356 LD2 (sim_cpu *cpu, uint64_t address)
11357 {
11358 vec_load (cpu, address, 2);
11359 }
11360
11361 /* Load multiple 1-element structures into one register. */
11362 static void
11363 LD1_1 (sim_cpu *cpu, uint64_t address)
11364 {
11365 int all = INSTR (30, 30);
11366 unsigned size = INSTR (11, 10);
11367 unsigned vd = INSTR (4, 0);
11368 unsigned i;
11369
11370 switch (size)
11371 {
11372 case 0:
11373 /* LD1 {Vd.16b}, addr, #16 */
11374 /* LD1 {Vd.8b}, addr, #8 */
11375 for (i = 0; i < (all ? 16 : 8); i++)
11376 aarch64_set_vec_u8 (cpu, vd, i,
11377 aarch64_get_mem_u8 (cpu, address + i));
11378 return;
11379
11380 case 1:
11381 /* LD1 {Vd.8h}, addr, #16 */
11382 /* LD1 {Vd.4h}, addr, #8 */
11383 for (i = 0; i < (all ? 8 : 4); i++)
11384 aarch64_set_vec_u16 (cpu, vd, i,
11385 aarch64_get_mem_u16 (cpu, address + i * 2));
11386 return;
11387
11388 case 2:
11389 /* LD1 {Vd.4s}, addr, #16 */
11390 /* LD1 {Vd.2s}, addr, #8 */
11391 for (i = 0; i < (all ? 4 : 2); i++)
11392 aarch64_set_vec_u32 (cpu, vd, i,
11393 aarch64_get_mem_u32 (cpu, address + i * 4));
11394 return;
11395
11396 case 3:
11397 /* LD1 {Vd.2d}, addr, #16 */
11398 /* LD1 {Vd.1d}, addr, #8 */
11399 for (i = 0; i < (all ? 2 : 1); i++)
11400 aarch64_set_vec_u64 (cpu, vd, i,
11401 aarch64_get_mem_u64 (cpu, address + i * 8));
11402 return;
11403 }
11404 }
11405
11406 /* Load multiple 1-element structures into two registers. */
11407 static void
11408 LD1_2 (sim_cpu *cpu, uint64_t address)
11409 {
11410 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11411 So why have two different instructions ? There must be something
11412 wrong somewhere. */
11413 vec_load (cpu, address, 2);
11414 }
11415
11416 /* Load multiple 1-element structures into three registers. */
11417 static void
11418 LD1_3 (sim_cpu *cpu, uint64_t address)
11419 {
11420 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11421 So why have two different instructions ? There must be something
11422 wrong somewhere. */
11423 vec_load (cpu, address, 3);
11424 }
11425
11426 /* Load multiple 1-element structures into four registers. */
11427 static void
11428 LD1_4 (sim_cpu *cpu, uint64_t address)
11429 {
11430 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11431 So why have two different instructions ? There must be something
11432 wrong somewhere. */
11433 vec_load (cpu, address, 4);
11434 }
11435
11436 /* Store multiple N-element structures to N consecutive registers. */
11437 static void
11438 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11439 {
11440 int all = INSTR (30, 30);
11441 unsigned size = INSTR (11, 10);
11442 unsigned vd = INSTR (4, 0);
11443 unsigned i;
11444
11445 switch (size)
11446 {
11447 case 0: /* 8-bit operations. */
11448 if (all)
11449 for (i = 0; i < (16 * N); i++)
11450 aarch64_set_mem_u8
11451 (cpu, address + i,
11452 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11453 else
11454 for (i = 0; i < (8 * N); i++)
11455 aarch64_set_mem_u8
11456 (cpu, address + i,
11457 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11458 return;
11459
11460 case 1: /* 16-bit operations. */
11461 if (all)
11462 for (i = 0; i < (8 * N); i++)
11463 aarch64_set_mem_u16
11464 (cpu, address + i * 2,
11465 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11466 else
11467 for (i = 0; i < (4 * N); i++)
11468 aarch64_set_mem_u16
11469 (cpu, address + i * 2,
11470 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11471 return;
11472
11473 case 2: /* 32-bit operations. */
11474 if (all)
11475 for (i = 0; i < (4 * N); i++)
11476 aarch64_set_mem_u32
11477 (cpu, address + i * 4,
11478 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11479 else
11480 for (i = 0; i < (2 * N); i++)
11481 aarch64_set_mem_u32
11482 (cpu, address + i * 4,
11483 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11484 return;
11485
11486 case 3: /* 64-bit operations. */
11487 if (all)
11488 for (i = 0; i < (2 * N); i++)
11489 aarch64_set_mem_u64
11490 (cpu, address + i * 8,
11491 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11492 else
11493 for (i = 0; i < N; i++)
11494 aarch64_set_mem_u64
11495 (cpu, address + i * 8,
11496 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11497 return;
11498 }
11499 }
11500
11501 /* Store multiple 4-element structure to four consecutive registers. */
11502 static void
11503 ST4 (sim_cpu *cpu, uint64_t address)
11504 {
11505 vec_store (cpu, address, 4);
11506 }
11507
11508 /* Store multiple 3-element structures to three consecutive registers. */
11509 static void
11510 ST3 (sim_cpu *cpu, uint64_t address)
11511 {
11512 vec_store (cpu, address, 3);
11513 }
11514
11515 /* Store multiple 2-element structures to two consecutive registers. */
11516 static void
11517 ST2 (sim_cpu *cpu, uint64_t address)
11518 {
11519 vec_store (cpu, address, 2);
11520 }
11521
11522 /* Store multiple 1-element structures into one register. */
11523 static void
11524 ST1_1 (sim_cpu *cpu, uint64_t address)
11525 {
11526 int all = INSTR (30, 30);
11527 unsigned size = INSTR (11, 10);
11528 unsigned vd = INSTR (4, 0);
11529 unsigned i;
11530
11531 switch (size)
11532 {
11533 case 0:
11534 for (i = 0; i < (all ? 16 : 8); i++)
11535 aarch64_set_mem_u8 (cpu, address + i,
11536 aarch64_get_vec_u8 (cpu, vd, i));
11537 return;
11538
11539 case 1:
11540 for (i = 0; i < (all ? 8 : 4); i++)
11541 aarch64_set_mem_u16 (cpu, address + i * 2,
11542 aarch64_get_vec_u16 (cpu, vd, i));
11543 return;
11544
11545 case 2:
11546 for (i = 0; i < (all ? 4 : 2); i++)
11547 aarch64_set_mem_u32 (cpu, address + i * 4,
11548 aarch64_get_vec_u32 (cpu, vd, i));
11549 return;
11550
11551 case 3:
11552 for (i = 0; i < (all ? 2 : 1); i++)
11553 aarch64_set_mem_u64 (cpu, address + i * 8,
11554 aarch64_get_vec_u64 (cpu, vd, i));
11555 return;
11556 }
11557 }
11558
11559 /* Store multiple 1-element structures into two registers. */
11560 static void
11561 ST1_2 (sim_cpu *cpu, uint64_t address)
11562 {
11563 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11564 So why have two different instructions ? There must be
11565 something wrong somewhere. */
11566 vec_store (cpu, address, 2);
11567 }
11568
11569 /* Store multiple 1-element structures into three registers. */
11570 static void
11571 ST1_3 (sim_cpu *cpu, uint64_t address)
11572 {
11573 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11574 So why have two different instructions ? There must be
11575 something wrong somewhere. */
11576 vec_store (cpu, address, 3);
11577 }
11578
11579 /* Store multiple 1-element structures into four registers. */
11580 static void
11581 ST1_4 (sim_cpu *cpu, uint64_t address)
11582 {
11583 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11584 So why have two different instructions ? There must be
11585 something wrong somewhere. */
11586 vec_store (cpu, address, 4);
11587 }
11588
11589 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11590 do \
11591 { \
11592 switch (INSTR (15, 14)) \
11593 { \
11594 case 0: \
11595 lane = (full << 3) | (s << 2) | size; \
11596 size = 0; \
11597 break; \
11598 \
11599 case 1: \
11600 if ((size & 1) == 1) \
11601 HALT_UNALLOC; \
11602 lane = (full << 2) | (s << 1) | (size >> 1); \
11603 size = 1; \
11604 break; \
11605 \
11606 case 2: \
11607 if ((size & 2) == 2) \
11608 HALT_UNALLOC; \
11609 \
11610 if ((size & 1) == 0) \
11611 { \
11612 lane = (full << 1) | s; \
11613 size = 2; \
11614 } \
11615 else \
11616 { \
11617 if (s) \
11618 HALT_UNALLOC; \
11619 lane = full; \
11620 size = 3; \
11621 } \
11622 break; \
11623 \
11624 default: \
11625 HALT_UNALLOC; \
11626 } \
11627 } \
11628 while (0)
11629
11630 /* Load single structure into one lane of N registers. */
11631 static void
11632 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11633 {
11634 /* instr[31] = 0
11635 instr[30] = element selector 0=>half, 1=>all elements
11636 instr[29,24] = 00 1101
11637 instr[23] = 0=>simple, 1=>post
11638 instr[22] = 1
11639 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11640 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11641 11111 (immediate post inc)
11642 instr[15,13] = opcode
11643 instr[12] = S, used for lane number
11644 instr[11,10] = size, also used for lane number
11645 instr[9,5] = address
11646 instr[4,0] = Vd */
11647
11648 unsigned full = INSTR (30, 30);
11649 unsigned vd = INSTR (4, 0);
11650 unsigned size = INSTR (11, 10);
11651 unsigned s = INSTR (12, 12);
11652 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11653 int lane = 0;
11654 int i;
11655
11656 NYI_assert (29, 24, 0x0D);
11657 NYI_assert (22, 22, 1);
11658
11659 /* Compute the lane number first (using size), and then compute size. */
11660 LDn_STn_SINGLE_LANE_AND_SIZE ();
11661
11662 for (i = 0; i < nregs; i++)
11663 switch (size)
11664 {
11665 case 0:
11666 {
11667 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11668 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11669 break;
11670 }
11671
11672 case 1:
11673 {
11674 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11675 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11676 break;
11677 }
11678
11679 case 2:
11680 {
11681 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11682 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11683 break;
11684 }
11685
11686 case 3:
11687 {
11688 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11689 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11690 break;
11691 }
11692 }
11693 }
11694
11695 /* Store single structure from one lane from N registers. */
11696 static void
11697 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11698 {
11699 /* instr[31] = 0
11700 instr[30] = element selector 0=>half, 1=>all elements
11701 instr[29,24] = 00 1101
11702 instr[23] = 0=>simple, 1=>post
11703 instr[22] = 0
11704 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11705 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11706 11111 (immediate post inc)
11707 instr[15,13] = opcode
11708 instr[12] = S, used for lane number
11709 instr[11,10] = size, also used for lane number
11710 instr[9,5] = address
11711 instr[4,0] = Vd */
11712
11713 unsigned full = INSTR (30, 30);
11714 unsigned vd = INSTR (4, 0);
11715 unsigned size = INSTR (11, 10);
11716 unsigned s = INSTR (12, 12);
11717 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11718 int lane = 0;
11719 int i;
11720
11721 NYI_assert (29, 24, 0x0D);
11722 NYI_assert (22, 22, 0);
11723
11724 /* Compute the lane number first (using size), and then compute size. */
11725 LDn_STn_SINGLE_LANE_AND_SIZE ();
11726
11727 for (i = 0; i < nregs; i++)
11728 switch (size)
11729 {
11730 case 0:
11731 {
11732 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11733 aarch64_set_mem_u8 (cpu, address + i, val);
11734 break;
11735 }
11736
11737 case 1:
11738 {
11739 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11740 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11741 break;
11742 }
11743
11744 case 2:
11745 {
11746 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11747 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11748 break;
11749 }
11750
11751 case 3:
11752 {
11753 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11754 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11755 break;
11756 }
11757 }
11758 }
11759
11760 /* Load single structure into all lanes of N registers. */
11761 static void
11762 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11763 {
11764 /* instr[31] = 0
11765 instr[30] = element selector 0=>half, 1=>all elements
11766 instr[29,24] = 00 1101
11767 instr[23] = 0=>simple, 1=>post
11768 instr[22] = 1
11769 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11770 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11771 11111 (immediate post inc)
11772 instr[15,14] = 11
11773 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11774 instr[12] = 0
11775 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11776 10=> word(s), 11=> double(d)
11777 instr[9,5] = address
11778 instr[4,0] = Vd */
11779
11780 unsigned full = INSTR (30, 30);
11781 unsigned vd = INSTR (4, 0);
11782 unsigned size = INSTR (11, 10);
11783 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11784 int i, n;
11785
11786 NYI_assert (29, 24, 0x0D);
11787 NYI_assert (22, 22, 1);
11788 NYI_assert (15, 14, 3);
11789 NYI_assert (12, 12, 0);
11790
11791 for (n = 0; n < nregs; n++)
11792 switch (size)
11793 {
11794 case 0:
11795 {
11796 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11797 for (i = 0; i < (full ? 16 : 8); i++)
11798 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11799 break;
11800 }
11801
11802 case 1:
11803 {
11804 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11805 for (i = 0; i < (full ? 8 : 4); i++)
11806 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11807 break;
11808 }
11809
11810 case 2:
11811 {
11812 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11813 for (i = 0; i < (full ? 4 : 2); i++)
11814 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11815 break;
11816 }
11817
11818 case 3:
11819 {
11820 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11821 for (i = 0; i < (full ? 2 : 1); i++)
11822 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11823 break;
11824 }
11825
11826 default:
11827 HALT_UNALLOC;
11828 }
11829 }
11830
11831 static void
11832 do_vec_load_store (sim_cpu *cpu)
11833 {
11834 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11835
11836 instr[31] = 0
11837 instr[30] = element selector 0=>half, 1=>all elements
11838 instr[29,25] = 00110
11839 instr[24] = 0=>multiple struct, 1=>single struct
11840 instr[23] = 0=>simple, 1=>post
11841 instr[22] = 0=>store, 1=>load
11842 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11843 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11844 11111 (immediate post inc)
11845 instr[15,12] = elements and destinations. eg for load:
11846 0000=>LD4 => load multiple 4-element to
11847 four consecutive registers
11848 0100=>LD3 => load multiple 3-element to
11849 three consecutive registers
11850 1000=>LD2 => load multiple 2-element to
11851 two consecutive registers
11852 0010=>LD1 => load multiple 1-element to
11853 four consecutive registers
11854 0110=>LD1 => load multiple 1-element to
11855 three consecutive registers
11856 1010=>LD1 => load multiple 1-element to
11857 two consecutive registers
11858 0111=>LD1 => load multiple 1-element to
11859 one register
11860 1100=>LDR1,LDR2
11861 1110=>LDR3,LDR4
11862 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11863 10=> word(s), 11=> double(d)
11864 instr[9,5] = Vn, can be SP
11865 instr[4,0] = Vd */
11866
11867 int single;
11868 int post;
11869 int load;
11870 unsigned vn;
11871 uint64_t address;
11872 int type;
11873
11874 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11875 HALT_NYI;
11876
11877 single = INSTR (24, 24);
11878 post = INSTR (23, 23);
11879 load = INSTR (22, 22);
11880 type = INSTR (15, 12);
11881 vn = INSTR (9, 5);
11882 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11883
11884 if (! single && INSTR (21, 21) != 0)
11885 HALT_UNALLOC;
11886
11887 if (post)
11888 {
11889 unsigned vm = INSTR (20, 16);
11890
11891 if (vm == R31)
11892 {
11893 unsigned sizeof_operation;
11894
11895 if (single)
11896 {
11897 if ((type >= 0) && (type <= 11))
11898 {
11899 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11900 switch (INSTR (15, 14))
11901 {
11902 case 0:
11903 sizeof_operation = nregs * 1;
11904 break;
11905 case 1:
11906 sizeof_operation = nregs * 2;
11907 break;
11908 case 2:
11909 if (INSTR (10, 10) == 0)
11910 sizeof_operation = nregs * 4;
11911 else
11912 sizeof_operation = nregs * 8;
11913 break;
11914 default:
11915 HALT_UNALLOC;
11916 }
11917 }
11918 else if (type == 0xC)
11919 {
11920 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11921 sizeof_operation <<= INSTR (11, 10);
11922 }
11923 else if (type == 0xE)
11924 {
11925 sizeof_operation = INSTR (21, 21) ? 4 : 3;
11926 sizeof_operation <<= INSTR (11, 10);
11927 }
11928 else
11929 HALT_UNALLOC;
11930 }
11931 else
11932 {
11933 switch (type)
11934 {
11935 case 0: sizeof_operation = 32; break;
11936 case 4: sizeof_operation = 24; break;
11937 case 8: sizeof_operation = 16; break;
11938
11939 case 7:
11940 /* One register, immediate offset variant. */
11941 sizeof_operation = 8;
11942 break;
11943
11944 case 10:
11945 /* Two registers, immediate offset variant. */
11946 sizeof_operation = 16;
11947 break;
11948
11949 case 6:
11950 /* Three registers, immediate offset variant. */
11951 sizeof_operation = 24;
11952 break;
11953
11954 case 2:
11955 /* Four registers, immediate offset variant. */
11956 sizeof_operation = 32;
11957 break;
11958
11959 default:
11960 HALT_UNALLOC;
11961 }
11962
11963 if (INSTR (30, 30))
11964 sizeof_operation *= 2;
11965 }
11966
11967 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11968 }
11969 else
11970 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11971 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11972 }
11973 else
11974 {
11975 NYI_assert (20, 16, 0);
11976 }
11977
11978 if (single)
11979 {
11980 if (load)
11981 {
11982 if ((type >= 0) && (type <= 11))
11983 do_vec_LDn_single (cpu, address);
11984 else if ((type == 0xC) || (type == 0xE))
11985 do_vec_LDnR (cpu, address);
11986 else
11987 HALT_UNALLOC;
11988 return;
11989 }
11990
11991 /* Stores. */
11992 if ((type >= 0) && (type <= 11))
11993 {
11994 do_vec_STn_single (cpu, address);
11995 return;
11996 }
11997
11998 HALT_UNALLOC;
11999 }
12000
12001 if (load)
12002 {
12003 switch (type)
12004 {
12005 case 0: LD4 (cpu, address); return;
12006 case 4: LD3 (cpu, address); return;
12007 case 8: LD2 (cpu, address); return;
12008 case 2: LD1_4 (cpu, address); return;
12009 case 6: LD1_3 (cpu, address); return;
12010 case 10: LD1_2 (cpu, address); return;
12011 case 7: LD1_1 (cpu, address); return;
12012
12013 default:
12014 HALT_UNALLOC;
12015 }
12016 }
12017
12018 /* Stores. */
12019 switch (type)
12020 {
12021 case 0: ST4 (cpu, address); return;
12022 case 4: ST3 (cpu, address); return;
12023 case 8: ST2 (cpu, address); return;
12024 case 2: ST1_4 (cpu, address); return;
12025 case 6: ST1_3 (cpu, address); return;
12026 case 10: ST1_2 (cpu, address); return;
12027 case 7: ST1_1 (cpu, address); return;
12028 default:
12029 HALT_UNALLOC;
12030 }
12031 }
12032
12033 static void
12034 dexLdSt (sim_cpu *cpu)
12035 {
12036 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12037 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12038 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12039 bits [29,28:26] of a LS are the secondary dispatch vector. */
12040 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12041
12042 switch (group2)
12043 {
12044 case LS_EXCL_000:
12045 dexLoadExclusive (cpu); return;
12046
12047 case LS_LIT_010:
12048 case LS_LIT_011:
12049 dexLoadLiteral (cpu); return;
12050
12051 case LS_OTHER_110:
12052 case LS_OTHER_111:
12053 dexLoadOther (cpu); return;
12054
12055 case LS_ADVSIMD_001:
12056 do_vec_load_store (cpu); return;
12057
12058 case LS_PAIR_100:
12059 dex_load_store_pair_gr (cpu); return;
12060
12061 case LS_PAIR_101:
12062 dex_load_store_pair_fp (cpu); return;
12063
12064 default:
12065 /* Should never reach here. */
12066 HALT_NYI;
12067 }
12068 }
12069
12070 /* Specific decode and execute for group Data Processing Register. */
12071
12072 static void
12073 dexLogicalShiftedRegister (sim_cpu *cpu)
12074 {
12075 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12076 instr[30,29] = op
12077 instr[28:24] = 01010
12078 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12079 instr[21] = N
12080 instr[20,16] = Rm
12081 instr[15,10] = count : must be 0xxxxx for 32 bit
12082 instr[9,5] = Rn
12083 instr[4,0] = Rd */
12084
12085 uint32_t size = INSTR (31, 31);
12086 Shift shiftType = INSTR (23, 22);
12087 uint32_t count = INSTR (15, 10);
12088
12089 /* 32 bit operations must have count[5] = 0.
12090 or else we have an UNALLOC. */
12091 if (size == 0 && uimm (count, 5, 5))
12092 HALT_UNALLOC;
12093
12094 /* Dispatch on size:op:N. */
12095 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12096 {
12097 case 0: and32_shift (cpu, shiftType, count); return;
12098 case 1: bic32_shift (cpu, shiftType, count); return;
12099 case 2: orr32_shift (cpu, shiftType, count); return;
12100 case 3: orn32_shift (cpu, shiftType, count); return;
12101 case 4: eor32_shift (cpu, shiftType, count); return;
12102 case 5: eon32_shift (cpu, shiftType, count); return;
12103 case 6: ands32_shift (cpu, shiftType, count); return;
12104 case 7: bics32_shift (cpu, shiftType, count); return;
12105 case 8: and64_shift (cpu, shiftType, count); return;
12106 case 9: bic64_shift (cpu, shiftType, count); return;
12107 case 10:orr64_shift (cpu, shiftType, count); return;
12108 case 11:orn64_shift (cpu, shiftType, count); return;
12109 case 12:eor64_shift (cpu, shiftType, count); return;
12110 case 13:eon64_shift (cpu, shiftType, count); return;
12111 case 14:ands64_shift (cpu, shiftType, count); return;
12112 case 15:bics64_shift (cpu, shiftType, count); return;
12113 }
12114 }
12115
12116 /* 32 bit conditional select. */
12117 static void
12118 csel32 (sim_cpu *cpu, CondCode cc)
12119 {
12120 unsigned rm = INSTR (20, 16);
12121 unsigned rn = INSTR (9, 5);
12122 unsigned rd = INSTR (4, 0);
12123
12124 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12125 testConditionCode (cpu, cc)
12126 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12127 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12128 }
12129
12130 /* 64 bit conditional select. */
12131 static void
12132 csel64 (sim_cpu *cpu, CondCode cc)
12133 {
12134 unsigned rm = INSTR (20, 16);
12135 unsigned rn = INSTR (9, 5);
12136 unsigned rd = INSTR (4, 0);
12137
12138 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12139 testConditionCode (cpu, cc)
12140 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12141 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12142 }
12143
12144 /* 32 bit conditional increment. */
12145 static void
12146 csinc32 (sim_cpu *cpu, CondCode cc)
12147 {
12148 unsigned rm = INSTR (20, 16);
12149 unsigned rn = INSTR (9, 5);
12150 unsigned rd = INSTR (4, 0);
12151
12152 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12153 testConditionCode (cpu, cc)
12154 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12155 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12156 }
12157
12158 /* 64 bit conditional increment. */
12159 static void
12160 csinc64 (sim_cpu *cpu, CondCode cc)
12161 {
12162 unsigned rm = INSTR (20, 16);
12163 unsigned rn = INSTR (9, 5);
12164 unsigned rd = INSTR (4, 0);
12165
12166 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12167 testConditionCode (cpu, cc)
12168 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12169 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12170 }
12171
12172 /* 32 bit conditional invert. */
12173 static void
12174 csinv32 (sim_cpu *cpu, CondCode cc)
12175 {
12176 unsigned rm = INSTR (20, 16);
12177 unsigned rn = INSTR (9, 5);
12178 unsigned rd = INSTR (4, 0);
12179
12180 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12181 testConditionCode (cpu, cc)
12182 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12183 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12184 }
12185
12186 /* 64 bit conditional invert. */
12187 static void
12188 csinv64 (sim_cpu *cpu, CondCode cc)
12189 {
12190 unsigned rm = INSTR (20, 16);
12191 unsigned rn = INSTR (9, 5);
12192 unsigned rd = INSTR (4, 0);
12193
12194 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12195 testConditionCode (cpu, cc)
12196 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12197 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12198 }
12199
12200 /* 32 bit conditional negate. */
12201 static void
12202 csneg32 (sim_cpu *cpu, CondCode cc)
12203 {
12204 unsigned rm = INSTR (20, 16);
12205 unsigned rn = INSTR (9, 5);
12206 unsigned rd = INSTR (4, 0);
12207
12208 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12209 testConditionCode (cpu, cc)
12210 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12211 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12212 }
12213
12214 /* 64 bit conditional negate. */
12215 static void
12216 csneg64 (sim_cpu *cpu, CondCode cc)
12217 {
12218 unsigned rm = INSTR (20, 16);
12219 unsigned rn = INSTR (9, 5);
12220 unsigned rd = INSTR (4, 0);
12221
12222 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12223 testConditionCode (cpu, cc)
12224 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12225 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12226 }
12227
12228 static void
12229 dexCondSelect (sim_cpu *cpu)
12230 {
12231 /* instr[28,21] = 11011011
12232 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12233 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12234 100 ==> CSINV, 101 ==> CSNEG,
12235 _1_ ==> UNALLOC
12236 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12237 instr[15,12] = cond
12238 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12239
12240 CondCode cc = INSTR (15, 12);
12241 uint32_t S = INSTR (29, 29);
12242 uint32_t op2 = INSTR (11, 10);
12243
12244 if (S == 1)
12245 HALT_UNALLOC;
12246
12247 if (op2 & 0x2)
12248 HALT_UNALLOC;
12249
12250 switch ((INSTR (31, 30) << 1) | op2)
12251 {
12252 case 0: csel32 (cpu, cc); return;
12253 case 1: csinc32 (cpu, cc); return;
12254 case 2: csinv32 (cpu, cc); return;
12255 case 3: csneg32 (cpu, cc); return;
12256 case 4: csel64 (cpu, cc); return;
12257 case 5: csinc64 (cpu, cc); return;
12258 case 6: csinv64 (cpu, cc); return;
12259 case 7: csneg64 (cpu, cc); return;
12260 }
12261 }
12262
12263 /* Some helpers for counting leading 1 or 0 bits. */
12264
12265 /* Counts the number of leading bits which are the same
12266 in a 32 bit value in the range 1 to 32. */
12267 static uint32_t
12268 leading32 (uint32_t value)
12269 {
12270 int32_t mask= 0xffff0000;
12271 uint32_t count= 16; /* Counts number of bits set in mask. */
12272 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12273 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12274
12275 while (lo + 1 < hi)
12276 {
12277 int32_t test = (value & mask);
12278
12279 if (test == 0 || test == mask)
12280 {
12281 lo = count;
12282 count = (lo + hi) / 2;
12283 mask >>= (count - lo);
12284 }
12285 else
12286 {
12287 hi = count;
12288 count = (lo + hi) / 2;
12289 mask <<= hi - count;
12290 }
12291 }
12292
12293 if (lo != hi)
12294 {
12295 int32_t test;
12296
12297 mask >>= 1;
12298 test = (value & mask);
12299
12300 if (test == 0 || test == mask)
12301 count = hi;
12302 else
12303 count = lo;
12304 }
12305
12306 return count;
12307 }
12308
12309 /* Counts the number of leading bits which are the same
12310 in a 64 bit value in the range 1 to 64. */
12311 static uint64_t
12312 leading64 (uint64_t value)
12313 {
12314 int64_t mask= 0xffffffff00000000LL;
12315 uint64_t count = 32; /* Counts number of bits set in mask. */
12316 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12317 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12318
12319 while (lo + 1 < hi)
12320 {
12321 int64_t test = (value & mask);
12322
12323 if (test == 0 || test == mask)
12324 {
12325 lo = count;
12326 count = (lo + hi) / 2;
12327 mask >>= (count - lo);
12328 }
12329 else
12330 {
12331 hi = count;
12332 count = (lo + hi) / 2;
12333 mask <<= hi - count;
12334 }
12335 }
12336
12337 if (lo != hi)
12338 {
12339 int64_t test;
12340
12341 mask >>= 1;
12342 test = (value & mask);
12343
12344 if (test == 0 || test == mask)
12345 count = hi;
12346 else
12347 count = lo;
12348 }
12349
12350 return count;
12351 }
12352
12353 /* Bit operations. */
12354 /* N.B register args may not be SP. */
12355
12356 /* 32 bit count leading sign bits. */
12357 static void
12358 cls32 (sim_cpu *cpu)
12359 {
12360 unsigned rn = INSTR (9, 5);
12361 unsigned rd = INSTR (4, 0);
12362
12363 /* N.B. the result needs to exclude the leading bit. */
12364 aarch64_set_reg_u64
12365 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12366 }
12367
12368 /* 64 bit count leading sign bits. */
12369 static void
12370 cls64 (sim_cpu *cpu)
12371 {
12372 unsigned rn = INSTR (9, 5);
12373 unsigned rd = INSTR (4, 0);
12374
12375 /* N.B. the result needs to exclude the leading bit. */
12376 aarch64_set_reg_u64
12377 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12378 }
12379
12380 /* 32 bit count leading zero bits. */
12381 static void
12382 clz32 (sim_cpu *cpu)
12383 {
12384 unsigned rn = INSTR (9, 5);
12385 unsigned rd = INSTR (4, 0);
12386 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12387
12388 /* if the sign (top) bit is set then the count is 0. */
12389 if (pick32 (value, 31, 31))
12390 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12391 else
12392 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12393 }
12394
12395 /* 64 bit count leading zero bits. */
12396 static void
12397 clz64 (sim_cpu *cpu)
12398 {
12399 unsigned rn = INSTR (9, 5);
12400 unsigned rd = INSTR (4, 0);
12401 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12402
12403 /* if the sign (top) bit is set then the count is 0. */
12404 if (pick64 (value, 63, 63))
12405 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12406 else
12407 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12408 }
12409
12410 /* 32 bit reverse bits. */
12411 static void
12412 rbit32 (sim_cpu *cpu)
12413 {
12414 unsigned rn = INSTR (9, 5);
12415 unsigned rd = INSTR (4, 0);
12416 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12417 uint32_t result = 0;
12418 int i;
12419
12420 for (i = 0; i < 32; i++)
12421 {
12422 result <<= 1;
12423 result |= (value & 1);
12424 value >>= 1;
12425 }
12426 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12427 }
12428
12429 /* 64 bit reverse bits. */
12430 static void
12431 rbit64 (sim_cpu *cpu)
12432 {
12433 unsigned rn = INSTR (9, 5);
12434 unsigned rd = INSTR (4, 0);
12435 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12436 uint64_t result = 0;
12437 int i;
12438
12439 for (i = 0; i < 64; i++)
12440 {
12441 result <<= 1;
12442 result |= (value & 1UL);
12443 value >>= 1;
12444 }
12445 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12446 }
12447
12448 /* 32 bit reverse bytes. */
12449 static void
12450 rev32 (sim_cpu *cpu)
12451 {
12452 unsigned rn = INSTR (9, 5);
12453 unsigned rd = INSTR (4, 0);
12454 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12455 uint32_t result = 0;
12456 int i;
12457
12458 for (i = 0; i < 4; i++)
12459 {
12460 result <<= 8;
12461 result |= (value & 0xff);
12462 value >>= 8;
12463 }
12464 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12465 }
12466
12467 /* 64 bit reverse bytes. */
12468 static void
12469 rev64 (sim_cpu *cpu)
12470 {
12471 unsigned rn = INSTR (9, 5);
12472 unsigned rd = INSTR (4, 0);
12473 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12474 uint64_t result = 0;
12475 int i;
12476
12477 for (i = 0; i < 8; i++)
12478 {
12479 result <<= 8;
12480 result |= (value & 0xffULL);
12481 value >>= 8;
12482 }
12483 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12484 }
12485
12486 /* 32 bit reverse shorts. */
12487 /* N.B.this reverses the order of the bytes in each half word. */
12488 static void
12489 revh32 (sim_cpu *cpu)
12490 {
12491 unsigned rn = INSTR (9, 5);
12492 unsigned rd = INSTR (4, 0);
12493 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12494 uint32_t result = 0;
12495 int i;
12496
12497 for (i = 0; i < 2; i++)
12498 {
12499 result <<= 8;
12500 result |= (value & 0x00ff00ff);
12501 value >>= 8;
12502 }
12503 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12504 }
12505
12506 /* 64 bit reverse shorts. */
12507 /* N.B.this reverses the order of the bytes in each half word. */
12508 static void
12509 revh64 (sim_cpu *cpu)
12510 {
12511 unsigned rn = INSTR (9, 5);
12512 unsigned rd = INSTR (4, 0);
12513 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12514 uint64_t result = 0;
12515 int i;
12516
12517 for (i = 0; i < 2; i++)
12518 {
12519 result <<= 8;
12520 result |= (value & 0x00ff00ff00ff00ffULL);
12521 value >>= 8;
12522 }
12523 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12524 }
12525
12526 static void
12527 dexDataProc1Source (sim_cpu *cpu)
12528 {
12529 /* instr[30] = 1
12530 instr[28,21] = 111010110
12531 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12532 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12533 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12534 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12535 000010 ==> REV, 000011 ==> UNALLOC
12536 000100 ==> CLZ, 000101 ==> CLS
12537 ow ==> UNALLOC
12538 instr[9,5] = rn : may not be SP
12539 instr[4,0] = rd : may not be SP. */
12540
12541 uint32_t S = INSTR (29, 29);
12542 uint32_t opcode2 = INSTR (20, 16);
12543 uint32_t opcode = INSTR (15, 10);
12544 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12545
12546 if (S == 1)
12547 HALT_UNALLOC;
12548
12549 if (opcode2 != 0)
12550 HALT_UNALLOC;
12551
12552 if (opcode & 0x38)
12553 HALT_UNALLOC;
12554
12555 switch (dispatch)
12556 {
12557 case 0: rbit32 (cpu); return;
12558 case 1: revh32 (cpu); return;
12559 case 2: rev32 (cpu); return;
12560 case 4: clz32 (cpu); return;
12561 case 5: cls32 (cpu); return;
12562 case 8: rbit64 (cpu); return;
12563 case 9: revh64 (cpu); return;
12564 case 10:rev32 (cpu); return;
12565 case 11:rev64 (cpu); return;
12566 case 12:clz64 (cpu); return;
12567 case 13:cls64 (cpu); return;
12568 default: HALT_UNALLOC;
12569 }
12570 }
12571
12572 /* Variable shift.
12573 Shifts by count supplied in register.
12574 N.B register args may not be SP.
12575 These all use the shifted auxiliary function for
12576 simplicity and clarity. Writing the actual shift
12577 inline would avoid a branch and so be faster but
12578 would also necessitate getting signs right. */
12579
12580 /* 32 bit arithmetic shift right. */
12581 static void
12582 asrv32 (sim_cpu *cpu)
12583 {
12584 unsigned rm = INSTR (20, 16);
12585 unsigned rn = INSTR (9, 5);
12586 unsigned rd = INSTR (4, 0);
12587
12588 aarch64_set_reg_u64
12589 (cpu, rd, NO_SP,
12590 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12591 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12592 }
12593
12594 /* 64 bit arithmetic shift right. */
12595 static void
12596 asrv64 (sim_cpu *cpu)
12597 {
12598 unsigned rm = INSTR (20, 16);
12599 unsigned rn = INSTR (9, 5);
12600 unsigned rd = INSTR (4, 0);
12601
12602 aarch64_set_reg_u64
12603 (cpu, rd, NO_SP,
12604 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12605 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12606 }
12607
12608 /* 32 bit logical shift left. */
12609 static void
12610 lslv32 (sim_cpu *cpu)
12611 {
12612 unsigned rm = INSTR (20, 16);
12613 unsigned rn = INSTR (9, 5);
12614 unsigned rd = INSTR (4, 0);
12615
12616 aarch64_set_reg_u64
12617 (cpu, rd, NO_SP,
12618 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12619 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12620 }
12621
12622 /* 64 bit arithmetic shift left. */
12623 static void
12624 lslv64 (sim_cpu *cpu)
12625 {
12626 unsigned rm = INSTR (20, 16);
12627 unsigned rn = INSTR (9, 5);
12628 unsigned rd = INSTR (4, 0);
12629
12630 aarch64_set_reg_u64
12631 (cpu, rd, NO_SP,
12632 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12633 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12634 }
12635
12636 /* 32 bit logical shift right. */
12637 static void
12638 lsrv32 (sim_cpu *cpu)
12639 {
12640 unsigned rm = INSTR (20, 16);
12641 unsigned rn = INSTR (9, 5);
12642 unsigned rd = INSTR (4, 0);
12643
12644 aarch64_set_reg_u64
12645 (cpu, rd, NO_SP,
12646 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12647 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12648 }
12649
12650 /* 64 bit logical shift right. */
12651 static void
12652 lsrv64 (sim_cpu *cpu)
12653 {
12654 unsigned rm = INSTR (20, 16);
12655 unsigned rn = INSTR (9, 5);
12656 unsigned rd = INSTR (4, 0);
12657
12658 aarch64_set_reg_u64
12659 (cpu, rd, NO_SP,
12660 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12661 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12662 }
12663
12664 /* 32 bit rotate right. */
12665 static void
12666 rorv32 (sim_cpu *cpu)
12667 {
12668 unsigned rm = INSTR (20, 16);
12669 unsigned rn = INSTR (9, 5);
12670 unsigned rd = INSTR (4, 0);
12671
12672 aarch64_set_reg_u64
12673 (cpu, rd, NO_SP,
12674 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12675 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12676 }
12677
12678 /* 64 bit rotate right. */
12679 static void
12680 rorv64 (sim_cpu *cpu)
12681 {
12682 unsigned rm = INSTR (20, 16);
12683 unsigned rn = INSTR (9, 5);
12684 unsigned rd = INSTR (4, 0);
12685
12686 aarch64_set_reg_u64
12687 (cpu, rd, NO_SP,
12688 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12689 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12690 }
12691
12692
12693 /* divide. */
12694
12695 /* 32 bit signed divide. */
12696 static void
12697 cpuiv32 (sim_cpu *cpu)
12698 {
12699 unsigned rm = INSTR (20, 16);
12700 unsigned rn = INSTR (9, 5);
12701 unsigned rd = INSTR (4, 0);
12702 /* N.B. the pseudo-code does the divide using 64 bit data. */
12703 /* TODO : check that this rounds towards zero as required. */
12704 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12705 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12706
12707 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12708 divisor ? ((int32_t) (dividend / divisor)) : 0);
12709 }
12710
12711 /* 64 bit signed divide. */
12712 static void
12713 cpuiv64 (sim_cpu *cpu)
12714 {
12715 unsigned rm = INSTR (20, 16);
12716 unsigned rn = INSTR (9, 5);
12717 unsigned rd = INSTR (4, 0);
12718
12719 /* TODO : check that this rounds towards zero as required. */
12720 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12721
12722 aarch64_set_reg_s64
12723 (cpu, rd, NO_SP,
12724 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12725 }
12726
12727 /* 32 bit unsigned divide. */
12728 static void
12729 udiv32 (sim_cpu *cpu)
12730 {
12731 unsigned rm = INSTR (20, 16);
12732 unsigned rn = INSTR (9, 5);
12733 unsigned rd = INSTR (4, 0);
12734
12735 /* N.B. the pseudo-code does the divide using 64 bit data. */
12736 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12737 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12738
12739 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12740 divisor ? (uint32_t) (dividend / divisor) : 0);
12741 }
12742
12743 /* 64 bit unsigned divide. */
12744 static void
12745 udiv64 (sim_cpu *cpu)
12746 {
12747 unsigned rm = INSTR (20, 16);
12748 unsigned rn = INSTR (9, 5);
12749 unsigned rd = INSTR (4, 0);
12750
12751 /* TODO : check that this rounds towards zero as required. */
12752 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12753
12754 aarch64_set_reg_u64
12755 (cpu, rd, NO_SP,
12756 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12757 }
12758
12759 static void
12760 dexDataProc2Source (sim_cpu *cpu)
12761 {
12762 /* assert instr[30] == 0
12763 instr[28,21] == 11010110
12764 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12765 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12766 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12767 001000 ==> LSLV, 001001 ==> LSRV
12768 001010 ==> ASRV, 001011 ==> RORV
12769 ow ==> UNALLOC. */
12770
12771 uint32_t dispatch;
12772 uint32_t S = INSTR (29, 29);
12773 uint32_t opcode = INSTR (15, 10);
12774
12775 if (S == 1)
12776 HALT_UNALLOC;
12777
12778 if (opcode & 0x34)
12779 HALT_UNALLOC;
12780
12781 dispatch = ( (INSTR (31, 31) << 3)
12782 | (uimm (opcode, 3, 3) << 2)
12783 | uimm (opcode, 1, 0));
12784 switch (dispatch)
12785 {
12786 case 2: udiv32 (cpu); return;
12787 case 3: cpuiv32 (cpu); return;
12788 case 4: lslv32 (cpu); return;
12789 case 5: lsrv32 (cpu); return;
12790 case 6: asrv32 (cpu); return;
12791 case 7: rorv32 (cpu); return;
12792 case 10: udiv64 (cpu); return;
12793 case 11: cpuiv64 (cpu); return;
12794 case 12: lslv64 (cpu); return;
12795 case 13: lsrv64 (cpu); return;
12796 case 14: asrv64 (cpu); return;
12797 case 15: rorv64 (cpu); return;
12798 default: HALT_UNALLOC;
12799 }
12800 }
12801
12802
12803 /* Multiply. */
12804
12805 /* 32 bit multiply and add. */
12806 static void
12807 madd32 (sim_cpu *cpu)
12808 {
12809 unsigned rm = INSTR (20, 16);
12810 unsigned ra = INSTR (14, 10);
12811 unsigned rn = INSTR (9, 5);
12812 unsigned rd = INSTR (4, 0);
12813
12814 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12815 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12816 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12817 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12818 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12819 }
12820
12821 /* 64 bit multiply and add. */
12822 static void
12823 madd64 (sim_cpu *cpu)
12824 {
12825 unsigned rm = INSTR (20, 16);
12826 unsigned ra = INSTR (14, 10);
12827 unsigned rn = INSTR (9, 5);
12828 unsigned rd = INSTR (4, 0);
12829
12830 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12831 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12832 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12833 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12834 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12835 }
12836
12837 /* 32 bit multiply and sub. */
12838 static void
12839 msub32 (sim_cpu *cpu)
12840 {
12841 unsigned rm = INSTR (20, 16);
12842 unsigned ra = INSTR (14, 10);
12843 unsigned rn = INSTR (9, 5);
12844 unsigned rd = INSTR (4, 0);
12845
12846 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12847 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12848 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12849 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12850 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12851 }
12852
12853 /* 64 bit multiply and sub. */
12854 static void
12855 msub64 (sim_cpu *cpu)
12856 {
12857 unsigned rm = INSTR (20, 16);
12858 unsigned ra = INSTR (14, 10);
12859 unsigned rn = INSTR (9, 5);
12860 unsigned rd = INSTR (4, 0);
12861
12862 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12863 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12864 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12865 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12866 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12867 }
12868
12869 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12870 static void
12871 smaddl (sim_cpu *cpu)
12872 {
12873 unsigned rm = INSTR (20, 16);
12874 unsigned ra = INSTR (14, 10);
12875 unsigned rn = INSTR (9, 5);
12876 unsigned rd = INSTR (4, 0);
12877
12878 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12879 obtain a 64 bit product. */
12880 aarch64_set_reg_s64
12881 (cpu, rd, NO_SP,
12882 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12883 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12884 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12885 }
12886
12887 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12888 static void
12889 smsubl (sim_cpu *cpu)
12890 {
12891 unsigned rm = INSTR (20, 16);
12892 unsigned ra = INSTR (14, 10);
12893 unsigned rn = INSTR (9, 5);
12894 unsigned rd = INSTR (4, 0);
12895
12896 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12897 obtain a 64 bit product. */
12898 aarch64_set_reg_s64
12899 (cpu, rd, NO_SP,
12900 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12901 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12902 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12903 }
12904
12905 /* Integer Multiply/Divide. */
12906
12907 /* First some macros and a helper function. */
12908 /* Macros to test or access elements of 64 bit words. */
12909
12910 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12911 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12912 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12913 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12914 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12915 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12916
12917 /* Offset of sign bit in 64 bit signed integger. */
12918 #define SIGN_SHIFT_U64 63
12919 /* The sign bit itself -- also identifies the minimum negative int value. */
12920 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12921 /* Return true if a 64 bit signed int presented as an unsigned int is the
12922 most negative value. */
12923 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12924 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12925 int has its sign bit set to false. */
12926 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12927 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12928 an unsigned int has its sign bit set or not. */
12929 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12930 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12931 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12932
12933 /* Multiply two 64 bit ints and return.
12934 the hi 64 bits of the 128 bit product. */
12935
12936 static uint64_t
12937 mul64hi (uint64_t value1, uint64_t value2)
12938 {
12939 uint64_t resultmid1;
12940 uint64_t result;
12941 uint64_t value1_lo = lowWordToU64 (value1);
12942 uint64_t value1_hi = highWordToU64 (value1) ;
12943 uint64_t value2_lo = lowWordToU64 (value2);
12944 uint64_t value2_hi = highWordToU64 (value2);
12945
12946 /* Cross-multiply and collect results. */
12947 uint64_t xproductlo = value1_lo * value2_lo;
12948 uint64_t xproductmid1 = value1_lo * value2_hi;
12949 uint64_t xproductmid2 = value1_hi * value2_lo;
12950 uint64_t xproducthi = value1_hi * value2_hi;
12951 uint64_t carry = 0;
12952 /* Start accumulating 64 bit results. */
12953 /* Drop bottom half of lowest cross-product. */
12954 uint64_t resultmid = xproductlo >> 32;
12955 /* Add in middle products. */
12956 resultmid = resultmid + xproductmid1;
12957
12958 /* Check for overflow. */
12959 if (resultmid < xproductmid1)
12960 /* Carry over 1 into top cross-product. */
12961 carry++;
12962
12963 resultmid1 = resultmid + xproductmid2;
12964
12965 /* Check for overflow. */
12966 if (resultmid1 < xproductmid2)
12967 /* Carry over 1 into top cross-product. */
12968 carry++;
12969
12970 /* Drop lowest 32 bits of middle cross-product. */
12971 result = resultmid1 >> 32;
12972
12973 /* Add top cross-product plus and any carry. */
12974 result += xproducthi + carry;
12975
12976 return result;
12977 }
12978
12979 /* Signed multiply high, source, source2 :
12980 64 bit, dest <-- high 64-bit of result. */
12981 static void
12982 smulh (sim_cpu *cpu)
12983 {
12984 uint64_t uresult;
12985 int64_t result;
12986 unsigned rm = INSTR (20, 16);
12987 unsigned rn = INSTR (9, 5);
12988 unsigned rd = INSTR (4, 0);
12989 GReg ra = INSTR (14, 10);
12990 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12991 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12992 uint64_t uvalue1;
12993 uint64_t uvalue2;
12994 int64_t signum = 1;
12995
12996 if (ra != R31)
12997 HALT_UNALLOC;
12998
12999 /* Convert to unsigned and use the unsigned mul64hi routine
13000 the fix the sign up afterwards. */
13001 if (value1 < 0)
13002 {
13003 signum *= -1L;
13004 uvalue1 = -value1;
13005 }
13006 else
13007 {
13008 uvalue1 = value1;
13009 }
13010
13011 if (value2 < 0)
13012 {
13013 signum *= -1L;
13014 uvalue2 = -value2;
13015 }
13016 else
13017 {
13018 uvalue2 = value2;
13019 }
13020
13021 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13022 uresult = mul64hi (uvalue1, uvalue2);
13023 result = uresult;
13024 result *= signum;
13025
13026 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13027 }
13028
13029 /* Unsigned multiply add long -- source, source2 :
13030 32 bit, source3 : 64 bit. */
13031 static void
13032 umaddl (sim_cpu *cpu)
13033 {
13034 unsigned rm = INSTR (20, 16);
13035 unsigned ra = INSTR (14, 10);
13036 unsigned rn = INSTR (9, 5);
13037 unsigned rd = INSTR (4, 0);
13038
13039 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13040 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13041 obtain a 64 bit product. */
13042 aarch64_set_reg_u64
13043 (cpu, rd, NO_SP,
13044 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13045 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13046 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13047 }
13048
13049 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13050 static void
13051 umsubl (sim_cpu *cpu)
13052 {
13053 unsigned rm = INSTR (20, 16);
13054 unsigned ra = INSTR (14, 10);
13055 unsigned rn = INSTR (9, 5);
13056 unsigned rd = INSTR (4, 0);
13057
13058 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13059 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13060 obtain a 64 bit product. */
13061 aarch64_set_reg_u64
13062 (cpu, rd, NO_SP,
13063 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13064 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13065 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13066 }
13067
13068 /* Unsigned multiply high, source, source2 :
13069 64 bit, dest <-- high 64-bit of result. */
13070 static void
13071 umulh (sim_cpu *cpu)
13072 {
13073 unsigned rm = INSTR (20, 16);
13074 unsigned rn = INSTR (9, 5);
13075 unsigned rd = INSTR (4, 0);
13076 GReg ra = INSTR (14, 10);
13077
13078 if (ra != R31)
13079 HALT_UNALLOC;
13080
13081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13082 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13083 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13084 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13085 }
13086
13087 static void
13088 dexDataProc3Source (sim_cpu *cpu)
13089 {
13090 /* assert instr[28,24] == 11011. */
13091 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13092 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13093 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13094 instr[15] = o0 : 0/1 ==> ok
13095 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13096 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13097 0100 ==> SMULH, (64 bit only)
13098 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13099 1100 ==> UMULH (64 bit only)
13100 ow ==> UNALLOC. */
13101
13102 uint32_t dispatch;
13103 uint32_t size = INSTR (31, 31);
13104 uint32_t op54 = INSTR (30, 29);
13105 uint32_t op31 = INSTR (23, 21);
13106 uint32_t o0 = INSTR (15, 15);
13107
13108 if (op54 != 0)
13109 HALT_UNALLOC;
13110
13111 if (size == 0)
13112 {
13113 if (op31 != 0)
13114 HALT_UNALLOC;
13115
13116 if (o0 == 0)
13117 madd32 (cpu);
13118 else
13119 msub32 (cpu);
13120 return;
13121 }
13122
13123 dispatch = (op31 << 1) | o0;
13124
13125 switch (dispatch)
13126 {
13127 case 0: madd64 (cpu); return;
13128 case 1: msub64 (cpu); return;
13129 case 2: smaddl (cpu); return;
13130 case 3: smsubl (cpu); return;
13131 case 4: smulh (cpu); return;
13132 case 10: umaddl (cpu); return;
13133 case 11: umsubl (cpu); return;
13134 case 12: umulh (cpu); return;
13135 default: HALT_UNALLOC;
13136 }
13137 }
13138
13139 static void
13140 dexDPReg (sim_cpu *cpu)
13141 {
13142 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13143 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13144 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13145 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13146
13147 switch (group2)
13148 {
13149 case DPREG_LOG_000:
13150 case DPREG_LOG_001:
13151 dexLogicalShiftedRegister (cpu); return;
13152
13153 case DPREG_ADDSHF_010:
13154 dexAddSubtractShiftedRegister (cpu); return;
13155
13156 case DPREG_ADDEXT_011:
13157 dexAddSubtractExtendedRegister (cpu); return;
13158
13159 case DPREG_ADDCOND_100:
13160 {
13161 /* This set bundles a variety of different operations. */
13162 /* Check for. */
13163 /* 1) add/sub w carry. */
13164 uint32_t mask1 = 0x1FE00000U;
13165 uint32_t val1 = 0x1A000000U;
13166 /* 2) cond compare register/immediate. */
13167 uint32_t mask2 = 0x1FE00000U;
13168 uint32_t val2 = 0x1A400000U;
13169 /* 3) cond select. */
13170 uint32_t mask3 = 0x1FE00000U;
13171 uint32_t val3 = 0x1A800000U;
13172 /* 4) data proc 1/2 source. */
13173 uint32_t mask4 = 0x1FE00000U;
13174 uint32_t val4 = 0x1AC00000U;
13175
13176 if ((aarch64_get_instr (cpu) & mask1) == val1)
13177 dexAddSubtractWithCarry (cpu);
13178
13179 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13180 CondCompare (cpu);
13181
13182 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13183 dexCondSelect (cpu);
13184
13185 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13186 {
13187 /* Bit 30 is clear for data proc 2 source
13188 and set for data proc 1 source. */
13189 if (aarch64_get_instr (cpu) & (1U << 30))
13190 dexDataProc1Source (cpu);
13191 else
13192 dexDataProc2Source (cpu);
13193 }
13194
13195 else
13196 /* Should not reach here. */
13197 HALT_NYI;
13198
13199 return;
13200 }
13201
13202 case DPREG_3SRC_110:
13203 dexDataProc3Source (cpu); return;
13204
13205 case DPREG_UNALLOC_101:
13206 HALT_UNALLOC;
13207
13208 case DPREG_3SRC_111:
13209 dexDataProc3Source (cpu); return;
13210
13211 default:
13212 /* Should never reach here. */
13213 HALT_NYI;
13214 }
13215 }
13216
13217 /* Unconditional Branch immediate.
13218 Offset is a PC-relative byte offset in the range +/- 128MiB.
13219 The offset is assumed to be raw from the decode i.e. the
13220 simulator is expected to scale them from word offsets to byte. */
13221
13222 /* Unconditional branch. */
13223 static void
13224 buc (sim_cpu *cpu, int32_t offset)
13225 {
13226 aarch64_set_next_PC_by_offset (cpu, offset);
13227 }
13228
13229 static unsigned stack_depth = 0;
13230
13231 /* Unconditional branch and link -- writes return PC to LR. */
13232 static void
13233 bl (sim_cpu *cpu, int32_t offset)
13234 {
13235 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13236 aarch64_save_LR (cpu);
13237 aarch64_set_next_PC_by_offset (cpu, offset);
13238
13239 if (TRACE_BRANCH_P (cpu))
13240 {
13241 ++ stack_depth;
13242 TRACE_BRANCH (cpu,
13243 " %*scall %" PRIx64 " [%s]"
13244 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13245 stack_depth, " ", aarch64_get_next_PC (cpu),
13246 aarch64_get_func (CPU_STATE (cpu),
13247 aarch64_get_next_PC (cpu)),
13248 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13249 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13250 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13251 );
13252 }
13253 }
13254
13255 /* Unconditional Branch register.
13256 Branch/return address is in source register. */
13257
13258 /* Unconditional branch. */
13259 static void
13260 br (sim_cpu *cpu)
13261 {
13262 unsigned rn = INSTR (9, 5);
13263 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13264 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13265 }
13266
13267 /* Unconditional branch and link -- writes return PC to LR. */
13268 static void
13269 blr (sim_cpu *cpu)
13270 {
13271 unsigned rn = INSTR (9, 5);
13272
13273 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13274 /* The pseudo code in the spec says we update LR before fetching.
13275 the value from the rn. */
13276 aarch64_save_LR (cpu);
13277 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13278
13279 if (TRACE_BRANCH_P (cpu))
13280 {
13281 ++ stack_depth;
13282 TRACE_BRANCH (cpu,
13283 " %*scall %" PRIx64 " [%s]"
13284 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13285 stack_depth, " ", aarch64_get_next_PC (cpu),
13286 aarch64_get_func (CPU_STATE (cpu),
13287 aarch64_get_next_PC (cpu)),
13288 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13289 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13290 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13291 );
13292 }
13293 }
13294
13295 /* Return -- assembler will default source to LR this is functionally
13296 equivalent to br but, presumably, unlike br it side effects the
13297 branch predictor. */
13298 static void
13299 ret (sim_cpu *cpu)
13300 {
13301 unsigned rn = INSTR (9, 5);
13302 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13303
13304 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13305 if (TRACE_BRANCH_P (cpu))
13306 {
13307 TRACE_BRANCH (cpu,
13308 " %*sreturn [result: %" PRIx64 "]",
13309 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13310 -- stack_depth;
13311 }
13312 }
13313
13314 /* NOP -- we implement this and call it from the decode in case we
13315 want to intercept it later. */
13316
13317 static void
13318 nop (sim_cpu *cpu)
13319 {
13320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13321 }
13322
13323 /* Data synchronization barrier. */
13324
13325 static void
13326 dsb (sim_cpu *cpu)
13327 {
13328 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13329 }
13330
13331 /* Data memory barrier. */
13332
13333 static void
13334 dmb (sim_cpu *cpu)
13335 {
13336 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13337 }
13338
13339 /* Instruction synchronization barrier. */
13340
13341 static void
13342 isb (sim_cpu *cpu)
13343 {
13344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13345 }
13346
13347 static void
13348 dexBranchImmediate (sim_cpu *cpu)
13349 {
13350 /* assert instr[30,26] == 00101
13351 instr[31] ==> 0 == B, 1 == BL
13352 instr[25,0] == imm26 branch offset counted in words. */
13353
13354 uint32_t top = INSTR (31, 31);
13355 /* We have a 26 byte signed word offset which we need to pass to the
13356 execute routine as a signed byte offset. */
13357 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13358
13359 if (top)
13360 bl (cpu, offset);
13361 else
13362 buc (cpu, offset);
13363 }
13364
13365 /* Control Flow. */
13366
13367 /* Conditional branch
13368
13369 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13370 a bit position in the range 0 .. 63
13371
13372 cc is a CondCode enum value as pulled out of the decode
13373
13374 N.B. any offset register (source) can only be Xn or Wn. */
13375
13376 static void
13377 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13378 {
13379 /* The test returns TRUE if CC is met. */
13380 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13381 if (testConditionCode (cpu, cc))
13382 aarch64_set_next_PC_by_offset (cpu, offset);
13383 }
13384
13385 /* 32 bit branch on register non-zero. */
13386 static void
13387 cbnz32 (sim_cpu *cpu, int32_t offset)
13388 {
13389 unsigned rt = INSTR (4, 0);
13390
13391 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13392 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13393 aarch64_set_next_PC_by_offset (cpu, offset);
13394 }
13395
13396 /* 64 bit branch on register zero. */
13397 static void
13398 cbnz (sim_cpu *cpu, int32_t offset)
13399 {
13400 unsigned rt = INSTR (4, 0);
13401
13402 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13403 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13404 aarch64_set_next_PC_by_offset (cpu, offset);
13405 }
13406
13407 /* 32 bit branch on register non-zero. */
13408 static void
13409 cbz32 (sim_cpu *cpu, int32_t offset)
13410 {
13411 unsigned rt = INSTR (4, 0);
13412
13413 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13414 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13415 aarch64_set_next_PC_by_offset (cpu, offset);
13416 }
13417
13418 /* 64 bit branch on register zero. */
13419 static void
13420 cbz (sim_cpu *cpu, int32_t offset)
13421 {
13422 unsigned rt = INSTR (4, 0);
13423
13424 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13425 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13426 aarch64_set_next_PC_by_offset (cpu, offset);
13427 }
13428
13429 /* Branch on register bit test non-zero -- one size fits all. */
13430 static void
13431 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13432 {
13433 unsigned rt = INSTR (4, 0);
13434
13435 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13436 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13437 aarch64_set_next_PC_by_offset (cpu, offset);
13438 }
13439
13440 /* Branch on register bit test zero -- one size fits all. */
13441 static void
13442 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13443 {
13444 unsigned rt = INSTR (4, 0);
13445
13446 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13447 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13448 aarch64_set_next_PC_by_offset (cpu, offset);
13449 }
13450
13451 static void
13452 dexCompareBranchImmediate (sim_cpu *cpu)
13453 {
13454 /* instr[30,25] = 01 1010
13455 instr[31] = size : 0 ==> 32, 1 ==> 64
13456 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13457 instr[23,5] = simm19 branch offset counted in words
13458 instr[4,0] = rt */
13459
13460 uint32_t size = INSTR (31, 31);
13461 uint32_t op = INSTR (24, 24);
13462 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13463
13464 if (size == 0)
13465 {
13466 if (op == 0)
13467 cbz32 (cpu, offset);
13468 else
13469 cbnz32 (cpu, offset);
13470 }
13471 else
13472 {
13473 if (op == 0)
13474 cbz (cpu, offset);
13475 else
13476 cbnz (cpu, offset);
13477 }
13478 }
13479
13480 static void
13481 dexTestBranchImmediate (sim_cpu *cpu)
13482 {
13483 /* instr[31] = b5 : bit 5 of test bit idx
13484 instr[30,25] = 01 1011
13485 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13486 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13487 instr[18,5] = simm14 : signed offset counted in words
13488 instr[4,0] = uimm5 */
13489
13490 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13491 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13492
13493 NYI_assert (30, 25, 0x1b);
13494
13495 if (INSTR (24, 24) == 0)
13496 tbz (cpu, pos, offset);
13497 else
13498 tbnz (cpu, pos, offset);
13499 }
13500
13501 static void
13502 dexCondBranchImmediate (sim_cpu *cpu)
13503 {
13504 /* instr[31,25] = 010 1010
13505 instr[24] = op1; op => 00 ==> B.cond
13506 instr[23,5] = simm19 : signed offset counted in words
13507 instr[4] = op0
13508 instr[3,0] = cond */
13509
13510 int32_t offset;
13511 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13512
13513 NYI_assert (31, 25, 0x2a);
13514
13515 if (op != 0)
13516 HALT_UNALLOC;
13517
13518 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13519
13520 bcc (cpu, offset, INSTR (3, 0));
13521 }
13522
13523 static void
13524 dexBranchRegister (sim_cpu *cpu)
13525 {
13526 /* instr[31,25] = 110 1011
13527 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13528 instr[20,16] = op2 : must be 11111
13529 instr[15,10] = op3 : must be 000000
13530 instr[4,0] = op2 : must be 11111. */
13531
13532 uint32_t op = INSTR (24, 21);
13533 uint32_t op2 = INSTR (20, 16);
13534 uint32_t op3 = INSTR (15, 10);
13535 uint32_t op4 = INSTR (4, 0);
13536
13537 NYI_assert (31, 25, 0x6b);
13538
13539 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13540 HALT_UNALLOC;
13541
13542 if (op == 0)
13543 br (cpu);
13544
13545 else if (op == 1)
13546 blr (cpu);
13547
13548 else if (op == 2)
13549 ret (cpu);
13550
13551 else
13552 {
13553 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13554 /* anything else is unallocated. */
13555 uint32_t rn = INSTR (4, 0);
13556
13557 if (rn != 0x1f)
13558 HALT_UNALLOC;
13559
13560 if (op == 4 || op == 5)
13561 HALT_NYI;
13562
13563 HALT_UNALLOC;
13564 }
13565 }
13566
13567 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13568 but this may not be available. So instead we define the values we need
13569 here. */
13570 #define AngelSVC_Reason_Open 0x01
13571 #define AngelSVC_Reason_Close 0x02
13572 #define AngelSVC_Reason_Write 0x05
13573 #define AngelSVC_Reason_Read 0x06
13574 #define AngelSVC_Reason_IsTTY 0x09
13575 #define AngelSVC_Reason_Seek 0x0A
13576 #define AngelSVC_Reason_FLen 0x0C
13577 #define AngelSVC_Reason_Remove 0x0E
13578 #define AngelSVC_Reason_Rename 0x0F
13579 #define AngelSVC_Reason_Clock 0x10
13580 #define AngelSVC_Reason_Time 0x11
13581 #define AngelSVC_Reason_System 0x12
13582 #define AngelSVC_Reason_Errno 0x13
13583 #define AngelSVC_Reason_GetCmdLine 0x15
13584 #define AngelSVC_Reason_HeapInfo 0x16
13585 #define AngelSVC_Reason_ReportException 0x18
13586 #define AngelSVC_Reason_Elapsed 0x30
13587
13588
13589 static void
13590 handle_halt (sim_cpu *cpu, uint32_t val)
13591 {
13592 uint64_t result = 0;
13593
13594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13595 if (val != 0xf000)
13596 {
13597 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13598 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13599 sim_stopped, SIM_SIGTRAP);
13600 }
13601
13602 /* We have encountered an Angel SVC call. See if we can process it. */
13603 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13604 {
13605 case AngelSVC_Reason_HeapInfo:
13606 {
13607 /* Get the values. */
13608 uint64_t stack_top = aarch64_get_stack_start (cpu);
13609 uint64_t heap_base = aarch64_get_heap_start (cpu);
13610
13611 /* Get the pointer */
13612 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13613 ptr = aarch64_get_mem_u64 (cpu, ptr);
13614
13615 /* Fill in the memory block. */
13616 /* Start addr of heap. */
13617 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13618 /* End addr of heap. */
13619 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13620 /* Lowest stack addr. */
13621 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13622 /* Initial stack addr. */
13623 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13624
13625 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13626 }
13627 break;
13628
13629 case AngelSVC_Reason_Open:
13630 {
13631 /* Get the pointer */
13632 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13633 /* FIXME: For now we just assume that we will only be asked
13634 to open the standard file descriptors. */
13635 static int fd = 0;
13636 result = fd ++;
13637
13638 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13639 }
13640 break;
13641
13642 case AngelSVC_Reason_Close:
13643 {
13644 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13645 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13646 result = 0;
13647 }
13648 break;
13649
13650 case AngelSVC_Reason_Errno:
13651 result = 0;
13652 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13653 break;
13654
13655 case AngelSVC_Reason_Clock:
13656 result =
13657 #ifdef CLOCKS_PER_SEC
13658 (CLOCKS_PER_SEC >= 100)
13659 ? (clock () / (CLOCKS_PER_SEC / 100))
13660 : ((clock () * 100) / CLOCKS_PER_SEC)
13661 #else
13662 /* Presume unix... clock() returns microseconds. */
13663 (clock () / 10000)
13664 #endif
13665 ;
13666 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13667 break;
13668
13669 case AngelSVC_Reason_GetCmdLine:
13670 {
13671 /* Get the pointer */
13672 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13673 ptr = aarch64_get_mem_u64 (cpu, ptr);
13674
13675 /* FIXME: No command line for now. */
13676 aarch64_set_mem_u64 (cpu, ptr, 0);
13677 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13678 }
13679 break;
13680
13681 case AngelSVC_Reason_IsTTY:
13682 result = 1;
13683 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13684 break;
13685
13686 case AngelSVC_Reason_Write:
13687 {
13688 /* Get the pointer */
13689 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13690 /* Get the write control block. */
13691 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13692 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13693 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13694
13695 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13696 PRIx64 " on descriptor %" PRIx64,
13697 len, buf, fd);
13698
13699 if (len > 1280)
13700 {
13701 TRACE_SYSCALL (cpu,
13702 " AngelSVC: Write: Suspiciously long write: %ld",
13703 (long) len);
13704 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13705 sim_stopped, SIM_SIGBUS);
13706 }
13707 else if (fd == 1)
13708 {
13709 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13710 }
13711 else if (fd == 2)
13712 {
13713 TRACE (cpu, 0, "\n");
13714 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13715 (int) len, aarch64_get_mem_ptr (cpu, buf));
13716 TRACE (cpu, 0, "\n");
13717 }
13718 else
13719 {
13720 TRACE_SYSCALL (cpu,
13721 " AngelSVC: Write: Unexpected file handle: %d",
13722 (int) fd);
13723 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13724 sim_stopped, SIM_SIGABRT);
13725 }
13726 }
13727 break;
13728
13729 case AngelSVC_Reason_ReportException:
13730 {
13731 /* Get the pointer */
13732 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13733 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13734 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13735 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13736
13737 TRACE_SYSCALL (cpu,
13738 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13739 type, state);
13740
13741 if (type == 0x20026)
13742 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13743 sim_exited, state);
13744 else
13745 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13746 sim_stopped, SIM_SIGINT);
13747 }
13748 break;
13749
13750 case AngelSVC_Reason_Read:
13751 case AngelSVC_Reason_FLen:
13752 case AngelSVC_Reason_Seek:
13753 case AngelSVC_Reason_Remove:
13754 case AngelSVC_Reason_Time:
13755 case AngelSVC_Reason_System:
13756 case AngelSVC_Reason_Rename:
13757 case AngelSVC_Reason_Elapsed:
13758 default:
13759 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13760 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13761 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13762 sim_stopped, SIM_SIGTRAP);
13763 }
13764
13765 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13766 }
13767
13768 static void
13769 dexExcpnGen (sim_cpu *cpu)
13770 {
13771 /* instr[31:24] = 11010100
13772 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13773 010 ==> HLT, 101 ==> DBG GEN EXCPN
13774 instr[20,5] = imm16
13775 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13776 instr[1,0] = LL : discriminates opc */
13777
13778 uint32_t opc = INSTR (23, 21);
13779 uint32_t imm16 = INSTR (20, 5);
13780 uint32_t opc2 = INSTR (4, 2);
13781 uint32_t LL;
13782
13783 NYI_assert (31, 24, 0xd4);
13784
13785 if (opc2 != 0)
13786 HALT_UNALLOC;
13787
13788 LL = INSTR (1, 0);
13789
13790 /* We only implement HLT and BRK for now. */
13791 if (opc == 1 && LL == 0)
13792 {
13793 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13794 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13795 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13796 }
13797
13798 if (opc == 2 && LL == 0)
13799 handle_halt (cpu, imm16);
13800
13801 else if (opc == 0 || opc == 5)
13802 HALT_NYI;
13803
13804 else
13805 HALT_UNALLOC;
13806 }
13807
13808 /* Stub for accessing system registers. */
13809
13810 static uint64_t
13811 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13812 unsigned crm, unsigned op2)
13813 {
13814 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13815 /* DCZID_EL0 - the Data Cache Zero ID register.
13816 We do not support DC ZVA at the moment, so
13817 we return a value with the disable bit set.
13818 We implement support for the DCZID register since
13819 it is used by the C library's memset function. */
13820 return ((uint64_t) 1) << 4;
13821
13822 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13823 /* Cache Type Register. */
13824 return 0x80008000UL;
13825
13826 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13827 /* TPIDR_EL0 - thread pointer id. */
13828 return aarch64_get_thread_id (cpu);
13829
13830 if (op1 == 3 && crm == 4 && op2 == 0)
13831 return aarch64_get_FPCR (cpu);
13832
13833 if (op1 == 3 && crm == 4 && op2 == 1)
13834 return aarch64_get_FPSR (cpu);
13835
13836 else if (op1 == 3 && crm == 2 && op2 == 0)
13837 return aarch64_get_CPSR (cpu);
13838
13839 HALT_NYI;
13840 }
13841
13842 static void
13843 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13844 unsigned crm, unsigned op2, uint64_t val)
13845 {
13846 if (op1 == 3 && crm == 4 && op2 == 0)
13847 aarch64_set_FPCR (cpu, val);
13848
13849 else if (op1 == 3 && crm == 4 && op2 == 1)
13850 aarch64_set_FPSR (cpu, val);
13851
13852 else if (op1 == 3 && crm == 2 && op2 == 0)
13853 aarch64_set_CPSR (cpu, val);
13854
13855 else
13856 HALT_NYI;
13857 }
13858
13859 static void
13860 do_mrs (sim_cpu *cpu)
13861 {
13862 /* instr[31:20] = 1101 0101 0001 1
13863 instr[19] = op0
13864 instr[18,16] = op1
13865 instr[15,12] = CRn
13866 instr[11,8] = CRm
13867 instr[7,5] = op2
13868 instr[4,0] = Rt */
13869 unsigned sys_op0 = INSTR (19, 19) + 2;
13870 unsigned sys_op1 = INSTR (18, 16);
13871 unsigned sys_crn = INSTR (15, 12);
13872 unsigned sys_crm = INSTR (11, 8);
13873 unsigned sys_op2 = INSTR (7, 5);
13874 unsigned rt = INSTR (4, 0);
13875
13876 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13877 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13878 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13879 }
13880
13881 static void
13882 do_MSR_immediate (sim_cpu *cpu)
13883 {
13884 /* instr[31:19] = 1101 0101 0000 0
13885 instr[18,16] = op1
13886 instr[15,12] = 0100
13887 instr[11,8] = CRm
13888 instr[7,5] = op2
13889 instr[4,0] = 1 1111 */
13890
13891 unsigned op1 = INSTR (18, 16);
13892 /*unsigned crm = INSTR (11, 8);*/
13893 unsigned op2 = INSTR (7, 5);
13894
13895 NYI_assert (31, 19, 0x1AA0);
13896 NYI_assert (15, 12, 0x4);
13897 NYI_assert (4, 0, 0x1F);
13898
13899 if (op1 == 0)
13900 {
13901 if (op2 == 5)
13902 HALT_NYI; /* set SPSel. */
13903 else
13904 HALT_UNALLOC;
13905 }
13906 else if (op1 == 3)
13907 {
13908 if (op2 == 6)
13909 HALT_NYI; /* set DAIFset. */
13910 else if (op2 == 7)
13911 HALT_NYI; /* set DAIFclr. */
13912 else
13913 HALT_UNALLOC;
13914 }
13915 else
13916 HALT_UNALLOC;
13917 }
13918
13919 static void
13920 do_MSR_reg (sim_cpu *cpu)
13921 {
13922 /* instr[31:20] = 1101 0101 0001
13923 instr[19] = op0
13924 instr[18,16] = op1
13925 instr[15,12] = CRn
13926 instr[11,8] = CRm
13927 instr[7,5] = op2
13928 instr[4,0] = Rt */
13929
13930 unsigned sys_op0 = INSTR (19, 19) + 2;
13931 unsigned sys_op1 = INSTR (18, 16);
13932 unsigned sys_crn = INSTR (15, 12);
13933 unsigned sys_crm = INSTR (11, 8);
13934 unsigned sys_op2 = INSTR (7, 5);
13935 unsigned rt = INSTR (4, 0);
13936
13937 NYI_assert (31, 20, 0xD51);
13938
13939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13940 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13941 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13942 }
13943
13944 static void
13945 do_SYS (sim_cpu *cpu)
13946 {
13947 /* instr[31,19] = 1101 0101 0000 1
13948 instr[18,16] = op1
13949 instr[15,12] = CRn
13950 instr[11,8] = CRm
13951 instr[7,5] = op2
13952 instr[4,0] = Rt */
13953 NYI_assert (31, 19, 0x1AA1);
13954
13955 /* FIXME: For now we just silently accept system ops. */
13956 }
13957
13958 static void
13959 dexSystem (sim_cpu *cpu)
13960 {
13961 /* instr[31:22] = 1101 01010 0
13962 instr[21] = L
13963 instr[20,19] = op0
13964 instr[18,16] = op1
13965 instr[15,12] = CRn
13966 instr[11,8] = CRm
13967 instr[7,5] = op2
13968 instr[4,0] = uimm5 */
13969
13970 /* We are interested in HINT, DSB, DMB and ISB
13971
13972 Hint #0 encodes NOOP (this is the only hint we care about)
13973 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13974 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13975
13976 DSB, DMB, ISB are data store barrier, data memory barrier and
13977 instruction store barrier, respectively, where
13978
13979 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13980 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13981 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13982 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13983 10 ==> InerShareable, 11 ==> FullSystem
13984 types : 01 ==> Reads, 10 ==> Writes,
13985 11 ==> All, 00 ==> All (domain == FullSystem). */
13986
13987 unsigned rt = INSTR (4, 0);
13988
13989 NYI_assert (31, 22, 0x354);
13990
13991 switch (INSTR (21, 12))
13992 {
13993 case 0x032:
13994 if (rt == 0x1F)
13995 {
13996 /* NOP has CRm != 0000 OR. */
13997 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13998 uint32_t crm = INSTR (11, 8);
13999 uint32_t op2 = INSTR (7, 5);
14000
14001 if (crm != 0 || (op2 == 0 || op2 > 5))
14002 {
14003 /* Actually call nop method so we can reimplement it later. */
14004 nop (cpu);
14005 return;
14006 }
14007 }
14008 HALT_NYI;
14009
14010 case 0x033:
14011 {
14012 uint32_t op2 = INSTR (7, 5);
14013
14014 switch (op2)
14015 {
14016 case 2: HALT_NYI;
14017 case 4: dsb (cpu); return;
14018 case 5: dmb (cpu); return;
14019 case 6: isb (cpu); return;
14020 default: HALT_UNALLOC;
14021 }
14022 }
14023
14024 case 0x3B0:
14025 case 0x3B4:
14026 case 0x3BD:
14027 do_mrs (cpu);
14028 return;
14029
14030 case 0x0B7:
14031 do_SYS (cpu); /* DC is an alias of SYS. */
14032 return;
14033
14034 default:
14035 if (INSTR (21, 20) == 0x1)
14036 do_MSR_reg (cpu);
14037 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14038 do_MSR_immediate (cpu);
14039 else
14040 HALT_NYI;
14041 return;
14042 }
14043 }
14044
14045 static void
14046 dexBr (sim_cpu *cpu)
14047 {
14048 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14049 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14050 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14051 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14052
14053 switch (group2)
14054 {
14055 case BR_IMM_000:
14056 return dexBranchImmediate (cpu);
14057
14058 case BR_IMMCMP_001:
14059 /* Compare has bit 25 clear while test has it set. */
14060 if (!INSTR (25, 25))
14061 dexCompareBranchImmediate (cpu);
14062 else
14063 dexTestBranchImmediate (cpu);
14064 return;
14065
14066 case BR_IMMCOND_010:
14067 /* This is a conditional branch if bit 25 is clear otherwise
14068 unallocated. */
14069 if (!INSTR (25, 25))
14070 dexCondBranchImmediate (cpu);
14071 else
14072 HALT_UNALLOC;
14073 return;
14074
14075 case BR_UNALLOC_011:
14076 HALT_UNALLOC;
14077
14078 case BR_IMM_100:
14079 dexBranchImmediate (cpu);
14080 return;
14081
14082 case BR_IMMCMP_101:
14083 /* Compare has bit 25 clear while test has it set. */
14084 if (!INSTR (25, 25))
14085 dexCompareBranchImmediate (cpu);
14086 else
14087 dexTestBranchImmediate (cpu);
14088 return;
14089
14090 case BR_REG_110:
14091 /* Unconditional branch reg has bit 25 set. */
14092 if (INSTR (25, 25))
14093 dexBranchRegister (cpu);
14094
14095 /* This includes both Excpn Gen, System and unalloc operations.
14096 We need to decode the Excpn Gen operation BRK so we can plant
14097 debugger entry points.
14098 Excpn Gen operations have instr [24] = 0.
14099 we need to decode at least one of the System operations NOP
14100 which is an alias for HINT #0.
14101 System operations have instr [24,22] = 100. */
14102 else if (INSTR (24, 24) == 0)
14103 dexExcpnGen (cpu);
14104
14105 else if (INSTR (24, 22) == 4)
14106 dexSystem (cpu);
14107
14108 else
14109 HALT_UNALLOC;
14110
14111 return;
14112
14113 case BR_UNALLOC_111:
14114 HALT_UNALLOC;
14115
14116 default:
14117 /* Should never reach here. */
14118 HALT_NYI;
14119 }
14120 }
14121
14122 static void
14123 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14124 {
14125 /* We need to check if gdb wants an in here. */
14126 /* checkBreak (cpu);. */
14127
14128 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14129
14130 switch (group)
14131 {
14132 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14133 case GROUP_LDST_0100: dexLdSt (cpu); break;
14134 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14135 case GROUP_LDST_0110: dexLdSt (cpu); break;
14136 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14137 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14138 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14139 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14140 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14141 case GROUP_LDST_1100: dexLdSt (cpu); break;
14142 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14143 case GROUP_LDST_1110: dexLdSt (cpu); break;
14144 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14145
14146 case GROUP_UNALLOC_0001:
14147 case GROUP_UNALLOC_0010:
14148 case GROUP_UNALLOC_0011:
14149 HALT_UNALLOC;
14150
14151 default:
14152 /* Should never reach here. */
14153 HALT_NYI;
14154 }
14155 }
14156
14157 static bfd_boolean
14158 aarch64_step (sim_cpu *cpu)
14159 {
14160 uint64_t pc = aarch64_get_PC (cpu);
14161
14162 if (pc == TOP_LEVEL_RETURN_PC)
14163 return FALSE;
14164
14165 aarch64_set_next_PC (cpu, pc + 4);
14166
14167 /* Code is always little-endian. */
14168 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14169 & aarch64_get_instr (cpu), pc, 4);
14170 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14171
14172 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14173 aarch64_get_instr (cpu));
14174 TRACE_DISASM (cpu, pc);
14175
14176 aarch64_decode_and_execute (cpu, pc);
14177
14178 return TRUE;
14179 }
14180
14181 void
14182 aarch64_run (SIM_DESC sd)
14183 {
14184 sim_cpu *cpu = STATE_CPU (sd, 0);
14185
14186 while (aarch64_step (cpu))
14187 {
14188 aarch64_update_PC (cpu);
14189
14190 if (sim_events_tick (sd))
14191 sim_events_process (sd);
14192 }
14193
14194 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14195 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14196 }
14197
14198 void
14199 aarch64_init (sim_cpu *cpu, uint64_t pc)
14200 {
14201 uint64_t sp = aarch64_get_stack_start (cpu);
14202
14203 /* Install SP, FP and PC and set LR to -20
14204 so we can detect a top-level return. */
14205 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14206 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14207 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14208 aarch64_set_next_PC (cpu, pc);
14209 aarch64_update_PC (cpu);
14210 aarch64_init_LIT_table ();
14211 }
This page took 0.350554 seconds and 4 git commands to generate.