[ARC] Fix ld testsuite failures.
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2016 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 { \
68 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: "); \
69 trace_disasm (CPU_STATE (cpu), cpu, aarch64_get_PC (cpu)); \
70 } \
71 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
72 sim_stopped, SIM_SIGABRT); \
73 } \
74 while (0)
75
76 #define NYI_assert(HI, LO, EXPECTED) \
77 do \
78 { \
79 if (INSTR ((HI), (LO)) != (EXPECTED)) \
80 HALT_NYI; \
81 } \
82 while (0)
83
84 /* Helper functions used by expandLogicalImmediate. */
85
86 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
87 static inline uint64_t
88 ones (int N)
89 {
90 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
91 }
92
93 /* result<0> to val<N> */
94 static inline uint64_t
95 pickbit (uint64_t val, int N)
96 {
97 return pickbits64 (val, N, N);
98 }
99
100 static uint64_t
101 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
102 {
103 uint64_t mask;
104 uint64_t imm;
105 unsigned simd_size;
106
107 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
108 (in other words, right rotated by R), then replicated. */
109 if (N != 0)
110 {
111 simd_size = 64;
112 mask = 0xffffffffffffffffull;
113 }
114 else
115 {
116 switch (S)
117 {
118 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
119 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
120 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
121 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
122 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
123 default: return 0;
124 }
125 mask = (1ull << simd_size) - 1;
126 /* Top bits are IGNORED. */
127 R &= simd_size - 1;
128 }
129
130 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
131 if (S == simd_size - 1)
132 return 0;
133
134 /* S+1 consecutive bits to 1. */
135 /* NOTE: S can't be 63 due to detection above. */
136 imm = (1ull << (S + 1)) - 1;
137
138 /* Rotate to the left by simd_size - R. */
139 if (R != 0)
140 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
141
142 /* Replicate the value according to SIMD size. */
143 switch (simd_size)
144 {
145 case 2: imm = (imm << 2) | imm;
146 case 4: imm = (imm << 4) | imm;
147 case 8: imm = (imm << 8) | imm;
148 case 16: imm = (imm << 16) | imm;
149 case 32: imm = (imm << 32) | imm;
150 case 64: break;
151 default: return 0;
152 }
153
154 return imm;
155 }
156
157 /* Instr[22,10] encodes N immr and imms. we want a lookup table
158 for each possible combination i.e. 13 bits worth of int entries. */
159 #define LI_TABLE_SIZE (1 << 13)
160 static uint64_t LITable[LI_TABLE_SIZE];
161
162 void
163 aarch64_init_LIT_table (void)
164 {
165 unsigned index;
166
167 for (index = 0; index < LI_TABLE_SIZE; index++)
168 {
169 uint32_t N = uimm (index, 12, 12);
170 uint32_t immr = uimm (index, 11, 6);
171 uint32_t imms = uimm (index, 5, 0);
172
173 LITable [index] = expand_logical_immediate (imms, immr, N);
174 }
175 }
176
177 static void
178 dexNotify (sim_cpu *cpu)
179 {
180 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
181 2 ==> exit Java, 3 ==> start next bytecode. */
182 uint32_t type = INSTR (14, 0);
183
184 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
185
186 switch (type)
187 {
188 case 0:
189 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
190 aarch64_get_reg_u64 (cpu, R22, 0)); */
191 break;
192 case 1:
193 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
194 aarch64_get_reg_u64 (cpu, R22, 0)); */
195 break;
196 case 2:
197 /* aarch64_notifyMethodExit (); */
198 break;
199 case 3:
200 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
201 aarch64_get_reg_u64 (cpu, R22, 0)); */
202 break;
203 }
204 }
205
206 /* secondary decode within top level groups */
207
208 static void
209 dexPseudo (sim_cpu *cpu)
210 {
211 /* assert instr[28,27] = 00
212
213 We provide 2 pseudo instructions:
214
215 HALT stops execution of the simulator causing an immediate
216 return to the x86 code which entered it.
217
218 CALLOUT initiates recursive entry into x86 code. A register
219 argument holds the address of the x86 routine. Immediate
220 values in the instruction identify the number of general
221 purpose and floating point register arguments to be passed
222 and the type of any value to be returned. */
223
224 uint32_t PSEUDO_HALT = 0xE0000000U;
225 uint32_t PSEUDO_CALLOUT = 0x00018000U;
226 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
227 uint32_t PSEUDO_NOTIFY = 0x00014000U;
228 uint32_t dispatch;
229
230 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
231 {
232 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
233 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
234 sim_stopped, SIM_SIGTRAP);
235 }
236
237 dispatch = INSTR (31, 15);
238
239 /* We do not handle callouts at the moment. */
240 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
241 {
242 TRACE_EVENTS (cpu, " Callout");
243 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
244 sim_stopped, SIM_SIGABRT);
245 }
246
247 else if (dispatch == PSEUDO_NOTIFY)
248 dexNotify (cpu);
249
250 else
251 HALT_UNALLOC;
252 }
253
254 /* Load-store single register (unscaled offset)
255 These instructions employ a base register plus an unscaled signed
256 9 bit offset.
257
258 N.B. the base register (source) can be Xn or SP. all other
259 registers may not be SP. */
260
261 /* 32 bit load 32 bit unscaled signed 9 bit. */
262 static void
263 ldur32 (sim_cpu *cpu, int32_t offset)
264 {
265 unsigned rn = INSTR (9, 5);
266 unsigned rt = INSTR (4, 0);
267
268 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
269 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
270 + offset));
271 }
272
273 /* 64 bit load 64 bit unscaled signed 9 bit. */
274 static void
275 ldur64 (sim_cpu *cpu, int32_t offset)
276 {
277 unsigned rn = INSTR (9, 5);
278 unsigned rt = INSTR (4, 0);
279
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
293 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
294 + offset));
295 }
296
297 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
298 static void
299 ldursb32 (sim_cpu *cpu, int32_t offset)
300 {
301 unsigned rn = INSTR (9, 5);
302 unsigned rt = INSTR (4, 0);
303
304 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
305 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
306 + offset));
307 }
308
309 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
310 static void
311 ldursb64 (sim_cpu *cpu, int32_t offset)
312 {
313 unsigned rn = INSTR (9, 5);
314 unsigned rt = INSTR (4, 0);
315
316 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
317 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
318 + offset));
319 }
320
321 /* 32 bit load zero-extended short unscaled signed 9 bit */
322 static void
323 ldurh32 (sim_cpu *cpu, int32_t offset)
324 {
325 unsigned rn = INSTR (9, 5);
326 unsigned rd = INSTR (4, 0);
327
328 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
329 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
330 + offset));
331 }
332
333 /* 32 bit load sign-extended short unscaled signed 9 bit */
334 static void
335 ldursh32 (sim_cpu *cpu, int32_t offset)
336 {
337 unsigned rn = INSTR (9, 5);
338 unsigned rd = INSTR (4, 0);
339
340 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
341 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
342 + offset));
343 }
344
345 /* 64 bit load sign-extended short unscaled signed 9 bit */
346 static void
347 ldursh64 (sim_cpu *cpu, int32_t offset)
348 {
349 unsigned rn = INSTR (9, 5);
350 unsigned rt = INSTR (4, 0);
351
352 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
353 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
354 + offset));
355 }
356
357 /* 64 bit load sign-extended word unscaled signed 9 bit */
358 static void
359 ldursw (sim_cpu *cpu, int32_t offset)
360 {
361 unsigned rn = INSTR (9, 5);
362 unsigned rd = INSTR (4, 0);
363
364 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
365 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
366 + offset));
367 }
368
369 /* N.B. with stores the value in source is written to the address
370 identified by source2 modified by offset. */
371
372 /* 32 bit store 32 bit unscaled signed 9 bit. */
373 static void
374 stur32 (sim_cpu *cpu, int32_t offset)
375 {
376 unsigned rn = INSTR (9, 5);
377 unsigned rd = INSTR (4, 0);
378
379 aarch64_set_mem_u32 (cpu,
380 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
381 aarch64_get_reg_u32 (cpu, rd, NO_SP));
382 }
383
384 /* 64 bit store 64 bit unscaled signed 9 bit */
385 static void
386 stur64 (sim_cpu *cpu, int32_t offset)
387 {
388 unsigned rn = INSTR (9, 5);
389 unsigned rd = INSTR (4, 0);
390
391 aarch64_set_mem_u64 (cpu,
392 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
393 aarch64_get_reg_u64 (cpu, rd, NO_SP));
394 }
395
396 /* 32 bit store byte unscaled signed 9 bit */
397 static void
398 sturb (sim_cpu *cpu, int32_t offset)
399 {
400 unsigned rn = INSTR (9, 5);
401 unsigned rd = INSTR (4, 0);
402
403 aarch64_set_mem_u8 (cpu,
404 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
405 aarch64_get_reg_u8 (cpu, rd, NO_SP));
406 }
407
408 /* 32 bit store short unscaled signed 9 bit */
409 static void
410 sturh (sim_cpu *cpu, int32_t offset)
411 {
412 unsigned rn = INSTR (9, 5);
413 unsigned rd = INSTR (4, 0);
414
415 aarch64_set_mem_u16 (cpu,
416 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
417 aarch64_get_reg_u16 (cpu, rd, NO_SP));
418 }
419
420 /* Load single register pc-relative label
421 Offset is a signed 19 bit immediate count in words
422 rt may not be SP. */
423
424 /* 32 bit pc-relative load */
425 static void
426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
427 {
428 unsigned rd = INSTR (4, 0);
429
430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
431 aarch64_get_mem_u32
432 (cpu, aarch64_get_PC (cpu) + offset * 4));
433 }
434
435 /* 64 bit pc-relative load */
436 static void
437 ldr_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 aarch64_set_reg_u64 (cpu, rd, NO_SP,
442 aarch64_get_mem_u64
443 (cpu, aarch64_get_PC (cpu) + offset * 4));
444 }
445
446 /* sign extended 32 bit pc-relative load */
447 static void
448 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
449 {
450 unsigned rd = INSTR (4, 0);
451
452 aarch64_set_reg_u64 (cpu, rd, NO_SP,
453 aarch64_get_mem_s32
454 (cpu, aarch64_get_PC (cpu) + offset * 4));
455 }
456
457 /* float pc-relative load */
458 static void
459 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
460 {
461 unsigned int rd = INSTR (4, 0);
462
463 aarch64_set_vec_u32 (cpu, rd, 0,
464 aarch64_get_mem_u32
465 (cpu, aarch64_get_PC (cpu) + offset * 4));
466 }
467
468 /* double pc-relative load */
469 static void
470 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
471 {
472 unsigned int st = INSTR (4, 0);
473
474 aarch64_set_vec_u64 (cpu, st, 0,
475 aarch64_get_mem_u64
476 (cpu, aarch64_get_PC (cpu) + offset * 4));
477 }
478
479 /* long double pc-relative load. */
480 static void
481 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
482 {
483 unsigned int st = INSTR (4, 0);
484 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
485 FRegister a;
486
487 aarch64_get_mem_long_double (cpu, addr, & a);
488 aarch64_set_FP_long_double (cpu, st, a);
489 }
490
491 /* This can be used to scale an offset by applying
492 the requisite shift. the second argument is either
493 16, 32 or 64. */
494
495 #define SCALE(_offset, _elementSize) \
496 ((_offset) << ScaleShift ## _elementSize)
497
498 /* This can be used to optionally scale a register derived offset
499 by applying the requisite shift as indicated by the Scaling
500 argument. The second argument is either Byte, Short, Word
501 or Long. The third argument is either Scaled or Unscaled.
502 N.B. when _Scaling is Scaled the shift gets ANDed with
503 all 1s while when it is Unscaled it gets ANDed with 0. */
504
505 #define OPT_SCALE(_offset, _elementType, _Scaling) \
506 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
507
508 /* This can be used to zero or sign extend a 32 bit register derived
509 value to a 64 bit value. the first argument must be the value as
510 a uint32_t and the second must be either UXTW or SXTW. The result
511 is returned as an int64_t. */
512
513 static inline int64_t
514 extend (uint32_t value, Extension extension)
515 {
516 union
517 {
518 uint32_t u;
519 int32_t n;
520 } x;
521
522 /* A branchless variant of this ought to be possible. */
523 if (extension == UXTW || extension == NoExtension)
524 return value;
525
526 x.u = value;
527 return x.n;
528 }
529
530 /* Scalar Floating Point
531
532 FP load/store single register (4 addressing modes)
533
534 N.B. the base register (source) can be the stack pointer.
535 The secondary source register (source2) can only be an Xn register. */
536
537 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
538 static void
539 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
540 {
541 unsigned rn = INSTR (9, 5);
542 unsigned st = INSTR (4, 0);
543 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
544
545 if (wb != Post)
546 address += offset;
547
548 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
549 if (wb == Post)
550 address += offset;
551
552 if (wb != NoWriteBack)
553 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
554 }
555
556 /* Load 8 bit with unsigned 12 bit offset. */
557 static void
558 fldrb_abs (sim_cpu *cpu, uint32_t offset)
559 {
560 unsigned rd = INSTR (4, 0);
561 unsigned rn = INSTR (9, 5);
562 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
563
564 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
565 }
566
567 /* Load 16 bit scaled unsigned 12 bit. */
568 static void
569 fldrh_abs (sim_cpu *cpu, uint32_t offset)
570 {
571 unsigned rd = INSTR (4, 0);
572 unsigned rn = INSTR (9, 5);
573 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
574
575 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
576 }
577
578 /* Load 32 bit scaled unsigned 12 bit. */
579 static void
580 fldrs_abs (sim_cpu *cpu, uint32_t offset)
581 {
582 unsigned rd = INSTR (4, 0);
583 unsigned rn = INSTR (9, 5);
584 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
585
586 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
587 }
588
589 /* Load 64 bit scaled unsigned 12 bit. */
590 static void
591 fldrd_abs (sim_cpu *cpu, uint32_t offset)
592 {
593 unsigned rd = INSTR (4, 0);
594 unsigned rn = INSTR (9, 5);
595 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
596
597 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
598 }
599
600 /* Load 128 bit scaled unsigned 12 bit. */
601 static void
602 fldrq_abs (sim_cpu *cpu, uint32_t offset)
603 {
604 unsigned rd = INSTR (4, 0);
605 unsigned rn = INSTR (9, 5);
606 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
607
608 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
609 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
610 }
611
612 /* Load 32 bit scaled or unscaled zero- or sign-extended
613 32-bit register offset. */
614 static void
615 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
616 {
617 unsigned rm = INSTR (20, 16);
618 unsigned rn = INSTR (9, 5);
619 unsigned st = INSTR (4, 0);
620 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
621 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
622 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
623
624 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
625 (cpu, address + displacement));
626 }
627
628 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
629 static void
630 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
631 {
632 unsigned rn = INSTR (9, 5);
633 unsigned st = INSTR (4, 0);
634 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
635
636 if (wb != Post)
637 address += offset;
638
639 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
640
641 if (wb == Post)
642 address += offset;
643
644 if (wb != NoWriteBack)
645 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
646 }
647
648 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
649 static void
650 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
651 {
652 unsigned rm = INSTR (20, 16);
653 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
654 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
655
656 fldrd_wb (cpu, displacement, NoWriteBack);
657 }
658
659 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
660 static void
661 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
662 {
663 FRegister a;
664 unsigned rn = INSTR (9, 5);
665 unsigned st = INSTR (4, 0);
666 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
667
668 if (wb != Post)
669 address += offset;
670
671 aarch64_get_mem_long_double (cpu, address, & a);
672 aarch64_set_FP_long_double (cpu, st, a);
673
674 if (wb == Post)
675 address += offset;
676
677 if (wb != NoWriteBack)
678 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
679 }
680
681 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
682 static void
683 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
684 {
685 unsigned rm = INSTR (20, 16);
686 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
687 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
688
689 fldrq_wb (cpu, displacement, NoWriteBack);
690 }
691
692 /* Memory Access
693
694 load-store single register
695 There are four addressing modes available here which all employ a
696 64 bit source (base) register.
697
698 N.B. the base register (source) can be the stack pointer.
699 The secondary source register (source2)can only be an Xn register.
700
701 Scaled, 12-bit, unsigned immediate offset, without pre- and
702 post-index options.
703 Unscaled, 9-bit, signed immediate offset with pre- or post-index
704 writeback.
705 scaled or unscaled 64-bit register offset.
706 scaled or unscaled 32-bit extended register offset.
707
708 All offsets are assumed to be raw from the decode i.e. the
709 simulator is expected to adjust scaled offsets based on the
710 accessed data size with register or extended register offset
711 versions the same applies except that in the latter case the
712 operation may also require a sign extend.
713
714 A separate method is provided for each possible addressing mode. */
715
716 /* 32 bit load 32 bit scaled unsigned 12 bit */
717 static void
718 ldr32_abs (sim_cpu *cpu, uint32_t offset)
719 {
720 unsigned rn = INSTR (9, 5);
721 unsigned rt = INSTR (4, 0);
722
723 /* The target register may not be SP but the source may be. */
724 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
725 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
726 + SCALE (offset, 32)));
727 }
728
729 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
730 static void
731 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
732 {
733 unsigned rn = INSTR (9, 5);
734 unsigned rt = INSTR (4, 0);
735 uint64_t address;
736
737 if (rn == rt && wb != NoWriteBack)
738 HALT_UNALLOC;
739
740 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
741
742 if (wb != Post)
743 address += offset;
744
745 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
746
747 if (wb == Post)
748 address += offset;
749
750 if (wb != NoWriteBack)
751 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
752 }
753
754 /* 32 bit load 32 bit scaled or unscaled
755 zero- or sign-extended 32-bit register offset */
756 static void
757 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
758 {
759 unsigned rm = INSTR (20, 16);
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 /* rn may reference SP, rm and rt must reference ZR */
763
764 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
765 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
766 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
767
768 aarch64_set_reg_u64 (cpu, rt, NO_SP,
769 aarch64_get_mem_u32 (cpu, address + displacement));
770 }
771
772 /* 64 bit load 64 bit scaled unsigned 12 bit */
773 static void
774 ldr_abs (sim_cpu *cpu, uint32_t offset)
775 {
776 unsigned rn = INSTR (9, 5);
777 unsigned rt = INSTR (4, 0);
778
779 /* The target register may not be SP but the source may be. */
780 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
781 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
782 + SCALE (offset, 64)));
783 }
784
785 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
786 static void
787 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
788 {
789 unsigned rn = INSTR (9, 5);
790 unsigned rt = INSTR (4, 0);
791 uint64_t address;
792
793 if (rn == rt && wb != NoWriteBack)
794 HALT_UNALLOC;
795
796 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
797
798 if (wb != Post)
799 address += offset;
800
801 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
802
803 if (wb == Post)
804 address += offset;
805
806 if (wb != NoWriteBack)
807 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
808 }
809
810 /* 64 bit load 64 bit scaled or unscaled zero-
811 or sign-extended 32-bit register offset. */
812 static void
813 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
814 {
815 unsigned rm = INSTR (20, 16);
816 unsigned rn = INSTR (9, 5);
817 unsigned rt = INSTR (4, 0);
818 /* rn may reference SP, rm and rt must reference ZR */
819
820 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
821 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
822 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
823
824 aarch64_set_reg_u64 (cpu, rt, NO_SP,
825 aarch64_get_mem_u64 (cpu, address + displacement));
826 }
827
828 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
829 static void
830 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
831 {
832 unsigned rn = INSTR (9, 5);
833 unsigned rt = INSTR (4, 0);
834
835 /* The target register may not be SP but the source may be
836 there is no scaling required for a byte load. */
837 aarch64_set_reg_u64 (cpu, rt, NO_SP,
838 aarch64_get_mem_u8
839 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
840 }
841
842 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
843 static void
844 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
845 {
846 unsigned rn = INSTR (9, 5);
847 unsigned rt = INSTR (4, 0);
848 uint64_t address;
849
850 if (rn == rt && wb != NoWriteBack)
851 HALT_UNALLOC;
852
853 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
854
855 if (wb != Post)
856 address += offset;
857
858 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
859
860 if (wb == Post)
861 address += offset;
862
863 if (wb != NoWriteBack)
864 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
865 }
866
867 /* 32 bit load zero-extended byte scaled or unscaled zero-
868 or sign-extended 32-bit register offset. */
869 static void
870 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
871 {
872 unsigned rm = INSTR (20, 16);
873 unsigned rn = INSTR (9, 5);
874 unsigned rt = INSTR (4, 0);
875 /* rn may reference SP, rm and rt must reference ZR */
876
877 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
878 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
879 extension);
880
881 /* There is no scaling required for a byte load. */
882 aarch64_set_reg_u64 (cpu, rt, NO_SP,
883 aarch64_get_mem_u8 (cpu, address + displacement));
884 }
885
886 /* 64 bit load sign-extended byte unscaled signed 9 bit
887 with pre- or post-writeback. */
888 static void
889 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
890 {
891 unsigned rn = INSTR (9, 5);
892 unsigned rt = INSTR (4, 0);
893 uint64_t address;
894 int64_t val;
895
896 if (rn == rt && wb != NoWriteBack)
897 HALT_UNALLOC;
898
899 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
900
901 if (wb != Post)
902 address += offset;
903
904 val = aarch64_get_mem_s8 (cpu, address);
905 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
906
907 if (wb == Post)
908 address += offset;
909
910 if (wb != NoWriteBack)
911 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
912 }
913
914 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
915 static void
916 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
917 {
918 ldrsb_wb (cpu, offset, NoWriteBack);
919 }
920
921 /* 64 bit load sign-extended byte scaled or unscaled zero-
922 or sign-extended 32-bit register offset. */
923 static void
924 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
925 {
926 unsigned rm = INSTR (20, 16);
927 unsigned rn = INSTR (9, 5);
928 unsigned rt = INSTR (4, 0);
929 /* rn may reference SP, rm and rt must reference ZR */
930
931 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
932 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
933 extension);
934 /* There is no scaling required for a byte load. */
935 aarch64_set_reg_s64 (cpu, rt, NO_SP,
936 aarch64_get_mem_s8 (cpu, address + displacement));
937 }
938
939 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
940 static void
941 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
942 {
943 unsigned rn = INSTR (9, 5);
944 unsigned rt = INSTR (4, 0);
945 uint32_t val;
946
947 /* The target register may not be SP but the source may be. */
948 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
949 + SCALE (offset, 16));
950 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
951 }
952
953 /* 32 bit load zero-extended short unscaled signed 9 bit
954 with pre- or post-writeback. */
955 static void
956 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
957 {
958 unsigned rn = INSTR (9, 5);
959 unsigned rt = INSTR (4, 0);
960 uint64_t address;
961
962 if (rn == rt && wb != NoWriteBack)
963 HALT_UNALLOC;
964
965 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
966
967 if (wb != Post)
968 address += offset;
969
970 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
971
972 if (wb == Post)
973 address += offset;
974
975 if (wb != NoWriteBack)
976 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
977 }
978
979 /* 32 bit load zero-extended short scaled or unscaled zero-
980 or sign-extended 32-bit register offset. */
981 static void
982 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
983 {
984 unsigned rm = INSTR (20, 16);
985 unsigned rn = INSTR (9, 5);
986 unsigned rt = INSTR (4, 0);
987 /* rn may reference SP, rm and rt must reference ZR */
988
989 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
990 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
991 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
992
993 aarch64_set_reg_u32 (cpu, rt, NO_SP,
994 aarch64_get_mem_u16 (cpu, address + displacement));
995 }
996
997 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
998 static void
999 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1000 {
1001 unsigned rn = INSTR (9, 5);
1002 unsigned rt = INSTR (4, 0);
1003 int32_t val;
1004
1005 /* The target register may not be SP but the source may be. */
1006 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1007 + SCALE (offset, 16));
1008 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1009 }
1010
1011 /* 32 bit load sign-extended short unscaled signed 9 bit
1012 with pre- or post-writeback. */
1013 static void
1014 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1015 {
1016 unsigned rn = INSTR (9, 5);
1017 unsigned rt = INSTR (4, 0);
1018 uint64_t address;
1019
1020 if (rn == rt && wb != NoWriteBack)
1021 HALT_UNALLOC;
1022
1023 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1024
1025 if (wb != Post)
1026 address += offset;
1027
1028 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1029 (int32_t) aarch64_get_mem_s16 (cpu, address));
1030
1031 if (wb == Post)
1032 address += offset;
1033
1034 if (wb != NoWriteBack)
1035 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1036 }
1037
1038 /* 32 bit load sign-extended short scaled or unscaled zero-
1039 or sign-extended 32-bit register offset. */
1040 static void
1041 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1042 {
1043 unsigned rm = INSTR (20, 16);
1044 unsigned rn = INSTR (9, 5);
1045 unsigned rt = INSTR (4, 0);
1046 /* rn may reference SP, rm and rt must reference ZR */
1047
1048 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1049 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1050 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1051
1052 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1053 (int32_t) aarch64_get_mem_s16
1054 (cpu, address + displacement));
1055 }
1056
1057 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1058 static void
1059 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1060 {
1061 unsigned rn = INSTR (9, 5);
1062 unsigned rt = INSTR (4, 0);
1063 int64_t val;
1064
1065 /* The target register may not be SP but the source may be. */
1066 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1067 + SCALE (offset, 16));
1068 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1069 }
1070
1071 /* 64 bit load sign-extended short unscaled signed 9 bit
1072 with pre- or post-writeback. */
1073 static void
1074 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1075 {
1076 unsigned rn = INSTR (9, 5);
1077 unsigned rt = INSTR (4, 0);
1078 uint64_t address;
1079 int64_t val;
1080
1081 if (rn == rt && wb != NoWriteBack)
1082 HALT_UNALLOC;
1083
1084 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1085
1086 if (wb != Post)
1087 address += offset;
1088
1089 val = aarch64_get_mem_s16 (cpu, address);
1090 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1091
1092 if (wb == Post)
1093 address += offset;
1094
1095 if (wb != NoWriteBack)
1096 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1097 }
1098
1099 /* 64 bit load sign-extended short scaled or unscaled zero-
1100 or sign-extended 32-bit register offset. */
1101 static void
1102 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1103 {
1104 unsigned rm = INSTR (20, 16);
1105 unsigned rn = INSTR (9, 5);
1106 unsigned rt = INSTR (4, 0);
1107
1108 /* rn may reference SP, rm and rt must reference ZR */
1109
1110 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1111 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1112 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1113 int64_t val;
1114
1115 val = aarch64_get_mem_s16 (cpu, address + displacement);
1116 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1117 }
1118
1119 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1120 static void
1121 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1122 {
1123 unsigned rn = INSTR (9, 5);
1124 unsigned rt = INSTR (4, 0);
1125 int64_t val;
1126
1127 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1128 + SCALE (offset, 32));
1129 /* The target register may not be SP but the source may be. */
1130 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1131 }
1132
1133 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1134 with pre- or post-writeback. */
1135 static void
1136 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1137 {
1138 unsigned rn = INSTR (9, 5);
1139 unsigned rt = INSTR (4, 0);
1140 uint64_t address;
1141
1142 if (rn == rt && wb != NoWriteBack)
1143 HALT_UNALLOC;
1144
1145 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1146
1147 if (wb != Post)
1148 address += offset;
1149
1150 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1151
1152 if (wb == Post)
1153 address += offset;
1154
1155 if (wb != NoWriteBack)
1156 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1157 }
1158
1159 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1160 or sign-extended 32-bit register offset. */
1161 static void
1162 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1163 {
1164 unsigned rm = INSTR (20, 16);
1165 unsigned rn = INSTR (9, 5);
1166 unsigned rt = INSTR (4, 0);
1167 /* rn may reference SP, rm and rt must reference ZR */
1168
1169 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1170 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1171 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1172
1173 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1174 aarch64_get_mem_s32 (cpu, address + displacement));
1175 }
1176
1177 /* N.B. with stores the value in source is written to the
1178 address identified by source2 modified by source3/offset. */
1179
1180 /* 32 bit store scaled unsigned 12 bit. */
1181 static void
1182 str32_abs (sim_cpu *cpu, uint32_t offset)
1183 {
1184 unsigned rn = INSTR (9, 5);
1185 unsigned rt = INSTR (4, 0);
1186
1187 /* The target register may not be SP but the source may be. */
1188 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1189 + SCALE (offset, 32)),
1190 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1191 }
1192
1193 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1194 static void
1195 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1196 {
1197 unsigned rn = INSTR (9, 5);
1198 unsigned rt = INSTR (4, 0);
1199 uint64_t address;
1200
1201 if (rn == rt && wb != NoWriteBack)
1202 HALT_UNALLOC;
1203
1204 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1205 if (wb != Post)
1206 address += offset;
1207
1208 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1209
1210 if (wb == Post)
1211 address += offset;
1212
1213 if (wb != NoWriteBack)
1214 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1215 }
1216
1217 /* 32 bit store scaled or unscaled zero- or
1218 sign-extended 32-bit register offset. */
1219 static void
1220 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1221 {
1222 unsigned rm = INSTR (20, 16);
1223 unsigned rn = INSTR (9, 5);
1224 unsigned rt = INSTR (4, 0);
1225
1226 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1227 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1228 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1229
1230 aarch64_set_mem_u32 (cpu, address + displacement,
1231 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1232 }
1233
1234 /* 64 bit store scaled unsigned 12 bit. */
1235 static void
1236 str_abs (sim_cpu *cpu, uint32_t offset)
1237 {
1238 unsigned rn = INSTR (9, 5);
1239 unsigned rt = INSTR (4, 0);
1240
1241 aarch64_set_mem_u64 (cpu,
1242 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1243 + SCALE (offset, 64),
1244 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1245 }
1246
1247 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1248 static void
1249 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1250 {
1251 unsigned rn = INSTR (9, 5);
1252 unsigned rt = INSTR (4, 0);
1253 uint64_t address;
1254
1255 if (rn == rt && wb != NoWriteBack)
1256 HALT_UNALLOC;
1257
1258 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1259
1260 if (wb != Post)
1261 address += offset;
1262
1263 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1264
1265 if (wb == Post)
1266 address += offset;
1267
1268 if (wb != NoWriteBack)
1269 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1270 }
1271
1272 /* 64 bit store scaled or unscaled zero-
1273 or sign-extended 32-bit register offset. */
1274 static void
1275 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1276 {
1277 unsigned rm = INSTR (20, 16);
1278 unsigned rn = INSTR (9, 5);
1279 unsigned rt = INSTR (4, 0);
1280 /* rn may reference SP, rm and rt must reference ZR */
1281
1282 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1283 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1284 extension);
1285 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1286
1287 aarch64_set_mem_u64 (cpu, address + displacement,
1288 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1289 }
1290
1291 /* 32 bit store byte scaled unsigned 12 bit. */
1292 static void
1293 strb_abs (sim_cpu *cpu, uint32_t offset)
1294 {
1295 unsigned rn = INSTR (9, 5);
1296 unsigned rt = INSTR (4, 0);
1297
1298 /* The target register may not be SP but the source may be.
1299 There is no scaling required for a byte load. */
1300 aarch64_set_mem_u8 (cpu,
1301 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1302 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1303 }
1304
1305 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1306 static void
1307 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1308 {
1309 unsigned rn = INSTR (9, 5);
1310 unsigned rt = INSTR (4, 0);
1311 uint64_t address;
1312
1313 if (rn == rt && wb != NoWriteBack)
1314 HALT_UNALLOC;
1315
1316 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1317
1318 if (wb != Post)
1319 address += offset;
1320
1321 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1322
1323 if (wb == Post)
1324 address += offset;
1325
1326 if (wb != NoWriteBack)
1327 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1328 }
1329
1330 /* 32 bit store byte scaled or unscaled zero-
1331 or sign-extended 32-bit register offset. */
1332 static void
1333 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1334 {
1335 unsigned rm = INSTR (20, 16);
1336 unsigned rn = INSTR (9, 5);
1337 unsigned rt = INSTR (4, 0);
1338 /* rn may reference SP, rm and rt must reference ZR */
1339
1340 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1341 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1342 extension);
1343
1344 /* There is no scaling required for a byte load. */
1345 aarch64_set_mem_u8 (cpu, address + displacement,
1346 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1347 }
1348
1349 /* 32 bit store short scaled unsigned 12 bit. */
1350 static void
1351 strh_abs (sim_cpu *cpu, uint32_t offset)
1352 {
1353 unsigned rn = INSTR (9, 5);
1354 unsigned rt = INSTR (4, 0);
1355
1356 /* The target register may not be SP but the source may be. */
1357 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1358 + SCALE (offset, 16),
1359 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1360 }
1361
1362 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1363 static void
1364 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1365 {
1366 unsigned rn = INSTR (9, 5);
1367 unsigned rt = INSTR (4, 0);
1368 uint64_t address;
1369
1370 if (rn == rt && wb != NoWriteBack)
1371 HALT_UNALLOC;
1372
1373 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1374
1375 if (wb != Post)
1376 address += offset;
1377
1378 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store short scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1399 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1400
1401 aarch64_set_mem_u16 (cpu, address + displacement,
1402 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1403 }
1404
1405 /* Prefetch unsigned 12 bit. */
1406 static void
1407 prfm_abs (sim_cpu *cpu, uint32_t offset)
1408 {
1409 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1410 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1411 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1412 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1413 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1414 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1415 ow ==> UNALLOC
1416 PrfOp prfop = prfop (instr, 4, 0);
1417 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1418 + SCALE (offset, 64). */
1419
1420 /* TODO : implement prefetch of address. */
1421 }
1422
1423 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1424 static void
1425 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1426 {
1427 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1428 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1429 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1430 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1431 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1432 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1433 ow ==> UNALLOC
1434 rn may reference SP, rm may only reference ZR
1435 PrfOp prfop = prfop (instr, 4, 0);
1436 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1437 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1438 extension);
1439 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1440 uint64_t address = base + displacement. */
1441
1442 /* TODO : implement prefetch of address */
1443 }
1444
1445 /* 64 bit pc-relative prefetch. */
1446 static void
1447 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1448 {
1449 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1450 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1451 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1452 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1453 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1454 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1455 ow ==> UNALLOC
1456 PrfOp prfop = prfop (instr, 4, 0);
1457 uint64_t address = aarch64_get_PC (cpu) + offset. */
1458
1459 /* TODO : implement this */
1460 }
1461
1462 /* Load-store exclusive. */
1463
1464 static void
1465 ldxr (sim_cpu *cpu)
1466 {
1467 unsigned rn = INSTR (9, 5);
1468 unsigned rt = INSTR (4, 0);
1469 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1470 int size = INSTR (31, 30);
1471 /* int ordered = INSTR (15, 15); */
1472 /* int exclusive = ! INSTR (23, 23); */
1473
1474 switch (size)
1475 {
1476 case 0:
1477 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1478 break;
1479 case 1:
1480 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1481 break;
1482 case 2:
1483 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1484 break;
1485 case 3:
1486 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1487 break;
1488 }
1489 }
1490
1491 static void
1492 stxr (sim_cpu *cpu)
1493 {
1494 unsigned rn = INSTR (9, 5);
1495 unsigned rt = INSTR (4, 0);
1496 unsigned rs = INSTR (20, 16);
1497 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int size = INSTR (31, 30);
1499 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1500
1501 switch (size)
1502 {
1503 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1504 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1505 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1506 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1507 }
1508
1509 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1510 }
1511
1512 static void
1513 dexLoadLiteral (sim_cpu *cpu)
1514 {
1515 /* instr[29,27] == 011
1516 instr[25,24] == 00
1517 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1518 010 ==> LDRX, 011 ==> FLDRD
1519 100 ==> LDRSW, 101 ==> FLDRQ
1520 110 ==> PRFM, 111 ==> UNALLOC
1521 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1522 instr[23, 5] == simm19 */
1523
1524 /* unsigned rt = INSTR (4, 0); */
1525 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1526 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1527
1528 switch (dispatch)
1529 {
1530 case 0: ldr32_pcrel (cpu, imm); break;
1531 case 1: fldrs_pcrel (cpu, imm); break;
1532 case 2: ldr_pcrel (cpu, imm); break;
1533 case 3: fldrd_pcrel (cpu, imm); break;
1534 case 4: ldrsw_pcrel (cpu, imm); break;
1535 case 5: fldrq_pcrel (cpu, imm); break;
1536 case 6: prfm_pcrel (cpu, imm); break;
1537 case 7:
1538 default:
1539 HALT_UNALLOC;
1540 }
1541 }
1542
1543 /* Immediate arithmetic
1544 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1545 value left shifted by 12 bits (done at decode).
1546
1547 N.B. the register args (dest, source) can normally be Xn or SP.
1548 the exception occurs for flag setting instructions which may
1549 only use Xn for the output (dest). */
1550
1551 /* 32 bit add immediate. */
1552 static void
1553 add32 (sim_cpu *cpu, uint32_t aimm)
1554 {
1555 unsigned rn = INSTR (9, 5);
1556 unsigned rd = INSTR (4, 0);
1557
1558 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1559 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1560 }
1561
1562 /* 64 bit add immediate. */
1563 static void
1564 add64 (sim_cpu *cpu, uint32_t aimm)
1565 {
1566 unsigned rn = INSTR (9, 5);
1567 unsigned rd = INSTR (4, 0);
1568
1569 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1570 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1571 }
1572
1573 static void
1574 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1575 {
1576 int32_t result = value1 + value2;
1577 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1578 uint64_t uresult = (uint64_t)(uint32_t) value1
1579 + (uint64_t)(uint32_t) value2;
1580 uint32_t flags = 0;
1581
1582 if (result == 0)
1583 flags |= Z;
1584
1585 if (result & (1 << 31))
1586 flags |= N;
1587
1588 if (uresult != result)
1589 flags |= C;
1590
1591 if (sresult != result)
1592 flags |= V;
1593
1594 aarch64_set_CPSR (cpu, flags);
1595 }
1596
1597 static void
1598 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1599 {
1600 int64_t sval1 = value1;
1601 int64_t sval2 = value2;
1602 uint64_t result = value1 + value2;
1603 int64_t sresult = sval1 + sval2;
1604 uint32_t flags = 0;
1605
1606 if (result == 0)
1607 flags |= Z;
1608
1609 if (result & (1ULL << 63))
1610 flags |= N;
1611
1612 if (sval1 < 0)
1613 {
1614 if (sval2 < 0)
1615 {
1616 /* Negative plus a negative. Overflow happens if
1617 the result is greater than either of the operands. */
1618 if (sresult > sval1 || sresult > sval2)
1619 flags |= V;
1620 }
1621 /* else Negative plus a positive. Overflow cannot happen. */
1622 }
1623 else /* value1 is +ve. */
1624 {
1625 if (sval2 < 0)
1626 {
1627 /* Overflow can only occur if we computed "0 - MININT". */
1628 if (sval1 == 0 && sval2 == (1LL << 63))
1629 flags |= V;
1630 }
1631 else
1632 {
1633 /* Postive plus positive - overflow has happened if the
1634 result is smaller than either of the operands. */
1635 if (result < value1 || result < value2)
1636 flags |= V | C;
1637 }
1638 }
1639
1640 aarch64_set_CPSR (cpu, flags);
1641 }
1642
1643 #define NEG(a) (((a) & signbit) == signbit)
1644 #define POS(a) (((a) & signbit) == 0)
1645
1646 static void
1647 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1648 {
1649 uint32_t result = value1 - value2;
1650 uint32_t flags = 0;
1651 uint32_t signbit = 1U << 31;
1652
1653 if (result == 0)
1654 flags |= Z;
1655
1656 if (NEG (result))
1657 flags |= N;
1658
1659 if ( (NEG (value1) && POS (value2))
1660 || (NEG (value1) && POS (result))
1661 || (POS (value2) && POS (result)))
1662 flags |= C;
1663
1664 if ( (NEG (value1) && POS (value2) && POS (result))
1665 || (POS (value1) && NEG (value2) && NEG (result)))
1666 flags |= V;
1667
1668 aarch64_set_CPSR (cpu, flags);
1669 }
1670
1671 static void
1672 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1673 {
1674 uint64_t result = value1 - value2;
1675 uint32_t flags = 0;
1676 uint64_t signbit = 1ULL << 63;
1677
1678 if (result == 0)
1679 flags |= Z;
1680
1681 if (NEG (result))
1682 flags |= N;
1683
1684 if ( (NEG (value1) && POS (value2))
1685 || (NEG (value1) && POS (result))
1686 || (POS (value2) && POS (result)))
1687 flags |= C;
1688
1689 if ( (NEG (value1) && POS (value2) && POS (result))
1690 || (POS (value1) && NEG (value2) && NEG (result)))
1691 flags |= V;
1692
1693 aarch64_set_CPSR (cpu, flags);
1694 }
1695
1696 static void
1697 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1698 {
1699 uint32_t flags = 0;
1700
1701 if (result == 0)
1702 flags |= Z;
1703 else
1704 flags &= ~ Z;
1705
1706 if (result & (1 << 31))
1707 flags |= N;
1708 else
1709 flags &= ~ N;
1710
1711 aarch64_set_CPSR (cpu, flags);
1712 }
1713
1714 static void
1715 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1716 {
1717 uint32_t flags = 0;
1718
1719 if (result == 0)
1720 flags |= Z;
1721 else
1722 flags &= ~ Z;
1723
1724 if (result & (1ULL << 63))
1725 flags |= N;
1726 else
1727 flags &= ~ N;
1728
1729 aarch64_set_CPSR (cpu, flags);
1730 }
1731
1732 /* 32 bit add immediate set flags. */
1733 static void
1734 adds32 (sim_cpu *cpu, uint32_t aimm)
1735 {
1736 unsigned rn = INSTR (9, 5);
1737 unsigned rd = INSTR (4, 0);
1738 /* TODO : do we need to worry about signs here? */
1739 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1740
1741 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1742 set_flags_for_add32 (cpu, value1, aimm);
1743 }
1744
1745 /* 64 bit add immediate set flags. */
1746 static void
1747 adds64 (sim_cpu *cpu, uint32_t aimm)
1748 {
1749 unsigned rn = INSTR (9, 5);
1750 unsigned rd = INSTR (4, 0);
1751 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1752 uint64_t value2 = aimm;
1753
1754 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1755 set_flags_for_add64 (cpu, value1, value2);
1756 }
1757
1758 /* 32 bit sub immediate. */
1759 static void
1760 sub32 (sim_cpu *cpu, uint32_t aimm)
1761 {
1762 unsigned rn = INSTR (9, 5);
1763 unsigned rd = INSTR (4, 0);
1764
1765 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1766 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1767 }
1768
1769 /* 64 bit sub immediate. */
1770 static void
1771 sub64 (sim_cpu *cpu, uint32_t aimm)
1772 {
1773 unsigned rn = INSTR (9, 5);
1774 unsigned rd = INSTR (4, 0);
1775
1776 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1777 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1778 }
1779
1780 /* 32 bit sub immediate set flags. */
1781 static void
1782 subs32 (sim_cpu *cpu, uint32_t aimm)
1783 {
1784 unsigned rn = INSTR (9, 5);
1785 unsigned rd = INSTR (4, 0);
1786 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1787 uint32_t value2 = aimm;
1788
1789 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1790 set_flags_for_sub32 (cpu, value1, value2);
1791 }
1792
1793 /* 64 bit sub immediate set flags. */
1794 static void
1795 subs64 (sim_cpu *cpu, uint32_t aimm)
1796 {
1797 unsigned rn = INSTR (9, 5);
1798 unsigned rd = INSTR (4, 0);
1799 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1800 uint32_t value2 = aimm;
1801
1802 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1803 set_flags_for_sub64 (cpu, value1, value2);
1804 }
1805
1806 /* Data Processing Register. */
1807
1808 /* First two helpers to perform the shift operations. */
1809
1810 static inline uint32_t
1811 shifted32 (uint32_t value, Shift shift, uint32_t count)
1812 {
1813 switch (shift)
1814 {
1815 default:
1816 case LSL:
1817 return (value << count);
1818 case LSR:
1819 return (value >> count);
1820 case ASR:
1821 {
1822 int32_t svalue = value;
1823 return (svalue >> count);
1824 }
1825 case ROR:
1826 {
1827 uint32_t top = value >> count;
1828 uint32_t bottom = value << (32 - count);
1829 return (bottom | top);
1830 }
1831 }
1832 }
1833
1834 static inline uint64_t
1835 shifted64 (uint64_t value, Shift shift, uint32_t count)
1836 {
1837 switch (shift)
1838 {
1839 default:
1840 case LSL:
1841 return (value << count);
1842 case LSR:
1843 return (value >> count);
1844 case ASR:
1845 {
1846 int64_t svalue = value;
1847 return (svalue >> count);
1848 }
1849 case ROR:
1850 {
1851 uint64_t top = value >> count;
1852 uint64_t bottom = value << (64 - count);
1853 return (bottom | top);
1854 }
1855 }
1856 }
1857
1858 /* Arithmetic shifted register.
1859 These allow an optional LSL, ASR or LSR to the second source
1860 register with a count up to the register bit count.
1861
1862 N.B register args may not be SP. */
1863
1864 /* 32 bit ADD shifted register. */
1865 static void
1866 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1867 {
1868 unsigned rm = INSTR (20, 16);
1869 unsigned rn = INSTR (9, 5);
1870 unsigned rd = INSTR (4, 0);
1871
1872 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1873 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1874 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1875 shift, count));
1876 }
1877
1878 /* 64 bit ADD shifted register. */
1879 static void
1880 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1881 {
1882 unsigned rm = INSTR (20, 16);
1883 unsigned rn = INSTR (9, 5);
1884 unsigned rd = INSTR (4, 0);
1885
1886 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1887 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1888 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1889 shift, count));
1890 }
1891
1892 /* 32 bit ADD shifted register setting flags. */
1893 static void
1894 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1895 {
1896 unsigned rm = INSTR (20, 16);
1897 unsigned rn = INSTR (9, 5);
1898 unsigned rd = INSTR (4, 0);
1899
1900 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1901 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1902 shift, count);
1903
1904 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1905 set_flags_for_add32 (cpu, value1, value2);
1906 }
1907
1908 /* 64 bit ADD shifted register setting flags. */
1909 static void
1910 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1911 {
1912 unsigned rm = INSTR (20, 16);
1913 unsigned rn = INSTR (9, 5);
1914 unsigned rd = INSTR (4, 0);
1915
1916 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1917 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1918 shift, count);
1919
1920 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1921 set_flags_for_add64 (cpu, value1, value2);
1922 }
1923
1924 /* 32 bit SUB shifted register. */
1925 static void
1926 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1927 {
1928 unsigned rm = INSTR (20, 16);
1929 unsigned rn = INSTR (9, 5);
1930 unsigned rd = INSTR (4, 0);
1931
1932 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1933 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1934 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1935 shift, count));
1936 }
1937
1938 /* 64 bit SUB shifted register. */
1939 static void
1940 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1941 {
1942 unsigned rm = INSTR (20, 16);
1943 unsigned rn = INSTR (9, 5);
1944 unsigned rd = INSTR (4, 0);
1945
1946 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1947 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1948 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1949 shift, count));
1950 }
1951
1952 /* 32 bit SUB shifted register setting flags. */
1953 static void
1954 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1955 {
1956 unsigned rm = INSTR (20, 16);
1957 unsigned rn = INSTR (9, 5);
1958 unsigned rd = INSTR (4, 0);
1959
1960 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1961 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1962 shift, count);
1963
1964 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1965 set_flags_for_sub32 (cpu, value1, value2);
1966 }
1967
1968 /* 64 bit SUB shifted register setting flags. */
1969 static void
1970 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1971 {
1972 unsigned rm = INSTR (20, 16);
1973 unsigned rn = INSTR (9, 5);
1974 unsigned rd = INSTR (4, 0);
1975
1976 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1977 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1978 shift, count);
1979
1980 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1981 set_flags_for_sub64 (cpu, value1, value2);
1982 }
1983
1984 /* First a couple more helpers to fetch the
1985 relevant source register element either
1986 sign or zero extended as required by the
1987 extension value. */
1988
1989 static uint32_t
1990 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
1991 {
1992 switch (extension)
1993 {
1994 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
1995 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
1996 case UXTW: /* Fall through. */
1997 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
1998 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
1999 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2000 case SXTW: /* Fall through. */
2001 case SXTX: /* Fall through. */
2002 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2003 }
2004 }
2005
2006 static uint64_t
2007 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2008 {
2009 switch (extension)
2010 {
2011 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2012 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2013 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2014 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2015 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2016 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2017 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2018 case SXTX:
2019 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2020 }
2021 }
2022
2023 /* Arithmetic extending register
2024 These allow an optional sign extension of some portion of the
2025 second source register followed by an optional left shift of
2026 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2027
2028 N.B output (dest) and first input arg (source) may normally be Xn
2029 or SP. However, for flag setting operations dest can only be
2030 Xn. Second input registers are always Xn. */
2031
2032 /* 32 bit ADD extending register. */
2033 static void
2034 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2035 {
2036 unsigned rm = INSTR (20, 16);
2037 unsigned rn = INSTR (9, 5);
2038 unsigned rd = INSTR (4, 0);
2039
2040 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2041 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2042 + (extreg32 (cpu, rm, extension) << shift));
2043 }
2044
2045 /* 64 bit ADD extending register.
2046 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2047 static void
2048 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2049 {
2050 unsigned rm = INSTR (20, 16);
2051 unsigned rn = INSTR (9, 5);
2052 unsigned rd = INSTR (4, 0);
2053
2054 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2055 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2056 + (extreg64 (cpu, rm, extension) << shift));
2057 }
2058
2059 /* 32 bit ADD extending register setting flags. */
2060 static void
2061 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2062 {
2063 unsigned rm = INSTR (20, 16);
2064 unsigned rn = INSTR (9, 5);
2065 unsigned rd = INSTR (4, 0);
2066
2067 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2068 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2069
2070 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2071 set_flags_for_add32 (cpu, value1, value2);
2072 }
2073
2074 /* 64 bit ADD extending register setting flags */
2075 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2076 static void
2077 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2078 {
2079 unsigned rm = INSTR (20, 16);
2080 unsigned rn = INSTR (9, 5);
2081 unsigned rd = INSTR (4, 0);
2082
2083 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2084 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2085
2086 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2087 set_flags_for_add64 (cpu, value1, value2);
2088 }
2089
2090 /* 32 bit SUB extending register. */
2091 static void
2092 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2097
2098 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2099 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2100 - (extreg32 (cpu, rm, extension) << shift));
2101 }
2102
2103 /* 64 bit SUB extending register. */
2104 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2105 static void
2106 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2107 {
2108 unsigned rm = INSTR (20, 16);
2109 unsigned rn = INSTR (9, 5);
2110 unsigned rd = INSTR (4, 0);
2111
2112 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2113 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2114 - (extreg64 (cpu, rm, extension) << shift));
2115 }
2116
2117 /* 32 bit SUB extending register setting flags. */
2118 static void
2119 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2120 {
2121 unsigned rm = INSTR (20, 16);
2122 unsigned rn = INSTR (9, 5);
2123 unsigned rd = INSTR (4, 0);
2124
2125 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2126 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2127
2128 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2129 set_flags_for_sub32 (cpu, value1, value2);
2130 }
2131
2132 /* 64 bit SUB extending register setting flags */
2133 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2134 static void
2135 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2136 {
2137 unsigned rm = INSTR (20, 16);
2138 unsigned rn = INSTR (9, 5);
2139 unsigned rd = INSTR (4, 0);
2140
2141 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2142 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2143
2144 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2145 set_flags_for_sub64 (cpu, value1, value2);
2146 }
2147
2148 static void
2149 dexAddSubtractImmediate (sim_cpu *cpu)
2150 {
2151 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2152 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2153 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2154 instr[28,24] = 10001
2155 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2156 instr[21,10] = uimm12
2157 instr[9,5] = Rn
2158 instr[4,0] = Rd */
2159
2160 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2161 uint32_t shift = INSTR (23, 22);
2162 uint32_t imm = INSTR (21, 10);
2163 uint32_t dispatch = INSTR (31, 29);
2164
2165 NYI_assert (28, 24, 0x11);
2166
2167 if (shift > 1)
2168 HALT_UNALLOC;
2169
2170 if (shift)
2171 imm <<= 12;
2172
2173 switch (dispatch)
2174 {
2175 case 0: add32 (cpu, imm); break;
2176 case 1: adds32 (cpu, imm); break;
2177 case 2: sub32 (cpu, imm); break;
2178 case 3: subs32 (cpu, imm); break;
2179 case 4: add64 (cpu, imm); break;
2180 case 5: adds64 (cpu, imm); break;
2181 case 6: sub64 (cpu, imm); break;
2182 case 7: subs64 (cpu, imm); break;
2183 }
2184 }
2185
2186 static void
2187 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2188 {
2189 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2190 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2191 instr[28,24] = 01011
2192 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2193 instr[21] = 0
2194 instr[20,16] = Rm
2195 instr[15,10] = count : must be 0xxxxx for 32 bit
2196 instr[9,5] = Rn
2197 instr[4,0] = Rd */
2198
2199 uint32_t size = INSTR (31, 31);
2200 uint32_t count = INSTR (15, 10);
2201 Shift shiftType = INSTR (23, 22);
2202
2203 NYI_assert (28, 24, 0x0B);
2204 NYI_assert (21, 21, 0);
2205
2206 /* Shift encoded as ROR is unallocated. */
2207 if (shiftType == ROR)
2208 HALT_UNALLOC;
2209
2210 /* 32 bit operations must have count[5] = 0
2211 or else we have an UNALLOC. */
2212 if (size == 0 && uimm (count, 5, 5))
2213 HALT_UNALLOC;
2214
2215 /* Dispatch on size:op i.e instr [31,29]. */
2216 switch (INSTR (31, 29))
2217 {
2218 case 0: add32_shift (cpu, shiftType, count); break;
2219 case 1: adds32_shift (cpu, shiftType, count); break;
2220 case 2: sub32_shift (cpu, shiftType, count); break;
2221 case 3: subs32_shift (cpu, shiftType, count); break;
2222 case 4: add64_shift (cpu, shiftType, count); break;
2223 case 5: adds64_shift (cpu, shiftType, count); break;
2224 case 6: sub64_shift (cpu, shiftType, count); break;
2225 case 7: subs64_shift (cpu, shiftType, count); break;
2226 }
2227 }
2228
2229 static void
2230 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2231 {
2232 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2233 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2234 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2235 instr[28,24] = 01011
2236 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2237 instr[21] = 1
2238 instr[20,16] = Rm
2239 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2240 000 ==> LSL|UXTW, 001 ==> UXTZ,
2241 000 ==> SXTB, 001 ==> SXTH,
2242 000 ==> SXTW, 001 ==> SXTX,
2243 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2244 instr[9,5] = Rn
2245 instr[4,0] = Rd */
2246
2247 Extension extensionType = INSTR (15, 13);
2248 uint32_t shift = INSTR (12, 10);
2249
2250 NYI_assert (28, 24, 0x0B);
2251 NYI_assert (21, 21, 1);
2252
2253 /* Shift may not exceed 4. */
2254 if (shift > 4)
2255 HALT_UNALLOC;
2256
2257 /* Dispatch on size:op:set?. */
2258 switch (INSTR (31, 29))
2259 {
2260 case 0: add32_ext (cpu, extensionType, shift); break;
2261 case 1: adds32_ext (cpu, extensionType, shift); break;
2262 case 2: sub32_ext (cpu, extensionType, shift); break;
2263 case 3: subs32_ext (cpu, extensionType, shift); break;
2264 case 4: add64_ext (cpu, extensionType, shift); break;
2265 case 5: adds64_ext (cpu, extensionType, shift); break;
2266 case 6: sub64_ext (cpu, extensionType, shift); break;
2267 case 7: subs64_ext (cpu, extensionType, shift); break;
2268 }
2269 }
2270
2271 /* Conditional data processing
2272 Condition register is implicit 3rd source. */
2273
2274 /* 32 bit add with carry. */
2275 /* N.B register args may not be SP. */
2276
2277 static void
2278 adc32 (sim_cpu *cpu)
2279 {
2280 unsigned rm = INSTR (20, 16);
2281 unsigned rn = INSTR (9, 5);
2282 unsigned rd = INSTR (4, 0);
2283
2284 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2285 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2286 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2287 + IS_SET (C));
2288 }
2289
2290 /* 64 bit add with carry */
2291 static void
2292 adc64 (sim_cpu *cpu)
2293 {
2294 unsigned rm = INSTR (20, 16);
2295 unsigned rn = INSTR (9, 5);
2296 unsigned rd = INSTR (4, 0);
2297
2298 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2299 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2300 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2301 + IS_SET (C));
2302 }
2303
2304 /* 32 bit add with carry setting flags. */
2305 static void
2306 adcs32 (sim_cpu *cpu)
2307 {
2308 unsigned rm = INSTR (20, 16);
2309 unsigned rn = INSTR (9, 5);
2310 unsigned rd = INSTR (4, 0);
2311
2312 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2313 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2314 uint32_t carry = IS_SET (C);
2315
2316 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2317 set_flags_for_add32 (cpu, value1, value2 + carry);
2318 }
2319
2320 /* 64 bit add with carry setting flags. */
2321 static void
2322 adcs64 (sim_cpu *cpu)
2323 {
2324 unsigned rm = INSTR (20, 16);
2325 unsigned rn = INSTR (9, 5);
2326 unsigned rd = INSTR (4, 0);
2327
2328 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2329 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2330 uint64_t carry = IS_SET (C);
2331
2332 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2333 set_flags_for_add64 (cpu, value1, value2 + carry);
2334 }
2335
2336 /* 32 bit sub with carry. */
2337 static void
2338 sbc32 (sim_cpu *cpu)
2339 {
2340 unsigned rm = INSTR (20, 16);
2341 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2342 unsigned rd = INSTR (4, 0);
2343
2344 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2345 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2346 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2347 - 1 + IS_SET (C));
2348 }
2349
2350 /* 64 bit sub with carry */
2351 static void
2352 sbc64 (sim_cpu *cpu)
2353 {
2354 unsigned rm = INSTR (20, 16);
2355 unsigned rn = INSTR (9, 5);
2356 unsigned rd = INSTR (4, 0);
2357
2358 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2359 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2360 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2361 - 1 + IS_SET (C));
2362 }
2363
2364 /* 32 bit sub with carry setting flags */
2365 static void
2366 sbcs32 (sim_cpu *cpu)
2367 {
2368 unsigned rm = INSTR (20, 16);
2369 unsigned rn = INSTR (9, 5);
2370 unsigned rd = INSTR (4, 0);
2371
2372 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2373 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2374 uint32_t carry = IS_SET (C);
2375 uint32_t result = value1 - value2 + 1 - carry;
2376
2377 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2378 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2379 }
2380
2381 /* 64 bit sub with carry setting flags */
2382 static void
2383 sbcs64 (sim_cpu *cpu)
2384 {
2385 unsigned rm = INSTR (20, 16);
2386 unsigned rn = INSTR (9, 5);
2387 unsigned rd = INSTR (4, 0);
2388
2389 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2390 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2391 uint64_t carry = IS_SET (C);
2392 uint64_t result = value1 - value2 + 1 - carry;
2393
2394 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2395 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2396 }
2397
2398 static void
2399 dexAddSubtractWithCarry (sim_cpu *cpu)
2400 {
2401 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2402 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2403 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2404 instr[28,21] = 1 1010 000
2405 instr[20,16] = Rm
2406 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2407 instr[9,5] = Rn
2408 instr[4,0] = Rd */
2409
2410 uint32_t op2 = INSTR (15, 10);
2411
2412 NYI_assert (28, 21, 0xD0);
2413
2414 if (op2 != 0)
2415 HALT_UNALLOC;
2416
2417 /* Dispatch on size:op:set?. */
2418 switch (INSTR (31, 29))
2419 {
2420 case 0: adc32 (cpu); break;
2421 case 1: adcs32 (cpu); break;
2422 case 2: sbc32 (cpu); break;
2423 case 3: sbcs32 (cpu); break;
2424 case 4: adc64 (cpu); break;
2425 case 5: adcs64 (cpu); break;
2426 case 6: sbc64 (cpu); break;
2427 case 7: sbcs64 (cpu); break;
2428 }
2429 }
2430
2431 static uint32_t
2432 testConditionCode (sim_cpu *cpu, CondCode cc)
2433 {
2434 /* This should be reduceable to branchless logic
2435 by some careful testing of bits in CC followed
2436 by the requisite masking and combining of bits
2437 from the flag register.
2438
2439 For now we do it with a switch. */
2440 int res;
2441
2442 switch (cc)
2443 {
2444 case EQ: res = IS_SET (Z); break;
2445 case NE: res = IS_CLEAR (Z); break;
2446 case CS: res = IS_SET (C); break;
2447 case CC: res = IS_CLEAR (C); break;
2448 case MI: res = IS_SET (N); break;
2449 case PL: res = IS_CLEAR (N); break;
2450 case VS: res = IS_SET (V); break;
2451 case VC: res = IS_CLEAR (V); break;
2452 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2453 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2454 case GE: res = IS_SET (N) == IS_SET (V); break;
2455 case LT: res = IS_SET (N) != IS_SET (V); break;
2456 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2457 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2458 case AL:
2459 case NV:
2460 default:
2461 res = 1;
2462 break;
2463 }
2464 return res;
2465 }
2466
2467 static void
2468 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2469 {
2470 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2471 instr[30] = compare with positive (1) or negative value (0)
2472 instr[29,21] = 1 1101 0010
2473 instr[20,16] = Rm or const
2474 instr[15,12] = cond
2475 instr[11] = compare reg (0) or const (1)
2476 instr[10] = 0
2477 instr[9,5] = Rn
2478 instr[4] = 0
2479 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2480 signed int negate;
2481 unsigned rm;
2482 unsigned rn;
2483
2484 NYI_assert (29, 21, 0x1d2);
2485 NYI_assert (10, 10, 0);
2486 NYI_assert (4, 4, 0);
2487
2488 if (! testConditionCode (cpu, INSTR (15, 12)))
2489 {
2490 aarch64_set_CPSR (cpu, INSTR (3, 0));
2491 return;
2492 }
2493
2494 negate = INSTR (30, 30) ? 1 : -1;
2495 rm = INSTR (20, 16);
2496 rn = INSTR ( 9, 5);
2497
2498 if (INSTR (31, 31))
2499 {
2500 if (INSTR (11, 11))
2501 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2502 negate * (uint64_t) rm);
2503 else
2504 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2505 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2506 }
2507 else
2508 {
2509 if (INSTR (11, 11))
2510 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2511 negate * rm);
2512 else
2513 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2514 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2515 }
2516 }
2517
2518 static void
2519 do_vec_MOV_whole_vector (sim_cpu *cpu)
2520 {
2521 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2522
2523 instr[31] = 0
2524 instr[30] = half(0)/full(1)
2525 instr[29,21] = 001110101
2526 instr[20,16] = Vs
2527 instr[15,10] = 000111
2528 instr[9,5] = Vs
2529 instr[4,0] = Vd */
2530
2531 unsigned vs = INSTR (9, 5);
2532 unsigned vd = INSTR (4, 0);
2533
2534 NYI_assert (29, 21, 0x075);
2535 NYI_assert (15, 10, 0x07);
2536
2537 if (INSTR (20, 16) != vs)
2538 HALT_NYI;
2539
2540 if (INSTR (30, 30))
2541 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2542
2543 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2544 }
2545
2546 static void
2547 do_vec_MOV_into_scalar (sim_cpu *cpu)
2548 {
2549 /* instr[31] = 0
2550 instr[30] = word(0)/long(1)
2551 instr[29,21] = 00 1110 000
2552 instr[20,18] = element size and index
2553 instr[17,10] = 00 0011 11
2554 instr[9,5] = V source
2555 instr[4,0] = R dest */
2556
2557 unsigned vs = INSTR (9, 5);
2558 unsigned rd = INSTR (4, 0);
2559
2560 NYI_assert (29, 21, 0x070);
2561 NYI_assert (17, 10, 0x0F);
2562
2563 switch (INSTR (20, 18))
2564 {
2565 case 0x2:
2566 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2567 break;
2568
2569 case 0x6:
2570 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2571 break;
2572
2573 case 0x1:
2574 case 0x3:
2575 case 0x5:
2576 case 0x7:
2577 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2578 (cpu, vs, INSTR (20, 19)));
2579 break;
2580
2581 default:
2582 HALT_NYI;
2583 }
2584 }
2585
2586 static void
2587 do_vec_INS (sim_cpu *cpu)
2588 {
2589 /* instr[31,21] = 01001110000
2590 instr[20,16] = element size and index
2591 instr[15,10] = 000111
2592 instr[9,5] = W source
2593 instr[4,0] = V dest */
2594
2595 int index;
2596 unsigned rs = INSTR (9, 5);
2597 unsigned vd = INSTR (4, 0);
2598
2599 NYI_assert (31, 21, 0x270);
2600 NYI_assert (15, 10, 0x07);
2601
2602 if (INSTR (16, 16))
2603 {
2604 index = INSTR (20, 17);
2605 aarch64_set_vec_u8 (cpu, vd, index,
2606 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2607 }
2608 else if (INSTR (17, 17))
2609 {
2610 index = INSTR (20, 18);
2611 aarch64_set_vec_u16 (cpu, vd, index,
2612 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2613 }
2614 else if (INSTR (18, 18))
2615 {
2616 index = INSTR (20, 19);
2617 aarch64_set_vec_u32 (cpu, vd, index,
2618 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2619 }
2620 else if (INSTR (19, 19))
2621 {
2622 index = INSTR (20, 20);
2623 aarch64_set_vec_u64 (cpu, vd, index,
2624 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2625 }
2626 else
2627 HALT_NYI;
2628 }
2629
2630 static void
2631 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2632 {
2633 /* instr[31] = 0
2634 instr[30] = half(0)/full(1)
2635 instr[29,21] = 00 1110 000
2636 instr[20,16] = element size and index
2637 instr[15,10] = 0000 01
2638 instr[9,5] = V source
2639 instr[4,0] = V dest. */
2640
2641 unsigned full = INSTR (30, 30);
2642 unsigned vs = INSTR (9, 5);
2643 unsigned vd = INSTR (4, 0);
2644 int i, index;
2645
2646 NYI_assert (29, 21, 0x070);
2647 NYI_assert (15, 10, 0x01);
2648
2649 if (INSTR (16, 16))
2650 {
2651 index = INSTR (20, 17);
2652
2653 for (i = 0; i < (full ? 16 : 8); i++)
2654 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2655 }
2656 else if (INSTR (17, 17))
2657 {
2658 index = INSTR (20, 18);
2659
2660 for (i = 0; i < (full ? 8 : 4); i++)
2661 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2662 }
2663 else if (INSTR (18, 18))
2664 {
2665 index = INSTR (20, 19);
2666
2667 for (i = 0; i < (full ? 4 : 2); i++)
2668 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2669 }
2670 else
2671 {
2672 if (INSTR (19, 19) == 0)
2673 HALT_UNALLOC;
2674
2675 if (! full)
2676 HALT_UNALLOC;
2677
2678 index = INSTR (20, 20);
2679
2680 for (i = 0; i < 2; i++)
2681 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2682 }
2683 }
2684
2685 static void
2686 do_vec_TBL (sim_cpu *cpu)
2687 {
2688 /* instr[31] = 0
2689 instr[30] = half(0)/full(1)
2690 instr[29,21] = 00 1110 000
2691 instr[20,16] = Vm
2692 instr[15] = 0
2693 instr[14,13] = vec length
2694 instr[12,10] = 000
2695 instr[9,5] = V start
2696 instr[4,0] = V dest */
2697
2698 int full = INSTR (30, 30);
2699 int len = INSTR (14, 13) + 1;
2700 unsigned vm = INSTR (20, 16);
2701 unsigned vn = INSTR (9, 5);
2702 unsigned vd = INSTR (4, 0);
2703 unsigned i;
2704
2705 NYI_assert (29, 21, 0x070);
2706 NYI_assert (12, 10, 0);
2707
2708 for (i = 0; i < (full ? 16 : 8); i++)
2709 {
2710 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2711 uint8_t val;
2712
2713 if (selector < 16)
2714 val = aarch64_get_vec_u8 (cpu, vn, selector);
2715 else if (selector < 32)
2716 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2717 else if (selector < 48)
2718 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2719 else if (selector < 64)
2720 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2721 else
2722 val = 0;
2723
2724 aarch64_set_vec_u8 (cpu, vd, i, val);
2725 }
2726 }
2727
2728 static void
2729 do_vec_TRN (sim_cpu *cpu)
2730 {
2731 /* instr[31] = 0
2732 instr[30] = half(0)/full(1)
2733 instr[29,24] = 00 1110
2734 instr[23,22] = size
2735 instr[21] = 0
2736 instr[20,16] = Vm
2737 instr[15] = 0
2738 instr[14] = TRN1 (0) / TRN2 (1)
2739 instr[13,10] = 1010
2740 instr[9,5] = V source
2741 instr[4,0] = V dest. */
2742
2743 int full = INSTR (30, 30);
2744 int second = INSTR (14, 14);
2745 unsigned vm = INSTR (20, 16);
2746 unsigned vn = INSTR (9, 5);
2747 unsigned vd = INSTR (4, 0);
2748 unsigned i;
2749
2750 NYI_assert (29, 24, 0x0E);
2751 NYI_assert (13, 10, 0xA);
2752
2753 switch (INSTR (23, 22))
2754 {
2755 case 0:
2756 for (i = 0; i < (full ? 8 : 4); i++)
2757 {
2758 aarch64_set_vec_u8
2759 (cpu, vd, i * 2,
2760 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2761 aarch64_set_vec_u8
2762 (cpu, vd, 1 * 2 + 1,
2763 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2764 }
2765 break;
2766
2767 case 1:
2768 for (i = 0; i < (full ? 4 : 2); i++)
2769 {
2770 aarch64_set_vec_u16
2771 (cpu, vd, i * 2,
2772 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2773 aarch64_set_vec_u16
2774 (cpu, vd, 1 * 2 + 1,
2775 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2776 }
2777 break;
2778
2779 case 2:
2780 aarch64_set_vec_u32
2781 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2782 aarch64_set_vec_u32
2783 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2784 aarch64_set_vec_u32
2785 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2786 aarch64_set_vec_u32
2787 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2788 break;
2789
2790 case 3:
2791 if (! full)
2792 HALT_UNALLOC;
2793
2794 aarch64_set_vec_u64 (cpu, vd, 0,
2795 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2796 aarch64_set_vec_u64 (cpu, vd, 1,
2797 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2798 break;
2799 }
2800 }
2801
2802 static void
2803 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2804 {
2805 /* instr[31] = 0
2806 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2807 [must be 1 for 64-bit xfer]
2808 instr[29,20] = 00 1110 0000
2809 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2810 0100=> 32-bits. 1000=>64-bits
2811 instr[15,10] = 0000 11
2812 instr[9,5] = W source
2813 instr[4,0] = V dest. */
2814
2815 unsigned i;
2816 unsigned Vd = INSTR (4, 0);
2817 unsigned Rs = INSTR (9, 5);
2818 int both = INSTR (30, 30);
2819
2820 NYI_assert (29, 20, 0x0E0);
2821 NYI_assert (15, 10, 0x03);
2822
2823 switch (INSTR (19, 16))
2824 {
2825 case 1:
2826 for (i = 0; i < (both ? 16 : 8); i++)
2827 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2828 break;
2829
2830 case 2:
2831 for (i = 0; i < (both ? 8 : 4); i++)
2832 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2833 break;
2834
2835 case 4:
2836 for (i = 0; i < (both ? 4 : 2); i++)
2837 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2838 break;
2839
2840 case 8:
2841 if (!both)
2842 HALT_NYI;
2843 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2844 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2845 break;
2846
2847 default:
2848 HALT_NYI;
2849 }
2850 }
2851
2852 static void
2853 do_vec_UZP (sim_cpu *cpu)
2854 {
2855 /* instr[31] = 0
2856 instr[30] = half(0)/full(1)
2857 instr[29,24] = 00 1110
2858 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2859 instr[21] = 0
2860 instr[20,16] = Vm
2861 instr[15] = 0
2862 instr[14] = lower (0) / upper (1)
2863 instr[13,10] = 0110
2864 instr[9,5] = Vn
2865 instr[4,0] = Vd. */
2866
2867 int full = INSTR (30, 30);
2868 int upper = INSTR (14, 14);
2869
2870 unsigned vm = INSTR (20, 16);
2871 unsigned vn = INSTR (9, 5);
2872 unsigned vd = INSTR (4, 0);
2873
2874 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2875 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2876 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2877 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2878
2879 uint64_t val1 = 0;
2880 uint64_t val2 = 0;
2881
2882 uint64_t input1 = upper ? val_n1 : val_m1;
2883 uint64_t input2 = upper ? val_n2 : val_m2;
2884 unsigned i;
2885
2886 NYI_assert (29, 24, 0x0E);
2887 NYI_assert (21, 21, 0);
2888 NYI_assert (15, 15, 0);
2889 NYI_assert (13, 10, 6);
2890
2891 switch (INSTR (23, 23))
2892 {
2893 case 0:
2894 for (i = 0; i < 8; i++)
2895 {
2896 val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
2897 val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
2898 }
2899 break;
2900
2901 case 1:
2902 for (i = 0; i < 4; i++)
2903 {
2904 val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
2905 val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
2906 }
2907 break;
2908
2909 case 2:
2910 val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
2911 val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
2912
2913 case 3:
2914 val1 = input1;
2915 val2 = input2;
2916 break;
2917 }
2918
2919 aarch64_set_vec_u64 (cpu, vd, 0, val1);
2920 if (full)
2921 aarch64_set_vec_u64 (cpu, vd, 1, val2);
2922 }
2923
2924 static void
2925 do_vec_ZIP (sim_cpu *cpu)
2926 {
2927 /* instr[31] = 0
2928 instr[30] = half(0)/full(1)
2929 instr[29,24] = 00 1110
2930 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
2931 instr[21] = 0
2932 instr[20,16] = Vm
2933 instr[15] = 0
2934 instr[14] = lower (0) / upper (1)
2935 instr[13,10] = 1110
2936 instr[9,5] = Vn
2937 instr[4,0] = Vd. */
2938
2939 int full = INSTR (30, 30);
2940 int upper = INSTR (14, 14);
2941
2942 unsigned vm = INSTR (20, 16);
2943 unsigned vn = INSTR (9, 5);
2944 unsigned vd = INSTR (4, 0);
2945
2946 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2947 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2948 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2949 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2950
2951 uint64_t val1 = 0;
2952 uint64_t val2 = 0;
2953
2954 uint64_t input1 = upper ? val_n1 : val_m1;
2955 uint64_t input2 = upper ? val_n2 : val_m2;
2956
2957 NYI_assert (29, 24, 0x0E);
2958 NYI_assert (21, 21, 0);
2959 NYI_assert (15, 15, 0);
2960 NYI_assert (13, 10, 0xE);
2961
2962 switch (INSTR (23, 23))
2963 {
2964 case 0:
2965 val1 =
2966 ((input1 << 0) & (0xFF << 0))
2967 | ((input2 << 8) & (0xFF << 8))
2968 | ((input1 << 8) & (0xFF << 16))
2969 | ((input2 << 16) & (0xFF << 24))
2970 | ((input1 << 16) & (0xFFULL << 32))
2971 | ((input2 << 24) & (0xFFULL << 40))
2972 | ((input1 << 24) & (0xFFULL << 48))
2973 | ((input2 << 32) & (0xFFULL << 56));
2974
2975 val2 =
2976 ((input1 >> 32) & (0xFF << 0))
2977 | ((input2 >> 24) & (0xFF << 8))
2978 | ((input1 >> 24) & (0xFF << 16))
2979 | ((input2 >> 16) & (0xFF << 24))
2980 | ((input1 >> 16) & (0xFFULL << 32))
2981 | ((input2 >> 8) & (0xFFULL << 40))
2982 | ((input1 >> 8) & (0xFFULL << 48))
2983 | ((input2 >> 0) & (0xFFULL << 56));
2984 break;
2985
2986 case 1:
2987 val1 =
2988 ((input1 << 0) & (0xFFFF << 0))
2989 | ((input2 << 16) & (0xFFFF << 16))
2990 | ((input1 << 16) & (0xFFFFULL << 32))
2991 | ((input2 << 32) & (0xFFFFULL << 48));
2992
2993 val2 =
2994 ((input1 >> 32) & (0xFFFF << 0))
2995 | ((input2 >> 16) & (0xFFFF << 16))
2996 | ((input1 >> 16) & (0xFFFFULL << 32))
2997 | ((input2 >> 0) & (0xFFFFULL << 48));
2998 break;
2999
3000 case 2:
3001 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3002 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3003 break;
3004
3005 case 3:
3006 val1 = input1;
3007 val2 = input2;
3008 break;
3009 }
3010
3011 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3012 if (full)
3013 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3014 }
3015
3016 /* Floating point immediates are encoded in 8 bits.
3017 fpimm[7] = sign bit.
3018 fpimm[6:4] = signed exponent.
3019 fpimm[3:0] = fraction (assuming leading 1).
3020 i.e. F = s * 1.f * 2^(e - b). */
3021
3022 static float
3023 fp_immediate_for_encoding_32 (uint32_t imm8)
3024 {
3025 float u;
3026 uint32_t s, e, f, i;
3027
3028 s = (imm8 >> 7) & 0x1;
3029 e = (imm8 >> 4) & 0x7;
3030 f = imm8 & 0xf;
3031
3032 /* The fp value is s * n/16 * 2r where n is 16+e. */
3033 u = (16.0 + f) / 16.0;
3034
3035 /* N.B. exponent is signed. */
3036 if (e < 4)
3037 {
3038 int epos = e;
3039
3040 for (i = 0; i <= epos; i++)
3041 u *= 2.0;
3042 }
3043 else
3044 {
3045 int eneg = 7 - e;
3046
3047 for (i = 0; i < eneg; i++)
3048 u /= 2.0;
3049 }
3050
3051 if (s)
3052 u = - u;
3053
3054 return u;
3055 }
3056
3057 static double
3058 fp_immediate_for_encoding_64 (uint32_t imm8)
3059 {
3060 double u;
3061 uint32_t s, e, f, i;
3062
3063 s = (imm8 >> 7) & 0x1;
3064 e = (imm8 >> 4) & 0x7;
3065 f = imm8 & 0xf;
3066
3067 /* The fp value is s * n/16 * 2r where n is 16+e. */
3068 u = (16.0 + f) / 16.0;
3069
3070 /* N.B. exponent is signed. */
3071 if (e < 4)
3072 {
3073 int epos = e;
3074
3075 for (i = 0; i <= epos; i++)
3076 u *= 2.0;
3077 }
3078 else
3079 {
3080 int eneg = 7 - e;
3081
3082 for (i = 0; i < eneg; i++)
3083 u /= 2.0;
3084 }
3085
3086 if (s)
3087 u = - u;
3088
3089 return u;
3090 }
3091
3092 static void
3093 do_vec_MOV_immediate (sim_cpu *cpu)
3094 {
3095 /* instr[31] = 0
3096 instr[30] = full/half selector
3097 instr[29,19] = 00111100000
3098 instr[18,16] = high 3 bits of uimm8
3099 instr[15,12] = size & shift:
3100 0000 => 32-bit
3101 0010 => 32-bit + LSL#8
3102 0100 => 32-bit + LSL#16
3103 0110 => 32-bit + LSL#24
3104 1010 => 16-bit + LSL#8
3105 1000 => 16-bit
3106 1101 => 32-bit + MSL#16
3107 1100 => 32-bit + MSL#8
3108 1110 => 8-bit
3109 1111 => double
3110 instr[11,10] = 01
3111 instr[9,5] = low 5-bits of uimm8
3112 instr[4,0] = Vd. */
3113
3114 int full = INSTR (30, 30);
3115 unsigned vd = INSTR (4, 0);
3116 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3117 unsigned i;
3118
3119 NYI_assert (29, 19, 0x1E0);
3120 NYI_assert (11, 10, 1);
3121
3122 switch (INSTR (15, 12))
3123 {
3124 case 0x0: /* 32-bit, no shift. */
3125 case 0x2: /* 32-bit, shift by 8. */
3126 case 0x4: /* 32-bit, shift by 16. */
3127 case 0x6: /* 32-bit, shift by 24. */
3128 val <<= (8 * INSTR (14, 13));
3129 for (i = 0; i < (full ? 4 : 2); i++)
3130 aarch64_set_vec_u32 (cpu, vd, i, val);
3131 break;
3132
3133 case 0xa: /* 16-bit, shift by 8. */
3134 val <<= 8;
3135 /* Fall through. */
3136 case 0x8: /* 16-bit, no shift. */
3137 for (i = 0; i < (full ? 8 : 4); i++)
3138 aarch64_set_vec_u16 (cpu, vd, i, val);
3139 /* Fall through. */
3140 case 0xd: /* 32-bit, mask shift by 16. */
3141 val <<= 8;
3142 val |= 0xFF;
3143 /* Fall through. */
3144 case 0xc: /* 32-bit, mask shift by 8. */
3145 val <<= 8;
3146 val |= 0xFF;
3147 for (i = 0; i < (full ? 4 : 2); i++)
3148 aarch64_set_vec_u32 (cpu, vd, i, val);
3149 break;
3150
3151 case 0xe: /* 8-bit, no shift. */
3152 for (i = 0; i < (full ? 16 : 8); i++)
3153 aarch64_set_vec_u8 (cpu, vd, i, val);
3154 break;
3155
3156 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3157 {
3158 float u = fp_immediate_for_encoding_32 (val);
3159 for (i = 0; i < (full ? 4 : 2); i++)
3160 aarch64_set_vec_float (cpu, vd, i, u);
3161 break;
3162 }
3163
3164 default:
3165 HALT_NYI;
3166 }
3167 }
3168
3169 static void
3170 do_vec_MVNI (sim_cpu *cpu)
3171 {
3172 /* instr[31] = 0
3173 instr[30] = full/half selector
3174 instr[29,19] = 10111100000
3175 instr[18,16] = high 3 bits of uimm8
3176 instr[15,12] = selector
3177 instr[11,10] = 01
3178 instr[9,5] = low 5-bits of uimm8
3179 instr[4,0] = Vd. */
3180
3181 int full = INSTR (30, 30);
3182 unsigned vd = INSTR (4, 0);
3183 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3184 unsigned i;
3185
3186 NYI_assert (29, 19, 0x5E0);
3187 NYI_assert (11, 10, 1);
3188
3189 switch (INSTR (15, 12))
3190 {
3191 case 0x0: /* 32-bit, no shift. */
3192 case 0x2: /* 32-bit, shift by 8. */
3193 case 0x4: /* 32-bit, shift by 16. */
3194 case 0x6: /* 32-bit, shift by 24. */
3195 val <<= (8 * INSTR (14, 13));
3196 val = ~ val;
3197 for (i = 0; i < (full ? 4 : 2); i++)
3198 aarch64_set_vec_u32 (cpu, vd, i, val);
3199 return;
3200
3201 case 0xa: /* 16-bit, 8 bit shift. */
3202 val <<= 8;
3203 case 0x8: /* 16-bit, no shift. */
3204 val = ~ val;
3205 for (i = 0; i < (full ? 8 : 4); i++)
3206 aarch64_set_vec_u16 (cpu, vd, i, val);
3207 return;
3208
3209 case 0xd: /* 32-bit, mask shift by 16. */
3210 val <<= 8;
3211 val |= 0xFF;
3212 case 0xc: /* 32-bit, mask shift by 8. */
3213 val <<= 8;
3214 val |= 0xFF;
3215 val = ~ val;
3216 for (i = 0; i < (full ? 4 : 2); i++)
3217 aarch64_set_vec_u32 (cpu, vd, i, val);
3218 return;
3219
3220 case 0xE: /* MOVI Dn, #mask64 */
3221 {
3222 uint64_t mask = 0;
3223
3224 for (i = 0; i < 8; i++)
3225 if (val & (1 << i))
3226 mask |= (0xFFUL << (i * 8));
3227 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3228 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3229 return;
3230 }
3231
3232 case 0xf: /* FMOV Vd.2D, #fpimm. */
3233 {
3234 double u = fp_immediate_for_encoding_64 (val);
3235
3236 if (! full)
3237 HALT_UNALLOC;
3238
3239 aarch64_set_vec_double (cpu, vd, 0, u);
3240 aarch64_set_vec_double (cpu, vd, 1, u);
3241 return;
3242 }
3243
3244 default:
3245 HALT_NYI;
3246 }
3247 }
3248
3249 #define ABS(A) ((A) < 0 ? - (A) : (A))
3250
3251 static void
3252 do_vec_ABS (sim_cpu *cpu)
3253 {
3254 /* instr[31] = 0
3255 instr[30] = half(0)/full(1)
3256 instr[29,24] = 00 1110
3257 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3258 instr[21,10] = 10 0000 1011 10
3259 instr[9,5] = Vn
3260 instr[4.0] = Vd. */
3261
3262 unsigned vn = INSTR (9, 5);
3263 unsigned vd = INSTR (4, 0);
3264 unsigned full = INSTR (30, 30);
3265 unsigned i;
3266
3267 NYI_assert (29, 24, 0x0E);
3268 NYI_assert (21, 10, 0x82E);
3269
3270 switch (INSTR (23, 22))
3271 {
3272 case 0:
3273 for (i = 0; i < (full ? 16 : 8); i++)
3274 aarch64_set_vec_s8 (cpu, vd, i,
3275 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3276 break;
3277
3278 case 1:
3279 for (i = 0; i < (full ? 8 : 4); i++)
3280 aarch64_set_vec_s16 (cpu, vd, i,
3281 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3282 break;
3283
3284 case 2:
3285 for (i = 0; i < (full ? 4 : 2); i++)
3286 aarch64_set_vec_s32 (cpu, vd, i,
3287 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3288 break;
3289
3290 case 3:
3291 if (! full)
3292 HALT_NYI;
3293 for (i = 0; i < 2; i++)
3294 aarch64_set_vec_s64 (cpu, vd, i,
3295 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3296 break;
3297 }
3298 }
3299
3300 static void
3301 do_vec_ADDV (sim_cpu *cpu)
3302 {
3303 /* instr[31] = 0
3304 instr[30] = full/half selector
3305 instr[29,24] = 00 1110
3306 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3307 instr[21,10] = 11 0001 1011 10
3308 instr[9,5] = Vm
3309 instr[4.0] = Rd. */
3310
3311 unsigned vm = INSTR (9, 5);
3312 unsigned rd = INSTR (4, 0);
3313 unsigned i;
3314 uint64_t val = 0;
3315 int full = INSTR (30, 30);
3316
3317 NYI_assert (29, 24, 0x0E);
3318 NYI_assert (21, 10, 0xC6E);
3319
3320 switch (INSTR (23, 22))
3321 {
3322 case 0:
3323 for (i = 0; i < (full ? 16 : 8); i++)
3324 val += aarch64_get_vec_u8 (cpu, vm, i);
3325 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3326 return;
3327
3328 case 1:
3329 for (i = 0; i < (full ? 8 : 4); i++)
3330 val += aarch64_get_vec_u16 (cpu, vm, i);
3331 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3332 return;
3333
3334 case 2:
3335 for (i = 0; i < (full ? 4 : 2); i++)
3336 val += aarch64_get_vec_u32 (cpu, vm, i);
3337 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3338 return;
3339
3340 case 3:
3341 if (! full)
3342 HALT_UNALLOC;
3343 val = aarch64_get_vec_u64 (cpu, vm, 0);
3344 val += aarch64_get_vec_u64 (cpu, vm, 1);
3345 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3346 return;
3347 }
3348 }
3349
3350 static void
3351 do_vec_ins_2 (sim_cpu *cpu)
3352 {
3353 /* instr[31,21] = 01001110000
3354 instr[20,18] = size & element selector
3355 instr[17,14] = 0000
3356 instr[13] = direction: to vec(0), from vec (1)
3357 instr[12,10] = 111
3358 instr[9,5] = Vm
3359 instr[4,0] = Vd. */
3360
3361 unsigned elem;
3362 unsigned vm = INSTR (9, 5);
3363 unsigned vd = INSTR (4, 0);
3364
3365 NYI_assert (31, 21, 0x270);
3366 NYI_assert (17, 14, 0);
3367 NYI_assert (12, 10, 7);
3368
3369 if (INSTR (13, 13) == 1)
3370 {
3371 if (INSTR (18, 18) == 1)
3372 {
3373 /* 32-bit moves. */
3374 elem = INSTR (20, 19);
3375 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3376 aarch64_get_vec_u32 (cpu, vm, elem));
3377 }
3378 else
3379 {
3380 /* 64-bit moves. */
3381 if (INSTR (19, 19) != 1)
3382 HALT_NYI;
3383
3384 elem = INSTR (20, 20);
3385 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3386 aarch64_get_vec_u64 (cpu, vm, elem));
3387 }
3388 }
3389 else
3390 {
3391 if (INSTR (18, 18) == 1)
3392 {
3393 /* 32-bit moves. */
3394 elem = INSTR (20, 19);
3395 aarch64_set_vec_u32 (cpu, vd, elem,
3396 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3397 }
3398 else
3399 {
3400 /* 64-bit moves. */
3401 if (INSTR (19, 19) != 1)
3402 HALT_NYI;
3403
3404 elem = INSTR (20, 20);
3405 aarch64_set_vec_u64 (cpu, vd, elem,
3406 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3407 }
3408 }
3409 }
3410
3411 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3412 do \
3413 { \
3414 DST_TYPE a[N], b[N]; \
3415 \
3416 for (i = 0; i < (N); i++) \
3417 { \
3418 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3419 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3420 } \
3421 for (i = 0; i < (N); i++) \
3422 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3423 } \
3424 while (0)
3425
3426 static void
3427 do_vec_mull (sim_cpu *cpu)
3428 {
3429 /* instr[31] = 0
3430 instr[30] = lower(0)/upper(1) selector
3431 instr[29] = signed(0)/unsigned(1)
3432 instr[28,24] = 0 1110
3433 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3434 instr[21] = 1
3435 instr[20,16] = Vm
3436 instr[15,10] = 11 0000
3437 instr[9,5] = Vn
3438 instr[4.0] = Vd. */
3439
3440 int unsign = INSTR (29, 29);
3441 int bias = INSTR (30, 30);
3442 unsigned vm = INSTR (20, 16);
3443 unsigned vn = INSTR ( 9, 5);
3444 unsigned vd = INSTR ( 4, 0);
3445 unsigned i;
3446
3447 NYI_assert (28, 24, 0x0E);
3448 NYI_assert (15, 10, 0x30);
3449
3450 /* NB: Read source values before writing results, in case
3451 the source and destination vectors are the same. */
3452 switch (INSTR (23, 22))
3453 {
3454 case 0:
3455 if (bias)
3456 bias = 8;
3457 if (unsign)
3458 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3459 else
3460 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3461 return;
3462
3463 case 1:
3464 if (bias)
3465 bias = 4;
3466 if (unsign)
3467 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3468 else
3469 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3470 return;
3471
3472 case 2:
3473 if (bias)
3474 bias = 2;
3475 if (unsign)
3476 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3477 else
3478 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3479 return;
3480
3481 case 3:
3482 HALT_NYI;
3483 }
3484 }
3485
3486 static void
3487 do_vec_fadd (sim_cpu *cpu)
3488 {
3489 /* instr[31] = 0
3490 instr[30] = half(0)/full(1)
3491 instr[29,24] = 001110
3492 instr[23] = FADD(0)/FSUB(1)
3493 instr[22] = float (0)/double(1)
3494 instr[21] = 1
3495 instr[20,16] = Vm
3496 instr[15,10] = 110101
3497 instr[9,5] = Vn
3498 instr[4.0] = Vd. */
3499
3500 unsigned vm = INSTR (20, 16);
3501 unsigned vn = INSTR (9, 5);
3502 unsigned vd = INSTR (4, 0);
3503 unsigned i;
3504 int full = INSTR (30, 30);
3505
3506 NYI_assert (29, 24, 0x0E);
3507 NYI_assert (21, 21, 1);
3508 NYI_assert (15, 10, 0x35);
3509
3510 if (INSTR (23, 23))
3511 {
3512 if (INSTR (22, 22))
3513 {
3514 if (! full)
3515 HALT_NYI;
3516
3517 for (i = 0; i < 2; i++)
3518 aarch64_set_vec_double (cpu, vd, i,
3519 aarch64_get_vec_double (cpu, vn, i)
3520 - aarch64_get_vec_double (cpu, vm, i));
3521 }
3522 else
3523 {
3524 for (i = 0; i < (full ? 4 : 2); i++)
3525 aarch64_set_vec_float (cpu, vd, i,
3526 aarch64_get_vec_float (cpu, vn, i)
3527 - aarch64_get_vec_float (cpu, vm, i));
3528 }
3529 }
3530 else
3531 {
3532 if (INSTR (22, 22))
3533 {
3534 if (! full)
3535 HALT_NYI;
3536
3537 for (i = 0; i < 2; i++)
3538 aarch64_set_vec_double (cpu, vd, i,
3539 aarch64_get_vec_double (cpu, vm, i)
3540 + aarch64_get_vec_double (cpu, vn, i));
3541 }
3542 else
3543 {
3544 for (i = 0; i < (full ? 4 : 2); i++)
3545 aarch64_set_vec_float (cpu, vd, i,
3546 aarch64_get_vec_float (cpu, vm, i)
3547 + aarch64_get_vec_float (cpu, vn, i));
3548 }
3549 }
3550 }
3551
3552 static void
3553 do_vec_add (sim_cpu *cpu)
3554 {
3555 /* instr[31] = 0
3556 instr[30] = full/half selector
3557 instr[29,24] = 001110
3558 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3559 instr[21] = 1
3560 instr[20,16] = Vn
3561 instr[15,10] = 100001
3562 instr[9,5] = Vm
3563 instr[4.0] = Vd. */
3564
3565 unsigned vm = INSTR (20, 16);
3566 unsigned vn = INSTR (9, 5);
3567 unsigned vd = INSTR (4, 0);
3568 unsigned i;
3569 int full = INSTR (30, 30);
3570
3571 NYI_assert (29, 24, 0x0E);
3572 NYI_assert (21, 21, 1);
3573 NYI_assert (15, 10, 0x21);
3574
3575 switch (INSTR (23, 22))
3576 {
3577 case 0:
3578 for (i = 0; i < (full ? 16 : 8); i++)
3579 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3580 + aarch64_get_vec_u8 (cpu, vm, i));
3581 return;
3582
3583 case 1:
3584 for (i = 0; i < (full ? 8 : 4); i++)
3585 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3586 + aarch64_get_vec_u16 (cpu, vm, i));
3587 return;
3588
3589 case 2:
3590 for (i = 0; i < (full ? 4 : 2); i++)
3591 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3592 + aarch64_get_vec_u32 (cpu, vm, i));
3593 return;
3594
3595 case 3:
3596 if (! full)
3597 HALT_UNALLOC;
3598 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3599 + aarch64_get_vec_u64 (cpu, vm, 0));
3600 aarch64_set_vec_u64 (cpu, vd, 1,
3601 aarch64_get_vec_u64 (cpu, vn, 1)
3602 + aarch64_get_vec_u64 (cpu, vm, 1));
3603 return;
3604 }
3605 }
3606
3607 static void
3608 do_vec_mul (sim_cpu *cpu)
3609 {
3610 /* instr[31] = 0
3611 instr[30] = full/half selector
3612 instr[29,24] = 00 1110
3613 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3614 instr[21] = 1
3615 instr[20,16] = Vn
3616 instr[15,10] = 10 0111
3617 instr[9,5] = Vm
3618 instr[4.0] = Vd. */
3619
3620 unsigned vm = INSTR (20, 16);
3621 unsigned vn = INSTR (9, 5);
3622 unsigned vd = INSTR (4, 0);
3623 unsigned i;
3624 int full = INSTR (30, 30);
3625 int bias = 0;
3626
3627 NYI_assert (29, 24, 0x0E);
3628 NYI_assert (21, 21, 1);
3629 NYI_assert (15, 10, 0x27);
3630
3631 switch (INSTR (23, 22))
3632 {
3633 case 0:
3634 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint16_t, u8, u16);
3635 return;
3636
3637 case 1:
3638 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint32_t, u16, u32);
3639 return;
3640
3641 case 2:
3642 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint64_t, u32, u64);
3643 return;
3644
3645 case 3:
3646 HALT_UNALLOC;
3647 }
3648 }
3649
3650 static void
3651 do_vec_MLA (sim_cpu *cpu)
3652 {
3653 /* instr[31] = 0
3654 instr[30] = full/half selector
3655 instr[29,24] = 00 1110
3656 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3657 instr[21] = 1
3658 instr[20,16] = Vn
3659 instr[15,10] = 1001 01
3660 instr[9,5] = Vm
3661 instr[4.0] = Vd. */
3662
3663 unsigned vm = INSTR (20, 16);
3664 unsigned vn = INSTR (9, 5);
3665 unsigned vd = INSTR (4, 0);
3666 unsigned i;
3667 int full = INSTR (30, 30);
3668
3669 NYI_assert (29, 24, 0x0E);
3670 NYI_assert (21, 21, 1);
3671 NYI_assert (15, 10, 0x25);
3672
3673 switch (INSTR (23, 22))
3674 {
3675 case 0:
3676 {
3677 uint16_t a[16], b[16];
3678
3679 for (i = 0; i < (full ? 16 : 8); i++)
3680 {
3681 a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3682 b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3683 }
3684
3685 for (i = 0; i < (full ? 16 : 8); i++)
3686 {
3687 uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3688
3689 aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3690 }
3691 }
3692 return;
3693
3694 case 1:
3695 {
3696 uint32_t a[8], b[8];
3697
3698 for (i = 0; i < (full ? 8 : 4); i++)
3699 {
3700 a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3701 b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3702 }
3703
3704 for (i = 0; i < (full ? 8 : 4); i++)
3705 {
3706 uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3707
3708 aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3709 }
3710 }
3711 return;
3712
3713 case 2:
3714 {
3715 uint64_t a[4], b[4];
3716
3717 for (i = 0; i < (full ? 4 : 2); i++)
3718 {
3719 a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3720 b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3721 }
3722
3723 for (i = 0; i < (full ? 4 : 2); i++)
3724 {
3725 uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3726
3727 aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3728 }
3729 }
3730 return;
3731
3732 case 3:
3733 HALT_UNALLOC;
3734 }
3735 }
3736
3737 static float
3738 fmaxnm (float a, float b)
3739 {
3740 if (fpclassify (a) == FP_NORMAL)
3741 {
3742 if (fpclassify (b) == FP_NORMAL)
3743 return a > b ? a : b;
3744 return a;
3745 }
3746 else if (fpclassify (b) == FP_NORMAL)
3747 return b;
3748 return a;
3749 }
3750
3751 static float
3752 fminnm (float a, float b)
3753 {
3754 if (fpclassify (a) == FP_NORMAL)
3755 {
3756 if (fpclassify (b) == FP_NORMAL)
3757 return a < b ? a : b;
3758 return a;
3759 }
3760 else if (fpclassify (b) == FP_NORMAL)
3761 return b;
3762 return a;
3763 }
3764
3765 static double
3766 dmaxnm (double a, double b)
3767 {
3768 if (fpclassify (a) == FP_NORMAL)
3769 {
3770 if (fpclassify (b) == FP_NORMAL)
3771 return a > b ? a : b;
3772 return a;
3773 }
3774 else if (fpclassify (b) == FP_NORMAL)
3775 return b;
3776 return a;
3777 }
3778
3779 static double
3780 dminnm (double a, double b)
3781 {
3782 if (fpclassify (a) == FP_NORMAL)
3783 {
3784 if (fpclassify (b) == FP_NORMAL)
3785 return a < b ? a : b;
3786 return a;
3787 }
3788 else if (fpclassify (b) == FP_NORMAL)
3789 return b;
3790 return a;
3791 }
3792
3793 static void
3794 do_vec_FminmaxNMP (sim_cpu *cpu)
3795 {
3796 /* instr [31] = 0
3797 instr [30] = half (0)/full (1)
3798 instr [29,24] = 10 1110
3799 instr [23] = max(0)/min(1)
3800 instr [22] = float (0)/double (1)
3801 instr [21] = 1
3802 instr [20,16] = Vn
3803 instr [15,10] = 1100 01
3804 instr [9,5] = Vm
3805 instr [4.0] = Vd. */
3806
3807 unsigned vm = INSTR (20, 16);
3808 unsigned vn = INSTR (9, 5);
3809 unsigned vd = INSTR (4, 0);
3810 int full = INSTR (30, 30);
3811
3812 NYI_assert (29, 24, 0x2E);
3813 NYI_assert (21, 21, 1);
3814 NYI_assert (15, 10, 0x31);
3815
3816 if (INSTR (22, 22))
3817 {
3818 double (* fn)(double, double) = INSTR (23, 23)
3819 ? dminnm : dmaxnm;
3820
3821 if (! full)
3822 HALT_NYI;
3823 aarch64_set_vec_double (cpu, vd, 0,
3824 fn (aarch64_get_vec_double (cpu, vn, 0),
3825 aarch64_get_vec_double (cpu, vn, 1)));
3826 aarch64_set_vec_double (cpu, vd, 0,
3827 fn (aarch64_get_vec_double (cpu, vm, 0),
3828 aarch64_get_vec_double (cpu, vm, 1)));
3829 }
3830 else
3831 {
3832 float (* fn)(float, float) = INSTR (23, 23)
3833 ? fminnm : fmaxnm;
3834
3835 aarch64_set_vec_float (cpu, vd, 0,
3836 fn (aarch64_get_vec_float (cpu, vn, 0),
3837 aarch64_get_vec_float (cpu, vn, 1)));
3838 if (full)
3839 aarch64_set_vec_float (cpu, vd, 1,
3840 fn (aarch64_get_vec_float (cpu, vn, 2),
3841 aarch64_get_vec_float (cpu, vn, 3)));
3842
3843 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3844 fn (aarch64_get_vec_float (cpu, vm, 0),
3845 aarch64_get_vec_float (cpu, vm, 1)));
3846 if (full)
3847 aarch64_set_vec_float (cpu, vd, 3,
3848 fn (aarch64_get_vec_float (cpu, vm, 2),
3849 aarch64_get_vec_float (cpu, vm, 3)));
3850 }
3851 }
3852
3853 static void
3854 do_vec_AND (sim_cpu *cpu)
3855 {
3856 /* instr[31] = 0
3857 instr[30] = half (0)/full (1)
3858 instr[29,21] = 001110001
3859 instr[20,16] = Vm
3860 instr[15,10] = 000111
3861 instr[9,5] = Vn
3862 instr[4.0] = Vd. */
3863
3864 unsigned vm = INSTR (20, 16);
3865 unsigned vn = INSTR (9, 5);
3866 unsigned vd = INSTR (4, 0);
3867 unsigned i;
3868 int full = INSTR (30, 30);
3869
3870 NYI_assert (29, 21, 0x071);
3871 NYI_assert (15, 10, 0x07);
3872
3873 for (i = 0; i < (full ? 4 : 2); i++)
3874 aarch64_set_vec_u32 (cpu, vd, i,
3875 aarch64_get_vec_u32 (cpu, vn, i)
3876 & aarch64_get_vec_u32 (cpu, vm, i));
3877 }
3878
3879 static void
3880 do_vec_BSL (sim_cpu *cpu)
3881 {
3882 /* instr[31] = 0
3883 instr[30] = half (0)/full (1)
3884 instr[29,21] = 101110011
3885 instr[20,16] = Vm
3886 instr[15,10] = 000111
3887 instr[9,5] = Vn
3888 instr[4.0] = Vd. */
3889
3890 unsigned vm = INSTR (20, 16);
3891 unsigned vn = INSTR (9, 5);
3892 unsigned vd = INSTR (4, 0);
3893 unsigned i;
3894 int full = INSTR (30, 30);
3895
3896 NYI_assert (29, 21, 0x173);
3897 NYI_assert (15, 10, 0x07);
3898
3899 for (i = 0; i < (full ? 16 : 8); i++)
3900 aarch64_set_vec_u8 (cpu, vd, i,
3901 ( aarch64_get_vec_u8 (cpu, vd, i)
3902 & aarch64_get_vec_u8 (cpu, vn, i))
3903 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
3904 & aarch64_get_vec_u8 (cpu, vm, i)));
3905 }
3906
3907 static void
3908 do_vec_EOR (sim_cpu *cpu)
3909 {
3910 /* instr[31] = 0
3911 instr[30] = half (0)/full (1)
3912 instr[29,21] = 10 1110 001
3913 instr[20,16] = Vm
3914 instr[15,10] = 000111
3915 instr[9,5] = Vn
3916 instr[4.0] = Vd. */
3917
3918 unsigned vm = INSTR (20, 16);
3919 unsigned vn = INSTR (9, 5);
3920 unsigned vd = INSTR (4, 0);
3921 unsigned i;
3922 int full = INSTR (30, 30);
3923
3924 NYI_assert (29, 21, 0x171);
3925 NYI_assert (15, 10, 0x07);
3926
3927 for (i = 0; i < (full ? 4 : 2); i++)
3928 aarch64_set_vec_u32 (cpu, vd, i,
3929 aarch64_get_vec_u32 (cpu, vn, i)
3930 ^ aarch64_get_vec_u32 (cpu, vm, i));
3931 }
3932
3933 static void
3934 do_vec_bit (sim_cpu *cpu)
3935 {
3936 /* instr[31] = 0
3937 instr[30] = half (0)/full (1)
3938 instr[29,23] = 10 1110 1
3939 instr[22] = BIT (0) / BIF (1)
3940 instr[21] = 1
3941 instr[20,16] = Vm
3942 instr[15,10] = 0001 11
3943 instr[9,5] = Vn
3944 instr[4.0] = Vd. */
3945
3946 unsigned vm = INSTR (20, 16);
3947 unsigned vn = INSTR (9, 5);
3948 unsigned vd = INSTR (4, 0);
3949 unsigned full = INSTR (30, 30);
3950 unsigned test_false = INSTR (22, 22);
3951 unsigned i;
3952
3953 NYI_assert (29, 23, 0x5D);
3954 NYI_assert (21, 21, 1);
3955 NYI_assert (15, 10, 0x07);
3956
3957 if (test_false)
3958 {
3959 for (i = 0; i < (full ? 16 : 8); i++)
3960 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
3961 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3962 }
3963 else
3964 {
3965 for (i = 0; i < (full ? 16 : 8); i++)
3966 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
3967 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3968 }
3969 }
3970
3971 static void
3972 do_vec_ORN (sim_cpu *cpu)
3973 {
3974 /* instr[31] = 0
3975 instr[30] = half (0)/full (1)
3976 instr[29,21] = 00 1110 111
3977 instr[20,16] = Vm
3978 instr[15,10] = 00 0111
3979 instr[9,5] = Vn
3980 instr[4.0] = Vd. */
3981
3982 unsigned vm = INSTR (20, 16);
3983 unsigned vn = INSTR (9, 5);
3984 unsigned vd = INSTR (4, 0);
3985 unsigned i;
3986 int full = INSTR (30, 30);
3987
3988 NYI_assert (29, 21, 0x077);
3989 NYI_assert (15, 10, 0x07);
3990
3991 for (i = 0; i < (full ? 16 : 8); i++)
3992 aarch64_set_vec_u8 (cpu, vd, i,
3993 aarch64_get_vec_u8 (cpu, vn, i)
3994 | ~ aarch64_get_vec_u8 (cpu, vm, i));
3995 }
3996
3997 static void
3998 do_vec_ORR (sim_cpu *cpu)
3999 {
4000 /* instr[31] = 0
4001 instr[30] = half (0)/full (1)
4002 instr[29,21] = 00 1110 101
4003 instr[20,16] = Vm
4004 instr[15,10] = 0001 11
4005 instr[9,5] = Vn
4006 instr[4.0] = Vd. */
4007
4008 unsigned vm = INSTR (20, 16);
4009 unsigned vn = INSTR (9, 5);
4010 unsigned vd = INSTR (4, 0);
4011 unsigned i;
4012 int full = INSTR (30, 30);
4013
4014 NYI_assert (29, 21, 0x075);
4015 NYI_assert (15, 10, 0x07);
4016
4017 for (i = 0; i < (full ? 16 : 8); i++)
4018 aarch64_set_vec_u8 (cpu, vd, i,
4019 aarch64_get_vec_u8 (cpu, vn, i)
4020 | aarch64_get_vec_u8 (cpu, vm, i));
4021 }
4022
4023 static void
4024 do_vec_BIC (sim_cpu *cpu)
4025 {
4026 /* instr[31] = 0
4027 instr[30] = half (0)/full (1)
4028 instr[29,21] = 00 1110 011
4029 instr[20,16] = Vm
4030 instr[15,10] = 00 0111
4031 instr[9,5] = Vn
4032 instr[4.0] = Vd. */
4033
4034 unsigned vm = INSTR (20, 16);
4035 unsigned vn = INSTR (9, 5);
4036 unsigned vd = INSTR (4, 0);
4037 unsigned i;
4038 int full = INSTR (30, 30);
4039
4040 NYI_assert (29, 21, 0x073);
4041 NYI_assert (15, 10, 0x07);
4042
4043 for (i = 0; i < (full ? 16 : 8); i++)
4044 aarch64_set_vec_u8 (cpu, vd, i,
4045 aarch64_get_vec_u8 (cpu, vn, i)
4046 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4047 }
4048
4049 static void
4050 do_vec_XTN (sim_cpu *cpu)
4051 {
4052 /* instr[31] = 0
4053 instr[30] = first part (0)/ second part (1)
4054 instr[29,24] = 00 1110
4055 instr[23,22] = size: byte(00), half(01), word (10)
4056 instr[21,10] = 1000 0100 1010
4057 instr[9,5] = Vs
4058 instr[4,0] = Vd. */
4059
4060 unsigned vs = INSTR (9, 5);
4061 unsigned vd = INSTR (4, 0);
4062 unsigned bias = INSTR (30, 30);
4063 unsigned i;
4064
4065 NYI_assert (29, 24, 0x0E);
4066 NYI_assert (21, 10, 0x84A);
4067
4068 switch (INSTR (23, 22))
4069 {
4070 case 0:
4071 if (bias)
4072 for (i = 0; i < 8; i++)
4073 aarch64_set_vec_u8 (cpu, vd, i + 8,
4074 aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4075 else
4076 for (i = 0; i < 8; i++)
4077 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4078 return;
4079
4080 case 1:
4081 if (bias)
4082 for (i = 0; i < 4; i++)
4083 aarch64_set_vec_u16 (cpu, vd, i + 4,
4084 aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4085 else
4086 for (i = 0; i < 4; i++)
4087 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4088 return;
4089
4090 case 2:
4091 if (bias)
4092 for (i = 0; i < 2; i++)
4093 aarch64_set_vec_u32 (cpu, vd, i + 4,
4094 aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4095 else
4096 for (i = 0; i < 2; i++)
4097 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4098 return;
4099 }
4100 }
4101
4102 static void
4103 do_vec_maxv (sim_cpu *cpu)
4104 {
4105 /* instr[31] = 0
4106 instr[30] = half(0)/full(1)
4107 instr[29] = signed (0)/unsigned(1)
4108 instr[28,24] = 0 1110
4109 instr[23,22] = size: byte(00), half(01), word (10)
4110 instr[21] = 1
4111 instr[20,17] = 1 000
4112 instr[16] = max(0)/min(1)
4113 instr[15,10] = 1010 10
4114 instr[9,5] = V source
4115 instr[4.0] = R dest. */
4116
4117 unsigned vs = INSTR (9, 5);
4118 unsigned rd = INSTR (4, 0);
4119 unsigned full = INSTR (30, 30);
4120 unsigned i;
4121
4122 NYI_assert (28, 24, 0x0E);
4123 NYI_assert (21, 21, 1);
4124 NYI_assert (20, 17, 8);
4125 NYI_assert (15, 10, 0x2A);
4126
4127 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4128 {
4129 case 0: /* SMAXV. */
4130 {
4131 int64_t smax;
4132 switch (INSTR (23, 22))
4133 {
4134 case 0:
4135 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4136 for (i = 1; i < (full ? 16 : 8); i++)
4137 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4138 break;
4139 case 1:
4140 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4141 for (i = 1; i < (full ? 8 : 4); i++)
4142 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4143 break;
4144 case 2:
4145 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4146 for (i = 1; i < (full ? 4 : 2); i++)
4147 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4148 break;
4149 case 3:
4150 HALT_UNALLOC;
4151 }
4152 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4153 return;
4154 }
4155
4156 case 1: /* SMINV. */
4157 {
4158 int64_t smin;
4159 switch (INSTR (23, 22))
4160 {
4161 case 0:
4162 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4163 for (i = 1; i < (full ? 16 : 8); i++)
4164 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4165 break;
4166 case 1:
4167 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4168 for (i = 1; i < (full ? 8 : 4); i++)
4169 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4170 break;
4171 case 2:
4172 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4173 for (i = 1; i < (full ? 4 : 2); i++)
4174 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4175 break;
4176
4177 case 3:
4178 HALT_UNALLOC;
4179 }
4180 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4181 return;
4182 }
4183
4184 case 2: /* UMAXV. */
4185 {
4186 uint64_t umax;
4187 switch (INSTR (23, 22))
4188 {
4189 case 0:
4190 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4191 for (i = 1; i < (full ? 16 : 8); i++)
4192 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4193 break;
4194 case 1:
4195 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4196 for (i = 1; i < (full ? 8 : 4); i++)
4197 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4198 break;
4199 case 2:
4200 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4201 for (i = 1; i < (full ? 4 : 2); i++)
4202 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4203 break;
4204
4205 case 3:
4206 HALT_UNALLOC;
4207 }
4208 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4209 return;
4210 }
4211
4212 case 3: /* UMINV. */
4213 {
4214 uint64_t umin;
4215 switch (INSTR (23, 22))
4216 {
4217 case 0:
4218 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4219 for (i = 1; i < (full ? 16 : 8); i++)
4220 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4221 break;
4222 case 1:
4223 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4224 for (i = 1; i < (full ? 8 : 4); i++)
4225 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4226 break;
4227 case 2:
4228 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4229 for (i = 1; i < (full ? 4 : 2); i++)
4230 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4231 break;
4232
4233 case 3:
4234 HALT_UNALLOC;
4235 }
4236 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4237 return;
4238 }
4239 }
4240 }
4241
4242 static void
4243 do_vec_fminmaxV (sim_cpu *cpu)
4244 {
4245 /* instr[31,24] = 0110 1110
4246 instr[23] = max(0)/min(1)
4247 instr[22,14] = 011 0000 11
4248 instr[13,12] = nm(00)/normal(11)
4249 instr[11,10] = 10
4250 instr[9,5] = V source
4251 instr[4.0] = R dest. */
4252
4253 unsigned vs = INSTR (9, 5);
4254 unsigned rd = INSTR (4, 0);
4255 unsigned i;
4256 float res = aarch64_get_vec_float (cpu, vs, 0);
4257
4258 NYI_assert (31, 24, 0x6E);
4259 NYI_assert (22, 14, 0x0C3);
4260 NYI_assert (11, 10, 2);
4261
4262 if (INSTR (23, 23))
4263 {
4264 switch (INSTR (13, 12))
4265 {
4266 case 0: /* FMNINNMV. */
4267 for (i = 1; i < 4; i++)
4268 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4269 break;
4270
4271 case 3: /* FMINV. */
4272 for (i = 1; i < 4; i++)
4273 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4274 break;
4275
4276 default:
4277 HALT_NYI;
4278 }
4279 }
4280 else
4281 {
4282 switch (INSTR (13, 12))
4283 {
4284 case 0: /* FMNAXNMV. */
4285 for (i = 1; i < 4; i++)
4286 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4287 break;
4288
4289 case 3: /* FMAXV. */
4290 for (i = 1; i < 4; i++)
4291 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4292 break;
4293
4294 default:
4295 HALT_NYI;
4296 }
4297 }
4298
4299 aarch64_set_FP_float (cpu, rd, res);
4300 }
4301
4302 static void
4303 do_vec_Fminmax (sim_cpu *cpu)
4304 {
4305 /* instr[31] = 0
4306 instr[30] = half(0)/full(1)
4307 instr[29,24] = 00 1110
4308 instr[23] = max(0)/min(1)
4309 instr[22] = float(0)/double(1)
4310 instr[21] = 1
4311 instr[20,16] = Vm
4312 instr[15,14] = 11
4313 instr[13,12] = nm(00)/normal(11)
4314 instr[11,10] = 01
4315 instr[9,5] = Vn
4316 instr[4,0] = Vd. */
4317
4318 unsigned vm = INSTR (20, 16);
4319 unsigned vn = INSTR (9, 5);
4320 unsigned vd = INSTR (4, 0);
4321 unsigned full = INSTR (30, 30);
4322 unsigned min = INSTR (23, 23);
4323 unsigned i;
4324
4325 NYI_assert (29, 24, 0x0E);
4326 NYI_assert (21, 21, 1);
4327 NYI_assert (15, 14, 3);
4328 NYI_assert (11, 10, 1);
4329
4330 if (INSTR (22, 22))
4331 {
4332 double (* func)(double, double);
4333
4334 if (! full)
4335 HALT_NYI;
4336
4337 if (INSTR (13, 12) == 0)
4338 func = min ? dminnm : dmaxnm;
4339 else if (INSTR (13, 12) == 3)
4340 func = min ? fmin : fmax;
4341 else
4342 HALT_NYI;
4343
4344 for (i = 0; i < 2; i++)
4345 aarch64_set_vec_double (cpu, vd, i,
4346 func (aarch64_get_vec_double (cpu, vn, i),
4347 aarch64_get_vec_double (cpu, vm, i)));
4348 }
4349 else
4350 {
4351 float (* func)(float, float);
4352
4353 if (INSTR (13, 12) == 0)
4354 func = min ? fminnm : fmaxnm;
4355 else if (INSTR (13, 12) == 3)
4356 func = min ? fminf : fmaxf;
4357 else
4358 HALT_NYI;
4359
4360 for (i = 0; i < (full ? 4 : 2); i++)
4361 aarch64_set_vec_float (cpu, vd, i,
4362 func (aarch64_get_vec_float (cpu, vn, i),
4363 aarch64_get_vec_float (cpu, vm, i)));
4364 }
4365 }
4366
4367 static void
4368 do_vec_SCVTF (sim_cpu *cpu)
4369 {
4370 /* instr[31] = 0
4371 instr[30] = Q
4372 instr[29,23] = 00 1110 0
4373 instr[22] = float(0)/double(1)
4374 instr[21,10] = 10 0001 1101 10
4375 instr[9,5] = Vn
4376 instr[4,0] = Vd. */
4377
4378 unsigned vn = INSTR (9, 5);
4379 unsigned vd = INSTR (4, 0);
4380 unsigned full = INSTR (30, 30);
4381 unsigned size = INSTR (22, 22);
4382 unsigned i;
4383
4384 NYI_assert (29, 23, 0x1C);
4385 NYI_assert (21, 10, 0x876);
4386
4387 if (size)
4388 {
4389 if (! full)
4390 HALT_UNALLOC;
4391
4392 for (i = 0; i < 2; i++)
4393 {
4394 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4395 aarch64_set_vec_double (cpu, vd, i, val);
4396 }
4397 }
4398 else
4399 {
4400 for (i = 0; i < (full ? 4 : 2); i++)
4401 {
4402 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4403 aarch64_set_vec_float (cpu, vd, i, val);
4404 }
4405 }
4406 }
4407
4408 #define VEC_CMP(SOURCE, CMP) \
4409 do \
4410 { \
4411 switch (size) \
4412 { \
4413 case 0: \
4414 for (i = 0; i < (full ? 16 : 8); i++) \
4415 aarch64_set_vec_u8 (cpu, vd, i, \
4416 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4417 CMP \
4418 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4419 ? -1 : 0); \
4420 return; \
4421 case 1: \
4422 for (i = 0; i < (full ? 8 : 4); i++) \
4423 aarch64_set_vec_u16 (cpu, vd, i, \
4424 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4425 CMP \
4426 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4427 ? -1 : 0); \
4428 return; \
4429 case 2: \
4430 for (i = 0; i < (full ? 4 : 2); i++) \
4431 aarch64_set_vec_u32 (cpu, vd, i, \
4432 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4433 CMP \
4434 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4435 ? -1 : 0); \
4436 return; \
4437 case 3: \
4438 if (! full) \
4439 HALT_UNALLOC; \
4440 for (i = 0; i < 2; i++) \
4441 aarch64_set_vec_u64 (cpu, vd, i, \
4442 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4443 CMP \
4444 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4445 ? -1ULL : 0); \
4446 return; \
4447 } \
4448 } \
4449 while (0)
4450
4451 #define VEC_CMP0(SOURCE, CMP) \
4452 do \
4453 { \
4454 switch (size) \
4455 { \
4456 case 0: \
4457 for (i = 0; i < (full ? 16 : 8); i++) \
4458 aarch64_set_vec_u8 (cpu, vd, i, \
4459 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4460 CMP 0 ? -1 : 0); \
4461 return; \
4462 case 1: \
4463 for (i = 0; i < (full ? 8 : 4); i++) \
4464 aarch64_set_vec_u16 (cpu, vd, i, \
4465 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4466 CMP 0 ? -1 : 0); \
4467 return; \
4468 case 2: \
4469 for (i = 0; i < (full ? 4 : 2); i++) \
4470 aarch64_set_vec_u32 (cpu, vd, i, \
4471 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4472 CMP 0 ? -1 : 0); \
4473 return; \
4474 case 3: \
4475 if (! full) \
4476 HALT_UNALLOC; \
4477 for (i = 0; i < 2; i++) \
4478 aarch64_set_vec_u64 (cpu, vd, i, \
4479 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4480 CMP 0 ? -1ULL : 0); \
4481 return; \
4482 } \
4483 } \
4484 while (0)
4485
4486 #define VEC_FCMP0(CMP) \
4487 do \
4488 { \
4489 if (vm != 0) \
4490 HALT_NYI; \
4491 if (INSTR (22, 22)) \
4492 { \
4493 if (! full) \
4494 HALT_NYI; \
4495 for (i = 0; i < 2; i++) \
4496 aarch64_set_vec_u64 (cpu, vd, i, \
4497 aarch64_get_vec_double (cpu, vn, i) \
4498 CMP 0.0 ? -1 : 0); \
4499 } \
4500 else \
4501 { \
4502 for (i = 0; i < (full ? 4 : 2); i++) \
4503 aarch64_set_vec_u32 (cpu, vd, i, \
4504 aarch64_get_vec_float (cpu, vn, i) \
4505 CMP 0.0 ? -1 : 0); \
4506 } \
4507 return; \
4508 } \
4509 while (0)
4510
4511 #define VEC_FCMP(CMP) \
4512 do \
4513 { \
4514 if (INSTR (22, 22)) \
4515 { \
4516 if (! full) \
4517 HALT_NYI; \
4518 for (i = 0; i < 2; i++) \
4519 aarch64_set_vec_u64 (cpu, vd, i, \
4520 aarch64_get_vec_double (cpu, vn, i) \
4521 CMP \
4522 aarch64_get_vec_double (cpu, vm, i) \
4523 ? -1 : 0); \
4524 } \
4525 else \
4526 { \
4527 for (i = 0; i < (full ? 4 : 2); i++) \
4528 aarch64_set_vec_u32 (cpu, vd, i, \
4529 aarch64_get_vec_float (cpu, vn, i) \
4530 CMP \
4531 aarch64_get_vec_float (cpu, vm, i) \
4532 ? -1 : 0); \
4533 } \
4534 return; \
4535 } \
4536 while (0)
4537
4538 static void
4539 do_vec_compare (sim_cpu *cpu)
4540 {
4541 /* instr[31] = 0
4542 instr[30] = half(0)/full(1)
4543 instr[29] = part-of-comparison-type
4544 instr[28,24] = 0 1110
4545 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4546 type of float compares: single (-0) / double (-1)
4547 instr[21] = 1
4548 instr[20,16] = Vm or 00000 (compare vs 0)
4549 instr[15,10] = part-of-comparison-type
4550 instr[9,5] = Vn
4551 instr[4.0] = Vd. */
4552
4553 int full = INSTR (30, 30);
4554 int size = INSTR (23, 22);
4555 unsigned vm = INSTR (20, 16);
4556 unsigned vn = INSTR (9, 5);
4557 unsigned vd = INSTR (4, 0);
4558 unsigned i;
4559
4560 NYI_assert (28, 24, 0x0E);
4561 NYI_assert (21, 21, 1);
4562
4563 if ((INSTR (11, 11)
4564 && INSTR (14, 14))
4565 || ((INSTR (11, 11) == 0
4566 && INSTR (10, 10) == 0)))
4567 {
4568 /* A compare vs 0. */
4569 if (vm != 0)
4570 {
4571 if (INSTR (15, 10) == 0x2A)
4572 do_vec_maxv (cpu);
4573 else if (INSTR (15, 10) == 0x32
4574 || INSTR (15, 10) == 0x3E)
4575 do_vec_fminmaxV (cpu);
4576 else if (INSTR (29, 23) == 0x1C
4577 && INSTR (21, 10) == 0x876)
4578 do_vec_SCVTF (cpu);
4579 else
4580 HALT_NYI;
4581 return;
4582 }
4583 }
4584
4585 if (INSTR (14, 14))
4586 {
4587 /* A floating point compare. */
4588 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4589 | INSTR (13, 10);
4590
4591 NYI_assert (15, 15, 1);
4592
4593 switch (decode)
4594 {
4595 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4596 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4597 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4598 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4599 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4600 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4601 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4602 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4603
4604 default:
4605 HALT_NYI;
4606 }
4607 }
4608 else
4609 {
4610 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4611
4612 switch (decode)
4613 {
4614 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4615 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4616 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4617 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4618 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4619 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4620 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4621 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4622 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4623 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4624 default:
4625 if (vm == 0)
4626 HALT_NYI;
4627 do_vec_maxv (cpu);
4628 }
4629 }
4630 }
4631
4632 static void
4633 do_vec_SSHL (sim_cpu *cpu)
4634 {
4635 /* instr[31] = 0
4636 instr[30] = first part (0)/ second part (1)
4637 instr[29,24] = 00 1110
4638 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4639 instr[21] = 1
4640 instr[20,16] = Vm
4641 instr[15,10] = 0100 01
4642 instr[9,5] = Vn
4643 instr[4,0] = Vd. */
4644
4645 unsigned full = INSTR (30, 30);
4646 unsigned vm = INSTR (20, 16);
4647 unsigned vn = INSTR (9, 5);
4648 unsigned vd = INSTR (4, 0);
4649 unsigned i;
4650 signed int shift;
4651
4652 NYI_assert (29, 24, 0x0E);
4653 NYI_assert (21, 21, 1);
4654 NYI_assert (15, 10, 0x11);
4655
4656 /* FIXME: What is a signed shift left in this context ?. */
4657
4658 switch (INSTR (23, 22))
4659 {
4660 case 0:
4661 for (i = 0; i < (full ? 16 : 8); i++)
4662 {
4663 shift = aarch64_get_vec_s8 (cpu, vm, i);
4664 if (shift >= 0)
4665 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4666 << shift);
4667 else
4668 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4669 >> - shift);
4670 }
4671 return;
4672
4673 case 1:
4674 for (i = 0; i < (full ? 8 : 4); i++)
4675 {
4676 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4677 if (shift >= 0)
4678 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4679 << shift);
4680 else
4681 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4682 >> - shift);
4683 }
4684 return;
4685
4686 case 2:
4687 for (i = 0; i < (full ? 4 : 2); i++)
4688 {
4689 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4690 if (shift >= 0)
4691 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4692 << shift);
4693 else
4694 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4695 >> - shift);
4696 }
4697 return;
4698
4699 case 3:
4700 if (! full)
4701 HALT_UNALLOC;
4702 for (i = 0; i < 2; i++)
4703 {
4704 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4705 if (shift >= 0)
4706 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4707 << shift);
4708 else
4709 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4710 >> - shift);
4711 }
4712 return;
4713 }
4714 }
4715
4716 static void
4717 do_vec_USHL (sim_cpu *cpu)
4718 {
4719 /* instr[31] = 0
4720 instr[30] = first part (0)/ second part (1)
4721 instr[29,24] = 10 1110
4722 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4723 instr[21] = 1
4724 instr[20,16] = Vm
4725 instr[15,10] = 0100 01
4726 instr[9,5] = Vn
4727 instr[4,0] = Vd */
4728
4729 unsigned full = INSTR (30, 30);
4730 unsigned vm = INSTR (20, 16);
4731 unsigned vn = INSTR (9, 5);
4732 unsigned vd = INSTR (4, 0);
4733 unsigned i;
4734 signed int shift;
4735
4736 NYI_assert (29, 24, 0x2E);
4737 NYI_assert (15, 10, 0x11);
4738
4739 switch (INSTR (23, 22))
4740 {
4741 case 0:
4742 for (i = 0; i < (full ? 16 : 8); i++)
4743 {
4744 shift = aarch64_get_vec_s8 (cpu, vm, i);
4745 if (shift >= 0)
4746 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4747 << shift);
4748 else
4749 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4750 >> - shift);
4751 }
4752 return;
4753
4754 case 1:
4755 for (i = 0; i < (full ? 8 : 4); i++)
4756 {
4757 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4758 if (shift >= 0)
4759 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4760 << shift);
4761 else
4762 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4763 >> - shift);
4764 }
4765 return;
4766
4767 case 2:
4768 for (i = 0; i < (full ? 4 : 2); i++)
4769 {
4770 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4771 if (shift >= 0)
4772 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4773 << shift);
4774 else
4775 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4776 >> - shift);
4777 }
4778 return;
4779
4780 case 3:
4781 if (! full)
4782 HALT_UNALLOC;
4783 for (i = 0; i < 2; i++)
4784 {
4785 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4786 if (shift >= 0)
4787 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4788 << shift);
4789 else
4790 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4791 >> - shift);
4792 }
4793 return;
4794 }
4795 }
4796
4797 static void
4798 do_vec_FMLA (sim_cpu *cpu)
4799 {
4800 /* instr[31] = 0
4801 instr[30] = full/half selector
4802 instr[29,23] = 0011100
4803 instr[22] = size: 0=>float, 1=>double
4804 instr[21] = 1
4805 instr[20,16] = Vn
4806 instr[15,10] = 1100 11
4807 instr[9,5] = Vm
4808 instr[4.0] = Vd. */
4809
4810 unsigned vm = INSTR (20, 16);
4811 unsigned vn = INSTR (9, 5);
4812 unsigned vd = INSTR (4, 0);
4813 unsigned i;
4814 int full = INSTR (30, 30);
4815
4816 NYI_assert (29, 23, 0x1C);
4817 NYI_assert (21, 21, 1);
4818 NYI_assert (15, 10, 0x33);
4819
4820 if (INSTR (22, 22))
4821 {
4822 if (! full)
4823 HALT_UNALLOC;
4824 for (i = 0; i < 2; i++)
4825 aarch64_set_vec_double (cpu, vd, i,
4826 aarch64_get_vec_double (cpu, vn, i) *
4827 aarch64_get_vec_double (cpu, vm, i) +
4828 aarch64_get_vec_double (cpu, vd, i));
4829 }
4830 else
4831 {
4832 for (i = 0; i < (full ? 4 : 2); i++)
4833 aarch64_set_vec_float (cpu, vd, i,
4834 aarch64_get_vec_float (cpu, vn, i) *
4835 aarch64_get_vec_float (cpu, vm, i) +
4836 aarch64_get_vec_float (cpu, vd, i));
4837 }
4838 }
4839
4840 static void
4841 do_vec_max (sim_cpu *cpu)
4842 {
4843 /* instr[31] = 0
4844 instr[30] = full/half selector
4845 instr[29] = SMAX (0) / UMAX (1)
4846 instr[28,24] = 0 1110
4847 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4848 instr[21] = 1
4849 instr[20,16] = Vn
4850 instr[15,10] = 0110 01
4851 instr[9,5] = Vm
4852 instr[4.0] = Vd. */
4853
4854 unsigned vm = INSTR (20, 16);
4855 unsigned vn = INSTR (9, 5);
4856 unsigned vd = INSTR (4, 0);
4857 unsigned i;
4858 int full = INSTR (30, 30);
4859
4860 NYI_assert (28, 24, 0x0E);
4861 NYI_assert (21, 21, 1);
4862 NYI_assert (15, 10, 0x19);
4863
4864 if (INSTR (29, 29))
4865 {
4866 switch (INSTR (23, 22))
4867 {
4868 case 0:
4869 for (i = 0; i < (full ? 16 : 8); i++)
4870 aarch64_set_vec_u8 (cpu, vd, i,
4871 aarch64_get_vec_u8 (cpu, vn, i)
4872 > aarch64_get_vec_u8 (cpu, vm, i)
4873 ? aarch64_get_vec_u8 (cpu, vn, i)
4874 : aarch64_get_vec_u8 (cpu, vm, i));
4875 return;
4876
4877 case 1:
4878 for (i = 0; i < (full ? 8 : 4); i++)
4879 aarch64_set_vec_u16 (cpu, vd, i,
4880 aarch64_get_vec_u16 (cpu, vn, i)
4881 > aarch64_get_vec_u16 (cpu, vm, i)
4882 ? aarch64_get_vec_u16 (cpu, vn, i)
4883 : aarch64_get_vec_u16 (cpu, vm, i));
4884 return;
4885
4886 case 2:
4887 for (i = 0; i < (full ? 4 : 2); i++)
4888 aarch64_set_vec_u32 (cpu, vd, i,
4889 aarch64_get_vec_u32 (cpu, vn, i)
4890 > aarch64_get_vec_u32 (cpu, vm, i)
4891 ? aarch64_get_vec_u32 (cpu, vn, i)
4892 : aarch64_get_vec_u32 (cpu, vm, i));
4893 return;
4894
4895 case 3:
4896 HALT_UNALLOC;
4897 }
4898 }
4899 else
4900 {
4901 switch (INSTR (23, 22))
4902 {
4903 case 0:
4904 for (i = 0; i < (full ? 16 : 8); i++)
4905 aarch64_set_vec_s8 (cpu, vd, i,
4906 aarch64_get_vec_s8 (cpu, vn, i)
4907 > aarch64_get_vec_s8 (cpu, vm, i)
4908 ? aarch64_get_vec_s8 (cpu, vn, i)
4909 : aarch64_get_vec_s8 (cpu, vm, i));
4910 return;
4911
4912 case 1:
4913 for (i = 0; i < (full ? 8 : 4); i++)
4914 aarch64_set_vec_s16 (cpu, vd, i,
4915 aarch64_get_vec_s16 (cpu, vn, i)
4916 > aarch64_get_vec_s16 (cpu, vm, i)
4917 ? aarch64_get_vec_s16 (cpu, vn, i)
4918 : aarch64_get_vec_s16 (cpu, vm, i));
4919 return;
4920
4921 case 2:
4922 for (i = 0; i < (full ? 4 : 2); i++)
4923 aarch64_set_vec_s32 (cpu, vd, i,
4924 aarch64_get_vec_s32 (cpu, vn, i)
4925 > aarch64_get_vec_s32 (cpu, vm, i)
4926 ? aarch64_get_vec_s32 (cpu, vn, i)
4927 : aarch64_get_vec_s32 (cpu, vm, i));
4928 return;
4929
4930 case 3:
4931 HALT_UNALLOC;
4932 }
4933 }
4934 }
4935
4936 static void
4937 do_vec_min (sim_cpu *cpu)
4938 {
4939 /* instr[31] = 0
4940 instr[30] = full/half selector
4941 instr[29] = SMIN (0) / UMIN (1)
4942 instr[28,24] = 0 1110
4943 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4944 instr[21] = 1
4945 instr[20,16] = Vn
4946 instr[15,10] = 0110 11
4947 instr[9,5] = Vm
4948 instr[4.0] = Vd. */
4949
4950 unsigned vm = INSTR (20, 16);
4951 unsigned vn = INSTR (9, 5);
4952 unsigned vd = INSTR (4, 0);
4953 unsigned i;
4954 int full = INSTR (30, 30);
4955
4956 NYI_assert (28, 24, 0x0E);
4957 NYI_assert (21, 21, 1);
4958 NYI_assert (15, 10, 0x1B);
4959
4960 if (INSTR (29, 29))
4961 {
4962 switch (INSTR (23, 22))
4963 {
4964 case 0:
4965 for (i = 0; i < (full ? 16 : 8); i++)
4966 aarch64_set_vec_u8 (cpu, vd, i,
4967 aarch64_get_vec_u8 (cpu, vn, i)
4968 < aarch64_get_vec_u8 (cpu, vm, i)
4969 ? aarch64_get_vec_u8 (cpu, vn, i)
4970 : aarch64_get_vec_u8 (cpu, vm, i));
4971 return;
4972
4973 case 1:
4974 for (i = 0; i < (full ? 8 : 4); i++)
4975 aarch64_set_vec_u16 (cpu, vd, i,
4976 aarch64_get_vec_u16 (cpu, vn, i)
4977 < aarch64_get_vec_u16 (cpu, vm, i)
4978 ? aarch64_get_vec_u16 (cpu, vn, i)
4979 : aarch64_get_vec_u16 (cpu, vm, i));
4980 return;
4981
4982 case 2:
4983 for (i = 0; i < (full ? 4 : 2); i++)
4984 aarch64_set_vec_u32 (cpu, vd, i,
4985 aarch64_get_vec_u32 (cpu, vn, i)
4986 < aarch64_get_vec_u32 (cpu, vm, i)
4987 ? aarch64_get_vec_u32 (cpu, vn, i)
4988 : aarch64_get_vec_u32 (cpu, vm, i));
4989 return;
4990
4991 case 3:
4992 HALT_UNALLOC;
4993 }
4994 }
4995 else
4996 {
4997 switch (INSTR (23, 22))
4998 {
4999 case 0:
5000 for (i = 0; i < (full ? 16 : 8); i++)
5001 aarch64_set_vec_s8 (cpu, vd, i,
5002 aarch64_get_vec_s8 (cpu, vn, i)
5003 < aarch64_get_vec_s8 (cpu, vm, i)
5004 ? aarch64_get_vec_s8 (cpu, vn, i)
5005 : aarch64_get_vec_s8 (cpu, vm, i));
5006 return;
5007
5008 case 1:
5009 for (i = 0; i < (full ? 8 : 4); i++)
5010 aarch64_set_vec_s16 (cpu, vd, i,
5011 aarch64_get_vec_s16 (cpu, vn, i)
5012 < aarch64_get_vec_s16 (cpu, vm, i)
5013 ? aarch64_get_vec_s16 (cpu, vn, i)
5014 : aarch64_get_vec_s16 (cpu, vm, i));
5015 return;
5016
5017 case 2:
5018 for (i = 0; i < (full ? 4 : 2); i++)
5019 aarch64_set_vec_s32 (cpu, vd, i,
5020 aarch64_get_vec_s32 (cpu, vn, i)
5021 < aarch64_get_vec_s32 (cpu, vm, i)
5022 ? aarch64_get_vec_s32 (cpu, vn, i)
5023 : aarch64_get_vec_s32 (cpu, vm, i));
5024 return;
5025
5026 case 3:
5027 HALT_UNALLOC;
5028 }
5029 }
5030 }
5031
5032 static void
5033 do_vec_sub_long (sim_cpu *cpu)
5034 {
5035 /* instr[31] = 0
5036 instr[30] = lower (0) / upper (1)
5037 instr[29] = signed (0) / unsigned (1)
5038 instr[28,24] = 0 1110
5039 instr[23,22] = size: bytes (00), half (01), word (10)
5040 instr[21] = 1
5041 insrt[20,16] = Vm
5042 instr[15,10] = 0010 00
5043 instr[9,5] = Vn
5044 instr[4,0] = V dest. */
5045
5046 unsigned size = INSTR (23, 22);
5047 unsigned vm = INSTR (20, 16);
5048 unsigned vn = INSTR (9, 5);
5049 unsigned vd = INSTR (4, 0);
5050 unsigned bias = 0;
5051 unsigned i;
5052
5053 NYI_assert (28, 24, 0x0E);
5054 NYI_assert (21, 21, 1);
5055 NYI_assert (15, 10, 0x08);
5056
5057 if (size == 3)
5058 HALT_UNALLOC;
5059
5060 switch (INSTR (30, 29))
5061 {
5062 case 2: /* SSUBL2. */
5063 bias = 2;
5064 case 0: /* SSUBL. */
5065 switch (size)
5066 {
5067 case 0:
5068 bias *= 3;
5069 for (i = 0; i < 8; i++)
5070 aarch64_set_vec_s16 (cpu, vd, i,
5071 aarch64_get_vec_s8 (cpu, vn, i + bias)
5072 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5073 break;
5074
5075 case 1:
5076 bias *= 2;
5077 for (i = 0; i < 4; i++)
5078 aarch64_set_vec_s32 (cpu, vd, i,
5079 aarch64_get_vec_s16 (cpu, vn, i + bias)
5080 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5081 break;
5082
5083 case 2:
5084 for (i = 0; i < 2; i++)
5085 aarch64_set_vec_s64 (cpu, vd, i,
5086 aarch64_get_vec_s32 (cpu, vn, i + bias)
5087 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5088 break;
5089
5090 default:
5091 HALT_UNALLOC;
5092 }
5093 break;
5094
5095 case 3: /* USUBL2. */
5096 bias = 2;
5097 case 1: /* USUBL. */
5098 switch (size)
5099 {
5100 case 0:
5101 bias *= 3;
5102 for (i = 0; i < 8; i++)
5103 aarch64_set_vec_u16 (cpu, vd, i,
5104 aarch64_get_vec_u8 (cpu, vn, i + bias)
5105 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5106 break;
5107
5108 case 1:
5109 bias *= 2;
5110 for (i = 0; i < 4; i++)
5111 aarch64_set_vec_u32 (cpu, vd, i,
5112 aarch64_get_vec_u16 (cpu, vn, i + bias)
5113 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5114 break;
5115
5116 case 2:
5117 for (i = 0; i < 2; i++)
5118 aarch64_set_vec_u64 (cpu, vd, i,
5119 aarch64_get_vec_u32 (cpu, vn, i + bias)
5120 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5121 break;
5122
5123 default:
5124 HALT_UNALLOC;
5125 }
5126 break;
5127 }
5128 }
5129
5130 static void
5131 do_vec_ADDP (sim_cpu *cpu)
5132 {
5133 /* instr[31] = 0
5134 instr[30] = half(0)/full(1)
5135 instr[29,24] = 00 1110
5136 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5137 instr[21] = 1
5138 insrt[20,16] = Vm
5139 instr[15,10] = 1011 11
5140 instr[9,5] = Vn
5141 instr[4,0] = V dest. */
5142
5143 FRegister copy_vn;
5144 FRegister copy_vm;
5145 unsigned full = INSTR (30, 30);
5146 unsigned size = INSTR (23, 22);
5147 unsigned vm = INSTR (20, 16);
5148 unsigned vn = INSTR (9, 5);
5149 unsigned vd = INSTR (4, 0);
5150 unsigned i, range;
5151
5152 NYI_assert (29, 24, 0x0E);
5153 NYI_assert (21, 21, 1);
5154 NYI_assert (15, 10, 0x2F);
5155
5156 /* Make copies of the source registers in case vd == vn/vm. */
5157 copy_vn = cpu->fr[vn];
5158 copy_vm = cpu->fr[vm];
5159
5160 switch (size)
5161 {
5162 case 0:
5163 range = full ? 8 : 4;
5164 for (i = 0; i < range; i++)
5165 {
5166 aarch64_set_vec_u8 (cpu, vd, i,
5167 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5168 aarch64_set_vec_u8 (cpu, vd, i + range,
5169 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5170 }
5171 return;
5172
5173 case 1:
5174 range = full ? 4 : 2;
5175 for (i = 0; i < range; i++)
5176 {
5177 aarch64_set_vec_u16 (cpu, vd, i,
5178 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5179 aarch64_set_vec_u16 (cpu, vd, i + range,
5180 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5181 }
5182 return;
5183
5184 case 2:
5185 range = full ? 2 : 1;
5186 for (i = 0; i < range; i++)
5187 {
5188 aarch64_set_vec_u32 (cpu, vd, i,
5189 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5190 aarch64_set_vec_u32 (cpu, vd, i + range,
5191 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5192 }
5193 return;
5194
5195 case 3:
5196 if (! full)
5197 HALT_UNALLOC;
5198 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5199 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5200 return;
5201 }
5202 }
5203
5204 static void
5205 do_vec_UMOV (sim_cpu *cpu)
5206 {
5207 /* instr[31] = 0
5208 instr[30] = 32-bit(0)/64-bit(1)
5209 instr[29,21] = 00 1110 000
5210 insrt[20,16] = size & index
5211 instr[15,10] = 0011 11
5212 instr[9,5] = V source
5213 instr[4,0] = R dest. */
5214
5215 unsigned vs = INSTR (9, 5);
5216 unsigned rd = INSTR (4, 0);
5217 unsigned index;
5218
5219 NYI_assert (29, 21, 0x070);
5220 NYI_assert (15, 10, 0x0F);
5221
5222 if (INSTR (16, 16))
5223 {
5224 /* Byte transfer. */
5225 index = INSTR (20, 17);
5226 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5227 aarch64_get_vec_u8 (cpu, vs, index));
5228 }
5229 else if (INSTR (17, 17))
5230 {
5231 index = INSTR (20, 18);
5232 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5233 aarch64_get_vec_u16 (cpu, vs, index));
5234 }
5235 else if (INSTR (18, 18))
5236 {
5237 index = INSTR (20, 19);
5238 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5239 aarch64_get_vec_u32 (cpu, vs, index));
5240 }
5241 else
5242 {
5243 if (INSTR (30, 30) != 1)
5244 HALT_UNALLOC;
5245
5246 index = INSTR (20, 20);
5247 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5248 aarch64_get_vec_u64 (cpu, vs, index));
5249 }
5250 }
5251
5252 static void
5253 do_vec_FABS (sim_cpu *cpu)
5254 {
5255 /* instr[31] = 0
5256 instr[30] = half(0)/full(1)
5257 instr[29,23] = 00 1110 1
5258 instr[22] = float(0)/double(1)
5259 instr[21,16] = 10 0000
5260 instr[15,10] = 1111 10
5261 instr[9,5] = Vn
5262 instr[4,0] = Vd. */
5263
5264 unsigned vn = INSTR (9, 5);
5265 unsigned vd = INSTR (4, 0);
5266 unsigned full = INSTR (30, 30);
5267 unsigned i;
5268
5269 NYI_assert (29, 23, 0x1D);
5270 NYI_assert (21, 10, 0x83E);
5271
5272 if (INSTR (22, 22))
5273 {
5274 if (! full)
5275 HALT_NYI;
5276
5277 for (i = 0; i < 2; i++)
5278 aarch64_set_vec_double (cpu, vd, i,
5279 fabs (aarch64_get_vec_double (cpu, vn, i)));
5280 }
5281 else
5282 {
5283 for (i = 0; i < (full ? 4 : 2); i++)
5284 aarch64_set_vec_float (cpu, vd, i,
5285 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5286 }
5287 }
5288
5289 static void
5290 do_vec_FCVTZS (sim_cpu *cpu)
5291 {
5292 /* instr[31] = 0
5293 instr[30] = half (0) / all (1)
5294 instr[29,23] = 00 1110 1
5295 instr[22] = single (0) / double (1)
5296 instr[21,10] = 10 0001 1011 10
5297 instr[9,5] = Rn
5298 instr[4,0] = Rd. */
5299
5300 unsigned rn = INSTR (9, 5);
5301 unsigned rd = INSTR (4, 0);
5302 unsigned full = INSTR (30, 30);
5303 unsigned i;
5304
5305 NYI_assert (31, 31, 0);
5306 NYI_assert (29, 23, 0x1D);
5307 NYI_assert (21, 10, 0x86E);
5308
5309 if (INSTR (22, 22))
5310 {
5311 if (! full)
5312 HALT_UNALLOC;
5313
5314 for (i = 0; i < 2; i++)
5315 aarch64_set_vec_s64 (cpu, rd, i,
5316 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5317 }
5318 else
5319 for (i = 0; i < (full ? 4 : 2); i++)
5320 aarch64_set_vec_s32 (cpu, rd, i,
5321 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5322 }
5323
5324 static void
5325 do_vec_REV64 (sim_cpu *cpu)
5326 {
5327 /* instr[31] = 0
5328 instr[30] = full/half
5329 instr[29,24] = 00 1110
5330 instr[23,22] = size
5331 instr[21,10] = 10 0000 0000 10
5332 instr[9,5] = Rn
5333 instr[4,0] = Rd. */
5334
5335 unsigned rn = INSTR (9, 5);
5336 unsigned rd = INSTR (4, 0);
5337 unsigned size = INSTR (23, 22);
5338 unsigned full = INSTR (30, 30);
5339 unsigned i;
5340 FRegister val;
5341
5342 NYI_assert (29, 24, 0x0E);
5343 NYI_assert (21, 10, 0x802);
5344
5345 switch (size)
5346 {
5347 case 0:
5348 for (i = 0; i < (full ? 16 : 8); i++)
5349 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5350 break;
5351
5352 case 1:
5353 for (i = 0; i < (full ? 8 : 4); i++)
5354 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5355 break;
5356
5357 case 2:
5358 for (i = 0; i < (full ? 4 : 2); i++)
5359 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5360 break;
5361
5362 case 3:
5363 HALT_UNALLOC;
5364 }
5365
5366 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5367 if (full)
5368 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5369 }
5370
5371 static void
5372 do_vec_REV16 (sim_cpu *cpu)
5373 {
5374 /* instr[31] = 0
5375 instr[30] = full/half
5376 instr[29,24] = 00 1110
5377 instr[23,22] = size
5378 instr[21,10] = 10 0000 0001 10
5379 instr[9,5] = Rn
5380 instr[4,0] = Rd. */
5381
5382 unsigned rn = INSTR (9, 5);
5383 unsigned rd = INSTR (4, 0);
5384 unsigned size = INSTR (23, 22);
5385 unsigned full = INSTR (30, 30);
5386 unsigned i;
5387 FRegister val;
5388
5389 NYI_assert (29, 24, 0x0E);
5390 NYI_assert (21, 10, 0x806);
5391
5392 switch (size)
5393 {
5394 case 0:
5395 for (i = 0; i < (full ? 16 : 8); i++)
5396 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5397 break;
5398
5399 default:
5400 HALT_UNALLOC;
5401 }
5402
5403 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5404 if (full)
5405 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5406 }
5407
5408 static void
5409 do_vec_op1 (sim_cpu *cpu)
5410 {
5411 /* instr[31] = 0
5412 instr[30] = half/full
5413 instr[29,24] = 00 1110
5414 instr[23,21] = ???
5415 instr[20,16] = Vm
5416 instr[15,10] = sub-opcode
5417 instr[9,5] = Vn
5418 instr[4,0] = Vd */
5419 NYI_assert (29, 24, 0x0E);
5420
5421 if (INSTR (21, 21) == 0)
5422 {
5423 if (INSTR (23, 22) == 0)
5424 {
5425 if (INSTR (30, 30) == 1
5426 && INSTR (17, 14) == 0
5427 && INSTR (12, 10) == 7)
5428 return do_vec_ins_2 (cpu);
5429
5430 switch (INSTR (15, 10))
5431 {
5432 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5433 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5434 case 0x07: do_vec_INS (cpu); return;
5435 case 0x0A: do_vec_TRN (cpu); return;
5436
5437 case 0x0F:
5438 if (INSTR (17, 16) == 0)
5439 {
5440 do_vec_MOV_into_scalar (cpu);
5441 return;
5442 }
5443 break;
5444
5445 case 0x00:
5446 case 0x08:
5447 case 0x10:
5448 case 0x18:
5449 do_vec_TBL (cpu); return;
5450
5451 case 0x06:
5452 case 0x16:
5453 do_vec_UZP (cpu); return;
5454
5455 case 0x0E:
5456 case 0x1E:
5457 do_vec_ZIP (cpu); return;
5458
5459 default:
5460 HALT_NYI;
5461 }
5462 }
5463
5464 switch (INSTR (13, 10))
5465 {
5466 case 0x6: do_vec_UZP (cpu); return;
5467 case 0xE: do_vec_ZIP (cpu); return;
5468 case 0xA: do_vec_TRN (cpu); return;
5469 case 0xF: do_vec_UMOV (cpu); return;
5470 default: HALT_NYI;
5471 }
5472 }
5473
5474 switch (INSTR (15, 10))
5475 {
5476 case 0x02: do_vec_REV64 (cpu); return;
5477 case 0x06: do_vec_REV16 (cpu); return;
5478
5479 case 0x07:
5480 switch (INSTR (23, 21))
5481 {
5482 case 1: do_vec_AND (cpu); return;
5483 case 3: do_vec_BIC (cpu); return;
5484 case 5: do_vec_ORR (cpu); return;
5485 case 7: do_vec_ORN (cpu); return;
5486 default: HALT_NYI;
5487 }
5488
5489 case 0x08: do_vec_sub_long (cpu); return;
5490 case 0x0a: do_vec_XTN (cpu); return;
5491 case 0x11: do_vec_SSHL (cpu); return;
5492 case 0x19: do_vec_max (cpu); return;
5493 case 0x1B: do_vec_min (cpu); return;
5494 case 0x21: do_vec_add (cpu); return;
5495 case 0x25: do_vec_MLA (cpu); return;
5496 case 0x27: do_vec_mul (cpu); return;
5497 case 0x2F: do_vec_ADDP (cpu); return;
5498 case 0x30: do_vec_mull (cpu); return;
5499 case 0x33: do_vec_FMLA (cpu); return;
5500 case 0x35: do_vec_fadd (cpu); return;
5501
5502 case 0x2E:
5503 switch (INSTR (20, 16))
5504 {
5505 case 0x00: do_vec_ABS (cpu); return;
5506 case 0x01: do_vec_FCVTZS (cpu); return;
5507 case 0x11: do_vec_ADDV (cpu); return;
5508 default: HALT_NYI;
5509 }
5510
5511 case 0x31:
5512 case 0x3B:
5513 do_vec_Fminmax (cpu); return;
5514
5515 case 0x0D:
5516 case 0x0F:
5517 case 0x22:
5518 case 0x23:
5519 case 0x26:
5520 case 0x2A:
5521 case 0x32:
5522 case 0x36:
5523 case 0x39:
5524 case 0x3A:
5525 do_vec_compare (cpu); return;
5526
5527 case 0x3E:
5528 do_vec_FABS (cpu); return;
5529
5530 default:
5531 HALT_NYI;
5532 }
5533 }
5534
5535 static void
5536 do_vec_xtl (sim_cpu *cpu)
5537 {
5538 /* instr[31] = 0
5539 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5540 instr[28,22] = 0 1111 00
5541 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5542 instr[15,10] = 1010 01
5543 instr[9,5] = V source
5544 instr[4,0] = V dest. */
5545
5546 unsigned vs = INSTR (9, 5);
5547 unsigned vd = INSTR (4, 0);
5548 unsigned i, shift, bias = 0;
5549
5550 NYI_assert (28, 22, 0x3C);
5551 NYI_assert (15, 10, 0x29);
5552
5553 switch (INSTR (30, 29))
5554 {
5555 case 2: /* SXTL2, SSHLL2. */
5556 bias = 2;
5557 case 0: /* SXTL, SSHLL. */
5558 if (INSTR (21, 21))
5559 {
5560 int64_t val1, val2;
5561
5562 shift = INSTR (20, 16);
5563 /* Get the source values before setting the destination values
5564 in case the source and destination are the same. */
5565 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5566 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5567 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5568 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5569 }
5570 else if (INSTR (20, 20))
5571 {
5572 int32_t v[4];
5573 int32_t v1,v2,v3,v4;
5574
5575 shift = INSTR (19, 16);
5576 bias *= 2;
5577 for (i = 0; i < 4; i++)
5578 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5579 for (i = 0; i < 4; i++)
5580 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5581 }
5582 else
5583 {
5584 int16_t v[8];
5585 NYI_assert (19, 19, 1);
5586
5587 shift = INSTR (18, 16);
5588 bias *= 3;
5589 for (i = 0; i < 8; i++)
5590 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5591 for (i = 0; i < 8; i++)
5592 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5593 }
5594 return;
5595
5596 case 3: /* UXTL2, USHLL2. */
5597 bias = 2;
5598 case 1: /* UXTL, USHLL. */
5599 if (INSTR (21, 21))
5600 {
5601 uint64_t v1, v2;
5602 shift = INSTR (20, 16);
5603 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5604 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5605 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5606 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5607 }
5608 else if (INSTR (20, 20))
5609 {
5610 uint32_t v[4];
5611 shift = INSTR (19, 16);
5612 bias *= 2;
5613 for (i = 0; i < 4; i++)
5614 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5615 for (i = 0; i < 4; i++)
5616 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5617 }
5618 else
5619 {
5620 uint16_t v[8];
5621 NYI_assert (19, 19, 1);
5622
5623 shift = INSTR (18, 16);
5624 bias *= 3;
5625 for (i = 0; i < 8; i++)
5626 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5627 for (i = 0; i < 8; i++)
5628 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5629 }
5630 return;
5631 }
5632 }
5633
5634 static void
5635 do_vec_SHL (sim_cpu *cpu)
5636 {
5637 /* instr [31] = 0
5638 instr [30] = half(0)/full(1)
5639 instr [29,23] = 001 1110
5640 instr [22,16] = size and shift amount
5641 instr [15,10] = 01 0101
5642 instr [9, 5] = Vs
5643 instr [4, 0] = Vd. */
5644
5645 int shift;
5646 int full = INSTR (30, 30);
5647 unsigned vs = INSTR (9, 5);
5648 unsigned vd = INSTR (4, 0);
5649 unsigned i;
5650
5651 NYI_assert (29, 23, 0x1E);
5652 NYI_assert (15, 10, 0x15);
5653
5654 if (INSTR (22, 22))
5655 {
5656 shift = INSTR (21, 16);
5657
5658 if (full == 0)
5659 HALT_UNALLOC;
5660
5661 for (i = 0; i < 2; i++)
5662 {
5663 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5664 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5665 }
5666
5667 return;
5668 }
5669
5670 if (INSTR (21, 21))
5671 {
5672 shift = INSTR (20, 16);
5673
5674 for (i = 0; i < (full ? 4 : 2); i++)
5675 {
5676 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5677 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5678 }
5679
5680 return;
5681 }
5682
5683 if (INSTR (20, 20))
5684 {
5685 shift = INSTR (19, 16);
5686
5687 for (i = 0; i < (full ? 8 : 4); i++)
5688 {
5689 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5690 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5691 }
5692
5693 return;
5694 }
5695
5696 if (INSTR (19, 19) == 0)
5697 HALT_UNALLOC;
5698
5699 shift = INSTR (18, 16);
5700
5701 for (i = 0; i < (full ? 16 : 8); i++)
5702 {
5703 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5704 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5705 }
5706 }
5707
5708 static void
5709 do_vec_SSHR_USHR (sim_cpu *cpu)
5710 {
5711 /* instr [31] = 0
5712 instr [30] = half(0)/full(1)
5713 instr [29] = signed(0)/unsigned(1)
5714 instr [28,23] = 0 1111 0
5715 instr [22,16] = size and shift amount
5716 instr [15,10] = 0000 01
5717 instr [9, 5] = Vs
5718 instr [4, 0] = Vd. */
5719
5720 int full = INSTR (30, 30);
5721 int sign = ! INSTR (29, 29);
5722 unsigned shift = INSTR (22, 16);
5723 unsigned vs = INSTR (9, 5);
5724 unsigned vd = INSTR (4, 0);
5725 unsigned i;
5726
5727 NYI_assert (28, 23, 0x1E);
5728 NYI_assert (15, 10, 0x01);
5729
5730 if (INSTR (22, 22))
5731 {
5732 shift = 128 - shift;
5733
5734 if (full == 0)
5735 HALT_UNALLOC;
5736
5737 if (sign)
5738 for (i = 0; i < 2; i++)
5739 {
5740 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5741 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5742 }
5743 else
5744 for (i = 0; i < 2; i++)
5745 {
5746 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5747 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5748 }
5749
5750 return;
5751 }
5752
5753 if (INSTR (21, 21))
5754 {
5755 shift = 64 - shift;
5756
5757 if (sign)
5758 for (i = 0; i < (full ? 4 : 2); i++)
5759 {
5760 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5761 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5762 }
5763 else
5764 for (i = 0; i < (full ? 4 : 2); i++)
5765 {
5766 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5767 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5768 }
5769
5770 return;
5771 }
5772
5773 if (INSTR (20, 20))
5774 {
5775 shift = 32 - shift;
5776
5777 if (sign)
5778 for (i = 0; i < (full ? 8 : 4); i++)
5779 {
5780 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5781 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5782 }
5783 else
5784 for (i = 0; i < (full ? 8 : 4); i++)
5785 {
5786 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5787 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5788 }
5789
5790 return;
5791 }
5792
5793 if (INSTR (19, 19) == 0)
5794 HALT_UNALLOC;
5795
5796 shift = 16 - shift;
5797
5798 if (sign)
5799 for (i = 0; i < (full ? 16 : 8); i++)
5800 {
5801 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5802 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5803 }
5804 else
5805 for (i = 0; i < (full ? 16 : 8); i++)
5806 {
5807 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5808 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5809 }
5810 }
5811
5812 static void
5813 do_vec_MUL_by_element (sim_cpu *cpu)
5814 {
5815 /* instr[31] = 0
5816 instr[30] = half/full
5817 instr[29,24] = 00 1111
5818 instr[23,22] = size
5819 instr[21] = L
5820 instr[20] = M
5821 instr[19,16] = m
5822 instr[15,12] = 1000
5823 instr[11] = H
5824 instr[10] = 0
5825 instr[9,5] = Vn
5826 instr[4,0] = Vd */
5827
5828 unsigned full = INSTR (30, 30);
5829 unsigned L = INSTR (21, 21);
5830 unsigned H = INSTR (11, 11);
5831 unsigned vn = INSTR (9, 5);
5832 unsigned vd = INSTR (4, 0);
5833 unsigned size = INSTR (23, 22);
5834 unsigned index;
5835 unsigned vm;
5836 unsigned e;
5837
5838 NYI_assert (29, 24, 0x0F);
5839 NYI_assert (15, 12, 0x8);
5840 NYI_assert (10, 10, 0);
5841
5842 switch (size)
5843 {
5844 case 1:
5845 {
5846 /* 16 bit products. */
5847 uint16_t product;
5848 uint16_t element1;
5849 uint16_t element2;
5850
5851 index = (H << 2) | (L << 1) | INSTR (20, 20);
5852 vm = INSTR (19, 16);
5853 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5854
5855 for (e = 0; e < (full ? 8 : 4); e ++)
5856 {
5857 element1 = aarch64_get_vec_u16 (cpu, vn, e);
5858 product = element1 * element2;
5859 aarch64_set_vec_u16 (cpu, vd, e, product);
5860 }
5861 }
5862 break;
5863
5864 case 2:
5865 {
5866 /* 32 bit products. */
5867 uint32_t product;
5868 uint32_t element1;
5869 uint32_t element2;
5870
5871 index = (H << 1) | L;
5872 vm = INSTR (20, 16);
5873 element2 = aarch64_get_vec_u32 (cpu, vm, index);
5874
5875 for (e = 0; e < (full ? 4 : 2); e ++)
5876 {
5877 element1 = aarch64_get_vec_u32 (cpu, vn, e);
5878 product = element1 * element2;
5879 aarch64_set_vec_u32 (cpu, vd, e, product);
5880 }
5881 }
5882 break;
5883
5884 default:
5885 HALT_UNALLOC;
5886 }
5887 }
5888
5889 static void
5890 do_vec_op2 (sim_cpu *cpu)
5891 {
5892 /* instr[31] = 0
5893 instr[30] = half/full
5894 instr[29,24] = 00 1111
5895 instr[23] = ?
5896 instr[22,16] = element size & index
5897 instr[15,10] = sub-opcode
5898 instr[9,5] = Vm
5899 instr[4,0] = Vd */
5900
5901 NYI_assert (29, 24, 0x0F);
5902
5903 if (INSTR (23, 23) != 0)
5904 {
5905 switch (INSTR (15, 10))
5906 {
5907 case 0x20:
5908 case 0x22: do_vec_MUL_by_element (cpu); return;
5909 default: HALT_NYI;
5910 }
5911 }
5912 else
5913 {
5914 switch (INSTR (15, 10))
5915 {
5916 case 0x01: do_vec_SSHR_USHR (cpu); return;
5917 case 0x15: do_vec_SHL (cpu); return;
5918 case 0x20:
5919 case 0x22: do_vec_MUL_by_element (cpu); return;
5920 case 0x29: do_vec_xtl (cpu); return;
5921 default: HALT_NYI;
5922 }
5923 }
5924 }
5925
5926 static void
5927 do_vec_neg (sim_cpu *cpu)
5928 {
5929 /* instr[31] = 0
5930 instr[30] = full(1)/half(0)
5931 instr[29,24] = 10 1110
5932 instr[23,22] = size: byte(00), half (01), word (10), long (11)
5933 instr[21,10] = 1000 0010 1110
5934 instr[9,5] = Vs
5935 instr[4,0] = Vd */
5936
5937 int full = INSTR (30, 30);
5938 unsigned vs = INSTR (9, 5);
5939 unsigned vd = INSTR (4, 0);
5940 unsigned i;
5941
5942 NYI_assert (29, 24, 0x2E);
5943 NYI_assert (21, 10, 0x82E);
5944
5945 switch (INSTR (23, 22))
5946 {
5947 case 0:
5948 for (i = 0; i < (full ? 16 : 8); i++)
5949 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
5950 return;
5951
5952 case 1:
5953 for (i = 0; i < (full ? 8 : 4); i++)
5954 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
5955 return;
5956
5957 case 2:
5958 for (i = 0; i < (full ? 4 : 2); i++)
5959 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
5960 return;
5961
5962 case 3:
5963 if (! full)
5964 HALT_NYI;
5965 for (i = 0; i < 2; i++)
5966 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
5967 return;
5968 }
5969 }
5970
5971 static void
5972 do_vec_sqrt (sim_cpu *cpu)
5973 {
5974 /* instr[31] = 0
5975 instr[30] = full(1)/half(0)
5976 instr[29,23] = 101 1101
5977 instr[22] = single(0)/double(1)
5978 instr[21,10] = 1000 0111 1110
5979 instr[9,5] = Vs
5980 instr[4,0] = Vd. */
5981
5982 int full = INSTR (30, 30);
5983 unsigned vs = INSTR (9, 5);
5984 unsigned vd = INSTR (4, 0);
5985 unsigned i;
5986
5987 NYI_assert (29, 23, 0x5B);
5988 NYI_assert (21, 10, 0x87E);
5989
5990 if (INSTR (22, 22) == 0)
5991 for (i = 0; i < (full ? 4 : 2); i++)
5992 aarch64_set_vec_float (cpu, vd, i,
5993 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
5994 else
5995 for (i = 0; i < 2; i++)
5996 aarch64_set_vec_double (cpu, vd, i,
5997 sqrt (aarch64_get_vec_double (cpu, vs, i)));
5998 }
5999
6000 static void
6001 do_vec_mls_indexed (sim_cpu *cpu)
6002 {
6003 /* instr[31] = 0
6004 instr[30] = half(0)/full(1)
6005 instr[29,24] = 10 1111
6006 instr[23,22] = 16-bit(01)/32-bit(10)
6007 instr[21,20+11] = index (if 16-bit)
6008 instr[21+11] = index (if 32-bit)
6009 instr[20,16] = Vm
6010 instr[15,12] = 0100
6011 instr[11] = part of index
6012 instr[10] = 0
6013 instr[9,5] = Vs
6014 instr[4,0] = Vd. */
6015
6016 int full = INSTR (30, 30);
6017 unsigned vs = INSTR (9, 5);
6018 unsigned vd = INSTR (4, 0);
6019 unsigned vm = INSTR (20, 16);
6020 unsigned i;
6021
6022 NYI_assert (15, 12, 4);
6023 NYI_assert (10, 10, 0);
6024
6025 switch (INSTR (23, 22))
6026 {
6027 case 1:
6028 {
6029 unsigned elem;
6030 uint32_t val;
6031
6032 if (vm > 15)
6033 HALT_NYI;
6034
6035 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6036 val = aarch64_get_vec_u16 (cpu, vm, elem);
6037
6038 for (i = 0; i < (full ? 8 : 4); i++)
6039 aarch64_set_vec_u32 (cpu, vd, i,
6040 aarch64_get_vec_u32 (cpu, vd, i) -
6041 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6042 return;
6043 }
6044
6045 case 2:
6046 {
6047 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6048 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6049
6050 for (i = 0; i < (full ? 4 : 2); i++)
6051 aarch64_set_vec_u64 (cpu, vd, i,
6052 aarch64_get_vec_u64 (cpu, vd, i) -
6053 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6054 return;
6055 }
6056
6057 case 0:
6058 case 3:
6059 default:
6060 HALT_NYI;
6061 }
6062 }
6063
6064 static void
6065 do_vec_SUB (sim_cpu *cpu)
6066 {
6067 /* instr [31] = 0
6068 instr [30] = half(0)/full(1)
6069 instr [29,24] = 10 1110
6070 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6071 instr [21] = 1
6072 instr [20,16] = Vm
6073 instr [15,10] = 10 0001
6074 instr [9, 5] = Vn
6075 instr [4, 0] = Vd. */
6076
6077 unsigned full = INSTR (30, 30);
6078 unsigned vm = INSTR (20, 16);
6079 unsigned vn = INSTR (9, 5);
6080 unsigned vd = INSTR (4, 0);
6081 unsigned i;
6082
6083 NYI_assert (29, 24, 0x2E);
6084 NYI_assert (21, 21, 1);
6085 NYI_assert (15, 10, 0x21);
6086
6087 switch (INSTR (23, 22))
6088 {
6089 case 0:
6090 for (i = 0; i < (full ? 16 : 8); i++)
6091 aarch64_set_vec_s8 (cpu, vd, i,
6092 aarch64_get_vec_s8 (cpu, vn, i)
6093 - aarch64_get_vec_s8 (cpu, vm, i));
6094 return;
6095
6096 case 1:
6097 for (i = 0; i < (full ? 8 : 4); i++)
6098 aarch64_set_vec_s16 (cpu, vd, i,
6099 aarch64_get_vec_s16 (cpu, vn, i)
6100 - aarch64_get_vec_s16 (cpu, vm, i));
6101 return;
6102
6103 case 2:
6104 for (i = 0; i < (full ? 4 : 2); i++)
6105 aarch64_set_vec_s32 (cpu, vd, i,
6106 aarch64_get_vec_s32 (cpu, vn, i)
6107 - aarch64_get_vec_s32 (cpu, vm, i));
6108 return;
6109
6110 case 3:
6111 if (full == 0)
6112 HALT_UNALLOC;
6113
6114 for (i = 0; i < 2; i++)
6115 aarch64_set_vec_s64 (cpu, vd, i,
6116 aarch64_get_vec_s64 (cpu, vn, i)
6117 - aarch64_get_vec_s64 (cpu, vm, i));
6118 return;
6119 }
6120 }
6121
6122 static void
6123 do_vec_MLS (sim_cpu *cpu)
6124 {
6125 /* instr [31] = 0
6126 instr [30] = half(0)/full(1)
6127 instr [29,24] = 10 1110
6128 instr [23,22] = size: byte(00, half(01), word (10)
6129 instr [21] = 1
6130 instr [20,16] = Vm
6131 instr [15,10] = 10 0101
6132 instr [9, 5] = Vn
6133 instr [4, 0] = Vd. */
6134
6135 unsigned full = INSTR (30, 30);
6136 unsigned vm = INSTR (20, 16);
6137 unsigned vn = INSTR (9, 5);
6138 unsigned vd = INSTR (4, 0);
6139 unsigned i;
6140
6141 NYI_assert (29, 24, 0x2E);
6142 NYI_assert (21, 21, 1);
6143 NYI_assert (15, 10, 0x25);
6144
6145 switch (INSTR (23, 22))
6146 {
6147 case 0:
6148 for (i = 0; i < (full ? 16 : 8); i++)
6149 aarch64_set_vec_u8 (cpu, vd, i,
6150 (aarch64_get_vec_u8 (cpu, vn, i)
6151 * aarch64_get_vec_u8 (cpu, vm, i))
6152 - aarch64_get_vec_u8 (cpu, vd, i));
6153 return;
6154
6155 case 1:
6156 for (i = 0; i < (full ? 8 : 4); i++)
6157 aarch64_set_vec_u16 (cpu, vd, i,
6158 (aarch64_get_vec_u16 (cpu, vn, i)
6159 * aarch64_get_vec_u16 (cpu, vm, i))
6160 - aarch64_get_vec_u16 (cpu, vd, i));
6161 return;
6162
6163 case 2:
6164 for (i = 0; i < (full ? 4 : 2); i++)
6165 aarch64_set_vec_u32 (cpu, vd, i,
6166 (aarch64_get_vec_u32 (cpu, vn, i)
6167 * aarch64_get_vec_u32 (cpu, vm, i))
6168 - aarch64_get_vec_u32 (cpu, vd, i));
6169 return;
6170
6171 default:
6172 HALT_UNALLOC;
6173 }
6174 }
6175
6176 static void
6177 do_vec_FDIV (sim_cpu *cpu)
6178 {
6179 /* instr [31] = 0
6180 instr [30] = half(0)/full(1)
6181 instr [29,23] = 10 1110 0
6182 instr [22] = float()/double(1)
6183 instr [21] = 1
6184 instr [20,16] = Vm
6185 instr [15,10] = 1111 11
6186 instr [9, 5] = Vn
6187 instr [4, 0] = Vd. */
6188
6189 unsigned full = INSTR (30, 30);
6190 unsigned vm = INSTR (20, 16);
6191 unsigned vn = INSTR (9, 5);
6192 unsigned vd = INSTR (4, 0);
6193 unsigned i;
6194
6195 NYI_assert (29, 23, 0x5C);
6196 NYI_assert (21, 21, 1);
6197 NYI_assert (15, 10, 0x3F);
6198
6199 if (INSTR (22, 22))
6200 {
6201 if (! full)
6202 HALT_UNALLOC;
6203
6204 for (i = 0; i < 2; i++)
6205 aarch64_set_vec_double (cpu, vd, i,
6206 aarch64_get_vec_double (cpu, vn, i)
6207 / aarch64_get_vec_double (cpu, vm, i));
6208 }
6209 else
6210 for (i = 0; i < (full ? 4 : 2); i++)
6211 aarch64_set_vec_float (cpu, vd, i,
6212 aarch64_get_vec_float (cpu, vn, i)
6213 / aarch64_get_vec_float (cpu, vm, i));
6214 }
6215
6216 static void
6217 do_vec_FMUL (sim_cpu *cpu)
6218 {
6219 /* instr [31] = 0
6220 instr [30] = half(0)/full(1)
6221 instr [29,23] = 10 1110 0
6222 instr [22] = float(0)/double(1)
6223 instr [21] = 1
6224 instr [20,16] = Vm
6225 instr [15,10] = 1101 11
6226 instr [9, 5] = Vn
6227 instr [4, 0] = Vd. */
6228
6229 unsigned full = INSTR (30, 30);
6230 unsigned vm = INSTR (20, 16);
6231 unsigned vn = INSTR (9, 5);
6232 unsigned vd = INSTR (4, 0);
6233 unsigned i;
6234
6235 NYI_assert (29, 23, 0x5C);
6236 NYI_assert (21, 21, 1);
6237 NYI_assert (15, 10, 0x37);
6238
6239 if (INSTR (22, 22))
6240 {
6241 if (! full)
6242 HALT_UNALLOC;
6243
6244 for (i = 0; i < 2; i++)
6245 aarch64_set_vec_double (cpu, vd, i,
6246 aarch64_get_vec_double (cpu, vn, i)
6247 * aarch64_get_vec_double (cpu, vm, i));
6248 }
6249 else
6250 for (i = 0; i < (full ? 4 : 2); i++)
6251 aarch64_set_vec_float (cpu, vd, i,
6252 aarch64_get_vec_float (cpu, vn, i)
6253 * aarch64_get_vec_float (cpu, vm, i));
6254 }
6255
6256 static void
6257 do_vec_FADDP (sim_cpu *cpu)
6258 {
6259 /* instr [31] = 0
6260 instr [30] = half(0)/full(1)
6261 instr [29,23] = 10 1110 0
6262 instr [22] = float(0)/double(1)
6263 instr [21] = 1
6264 instr [20,16] = Vm
6265 instr [15,10] = 1101 01
6266 instr [9, 5] = Vn
6267 instr [4, 0] = Vd. */
6268
6269 unsigned full = INSTR (30, 30);
6270 unsigned vm = INSTR (20, 16);
6271 unsigned vn = INSTR (9, 5);
6272 unsigned vd = INSTR (4, 0);
6273
6274 NYI_assert (29, 23, 0x5C);
6275 NYI_assert (21, 21, 1);
6276 NYI_assert (15, 10, 0x35);
6277
6278 if (INSTR (22, 22))
6279 {
6280 /* Extract values before adding them incase vd == vn/vm. */
6281 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6282 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6283 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6284 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6285
6286 if (! full)
6287 HALT_UNALLOC;
6288
6289 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6290 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6291 }
6292 else
6293 {
6294 /* Extract values before adding them incase vd == vn/vm. */
6295 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6296 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6297 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6298 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6299
6300 if (full)
6301 {
6302 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6303 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6304 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6305 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6306
6307 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6308 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6309 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6310 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6311 }
6312 else
6313 {
6314 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6315 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6316 }
6317 }
6318 }
6319
6320 static void
6321 do_vec_FSQRT (sim_cpu *cpu)
6322 {
6323 /* instr[31] = 0
6324 instr[30] = half(0)/full(1)
6325 instr[29,23] = 10 1110 1
6326 instr[22] = single(0)/double(1)
6327 instr[21,10] = 10 0001 1111 10
6328 instr[9,5] = Vsrc
6329 instr[4,0] = Vdest. */
6330
6331 unsigned vn = INSTR (9, 5);
6332 unsigned vd = INSTR (4, 0);
6333 unsigned full = INSTR (30, 30);
6334 int i;
6335
6336 NYI_assert (29, 23, 0x5D);
6337 NYI_assert (21, 10, 0x87E);
6338
6339 if (INSTR (22, 22))
6340 {
6341 if (! full)
6342 HALT_UNALLOC;
6343
6344 for (i = 0; i < 2; i++)
6345 aarch64_set_vec_double (cpu, vd, i,
6346 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6347 }
6348 else
6349 {
6350 for (i = 0; i < (full ? 4 : 2); i++)
6351 aarch64_set_vec_float (cpu, vd, i,
6352 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6353 }
6354 }
6355
6356 static void
6357 do_vec_FNEG (sim_cpu *cpu)
6358 {
6359 /* instr[31] = 0
6360 instr[30] = half (0)/full (1)
6361 instr[29,23] = 10 1110 1
6362 instr[22] = single (0)/double (1)
6363 instr[21,10] = 10 0000 1111 10
6364 instr[9,5] = Vsrc
6365 instr[4,0] = Vdest. */
6366
6367 unsigned vn = INSTR (9, 5);
6368 unsigned vd = INSTR (4, 0);
6369 unsigned full = INSTR (30, 30);
6370 int i;
6371
6372 NYI_assert (29, 23, 0x5D);
6373 NYI_assert (21, 10, 0x83E);
6374
6375 if (INSTR (22, 22))
6376 {
6377 if (! full)
6378 HALT_UNALLOC;
6379
6380 for (i = 0; i < 2; i++)
6381 aarch64_set_vec_double (cpu, vd, i,
6382 - aarch64_get_vec_double (cpu, vn, i));
6383 }
6384 else
6385 {
6386 for (i = 0; i < (full ? 4 : 2); i++)
6387 aarch64_set_vec_float (cpu, vd, i,
6388 - aarch64_get_vec_float (cpu, vn, i));
6389 }
6390 }
6391
6392 static void
6393 do_vec_NOT (sim_cpu *cpu)
6394 {
6395 /* instr[31] = 0
6396 instr[30] = half (0)/full (1)
6397 instr[29,10] = 10 1110 0010 0000 0101 10
6398 instr[9,5] = Vn
6399 instr[4.0] = Vd. */
6400
6401 unsigned vn = INSTR (9, 5);
6402 unsigned vd = INSTR (4, 0);
6403 unsigned i;
6404 int full = INSTR (30, 30);
6405
6406 NYI_assert (29, 10, 0xB8816);
6407
6408 for (i = 0; i < (full ? 16 : 8); i++)
6409 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6410 }
6411
6412 static unsigned int
6413 clz (uint64_t val, unsigned size)
6414 {
6415 uint64_t mask = 1;
6416 int count;
6417
6418 mask <<= (size - 1);
6419 count = 0;
6420 do
6421 {
6422 if (val & mask)
6423 break;
6424 mask >>= 1;
6425 count ++;
6426 }
6427 while (mask);
6428
6429 return count;
6430 }
6431
6432 static void
6433 do_vec_CLZ (sim_cpu *cpu)
6434 {
6435 /* instr[31] = 0
6436 instr[30] = half (0)/full (1)
6437 instr[29,24] = 10 1110
6438 instr[23,22] = size
6439 instr[21,10] = 10 0000 0100 10
6440 instr[9,5] = Vn
6441 instr[4.0] = Vd. */
6442
6443 unsigned vn = INSTR (9, 5);
6444 unsigned vd = INSTR (4, 0);
6445 unsigned i;
6446 int full = INSTR (30,30);
6447
6448 NYI_assert (29, 24, 0x2E);
6449 NYI_assert (21, 10, 0x812);
6450
6451 switch (INSTR (23, 22))
6452 {
6453 case 0:
6454 for (i = 0; i < (full ? 16 : 8); i++)
6455 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6456 break;
6457 case 1:
6458 for (i = 0; i < (full ? 8 : 4); i++)
6459 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6460 break;
6461 case 2:
6462 for (i = 0; i < (full ? 4 : 2); i++)
6463 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6464 break;
6465 case 3:
6466 if (! full)
6467 HALT_UNALLOC;
6468 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6469 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6470 break;
6471 }
6472 }
6473
6474 static void
6475 do_vec_MOV_element (sim_cpu *cpu)
6476 {
6477 /* instr[31,21] = 0110 1110 000
6478 instr[20,16] = size & dest index
6479 instr[15] = 0
6480 instr[14,11] = source index
6481 instr[10] = 1
6482 instr[9,5] = Vs
6483 instr[4.0] = Vd. */
6484
6485 unsigned vs = INSTR (9, 5);
6486 unsigned vd = INSTR (4, 0);
6487 unsigned src_index;
6488 unsigned dst_index;
6489
6490 NYI_assert (31, 21, 0x370);
6491 NYI_assert (15, 15, 0);
6492 NYI_assert (10, 10, 1);
6493
6494 if (INSTR (16, 16))
6495 {
6496 /* Move a byte. */
6497 src_index = INSTR (14, 11);
6498 dst_index = INSTR (20, 17);
6499 aarch64_set_vec_u8 (cpu, vd, dst_index,
6500 aarch64_get_vec_u8 (cpu, vs, src_index));
6501 }
6502 else if (INSTR (17, 17))
6503 {
6504 /* Move 16-bits. */
6505 NYI_assert (11, 11, 0);
6506 src_index = INSTR (14, 12);
6507 dst_index = INSTR (20, 18);
6508 aarch64_set_vec_u16 (cpu, vd, dst_index,
6509 aarch64_get_vec_u16 (cpu, vs, src_index));
6510 }
6511 else if (INSTR (18, 18))
6512 {
6513 /* Move 32-bits. */
6514 NYI_assert (12, 11, 0);
6515 src_index = INSTR (14, 13);
6516 dst_index = INSTR (20, 19);
6517 aarch64_set_vec_u32 (cpu, vd, dst_index,
6518 aarch64_get_vec_u32 (cpu, vs, src_index));
6519 }
6520 else
6521 {
6522 NYI_assert (19, 19, 1);
6523 NYI_assert (13, 11, 0);
6524 src_index = INSTR (14, 14);
6525 dst_index = INSTR (20, 20);
6526 aarch64_set_vec_u64 (cpu, vd, dst_index,
6527 aarch64_get_vec_u64 (cpu, vs, src_index));
6528 }
6529 }
6530
6531 static void
6532 do_vec_REV32 (sim_cpu *cpu)
6533 {
6534 /* instr[31] = 0
6535 instr[30] = full/half
6536 instr[29,24] = 10 1110
6537 instr[23,22] = size
6538 instr[21,10] = 10 0000 0000 10
6539 instr[9,5] = Rn
6540 instr[4,0] = Rd. */
6541
6542 unsigned rn = INSTR (9, 5);
6543 unsigned rd = INSTR (4, 0);
6544 unsigned size = INSTR (23, 22);
6545 unsigned full = INSTR (30, 30);
6546 unsigned i;
6547 FRegister val;
6548
6549 NYI_assert (29, 24, 0x2E);
6550 NYI_assert (21, 10, 0x802);
6551
6552 switch (size)
6553 {
6554 case 0:
6555 for (i = 0; i < (full ? 16 : 8); i++)
6556 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6557 break;
6558
6559 case 1:
6560 for (i = 0; i < (full ? 8 : 4); i++)
6561 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6562 break;
6563
6564 default:
6565 HALT_UNALLOC;
6566 }
6567
6568 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6569 if (full)
6570 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6571 }
6572
6573 static void
6574 do_vec_EXT (sim_cpu *cpu)
6575 {
6576 /* instr[31] = 0
6577 instr[30] = full/half
6578 instr[29,21] = 10 1110 000
6579 instr[20,16] = Vm
6580 instr[15] = 0
6581 instr[14,11] = source index
6582 instr[10] = 0
6583 instr[9,5] = Vn
6584 instr[4.0] = Vd. */
6585
6586 unsigned vm = INSTR (20, 16);
6587 unsigned vn = INSTR (9, 5);
6588 unsigned vd = INSTR (4, 0);
6589 unsigned src_index = INSTR (14, 11);
6590 unsigned full = INSTR (30, 30);
6591 unsigned i;
6592 unsigned j;
6593 FRegister val;
6594
6595 NYI_assert (31, 21, 0x370);
6596 NYI_assert (15, 15, 0);
6597 NYI_assert (10, 10, 0);
6598
6599 if (!full && (src_index & 0x8))
6600 HALT_UNALLOC;
6601
6602 j = 0;
6603
6604 for (i = src_index; i < (full ? 16 : 8); i++)
6605 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6606 for (i = 0; i < src_index; i++)
6607 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6608
6609 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6610 if (full)
6611 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6612 }
6613
6614 static void
6615 dexAdvSIMD0 (sim_cpu *cpu)
6616 {
6617 /* instr [28,25] = 0 111. */
6618 if ( INSTR (15, 10) == 0x07
6619 && (INSTR (9, 5) ==
6620 INSTR (20, 16)))
6621 {
6622 if (INSTR (31, 21) == 0x075
6623 || INSTR (31, 21) == 0x275)
6624 {
6625 do_vec_MOV_whole_vector (cpu);
6626 return;
6627 }
6628 }
6629
6630 if (INSTR (29, 19) == 0x1E0)
6631 {
6632 do_vec_MOV_immediate (cpu);
6633 return;
6634 }
6635
6636 if (INSTR (29, 19) == 0x5E0)
6637 {
6638 do_vec_MVNI (cpu);
6639 return;
6640 }
6641
6642 if (INSTR (29, 19) == 0x1C0
6643 || INSTR (29, 19) == 0x1C1)
6644 {
6645 if (INSTR (15, 10) == 0x03)
6646 {
6647 do_vec_DUP_scalar_into_vector (cpu);
6648 return;
6649 }
6650 }
6651
6652 switch (INSTR (29, 24))
6653 {
6654 case 0x0E: do_vec_op1 (cpu); return;
6655 case 0x0F: do_vec_op2 (cpu); return;
6656
6657 case 0x2E:
6658 if (INSTR (21, 21) == 1)
6659 {
6660 switch (INSTR (15, 10))
6661 {
6662 case 0x02:
6663 do_vec_REV32 (cpu);
6664 return;
6665
6666 case 0x07:
6667 switch (INSTR (23, 22))
6668 {
6669 case 0: do_vec_EOR (cpu); return;
6670 case 1: do_vec_BSL (cpu); return;
6671 case 2:
6672 case 3: do_vec_bit (cpu); return;
6673 }
6674 break;
6675
6676 case 0x08: do_vec_sub_long (cpu); return;
6677 case 0x11: do_vec_USHL (cpu); return;
6678 case 0x12: do_vec_CLZ (cpu); return;
6679 case 0x16: do_vec_NOT (cpu); return;
6680 case 0x19: do_vec_max (cpu); return;
6681 case 0x1B: do_vec_min (cpu); return;
6682 case 0x21: do_vec_SUB (cpu); return;
6683 case 0x25: do_vec_MLS (cpu); return;
6684 case 0x31: do_vec_FminmaxNMP (cpu); return;
6685 case 0x35: do_vec_FADDP (cpu); return;
6686 case 0x37: do_vec_FMUL (cpu); return;
6687 case 0x3F: do_vec_FDIV (cpu); return;
6688
6689 case 0x3E:
6690 switch (INSTR (20, 16))
6691 {
6692 case 0x00: do_vec_FNEG (cpu); return;
6693 case 0x01: do_vec_FSQRT (cpu); return;
6694 default: HALT_NYI;
6695 }
6696
6697 case 0x0D:
6698 case 0x0F:
6699 case 0x22:
6700 case 0x23:
6701 case 0x26:
6702 case 0x2A:
6703 case 0x32:
6704 case 0x36:
6705 case 0x39:
6706 case 0x3A:
6707 do_vec_compare (cpu); return;
6708
6709 default:
6710 break;
6711 }
6712 }
6713
6714 if (INSTR (31, 21) == 0x370)
6715 {
6716 if (INSTR (10, 10))
6717 do_vec_MOV_element (cpu);
6718 else
6719 do_vec_EXT (cpu);
6720 return;
6721 }
6722
6723 switch (INSTR (21, 10))
6724 {
6725 case 0x82E: do_vec_neg (cpu); return;
6726 case 0x87E: do_vec_sqrt (cpu); return;
6727 default:
6728 if (INSTR (15, 10) == 0x30)
6729 {
6730 do_vec_mull (cpu);
6731 return;
6732 }
6733 break;
6734 }
6735 break;
6736
6737 case 0x2f:
6738 switch (INSTR (15, 10))
6739 {
6740 case 0x01: do_vec_SSHR_USHR (cpu); return;
6741 case 0x10:
6742 case 0x12: do_vec_mls_indexed (cpu); return;
6743 case 0x29: do_vec_xtl (cpu); return;
6744 default:
6745 HALT_NYI;
6746 }
6747
6748 default:
6749 break;
6750 }
6751
6752 HALT_NYI;
6753 }
6754
6755 /* 3 sources. */
6756
6757 /* Float multiply add. */
6758 static void
6759 fmadds (sim_cpu *cpu)
6760 {
6761 unsigned sa = INSTR (14, 10);
6762 unsigned sm = INSTR (20, 16);
6763 unsigned sn = INSTR ( 9, 5);
6764 unsigned sd = INSTR ( 4, 0);
6765
6766 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6767 + aarch64_get_FP_float (cpu, sn)
6768 * aarch64_get_FP_float (cpu, sm));
6769 }
6770
6771 /* Double multiply add. */
6772 static void
6773 fmaddd (sim_cpu *cpu)
6774 {
6775 unsigned sa = INSTR (14, 10);
6776 unsigned sm = INSTR (20, 16);
6777 unsigned sn = INSTR ( 9, 5);
6778 unsigned sd = INSTR ( 4, 0);
6779
6780 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6781 + aarch64_get_FP_double (cpu, sn)
6782 * aarch64_get_FP_double (cpu, sm));
6783 }
6784
6785 /* Float multiply subtract. */
6786 static void
6787 fmsubs (sim_cpu *cpu)
6788 {
6789 unsigned sa = INSTR (14, 10);
6790 unsigned sm = INSTR (20, 16);
6791 unsigned sn = INSTR ( 9, 5);
6792 unsigned sd = INSTR ( 4, 0);
6793
6794 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6795 - aarch64_get_FP_float (cpu, sn)
6796 * aarch64_get_FP_float (cpu, sm));
6797 }
6798
6799 /* Double multiply subtract. */
6800 static void
6801 fmsubd (sim_cpu *cpu)
6802 {
6803 unsigned sa = INSTR (14, 10);
6804 unsigned sm = INSTR (20, 16);
6805 unsigned sn = INSTR ( 9, 5);
6806 unsigned sd = INSTR ( 4, 0);
6807
6808 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6809 - aarch64_get_FP_double (cpu, sn)
6810 * aarch64_get_FP_double (cpu, sm));
6811 }
6812
6813 /* Float negative multiply add. */
6814 static void
6815 fnmadds (sim_cpu *cpu)
6816 {
6817 unsigned sa = INSTR (14, 10);
6818 unsigned sm = INSTR (20, 16);
6819 unsigned sn = INSTR ( 9, 5);
6820 unsigned sd = INSTR ( 4, 0);
6821
6822 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6823 + (- aarch64_get_FP_float (cpu, sn))
6824 * aarch64_get_FP_float (cpu, sm));
6825 }
6826
6827 /* Double negative multiply add. */
6828 static void
6829 fnmaddd (sim_cpu *cpu)
6830 {
6831 unsigned sa = INSTR (14, 10);
6832 unsigned sm = INSTR (20, 16);
6833 unsigned sn = INSTR ( 9, 5);
6834 unsigned sd = INSTR ( 4, 0);
6835
6836 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6837 + (- aarch64_get_FP_double (cpu, sn))
6838 * aarch64_get_FP_double (cpu, sm));
6839 }
6840
6841 /* Float negative multiply subtract. */
6842 static void
6843 fnmsubs (sim_cpu *cpu)
6844 {
6845 unsigned sa = INSTR (14, 10);
6846 unsigned sm = INSTR (20, 16);
6847 unsigned sn = INSTR ( 9, 5);
6848 unsigned sd = INSTR ( 4, 0);
6849
6850 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6851 + aarch64_get_FP_float (cpu, sn)
6852 * aarch64_get_FP_float (cpu, sm));
6853 }
6854
6855 /* Double negative multiply subtract. */
6856 static void
6857 fnmsubd (sim_cpu *cpu)
6858 {
6859 unsigned sa = INSTR (14, 10);
6860 unsigned sm = INSTR (20, 16);
6861 unsigned sn = INSTR ( 9, 5);
6862 unsigned sd = INSTR ( 4, 0);
6863
6864 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6865 + aarch64_get_FP_double (cpu, sn)
6866 * aarch64_get_FP_double (cpu, sm));
6867 }
6868
6869 static void
6870 dexSimpleFPDataProc3Source (sim_cpu *cpu)
6871 {
6872 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
6873 instr[30] = 0
6874 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
6875 instr[28,25] = 1111
6876 instr[24] = 1
6877 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
6878 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
6879 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
6880
6881 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
6882 /* dispatch on combined type:o1:o2. */
6883 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
6884
6885 if (M_S != 0)
6886 HALT_UNALLOC;
6887
6888 switch (dispatch)
6889 {
6890 case 0: fmadds (cpu); return;
6891 case 1: fmsubs (cpu); return;
6892 case 2: fnmadds (cpu); return;
6893 case 3: fnmsubs (cpu); return;
6894 case 4: fmaddd (cpu); return;
6895 case 5: fmsubd (cpu); return;
6896 case 6: fnmaddd (cpu); return;
6897 case 7: fnmsubd (cpu); return;
6898 default:
6899 /* type > 1 is currently unallocated. */
6900 HALT_UNALLOC;
6901 }
6902 }
6903
6904 static void
6905 dexSimpleFPFixedConvert (sim_cpu *cpu)
6906 {
6907 HALT_NYI;
6908 }
6909
6910 static void
6911 dexSimpleFPCondCompare (sim_cpu *cpu)
6912 {
6913 /* instr [31,23] = 0001 1110 0
6914 instr [22] = type
6915 instr [21] = 1
6916 instr [20,16] = Rm
6917 instr [15,12] = condition
6918 instr [11,10] = 01
6919 instr [9,5] = Rn
6920 instr [4] = 0
6921 instr [3,0] = nzcv */
6922
6923 unsigned rm = INSTR (20, 16);
6924 unsigned rn = INSTR (9, 5);
6925
6926 NYI_assert (31, 23, 0x3C);
6927 NYI_assert (11, 10, 0x1);
6928 NYI_assert (4, 4, 0);
6929
6930 if (! testConditionCode (cpu, INSTR (15, 12)))
6931 {
6932 aarch64_set_CPSR (cpu, INSTR (3, 0));
6933 return;
6934 }
6935
6936 if (INSTR (22, 22))
6937 {
6938 /* Double precision. */
6939 double val1 = aarch64_get_vec_double (cpu, rn, 0);
6940 double val2 = aarch64_get_vec_double (cpu, rm, 0);
6941
6942 /* FIXME: Check for NaNs. */
6943 if (val1 == val2)
6944 aarch64_set_CPSR (cpu, (Z | C));
6945 else if (val1 < val2)
6946 aarch64_set_CPSR (cpu, N);
6947 else /* val1 > val2 */
6948 aarch64_set_CPSR (cpu, C);
6949 }
6950 else
6951 {
6952 /* Single precision. */
6953 float val1 = aarch64_get_vec_float (cpu, rn, 0);
6954 float val2 = aarch64_get_vec_float (cpu, rm, 0);
6955
6956 /* FIXME: Check for NaNs. */
6957 if (val1 == val2)
6958 aarch64_set_CPSR (cpu, (Z | C));
6959 else if (val1 < val2)
6960 aarch64_set_CPSR (cpu, N);
6961 else /* val1 > val2 */
6962 aarch64_set_CPSR (cpu, C);
6963 }
6964 }
6965
6966 /* 2 sources. */
6967
6968 /* Float add. */
6969 static void
6970 fadds (sim_cpu *cpu)
6971 {
6972 unsigned sm = INSTR (20, 16);
6973 unsigned sn = INSTR ( 9, 5);
6974 unsigned sd = INSTR ( 4, 0);
6975
6976 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6977 + aarch64_get_FP_float (cpu, sm));
6978 }
6979
6980 /* Double add. */
6981 static void
6982 faddd (sim_cpu *cpu)
6983 {
6984 unsigned sm = INSTR (20, 16);
6985 unsigned sn = INSTR ( 9, 5);
6986 unsigned sd = INSTR ( 4, 0);
6987
6988 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6989 + aarch64_get_FP_double (cpu, sm));
6990 }
6991
6992 /* Float divide. */
6993 static void
6994 fdivs (sim_cpu *cpu)
6995 {
6996 unsigned sm = INSTR (20, 16);
6997 unsigned sn = INSTR ( 9, 5);
6998 unsigned sd = INSTR ( 4, 0);
6999
7000 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7001 / aarch64_get_FP_float (cpu, sm));
7002 }
7003
7004 /* Double divide. */
7005 static void
7006 fdivd (sim_cpu *cpu)
7007 {
7008 unsigned sm = INSTR (20, 16);
7009 unsigned sn = INSTR ( 9, 5);
7010 unsigned sd = INSTR ( 4, 0);
7011
7012 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7013 / aarch64_get_FP_double (cpu, sm));
7014 }
7015
7016 /* Float multiply. */
7017 static void
7018 fmuls (sim_cpu *cpu)
7019 {
7020 unsigned sm = INSTR (20, 16);
7021 unsigned sn = INSTR ( 9, 5);
7022 unsigned sd = INSTR ( 4, 0);
7023
7024 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7025 * aarch64_get_FP_float (cpu, sm));
7026 }
7027
7028 /* Double multiply. */
7029 static void
7030 fmuld (sim_cpu *cpu)
7031 {
7032 unsigned sm = INSTR (20, 16);
7033 unsigned sn = INSTR ( 9, 5);
7034 unsigned sd = INSTR ( 4, 0);
7035
7036 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7037 * aarch64_get_FP_double (cpu, sm));
7038 }
7039
7040 /* Float negate and multiply. */
7041 static void
7042 fnmuls (sim_cpu *cpu)
7043 {
7044 unsigned sm = INSTR (20, 16);
7045 unsigned sn = INSTR ( 9, 5);
7046 unsigned sd = INSTR ( 4, 0);
7047
7048 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7049 * aarch64_get_FP_float (cpu, sm)));
7050 }
7051
7052 /* Double negate and multiply. */
7053 static void
7054 fnmuld (sim_cpu *cpu)
7055 {
7056 unsigned sm = INSTR (20, 16);
7057 unsigned sn = INSTR ( 9, 5);
7058 unsigned sd = INSTR ( 4, 0);
7059
7060 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7061 * aarch64_get_FP_double (cpu, sm)));
7062 }
7063
7064 /* Float subtract. */
7065 static void
7066 fsubs (sim_cpu *cpu)
7067 {
7068 unsigned sm = INSTR (20, 16);
7069 unsigned sn = INSTR ( 9, 5);
7070 unsigned sd = INSTR ( 4, 0);
7071
7072 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7073 - aarch64_get_FP_float (cpu, sm));
7074 }
7075
7076 /* Double subtract. */
7077 static void
7078 fsubd (sim_cpu *cpu)
7079 {
7080 unsigned sm = INSTR (20, 16);
7081 unsigned sn = INSTR ( 9, 5);
7082 unsigned sd = INSTR ( 4, 0);
7083
7084 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7085 - aarch64_get_FP_double (cpu, sm));
7086 }
7087
7088 static void
7089 do_FMINNM (sim_cpu *cpu)
7090 {
7091 /* instr[31,23] = 0 0011 1100
7092 instr[22] = float(0)/double(1)
7093 instr[21] = 1
7094 instr[20,16] = Sm
7095 instr[15,10] = 01 1110
7096 instr[9,5] = Sn
7097 instr[4,0] = Cpu */
7098
7099 unsigned sm = INSTR (20, 16);
7100 unsigned sn = INSTR ( 9, 5);
7101 unsigned sd = INSTR ( 4, 0);
7102
7103 NYI_assert (31, 23, 0x03C);
7104 NYI_assert (15, 10, 0x1E);
7105
7106 if (INSTR (22, 22))
7107 aarch64_set_FP_double (cpu, sd,
7108 dminnm (aarch64_get_FP_double (cpu, sn),
7109 aarch64_get_FP_double (cpu, sm)));
7110 else
7111 aarch64_set_FP_float (cpu, sd,
7112 fminnm (aarch64_get_FP_float (cpu, sn),
7113 aarch64_get_FP_float (cpu, sm)));
7114 }
7115
7116 static void
7117 do_FMAXNM (sim_cpu *cpu)
7118 {
7119 /* instr[31,23] = 0 0011 1100
7120 instr[22] = float(0)/double(1)
7121 instr[21] = 1
7122 instr[20,16] = Sm
7123 instr[15,10] = 01 1010
7124 instr[9,5] = Sn
7125 instr[4,0] = Cpu */
7126
7127 unsigned sm = INSTR (20, 16);
7128 unsigned sn = INSTR ( 9, 5);
7129 unsigned sd = INSTR ( 4, 0);
7130
7131 NYI_assert (31, 23, 0x03C);
7132 NYI_assert (15, 10, 0x1A);
7133
7134 if (INSTR (22, 22))
7135 aarch64_set_FP_double (cpu, sd,
7136 dmaxnm (aarch64_get_FP_double (cpu, sn),
7137 aarch64_get_FP_double (cpu, sm)));
7138 else
7139 aarch64_set_FP_float (cpu, sd,
7140 fmaxnm (aarch64_get_FP_float (cpu, sn),
7141 aarch64_get_FP_float (cpu, sm)));
7142 }
7143
7144 static void
7145 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7146 {
7147 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7148 instr[30] = 0
7149 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7150 instr[28,25] = 1111
7151 instr[24] = 0
7152 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7153 instr[21] = 1
7154 instr[20,16] = Vm
7155 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7156 0010 ==> FADD, 0011 ==> FSUB,
7157 0100 ==> FMAX, 0101 ==> FMIN
7158 0110 ==> FMAXNM, 0111 ==> FMINNM
7159 1000 ==> FNMUL, ow ==> UNALLOC
7160 instr[11,10] = 10
7161 instr[9,5] = Vn
7162 instr[4,0] = Vd */
7163
7164 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7165 uint32_t type = INSTR (23, 22);
7166 /* Dispatch on opcode. */
7167 uint32_t dispatch = INSTR (15, 12);
7168
7169 if (type > 1)
7170 HALT_UNALLOC;
7171
7172 if (M_S != 0)
7173 HALT_UNALLOC;
7174
7175 if (type)
7176 switch (dispatch)
7177 {
7178 case 0: fmuld (cpu); return;
7179 case 1: fdivd (cpu); return;
7180 case 2: faddd (cpu); return;
7181 case 3: fsubd (cpu); return;
7182 case 6: do_FMAXNM (cpu); return;
7183 case 7: do_FMINNM (cpu); return;
7184 case 8: fnmuld (cpu); return;
7185
7186 /* Have not yet implemented fmax and fmin. */
7187 case 4:
7188 case 5:
7189 HALT_NYI;
7190
7191 default:
7192 HALT_UNALLOC;
7193 }
7194 else /* type == 0 => floats. */
7195 switch (dispatch)
7196 {
7197 case 0: fmuls (cpu); return;
7198 case 1: fdivs (cpu); return;
7199 case 2: fadds (cpu); return;
7200 case 3: fsubs (cpu); return;
7201 case 6: do_FMAXNM (cpu); return;
7202 case 7: do_FMINNM (cpu); return;
7203 case 8: fnmuls (cpu); return;
7204
7205 case 4:
7206 case 5:
7207 HALT_NYI;
7208
7209 default:
7210 HALT_UNALLOC;
7211 }
7212 }
7213
7214 static void
7215 dexSimpleFPCondSelect (sim_cpu *cpu)
7216 {
7217 /* FCSEL
7218 instr[31,23] = 0 0011 1100
7219 instr[22] = 0=>single 1=>double
7220 instr[21] = 1
7221 instr[20,16] = Sm
7222 instr[15,12] = cond
7223 instr[11,10] = 11
7224 instr[9,5] = Sn
7225 instr[4,0] = Cpu */
7226 unsigned sm = INSTR (20, 16);
7227 unsigned sn = INSTR ( 9, 5);
7228 unsigned sd = INSTR ( 4, 0);
7229 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7230
7231 NYI_assert (31, 23, 0x03C);
7232 NYI_assert (11, 10, 0x3);
7233
7234 if (INSTR (22, 22))
7235 aarch64_set_FP_double (cpu, sd, set ? sn : sm);
7236 else
7237 aarch64_set_FP_float (cpu, sd, set ? sn : sm);
7238 }
7239
7240 /* Store 32 bit unscaled signed 9 bit. */
7241 static void
7242 fsturs (sim_cpu *cpu, int32_t offset)
7243 {
7244 unsigned int rn = INSTR (9, 5);
7245 unsigned int st = INSTR (4, 0);
7246
7247 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7248 aarch64_get_vec_u32 (cpu, rn, 0));
7249 }
7250
7251 /* Store 64 bit unscaled signed 9 bit. */
7252 static void
7253 fsturd (sim_cpu *cpu, int32_t offset)
7254 {
7255 unsigned int rn = INSTR (9, 5);
7256 unsigned int st = INSTR (4, 0);
7257
7258 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7259 aarch64_get_vec_u64 (cpu, rn, 0));
7260 }
7261
7262 /* Store 128 bit unscaled signed 9 bit. */
7263 static void
7264 fsturq (sim_cpu *cpu, int32_t offset)
7265 {
7266 unsigned int rn = INSTR (9, 5);
7267 unsigned int st = INSTR (4, 0);
7268 FRegister a;
7269
7270 aarch64_get_FP_long_double (cpu, rn, & a);
7271 aarch64_set_mem_long_double (cpu,
7272 aarch64_get_reg_u64 (cpu, st, 1)
7273 + offset, a);
7274 }
7275
7276 /* TODO FP move register. */
7277
7278 /* 32 bit fp to fp move register. */
7279 static void
7280 ffmovs (sim_cpu *cpu)
7281 {
7282 unsigned int rn = INSTR (9, 5);
7283 unsigned int st = INSTR (4, 0);
7284
7285 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7286 }
7287
7288 /* 64 bit fp to fp move register. */
7289 static void
7290 ffmovd (sim_cpu *cpu)
7291 {
7292 unsigned int rn = INSTR (9, 5);
7293 unsigned int st = INSTR (4, 0);
7294
7295 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7296 }
7297
7298 /* 32 bit GReg to Vec move register. */
7299 static void
7300 fgmovs (sim_cpu *cpu)
7301 {
7302 unsigned int rn = INSTR (9, 5);
7303 unsigned int st = INSTR (4, 0);
7304
7305 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7306 }
7307
7308 /* 64 bit g to fp move register. */
7309 static void
7310 fgmovd (sim_cpu *cpu)
7311 {
7312 unsigned int rn = INSTR (9, 5);
7313 unsigned int st = INSTR (4, 0);
7314
7315 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7316 }
7317
7318 /* 32 bit fp to g move register. */
7319 static void
7320 gfmovs (sim_cpu *cpu)
7321 {
7322 unsigned int rn = INSTR (9, 5);
7323 unsigned int st = INSTR (4, 0);
7324
7325 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7326 }
7327
7328 /* 64 bit fp to g move register. */
7329 static void
7330 gfmovd (sim_cpu *cpu)
7331 {
7332 unsigned int rn = INSTR (9, 5);
7333 unsigned int st = INSTR (4, 0);
7334
7335 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7336 }
7337
7338 /* FP move immediate
7339
7340 These install an immediate 8 bit value in the target register
7341 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7342 bit exponent. */
7343
7344 static void
7345 fmovs (sim_cpu *cpu)
7346 {
7347 unsigned int sd = INSTR (4, 0);
7348 uint32_t imm = INSTR (20, 13);
7349 float f = fp_immediate_for_encoding_32 (imm);
7350
7351 aarch64_set_FP_float (cpu, sd, f);
7352 }
7353
7354 static void
7355 fmovd (sim_cpu *cpu)
7356 {
7357 unsigned int sd = INSTR (4, 0);
7358 uint32_t imm = INSTR (20, 13);
7359 double d = fp_immediate_for_encoding_64 (imm);
7360
7361 aarch64_set_FP_double (cpu, sd, d);
7362 }
7363
7364 static void
7365 dexSimpleFPImmediate (sim_cpu *cpu)
7366 {
7367 /* instr[31,23] == 00111100
7368 instr[22] == type : single(0)/double(1)
7369 instr[21] == 1
7370 instr[20,13] == imm8
7371 instr[12,10] == 100
7372 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7373 instr[4,0] == Rd */
7374 uint32_t imm5 = INSTR (9, 5);
7375
7376 NYI_assert (31, 23, 0x3C);
7377
7378 if (imm5 != 0)
7379 HALT_UNALLOC;
7380
7381 if (INSTR (22, 22))
7382 fmovd (cpu);
7383 else
7384 fmovs (cpu);
7385 }
7386
7387 /* TODO specific decode and execute for group Load Store. */
7388
7389 /* TODO FP load/store single register (unscaled offset). */
7390
7391 /* TODO load 8 bit unscaled signed 9 bit. */
7392 /* TODO load 16 bit unscaled signed 9 bit. */
7393
7394 /* Load 32 bit unscaled signed 9 bit. */
7395 static void
7396 fldurs (sim_cpu *cpu, int32_t offset)
7397 {
7398 unsigned int rn = INSTR (9, 5);
7399 unsigned int st = INSTR (4, 0);
7400
7401 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7402 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7403 }
7404
7405 /* Load 64 bit unscaled signed 9 bit. */
7406 static void
7407 fldurd (sim_cpu *cpu, int32_t offset)
7408 {
7409 unsigned int rn = INSTR (9, 5);
7410 unsigned int st = INSTR (4, 0);
7411
7412 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7413 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7414 }
7415
7416 /* Load 128 bit unscaled signed 9 bit. */
7417 static void
7418 fldurq (sim_cpu *cpu, int32_t offset)
7419 {
7420 unsigned int rn = INSTR (9, 5);
7421 unsigned int st = INSTR (4, 0);
7422 FRegister a;
7423 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7424
7425 aarch64_get_mem_long_double (cpu, addr, & a);
7426 aarch64_set_FP_long_double (cpu, st, a);
7427 }
7428
7429 /* TODO store 8 bit unscaled signed 9 bit. */
7430 /* TODO store 16 bit unscaled signed 9 bit. */
7431
7432
7433 /* 1 source. */
7434
7435 /* Float absolute value. */
7436 static void
7437 fabss (sim_cpu *cpu)
7438 {
7439 unsigned sn = INSTR (9, 5);
7440 unsigned sd = INSTR (4, 0);
7441 float value = aarch64_get_FP_float (cpu, sn);
7442
7443 aarch64_set_FP_float (cpu, sd, fabsf (value));
7444 }
7445
7446 /* Double absolute value. */
7447 static void
7448 fabcpu (sim_cpu *cpu)
7449 {
7450 unsigned sn = INSTR (9, 5);
7451 unsigned sd = INSTR (4, 0);
7452 double value = aarch64_get_FP_double (cpu, sn);
7453
7454 aarch64_set_FP_double (cpu, sd, fabs (value));
7455 }
7456
7457 /* Float negative value. */
7458 static void
7459 fnegs (sim_cpu *cpu)
7460 {
7461 unsigned sn = INSTR (9, 5);
7462 unsigned sd = INSTR (4, 0);
7463
7464 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7465 }
7466
7467 /* Double negative value. */
7468 static void
7469 fnegd (sim_cpu *cpu)
7470 {
7471 unsigned sn = INSTR (9, 5);
7472 unsigned sd = INSTR (4, 0);
7473
7474 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7475 }
7476
7477 /* Float square root. */
7478 static void
7479 fsqrts (sim_cpu *cpu)
7480 {
7481 unsigned sn = INSTR (9, 5);
7482 unsigned sd = INSTR (4, 0);
7483
7484 aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn)));
7485 }
7486
7487 /* Double square root. */
7488 static void
7489 fsqrtd (sim_cpu *cpu)
7490 {
7491 unsigned sn = INSTR (9, 5);
7492 unsigned sd = INSTR (4, 0);
7493
7494 aarch64_set_FP_double (cpu, sd,
7495 sqrt (aarch64_get_FP_double (cpu, sn)));
7496 }
7497
7498 /* Convert double to float. */
7499 static void
7500 fcvtds (sim_cpu *cpu)
7501 {
7502 unsigned sn = INSTR (9, 5);
7503 unsigned sd = INSTR (4, 0);
7504
7505 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7506 }
7507
7508 /* Convert float to double. */
7509 static void
7510 fcvtcpu (sim_cpu *cpu)
7511 {
7512 unsigned sn = INSTR (9, 5);
7513 unsigned sd = INSTR (4, 0);
7514
7515 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7516 }
7517
7518 static void
7519 do_FRINT (sim_cpu *cpu)
7520 {
7521 /* instr[31,23] = 0001 1110 0
7522 instr[22] = single(0)/double(1)
7523 instr[21,18] = 1001
7524 instr[17,15] = rounding mode
7525 instr[14,10] = 10000
7526 instr[9,5] = source
7527 instr[4,0] = dest */
7528
7529 float val;
7530 unsigned rs = INSTR (9, 5);
7531 unsigned rd = INSTR (4, 0);
7532 unsigned int rmode = INSTR (17, 15);
7533
7534 NYI_assert (31, 23, 0x03C);
7535 NYI_assert (21, 18, 0x9);
7536 NYI_assert (14, 10, 0x10);
7537
7538 if (rmode == 6 || rmode == 7)
7539 /* FIXME: Add support for rmode == 6 exactness check. */
7540 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7541
7542 if (INSTR (22, 22))
7543 {
7544 double val = aarch64_get_FP_double (cpu, rs);
7545
7546 switch (rmode)
7547 {
7548 case 0: /* mode N: nearest or even. */
7549 {
7550 double rval = round (val);
7551
7552 if (val - rval == 0.5)
7553 {
7554 if (((rval / 2.0) * 2.0) != rval)
7555 rval += 1.0;
7556 }
7557
7558 aarch64_set_FP_double (cpu, rd, round (val));
7559 return;
7560 }
7561
7562 case 1: /* mode P: towards +inf. */
7563 if (val < 0.0)
7564 aarch64_set_FP_double (cpu, rd, trunc (val));
7565 else
7566 aarch64_set_FP_double (cpu, rd, round (val));
7567 return;
7568
7569 case 2: /* mode M: towards -inf. */
7570 if (val < 0.0)
7571 aarch64_set_FP_double (cpu, rd, round (val));
7572 else
7573 aarch64_set_FP_double (cpu, rd, trunc (val));
7574 return;
7575
7576 case 3: /* mode Z: towards 0. */
7577 aarch64_set_FP_double (cpu, rd, trunc (val));
7578 return;
7579
7580 case 4: /* mode A: away from 0. */
7581 aarch64_set_FP_double (cpu, rd, round (val));
7582 return;
7583
7584 case 6: /* mode X: use FPCR with exactness check. */
7585 case 7: /* mode I: use FPCR mode. */
7586 HALT_NYI;
7587
7588 default:
7589 HALT_UNALLOC;
7590 }
7591 }
7592
7593 val = aarch64_get_FP_float (cpu, rs);
7594
7595 switch (rmode)
7596 {
7597 case 0: /* mode N: nearest or even. */
7598 {
7599 float rval = roundf (val);
7600
7601 if (val - rval == 0.5)
7602 {
7603 if (((rval / 2.0) * 2.0) != rval)
7604 rval += 1.0;
7605 }
7606
7607 aarch64_set_FP_float (cpu, rd, rval);
7608 return;
7609 }
7610
7611 case 1: /* mode P: towards +inf. */
7612 if (val < 0.0)
7613 aarch64_set_FP_float (cpu, rd, truncf (val));
7614 else
7615 aarch64_set_FP_float (cpu, rd, roundf (val));
7616 return;
7617
7618 case 2: /* mode M: towards -inf. */
7619 if (val < 0.0)
7620 aarch64_set_FP_float (cpu, rd, truncf (val));
7621 else
7622 aarch64_set_FP_float (cpu, rd, roundf (val));
7623 return;
7624
7625 case 3: /* mode Z: towards 0. */
7626 aarch64_set_FP_float (cpu, rd, truncf (val));
7627 return;
7628
7629 case 4: /* mode A: away from 0. */
7630 aarch64_set_FP_float (cpu, rd, roundf (val));
7631 return;
7632
7633 case 6: /* mode X: use FPCR with exactness check. */
7634 case 7: /* mode I: use FPCR mode. */
7635 HALT_NYI;
7636
7637 default:
7638 HALT_UNALLOC;
7639 }
7640 }
7641
7642 /* Convert half to float. */
7643 static void
7644 do_FCVT_half_to_single (sim_cpu *cpu)
7645 {
7646 unsigned rn = INSTR (9, 5);
7647 unsigned rd = INSTR (4, 0);
7648
7649 NYI_assert (31, 10, 0x7B890);
7650
7651 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7652 }
7653
7654 /* Convert half to double. */
7655 static void
7656 do_FCVT_half_to_double (sim_cpu *cpu)
7657 {
7658 unsigned rn = INSTR (9, 5);
7659 unsigned rd = INSTR (4, 0);
7660
7661 NYI_assert (31, 10, 0x7B8B0);
7662
7663 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7664 }
7665
7666 static void
7667 do_FCVT_single_to_half (sim_cpu *cpu)
7668 {
7669 unsigned rn = INSTR (9, 5);
7670 unsigned rd = INSTR (4, 0);
7671
7672 NYI_assert (31, 10, 0x788F0);
7673
7674 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7675 }
7676
7677 /* Convert double to half. */
7678 static void
7679 do_FCVT_double_to_half (sim_cpu *cpu)
7680 {
7681 unsigned rn = INSTR (9, 5);
7682 unsigned rd = INSTR (4, 0);
7683
7684 NYI_assert (31, 10, 0x798F0);
7685
7686 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7687 }
7688
7689 static void
7690 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7691 {
7692 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7693 instr[30] = 0
7694 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7695 instr[28,25] = 1111
7696 instr[24] = 0
7697 instr[23,22] ==> type : 00 ==> source is single,
7698 01 ==> source is double
7699 10 ==> UNALLOC
7700 11 ==> UNALLOC or source is half
7701 instr[21] = 1
7702 instr[20,15] ==> opcode : with type 00 or 01
7703 000000 ==> FMOV, 000001 ==> FABS,
7704 000010 ==> FNEG, 000011 ==> FSQRT,
7705 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7706 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7707 001000 ==> FRINTN, 001001 ==> FRINTP,
7708 001010 ==> FRINTM, 001011 ==> FRINTZ,
7709 001100 ==> FRINTA, 001101 ==> UNALLOC
7710 001110 ==> FRINTX, 001111 ==> FRINTI
7711 with type 11
7712 000100 ==> FCVT (half-to-single)
7713 000101 ==> FCVT (half-to-double)
7714 instr[14,10] = 10000. */
7715
7716 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7717 uint32_t type = INSTR (23, 22);
7718 uint32_t opcode = INSTR (20, 15);
7719
7720 if (M_S != 0)
7721 HALT_UNALLOC;
7722
7723 if (type == 3)
7724 {
7725 if (opcode == 4)
7726 do_FCVT_half_to_single (cpu);
7727 else if (opcode == 5)
7728 do_FCVT_half_to_double (cpu);
7729 else
7730 HALT_UNALLOC;
7731 return;
7732 }
7733
7734 if (type == 2)
7735 HALT_UNALLOC;
7736
7737 switch (opcode)
7738 {
7739 case 0:
7740 if (type)
7741 ffmovd (cpu);
7742 else
7743 ffmovs (cpu);
7744 return;
7745
7746 case 1:
7747 if (type)
7748 fabcpu (cpu);
7749 else
7750 fabss (cpu);
7751 return;
7752
7753 case 2:
7754 if (type)
7755 fnegd (cpu);
7756 else
7757 fnegs (cpu);
7758 return;
7759
7760 case 3:
7761 if (type)
7762 fsqrtd (cpu);
7763 else
7764 fsqrts (cpu);
7765 return;
7766
7767 case 4:
7768 if (type)
7769 fcvtds (cpu);
7770 else
7771 HALT_UNALLOC;
7772 return;
7773
7774 case 5:
7775 if (type)
7776 HALT_UNALLOC;
7777 fcvtcpu (cpu);
7778 return;
7779
7780 case 8: /* FRINTN etc. */
7781 case 9:
7782 case 10:
7783 case 11:
7784 case 12:
7785 case 14:
7786 case 15:
7787 do_FRINT (cpu);
7788 return;
7789
7790 case 7:
7791 if (INSTR (22, 22))
7792 do_FCVT_double_to_half (cpu);
7793 else
7794 do_FCVT_single_to_half (cpu);
7795 return;
7796
7797 case 13:
7798 HALT_NYI;
7799
7800 default:
7801 HALT_UNALLOC;
7802 }
7803 }
7804
7805 /* 32 bit signed int to float. */
7806 static void
7807 scvtf32 (sim_cpu *cpu)
7808 {
7809 unsigned rn = INSTR (9, 5);
7810 unsigned sd = INSTR (4, 0);
7811
7812 aarch64_set_FP_float
7813 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7814 }
7815
7816 /* signed int to float. */
7817 static void
7818 scvtf (sim_cpu *cpu)
7819 {
7820 unsigned rn = INSTR (9, 5);
7821 unsigned sd = INSTR (4, 0);
7822
7823 aarch64_set_FP_float
7824 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7825 }
7826
7827 /* 32 bit signed int to double. */
7828 static void
7829 scvtd32 (sim_cpu *cpu)
7830 {
7831 unsigned rn = INSTR (9, 5);
7832 unsigned sd = INSTR (4, 0);
7833
7834 aarch64_set_FP_double
7835 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7836 }
7837
7838 /* signed int to double. */
7839 static void
7840 scvtd (sim_cpu *cpu)
7841 {
7842 unsigned rn = INSTR (9, 5);
7843 unsigned sd = INSTR (4, 0);
7844
7845 aarch64_set_FP_double
7846 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7847 }
7848
7849 static const float FLOAT_INT_MAX = (float) INT_MAX;
7850 static const float FLOAT_INT_MIN = (float) INT_MIN;
7851 static const double DOUBLE_INT_MAX = (double) INT_MAX;
7852 static const double DOUBLE_INT_MIN = (double) INT_MIN;
7853 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
7854 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
7855 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
7856 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
7857
7858 /* Check for FP exception conditions:
7859 NaN raises IO
7860 Infinity raises IO
7861 Out of Range raises IO and IX and saturates value
7862 Denormal raises ID and IX and sets to zero. */
7863 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
7864 do \
7865 { \
7866 switch (fpclassify (F)) \
7867 { \
7868 case FP_INFINITE: \
7869 case FP_NAN: \
7870 aarch64_set_FPSR (cpu, IO); \
7871 if (signbit (F)) \
7872 VALUE = ITYPE##_MAX; \
7873 else \
7874 VALUE = ITYPE##_MIN; \
7875 break; \
7876 \
7877 case FP_NORMAL: \
7878 if (F >= FTYPE##_##ITYPE##_MAX) \
7879 { \
7880 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
7881 VALUE = ITYPE##_MAX; \
7882 } \
7883 else if (F <= FTYPE##_##ITYPE##_MIN) \
7884 { \
7885 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
7886 VALUE = ITYPE##_MIN; \
7887 } \
7888 break; \
7889 \
7890 case FP_SUBNORMAL: \
7891 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
7892 VALUE = 0; \
7893 break; \
7894 \
7895 default: \
7896 case FP_ZERO: \
7897 VALUE = 0; \
7898 break; \
7899 } \
7900 } \
7901 while (0)
7902
7903 /* 32 bit convert float to signed int truncate towards zero. */
7904 static void
7905 fcvtszs32 (sim_cpu *cpu)
7906 {
7907 unsigned sn = INSTR (9, 5);
7908 unsigned rd = INSTR (4, 0);
7909 /* TODO : check that this rounds toward zero. */
7910 float f = aarch64_get_FP_float (cpu, sn);
7911 int32_t value = (int32_t) f;
7912
7913 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
7914
7915 /* Avoid sign extension to 64 bit. */
7916 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7917 }
7918
7919 /* 64 bit convert float to signed int truncate towards zero. */
7920 static void
7921 fcvtszs (sim_cpu *cpu)
7922 {
7923 unsigned sn = INSTR (9, 5);
7924 unsigned rd = INSTR (4, 0);
7925 float f = aarch64_get_FP_float (cpu, sn);
7926 int64_t value = (int64_t) f;
7927
7928 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
7929
7930 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7931 }
7932
7933 /* 32 bit convert double to signed int truncate towards zero. */
7934 static void
7935 fcvtszd32 (sim_cpu *cpu)
7936 {
7937 unsigned sn = INSTR (9, 5);
7938 unsigned rd = INSTR (4, 0);
7939 /* TODO : check that this rounds toward zero. */
7940 double d = aarch64_get_FP_double (cpu, sn);
7941 int32_t value = (int32_t) d;
7942
7943 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
7944
7945 /* Avoid sign extension to 64 bit. */
7946 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7947 }
7948
7949 /* 64 bit convert double to signed int truncate towards zero. */
7950 static void
7951 fcvtszd (sim_cpu *cpu)
7952 {
7953 unsigned sn = INSTR (9, 5);
7954 unsigned rd = INSTR (4, 0);
7955 /* TODO : check that this rounds toward zero. */
7956 double d = aarch64_get_FP_double (cpu, sn);
7957 int64_t value;
7958
7959 value = (int64_t) d;
7960
7961 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7962
7963 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7964 }
7965
7966 static void
7967 do_fcvtzu (sim_cpu *cpu)
7968 {
7969 /* instr[31] = size: 32-bit (0), 64-bit (1)
7970 instr[30,23] = 00111100
7971 instr[22] = type: single (0)/ double (1)
7972 instr[21] = enable (0)/disable(1) precision
7973 instr[20,16] = 11001
7974 instr[15,10] = precision
7975 instr[9,5] = Rs
7976 instr[4,0] = Rd. */
7977
7978 unsigned rs = INSTR (9, 5);
7979 unsigned rd = INSTR (4, 0);
7980
7981 NYI_assert (30, 23, 0x3C);
7982 NYI_assert (20, 16, 0x19);
7983
7984 if (INSTR (21, 21) != 1)
7985 /* Convert to fixed point. */
7986 HALT_NYI;
7987
7988 if (INSTR (31, 31))
7989 {
7990 /* Convert to unsigned 64-bit integer. */
7991 if (INSTR (22, 22))
7992 {
7993 double d = aarch64_get_FP_double (cpu, rs);
7994 uint64_t value = (uint64_t) d;
7995
7996 /* Do not raise an exception if we have reached ULONG_MAX. */
7997 if (value != (1UL << 63))
7998 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7999
8000 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8001 }
8002 else
8003 {
8004 float f = aarch64_get_FP_float (cpu, rs);
8005 uint64_t value = (uint64_t) f;
8006
8007 /* Do not raise an exception if we have reached ULONG_MAX. */
8008 if (value != (1UL << 63))
8009 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8010
8011 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8012 }
8013 }
8014 else
8015 {
8016 uint32_t value;
8017
8018 /* Convert to unsigned 32-bit integer. */
8019 if (INSTR (22, 22))
8020 {
8021 double d = aarch64_get_FP_double (cpu, rs);
8022
8023 value = (uint32_t) d;
8024 /* Do not raise an exception if we have reached UINT_MAX. */
8025 if (value != (1UL << 31))
8026 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8027 }
8028 else
8029 {
8030 float f = aarch64_get_FP_float (cpu, rs);
8031
8032 value = (uint32_t) f;
8033 /* Do not raise an exception if we have reached UINT_MAX. */
8034 if (value != (1UL << 31))
8035 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8036 }
8037
8038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8039 }
8040 }
8041
8042 static void
8043 do_UCVTF (sim_cpu *cpu)
8044 {
8045 /* instr[31] = size: 32-bit (0), 64-bit (1)
8046 instr[30,23] = 001 1110 0
8047 instr[22] = type: single (0)/ double (1)
8048 instr[21] = enable (0)/disable(1) precision
8049 instr[20,16] = 0 0011
8050 instr[15,10] = precision
8051 instr[9,5] = Rs
8052 instr[4,0] = Rd. */
8053
8054 unsigned rs = INSTR (9, 5);
8055 unsigned rd = INSTR (4, 0);
8056
8057 NYI_assert (30, 23, 0x3C);
8058 NYI_assert (20, 16, 0x03);
8059
8060 if (INSTR (21, 21) != 1)
8061 HALT_NYI;
8062
8063 /* FIXME: Add exception raising. */
8064 if (INSTR (31, 31))
8065 {
8066 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8067
8068 if (INSTR (22, 22))
8069 aarch64_set_FP_double (cpu, rd, (double) value);
8070 else
8071 aarch64_set_FP_float (cpu, rd, (float) value);
8072 }
8073 else
8074 {
8075 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8076
8077 if (INSTR (22, 22))
8078 aarch64_set_FP_double (cpu, rd, (double) value);
8079 else
8080 aarch64_set_FP_float (cpu, rd, (float) value);
8081 }
8082 }
8083
8084 static void
8085 float_vector_move (sim_cpu *cpu)
8086 {
8087 /* instr[31,17] == 100 1111 0101 0111
8088 instr[16] ==> direction 0=> to GR, 1=> from GR
8089 instr[15,10] => ???
8090 instr[9,5] ==> source
8091 instr[4,0] ==> dest. */
8092
8093 unsigned rn = INSTR (9, 5);
8094 unsigned rd = INSTR (4, 0);
8095
8096 NYI_assert (31, 17, 0x4F57);
8097
8098 if (INSTR (15, 10) != 0)
8099 HALT_UNALLOC;
8100
8101 if (INSTR (16, 16))
8102 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8103 else
8104 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8105 }
8106
8107 static void
8108 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8109 {
8110 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8111 instr[30 = 0
8112 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8113 instr[28,25] = 1111
8114 instr[24] = 0
8115 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8116 instr[21] = 1
8117 instr[20,19] = rmode
8118 instr[18,16] = opcode
8119 instr[15,10] = 10 0000 */
8120
8121 uint32_t rmode_opcode;
8122 uint32_t size_type;
8123 uint32_t type;
8124 uint32_t size;
8125 uint32_t S;
8126
8127 if (INSTR (31, 17) == 0x4F57)
8128 {
8129 float_vector_move (cpu);
8130 return;
8131 }
8132
8133 size = INSTR (31, 31);
8134 S = INSTR (29, 29);
8135 if (S != 0)
8136 HALT_UNALLOC;
8137
8138 type = INSTR (23, 22);
8139 if (type > 1)
8140 HALT_UNALLOC;
8141
8142 rmode_opcode = INSTR (20, 16);
8143 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8144
8145 switch (rmode_opcode)
8146 {
8147 case 2: /* SCVTF. */
8148 switch (size_type)
8149 {
8150 case 0: scvtf32 (cpu); return;
8151 case 1: scvtd32 (cpu); return;
8152 case 2: scvtf (cpu); return;
8153 case 3: scvtd (cpu); return;
8154 }
8155
8156 case 6: /* FMOV GR, Vec. */
8157 switch (size_type)
8158 {
8159 case 0: gfmovs (cpu); return;
8160 case 3: gfmovd (cpu); return;
8161 default: HALT_UNALLOC;
8162 }
8163
8164 case 7: /* FMOV vec, GR. */
8165 switch (size_type)
8166 {
8167 case 0: fgmovs (cpu); return;
8168 case 3: fgmovd (cpu); return;
8169 default: HALT_UNALLOC;
8170 }
8171
8172 case 24: /* FCVTZS. */
8173 switch (size_type)
8174 {
8175 case 0: fcvtszs32 (cpu); return;
8176 case 1: fcvtszd32 (cpu); return;
8177 case 2: fcvtszs (cpu); return;
8178 case 3: fcvtszd (cpu); return;
8179 }
8180
8181 case 25: do_fcvtzu (cpu); return;
8182 case 3: do_UCVTF (cpu); return;
8183
8184 case 0: /* FCVTNS. */
8185 case 1: /* FCVTNU. */
8186 case 4: /* FCVTAS. */
8187 case 5: /* FCVTAU. */
8188 case 8: /* FCVPTS. */
8189 case 9: /* FCVTPU. */
8190 case 16: /* FCVTMS. */
8191 case 17: /* FCVTMU. */
8192 default:
8193 HALT_NYI;
8194 }
8195 }
8196
8197 static void
8198 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8199 {
8200 uint32_t flags;
8201
8202 if (isnan (fvalue1) || isnan (fvalue2))
8203 flags = C|V;
8204 else
8205 {
8206 float result = fvalue1 - fvalue2;
8207
8208 if (result == 0.0)
8209 flags = Z|C;
8210 else if (result < 0)
8211 flags = N;
8212 else /* (result > 0). */
8213 flags = C;
8214 }
8215
8216 aarch64_set_CPSR (cpu, flags);
8217 }
8218
8219 static void
8220 fcmps (sim_cpu *cpu)
8221 {
8222 unsigned sm = INSTR (20, 16);
8223 unsigned sn = INSTR ( 9, 5);
8224
8225 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8226 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8227
8228 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8229 }
8230
8231 /* Float compare to zero -- Invalid Operation exception
8232 only on signaling NaNs. */
8233 static void
8234 fcmpzs (sim_cpu *cpu)
8235 {
8236 unsigned sn = INSTR ( 9, 5);
8237 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8238
8239 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8240 }
8241
8242 /* Float compare -- Invalid Operation exception on all NaNs. */
8243 static void
8244 fcmpes (sim_cpu *cpu)
8245 {
8246 unsigned sm = INSTR (20, 16);
8247 unsigned sn = INSTR ( 9, 5);
8248
8249 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8250 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8251
8252 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8253 }
8254
8255 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8256 static void
8257 fcmpzes (sim_cpu *cpu)
8258 {
8259 unsigned sn = INSTR ( 9, 5);
8260 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8261
8262 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8263 }
8264
8265 static void
8266 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8267 {
8268 uint32_t flags;
8269
8270 if (isnan (dval1) || isnan (dval2))
8271 flags = C|V;
8272 else
8273 {
8274 double result = dval1 - dval2;
8275
8276 if (result == 0.0)
8277 flags = Z|C;
8278 else if (result < 0)
8279 flags = N;
8280 else /* (result > 0). */
8281 flags = C;
8282 }
8283
8284 aarch64_set_CPSR (cpu, flags);
8285 }
8286
8287 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8288 static void
8289 fcmpd (sim_cpu *cpu)
8290 {
8291 unsigned sm = INSTR (20, 16);
8292 unsigned sn = INSTR ( 9, 5);
8293
8294 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8295 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8296
8297 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8298 }
8299
8300 /* Double compare to zero -- Invalid Operation exception
8301 only on signaling NaNs. */
8302 static void
8303 fcmpzd (sim_cpu *cpu)
8304 {
8305 unsigned sn = INSTR ( 9, 5);
8306 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8307
8308 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8309 }
8310
8311 /* Double compare -- Invalid Operation exception on all NaNs. */
8312 static void
8313 fcmped (sim_cpu *cpu)
8314 {
8315 unsigned sm = INSTR (20, 16);
8316 unsigned sn = INSTR ( 9, 5);
8317
8318 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8319 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8320
8321 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8322 }
8323
8324 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8325 static void
8326 fcmpzed (sim_cpu *cpu)
8327 {
8328 unsigned sn = INSTR ( 9, 5);
8329 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8330
8331 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8332 }
8333
8334 static void
8335 dexSimpleFPCompare (sim_cpu *cpu)
8336 {
8337 /* assert instr[28,25] == 1111
8338 instr[30:24:21:13,10] = 0011000
8339 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8340 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8341 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8342 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8343 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8344 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8345 ow ==> UNALLOC */
8346 uint32_t dispatch;
8347 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8348 uint32_t type = INSTR (23, 22);
8349 uint32_t op = INSTR (15, 14);
8350 uint32_t op2_2_0 = INSTR (2, 0);
8351
8352 if (op2_2_0 != 0)
8353 HALT_UNALLOC;
8354
8355 if (M_S != 0)
8356 HALT_UNALLOC;
8357
8358 if (type > 1)
8359 HALT_UNALLOC;
8360
8361 if (op != 0)
8362 HALT_UNALLOC;
8363
8364 /* dispatch on type and top 2 bits of opcode. */
8365 dispatch = (type << 2) | INSTR (4, 3);
8366
8367 switch (dispatch)
8368 {
8369 case 0: fcmps (cpu); return;
8370 case 1: fcmpzs (cpu); return;
8371 case 2: fcmpes (cpu); return;
8372 case 3: fcmpzes (cpu); return;
8373 case 4: fcmpd (cpu); return;
8374 case 5: fcmpzd (cpu); return;
8375 case 6: fcmped (cpu); return;
8376 case 7: fcmpzed (cpu); return;
8377 }
8378 }
8379
8380 static void
8381 do_scalar_FADDP (sim_cpu *cpu)
8382 {
8383 /* instr [31,23] = 0111 1110 0
8384 instr [22] = single(0)/double(1)
8385 instr [21,10] = 11 0000 1101 10
8386 instr [9,5] = Fn
8387 instr [4,0] = Fd. */
8388
8389 unsigned Fn = INSTR (9, 5);
8390 unsigned Fd = INSTR (4, 0);
8391
8392 NYI_assert (31, 23, 0x0FC);
8393 NYI_assert (21, 10, 0xC36);
8394
8395 if (INSTR (22, 22))
8396 {
8397 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8398 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8399
8400 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8401 }
8402 else
8403 {
8404 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8405 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8406
8407 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8408 }
8409 }
8410
8411 /* Floating point absolute difference. */
8412
8413 static void
8414 do_scalar_FABD (sim_cpu *cpu)
8415 {
8416 /* instr [31,23] = 0111 1110 1
8417 instr [22] = float(0)/double(1)
8418 instr [21] = 1
8419 instr [20,16] = Rm
8420 instr [15,10] = 1101 01
8421 instr [9, 5] = Rn
8422 instr [4, 0] = Rd. */
8423
8424 unsigned rm = INSTR (20, 16);
8425 unsigned rn = INSTR (9, 5);
8426 unsigned rd = INSTR (4, 0);
8427
8428 NYI_assert (31, 23, 0x0FD);
8429 NYI_assert (21, 21, 1);
8430 NYI_assert (15, 10, 0x35);
8431
8432 if (INSTR (22, 22))
8433 aarch64_set_FP_double (cpu, rd,
8434 fabs (aarch64_get_FP_double (cpu, rn)
8435 - aarch64_get_FP_double (cpu, rm)));
8436 else
8437 aarch64_set_FP_float (cpu, rd,
8438 fabsf (aarch64_get_FP_float (cpu, rn)
8439 - aarch64_get_FP_float (cpu, rm)));
8440 }
8441
8442 static void
8443 do_scalar_CMGT (sim_cpu *cpu)
8444 {
8445 /* instr [31,21] = 0101 1110 111
8446 instr [20,16] = Rm
8447 instr [15,10] = 00 1101
8448 instr [9, 5] = Rn
8449 instr [4, 0] = Rd. */
8450
8451 unsigned rm = INSTR (20, 16);
8452 unsigned rn = INSTR (9, 5);
8453 unsigned rd = INSTR (4, 0);
8454
8455 NYI_assert (31, 21, 0x2F7);
8456 NYI_assert (15, 10, 0x0D);
8457
8458 aarch64_set_vec_u64 (cpu, rd, 0,
8459 aarch64_get_vec_u64 (cpu, rn, 0) >
8460 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8461 }
8462
8463 static void
8464 do_scalar_USHR (sim_cpu *cpu)
8465 {
8466 /* instr [31,23] = 0111 1111 0
8467 instr [22,16] = shift amount
8468 instr [15,10] = 0000 01
8469 instr [9, 5] = Rn
8470 instr [4, 0] = Rd. */
8471
8472 unsigned amount = 128 - INSTR (22, 16);
8473 unsigned rn = INSTR (9, 5);
8474 unsigned rd = INSTR (4, 0);
8475
8476 NYI_assert (31, 23, 0x0FE);
8477 NYI_assert (15, 10, 0x01);
8478
8479 aarch64_set_vec_u64 (cpu, rd, 0,
8480 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8481 }
8482
8483 static void
8484 do_scalar_SSHL (sim_cpu *cpu)
8485 {
8486 /* instr [31,21] = 0101 1110 111
8487 instr [20,16] = Rm
8488 instr [15,10] = 0100 01
8489 instr [9, 5] = Rn
8490 instr [4, 0] = Rd. */
8491
8492 unsigned rm = INSTR (20, 16);
8493 unsigned rn = INSTR (9, 5);
8494 unsigned rd = INSTR (4, 0);
8495 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8496
8497 NYI_assert (31, 21, 0x2F7);
8498 NYI_assert (15, 10, 0x11);
8499
8500 if (shift >= 0)
8501 aarch64_set_vec_s64 (cpu, rd, 0,
8502 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8503 else
8504 aarch64_set_vec_s64 (cpu, rd, 0,
8505 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8506 }
8507
8508 static void
8509 do_scalar_shift (sim_cpu *cpu)
8510 {
8511 /* instr [31,23] = 0101 1111 0
8512 instr [22,16] = shift amount
8513 instr [15,10] = 0101 01 [SHL]
8514 instr [15,10] = 0000 01 [SSHR]
8515 instr [9, 5] = Rn
8516 instr [4, 0] = Rd. */
8517
8518 unsigned rn = INSTR (9, 5);
8519 unsigned rd = INSTR (4, 0);
8520 unsigned amount;
8521
8522 NYI_assert (31, 23, 0x0BE);
8523
8524 if (INSTR (22, 22) == 0)
8525 HALT_UNALLOC;
8526
8527 switch (INSTR (15, 10))
8528 {
8529 case 0x01: /* SSHR */
8530 amount = 128 - INSTR (22, 16);
8531 aarch64_set_vec_s64 (cpu, rd, 0,
8532 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8533 return;
8534 case 0x15: /* SHL */
8535 amount = INSTR (22, 16) - 64;
8536 aarch64_set_vec_u64 (cpu, rd, 0,
8537 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8538 return;
8539 default:
8540 HALT_NYI;
8541 }
8542 }
8543
8544 /* FCMEQ FCMGT FCMGE. */
8545 static void
8546 do_scalar_FCM (sim_cpu *cpu)
8547 {
8548 /* instr [31,30] = 01
8549 instr [29] = U
8550 instr [28,24] = 1 1110
8551 instr [23] = E
8552 instr [22] = size
8553 instr [21] = 1
8554 instr [20,16] = Rm
8555 instr [15,12] = 1110
8556 instr [11] = AC
8557 instr [10] = 1
8558 instr [9, 5] = Rn
8559 instr [4, 0] = Rd. */
8560
8561 unsigned rm = INSTR (20, 16);
8562 unsigned rn = INSTR (9, 5);
8563 unsigned rd = INSTR (4, 0);
8564 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8565 unsigned result;
8566 float val1;
8567 float val2;
8568
8569 NYI_assert (31, 30, 1);
8570 NYI_assert (28, 24, 0x1E);
8571 NYI_assert (21, 21, 1);
8572 NYI_assert (15, 12, 0xE);
8573 NYI_assert (10, 10, 1);
8574
8575 if (INSTR (22, 22))
8576 {
8577 double val1 = aarch64_get_FP_double (cpu, rn);
8578 double val2 = aarch64_get_FP_double (cpu, rm);
8579
8580 switch (EUac)
8581 {
8582 case 0: /* 000 */
8583 result = val1 == val2;
8584 break;
8585
8586 case 3: /* 011 */
8587 val1 = fabs (val1);
8588 val2 = fabs (val2);
8589 /* Fall through. */
8590 case 2: /* 010 */
8591 result = val1 >= val2;
8592 break;
8593
8594 case 7: /* 111 */
8595 val1 = fabs (val1);
8596 val2 = fabs (val2);
8597 /* Fall through. */
8598 case 6: /* 110 */
8599 result = val1 > val2;
8600 break;
8601
8602 default:
8603 HALT_UNALLOC;
8604 }
8605
8606 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8607 return;
8608 }
8609
8610 val1 = aarch64_get_FP_float (cpu, rn);
8611 val2 = aarch64_get_FP_float (cpu, rm);
8612
8613 switch (EUac)
8614 {
8615 case 0: /* 000 */
8616 result = val1 == val2;
8617 break;
8618
8619 case 3: /* 011 */
8620 val1 = fabsf (val1);
8621 val2 = fabsf (val2);
8622 /* Fall through. */
8623 case 2: /* 010 */
8624 result = val1 >= val2;
8625 break;
8626
8627 case 7: /* 111 */
8628 val1 = fabsf (val1);
8629 val2 = fabsf (val2);
8630 /* Fall through. */
8631 case 6: /* 110 */
8632 result = val1 > val2;
8633 break;
8634
8635 default:
8636 HALT_UNALLOC;
8637 }
8638
8639 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8640 }
8641
8642 /* An alias of DUP. */
8643 static void
8644 do_scalar_MOV (sim_cpu *cpu)
8645 {
8646 /* instr [31,21] = 0101 1110 000
8647 instr [20,16] = imm5
8648 instr [15,10] = 0000 01
8649 instr [9, 5] = Rn
8650 instr [4, 0] = Rd. */
8651
8652 unsigned rn = INSTR (9, 5);
8653 unsigned rd = INSTR (4, 0);
8654 unsigned index;
8655
8656 NYI_assert (31, 21, 0x2F0);
8657 NYI_assert (15, 10, 0x01);
8658
8659 if (INSTR (16, 16))
8660 {
8661 /* 8-bit. */
8662 index = INSTR (20, 17);
8663 aarch64_set_vec_u8
8664 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8665 }
8666 else if (INSTR (17, 17))
8667 {
8668 /* 16-bit. */
8669 index = INSTR (20, 18);
8670 aarch64_set_vec_u16
8671 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8672 }
8673 else if (INSTR (18, 18))
8674 {
8675 /* 32-bit. */
8676 index = INSTR (20, 19);
8677 aarch64_set_vec_u32
8678 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8679 }
8680 else if (INSTR (19, 19))
8681 {
8682 /* 64-bit. */
8683 index = INSTR (20, 20);
8684 aarch64_set_vec_u64
8685 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
8686 }
8687 else
8688 HALT_UNALLOC;
8689 }
8690
8691 static void
8692 do_scalar_NEG (sim_cpu *cpu)
8693 {
8694 /* instr [31,10] = 0111 1110 1110 0000 1011 10
8695 instr [9, 5] = Rn
8696 instr [4, 0] = Rd. */
8697
8698 unsigned rn = INSTR (9, 5);
8699 unsigned rd = INSTR (4, 0);
8700
8701 NYI_assert (31, 10, 0x1FB82E);
8702
8703 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
8704 }
8705
8706 static void
8707 do_scalar_USHL (sim_cpu *cpu)
8708 {
8709 /* instr [31,21] = 0111 1110 111
8710 instr [20,16] = Rm
8711 instr [15,10] = 0100 01
8712 instr [9, 5] = Rn
8713 instr [4, 0] = Rd. */
8714
8715 unsigned rm = INSTR (20, 16);
8716 unsigned rn = INSTR (9, 5);
8717 unsigned rd = INSTR (4, 0);
8718 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8719
8720 NYI_assert (31, 21, 0x3F7);
8721 NYI_assert (15, 10, 0x11);
8722
8723 if (shift >= 0)
8724 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
8725 else
8726 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
8727 }
8728
8729 static void
8730 do_double_add (sim_cpu *cpu)
8731 {
8732 /* instr [31,21] = 0101 1110 111
8733 instr [20,16] = Fn
8734 instr [15,10] = 1000 01
8735 instr [9,5] = Fm
8736 instr [4,0] = Fd. */
8737 unsigned Fd;
8738 unsigned Fm;
8739 unsigned Fn;
8740 double val1;
8741 double val2;
8742
8743 NYI_assert (31, 21, 0x2F7);
8744 NYI_assert (15, 10, 0x21);
8745
8746 Fd = INSTR (4, 0);
8747 Fm = INSTR (9, 5);
8748 Fn = INSTR (20, 16);
8749
8750 val1 = aarch64_get_FP_double (cpu, Fm);
8751 val2 = aarch64_get_FP_double (cpu, Fn);
8752
8753 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8754 }
8755
8756 static void
8757 do_scalar_UCVTF (sim_cpu *cpu)
8758 {
8759 /* instr [31,23] = 0111 1110 0
8760 instr [22] = single(0)/double(1)
8761 instr [21,10] = 10 0001 1101 10
8762 instr [9,5] = rn
8763 instr [4,0] = rd. */
8764
8765 unsigned rn = INSTR (9, 5);
8766 unsigned rd = INSTR (4, 0);
8767
8768 NYI_assert (31, 23, 0x0FC);
8769 NYI_assert (21, 10, 0x876);
8770
8771 if (INSTR (22, 22))
8772 {
8773 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
8774
8775 aarch64_set_vec_double (cpu, rd, 0, (double) val);
8776 }
8777 else
8778 {
8779 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
8780
8781 aarch64_set_vec_float (cpu, rd, 0, (float) val);
8782 }
8783 }
8784
8785 static void
8786 do_scalar_vec (sim_cpu *cpu)
8787 {
8788 /* instr [30] = 1. */
8789 /* instr [28,25] = 1111. */
8790 switch (INSTR (31, 23))
8791 {
8792 case 0xBC:
8793 switch (INSTR (15, 10))
8794 {
8795 case 0x01: do_scalar_MOV (cpu); return;
8796 case 0x39: do_scalar_FCM (cpu); return;
8797 case 0x3B: do_scalar_FCM (cpu); return;
8798 }
8799 break;
8800
8801 case 0xBE: do_scalar_shift (cpu); return;
8802
8803 case 0xFC:
8804 switch (INSTR (15, 10))
8805 {
8806 case 0x36:
8807 switch (INSTR (21, 16))
8808 {
8809 case 0x30: do_scalar_FADDP (cpu); return;
8810 case 0x21: do_scalar_UCVTF (cpu); return;
8811 }
8812 HALT_NYI;
8813 case 0x39: do_scalar_FCM (cpu); return;
8814 case 0x3B: do_scalar_FCM (cpu); return;
8815 }
8816 break;
8817
8818 case 0xFD:
8819 switch (INSTR (15, 10))
8820 {
8821 case 0x0D: do_scalar_CMGT (cpu); return;
8822 case 0x11: do_scalar_USHL (cpu); return;
8823 case 0x2E: do_scalar_NEG (cpu); return;
8824 case 0x35: do_scalar_FABD (cpu); return;
8825 case 0x39: do_scalar_FCM (cpu); return;
8826 case 0x3B: do_scalar_FCM (cpu); return;
8827 default:
8828 HALT_NYI;
8829 }
8830
8831 case 0xFE: do_scalar_USHR (cpu); return;
8832
8833 case 0xBD:
8834 switch (INSTR (15, 10))
8835 {
8836 case 0x21: do_double_add (cpu); return;
8837 case 0x11: do_scalar_SSHL (cpu); return;
8838 default:
8839 HALT_NYI;
8840 }
8841
8842 default:
8843 HALT_NYI;
8844 }
8845 }
8846
8847 static void
8848 dexAdvSIMD1 (sim_cpu *cpu)
8849 {
8850 /* instr [28,25] = 1 111. */
8851
8852 /* We are currently only interested in the basic
8853 scalar fp routines which all have bit 30 = 0. */
8854 if (INSTR (30, 30))
8855 do_scalar_vec (cpu);
8856
8857 /* instr[24] is set for FP data processing 3-source and clear for
8858 all other basic scalar fp instruction groups. */
8859 else if (INSTR (24, 24))
8860 dexSimpleFPDataProc3Source (cpu);
8861
8862 /* instr[21] is clear for floating <-> fixed conversions and set for
8863 all other basic scalar fp instruction groups. */
8864 else if (!INSTR (21, 21))
8865 dexSimpleFPFixedConvert (cpu);
8866
8867 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
8868 11 ==> cond select, 00 ==> other. */
8869 else
8870 switch (INSTR (11, 10))
8871 {
8872 case 1: dexSimpleFPCondCompare (cpu); return;
8873 case 2: dexSimpleFPDataProc2Source (cpu); return;
8874 case 3: dexSimpleFPCondSelect (cpu); return;
8875
8876 default:
8877 /* Now an ordered cascade of tests.
8878 FP immediate has instr [12] == 1.
8879 FP compare has instr [13] == 1.
8880 FP Data Proc 1 Source has instr [14] == 1.
8881 FP floating <--> integer conversions has instr [15] == 0. */
8882 if (INSTR (12, 12))
8883 dexSimpleFPImmediate (cpu);
8884
8885 else if (INSTR (13, 13))
8886 dexSimpleFPCompare (cpu);
8887
8888 else if (INSTR (14, 14))
8889 dexSimpleFPDataProc1Source (cpu);
8890
8891 else if (!INSTR (15, 15))
8892 dexSimpleFPIntegerConvert (cpu);
8893
8894 else
8895 /* If we get here then instr[15] == 1 which means UNALLOC. */
8896 HALT_UNALLOC;
8897 }
8898 }
8899
8900 /* PC relative addressing. */
8901
8902 static void
8903 pcadr (sim_cpu *cpu)
8904 {
8905 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
8906 instr[30,29] = immlo
8907 instr[23,5] = immhi. */
8908 uint64_t address;
8909 unsigned rd = INSTR (4, 0);
8910 uint32_t isPage = INSTR (31, 31);
8911 union { int64_t u64; uint64_t s64; } imm;
8912 uint64_t offset;
8913
8914 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
8915 offset = imm.u64;
8916 offset = (offset << 2) | INSTR (30, 29);
8917
8918 address = aarch64_get_PC (cpu);
8919
8920 if (isPage)
8921 {
8922 offset <<= 12;
8923 address &= ~0xfff;
8924 }
8925
8926 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
8927 }
8928
8929 /* Specific decode and execute for group Data Processing Immediate. */
8930
8931 static void
8932 dexPCRelAddressing (sim_cpu *cpu)
8933 {
8934 /* assert instr[28,24] = 10000. */
8935 pcadr (cpu);
8936 }
8937
8938 /* Immediate logical.
8939 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
8940 16, 32 or 64 bit sequence pulled out at decode and possibly
8941 inverting it..
8942
8943 N.B. the output register (dest) can normally be Xn or SP
8944 the exception occurs for flag setting instructions which may
8945 only use Xn for the output (dest). The input register can
8946 never be SP. */
8947
8948 /* 32 bit and immediate. */
8949 static void
8950 and32 (sim_cpu *cpu, uint32_t bimm)
8951 {
8952 unsigned rn = INSTR (9, 5);
8953 unsigned rd = INSTR (4, 0);
8954
8955 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8956 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
8957 }
8958
8959 /* 64 bit and immediate. */
8960 static void
8961 and64 (sim_cpu *cpu, uint64_t bimm)
8962 {
8963 unsigned rn = INSTR (9, 5);
8964 unsigned rd = INSTR (4, 0);
8965
8966 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8967 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
8968 }
8969
8970 /* 32 bit and immediate set flags. */
8971 static void
8972 ands32 (sim_cpu *cpu, uint32_t bimm)
8973 {
8974 unsigned rn = INSTR (9, 5);
8975 unsigned rd = INSTR (4, 0);
8976
8977 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8978 uint32_t value2 = bimm;
8979
8980 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8981 set_flags_for_binop32 (cpu, value1 & value2);
8982 }
8983
8984 /* 64 bit and immediate set flags. */
8985 static void
8986 ands64 (sim_cpu *cpu, uint64_t bimm)
8987 {
8988 unsigned rn = INSTR (9, 5);
8989 unsigned rd = INSTR (4, 0);
8990
8991 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8992 uint64_t value2 = bimm;
8993
8994 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8995 set_flags_for_binop64 (cpu, value1 & value2);
8996 }
8997
8998 /* 32 bit exclusive or immediate. */
8999 static void
9000 eor32 (sim_cpu *cpu, uint32_t bimm)
9001 {
9002 unsigned rn = INSTR (9, 5);
9003 unsigned rd = INSTR (4, 0);
9004
9005 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9006 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9007 }
9008
9009 /* 64 bit exclusive or immediate. */
9010 static void
9011 eor64 (sim_cpu *cpu, uint64_t bimm)
9012 {
9013 unsigned rn = INSTR (9, 5);
9014 unsigned rd = INSTR (4, 0);
9015
9016 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9017 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9018 }
9019
9020 /* 32 bit or immediate. */
9021 static void
9022 orr32 (sim_cpu *cpu, uint32_t bimm)
9023 {
9024 unsigned rn = INSTR (9, 5);
9025 unsigned rd = INSTR (4, 0);
9026
9027 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9028 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9029 }
9030
9031 /* 64 bit or immediate. */
9032 static void
9033 orr64 (sim_cpu *cpu, uint64_t bimm)
9034 {
9035 unsigned rn = INSTR (9, 5);
9036 unsigned rd = INSTR (4, 0);
9037
9038 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9039 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9040 }
9041
9042 /* Logical shifted register.
9043 These allow an optional LSL, ASR, LSR or ROR to the second source
9044 register with a count up to the register bit count.
9045 N.B register args may not be SP. */
9046
9047 /* 32 bit AND shifted register. */
9048 static void
9049 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9050 {
9051 unsigned rm = INSTR (20, 16);
9052 unsigned rn = INSTR (9, 5);
9053 unsigned rd = INSTR (4, 0);
9054
9055 aarch64_set_reg_u64
9056 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9057 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9058 }
9059
9060 /* 64 bit AND shifted register. */
9061 static void
9062 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9063 {
9064 unsigned rm = INSTR (20, 16);
9065 unsigned rn = INSTR (9, 5);
9066 unsigned rd = INSTR (4, 0);
9067
9068 aarch64_set_reg_u64
9069 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9070 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9071 }
9072
9073 /* 32 bit AND shifted register setting flags. */
9074 static void
9075 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9076 {
9077 unsigned rm = INSTR (20, 16);
9078 unsigned rn = INSTR (9, 5);
9079 unsigned rd = INSTR (4, 0);
9080
9081 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9082 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9083 shift, count);
9084
9085 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9086 set_flags_for_binop32 (cpu, value1 & value2);
9087 }
9088
9089 /* 64 bit AND shifted register setting flags. */
9090 static void
9091 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9092 {
9093 unsigned rm = INSTR (20, 16);
9094 unsigned rn = INSTR (9, 5);
9095 unsigned rd = INSTR (4, 0);
9096
9097 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9098 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9099 shift, count);
9100
9101 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9102 set_flags_for_binop64 (cpu, value1 & value2);
9103 }
9104
9105 /* 32 bit BIC shifted register. */
9106 static void
9107 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9108 {
9109 unsigned rm = INSTR (20, 16);
9110 unsigned rn = INSTR (9, 5);
9111 unsigned rd = INSTR (4, 0);
9112
9113 aarch64_set_reg_u64
9114 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9115 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9116 }
9117
9118 /* 64 bit BIC shifted register. */
9119 static void
9120 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9121 {
9122 unsigned rm = INSTR (20, 16);
9123 unsigned rn = INSTR (9, 5);
9124 unsigned rd = INSTR (4, 0);
9125
9126 aarch64_set_reg_u64
9127 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9128 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9129 }
9130
9131 /* 32 bit BIC shifted register setting flags. */
9132 static void
9133 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9134 {
9135 unsigned rm = INSTR (20, 16);
9136 unsigned rn = INSTR (9, 5);
9137 unsigned rd = INSTR (4, 0);
9138
9139 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9140 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9141 shift, count);
9142
9143 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9144 set_flags_for_binop32 (cpu, value1 & value2);
9145 }
9146
9147 /* 64 bit BIC shifted register setting flags. */
9148 static void
9149 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9150 {
9151 unsigned rm = INSTR (20, 16);
9152 unsigned rn = INSTR (9, 5);
9153 unsigned rd = INSTR (4, 0);
9154
9155 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9156 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9157 shift, count);
9158
9159 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9160 set_flags_for_binop64 (cpu, value1 & value2);
9161 }
9162
9163 /* 32 bit EON shifted register. */
9164 static void
9165 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9166 {
9167 unsigned rm = INSTR (20, 16);
9168 unsigned rn = INSTR (9, 5);
9169 unsigned rd = INSTR (4, 0);
9170
9171 aarch64_set_reg_u64
9172 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9173 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9174 }
9175
9176 /* 64 bit EON shifted register. */
9177 static void
9178 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9179 {
9180 unsigned rm = INSTR (20, 16);
9181 unsigned rn = INSTR (9, 5);
9182 unsigned rd = INSTR (4, 0);
9183
9184 aarch64_set_reg_u64
9185 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9186 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9187 }
9188
9189 /* 32 bit EOR shifted register. */
9190 static void
9191 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9192 {
9193 unsigned rm = INSTR (20, 16);
9194 unsigned rn = INSTR (9, 5);
9195 unsigned rd = INSTR (4, 0);
9196
9197 aarch64_set_reg_u64
9198 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9199 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9200 }
9201
9202 /* 64 bit EOR shifted register. */
9203 static void
9204 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9205 {
9206 unsigned rm = INSTR (20, 16);
9207 unsigned rn = INSTR (9, 5);
9208 unsigned rd = INSTR (4, 0);
9209
9210 aarch64_set_reg_u64
9211 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9212 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9213 }
9214
9215 /* 32 bit ORR shifted register. */
9216 static void
9217 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9218 {
9219 unsigned rm = INSTR (20, 16);
9220 unsigned rn = INSTR (9, 5);
9221 unsigned rd = INSTR (4, 0);
9222
9223 aarch64_set_reg_u64
9224 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9225 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9226 }
9227
9228 /* 64 bit ORR shifted register. */
9229 static void
9230 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9231 {
9232 unsigned rm = INSTR (20, 16);
9233 unsigned rn = INSTR (9, 5);
9234 unsigned rd = INSTR (4, 0);
9235
9236 aarch64_set_reg_u64
9237 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9238 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9239 }
9240
9241 /* 32 bit ORN shifted register. */
9242 static void
9243 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9244 {
9245 unsigned rm = INSTR (20, 16);
9246 unsigned rn = INSTR (9, 5);
9247 unsigned rd = INSTR (4, 0);
9248
9249 aarch64_set_reg_u64
9250 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9251 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9252 }
9253
9254 /* 64 bit ORN shifted register. */
9255 static void
9256 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9257 {
9258 unsigned rm = INSTR (20, 16);
9259 unsigned rn = INSTR (9, 5);
9260 unsigned rd = INSTR (4, 0);
9261
9262 aarch64_set_reg_u64
9263 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9264 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9265 }
9266
9267 static void
9268 dexLogicalImmediate (sim_cpu *cpu)
9269 {
9270 /* assert instr[28,23] = 1001000
9271 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9272 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9273 instr[22] = N : used to construct immediate mask
9274 instr[21,16] = immr
9275 instr[15,10] = imms
9276 instr[9,5] = Rn
9277 instr[4,0] = Rd */
9278
9279 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9280 uint32_t size = INSTR (31, 31);
9281 uint32_t N = INSTR (22, 22);
9282 /* uint32_t immr = INSTR (21, 16);. */
9283 /* uint32_t imms = INSTR (15, 10);. */
9284 uint32_t index = INSTR (22, 10);
9285 uint64_t bimm64 = LITable [index];
9286 uint32_t dispatch = INSTR (30, 29);
9287
9288 if (~size & N)
9289 HALT_UNALLOC;
9290
9291 if (!bimm64)
9292 HALT_UNALLOC;
9293
9294 if (size == 0)
9295 {
9296 uint32_t bimm = (uint32_t) bimm64;
9297
9298 switch (dispatch)
9299 {
9300 case 0: and32 (cpu, bimm); return;
9301 case 1: orr32 (cpu, bimm); return;
9302 case 2: eor32 (cpu, bimm); return;
9303 case 3: ands32 (cpu, bimm); return;
9304 }
9305 }
9306 else
9307 {
9308 switch (dispatch)
9309 {
9310 case 0: and64 (cpu, bimm64); return;
9311 case 1: orr64 (cpu, bimm64); return;
9312 case 2: eor64 (cpu, bimm64); return;
9313 case 3: ands64 (cpu, bimm64); return;
9314 }
9315 }
9316 HALT_UNALLOC;
9317 }
9318
9319 /* Immediate move.
9320 The uimm argument is a 16 bit value to be inserted into the
9321 target register the pos argument locates the 16 bit word in the
9322 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9323 3} for 64 bit.
9324 N.B register arg may not be SP so it should be.
9325 accessed using the setGZRegisterXXX accessors. */
9326
9327 /* 32 bit move 16 bit immediate zero remaining shorts. */
9328 static void
9329 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9330 {
9331 unsigned rd = INSTR (4, 0);
9332
9333 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9334 }
9335
9336 /* 64 bit move 16 bit immediate zero remaining shorts. */
9337 static void
9338 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9339 {
9340 unsigned rd = INSTR (4, 0);
9341
9342 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9343 }
9344
9345 /* 32 bit move 16 bit immediate negated. */
9346 static void
9347 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9348 {
9349 unsigned rd = INSTR (4, 0);
9350
9351 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9352 }
9353
9354 /* 64 bit move 16 bit immediate negated. */
9355 static void
9356 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9357 {
9358 unsigned rd = INSTR (4, 0);
9359
9360 aarch64_set_reg_u64
9361 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9362 ^ 0xffffffffffffffffULL));
9363 }
9364
9365 /* 32 bit move 16 bit immediate keep remaining shorts. */
9366 static void
9367 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9368 {
9369 unsigned rd = INSTR (4, 0);
9370 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9371 uint32_t value = val << (pos * 16);
9372 uint32_t mask = ~(0xffffU << (pos * 16));
9373
9374 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9375 }
9376
9377 /* 64 bit move 16 it immediate keep remaining shorts. */
9378 static void
9379 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9380 {
9381 unsigned rd = INSTR (4, 0);
9382 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9383 uint64_t value = (uint64_t) val << (pos * 16);
9384 uint64_t mask = ~(0xffffULL << (pos * 16));
9385
9386 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9387 }
9388
9389 static void
9390 dexMoveWideImmediate (sim_cpu *cpu)
9391 {
9392 /* assert instr[28:23] = 100101
9393 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9394 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9395 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9396 instr[20,5] = uimm16
9397 instr[4,0] = Rd */
9398
9399 /* N.B. the (multiple of 16) shift is applied by the called routine,
9400 we just pass the multiplier. */
9401
9402 uint32_t imm;
9403 uint32_t size = INSTR (31, 31);
9404 uint32_t op = INSTR (30, 29);
9405 uint32_t shift = INSTR (22, 21);
9406
9407 /* 32 bit can only shift 0 or 1 lot of 16.
9408 anything else is an unallocated instruction. */
9409 if (size == 0 && (shift > 1))
9410 HALT_UNALLOC;
9411
9412 if (op == 1)
9413 HALT_UNALLOC;
9414
9415 imm = INSTR (20, 5);
9416
9417 if (size == 0)
9418 {
9419 if (op == 0)
9420 movn32 (cpu, imm, shift);
9421 else if (op == 2)
9422 movz32 (cpu, imm, shift);
9423 else
9424 movk32 (cpu, imm, shift);
9425 }
9426 else
9427 {
9428 if (op == 0)
9429 movn64 (cpu, imm, shift);
9430 else if (op == 2)
9431 movz64 (cpu, imm, shift);
9432 else
9433 movk64 (cpu, imm, shift);
9434 }
9435 }
9436
9437 /* Bitfield operations.
9438 These take a pair of bit positions r and s which are in {0..31}
9439 or {0..63} depending on the instruction word size.
9440 N.B register args may not be SP. */
9441
9442 /* OK, we start with ubfm which just needs to pick
9443 some bits out of source zero the rest and write
9444 the result to dest. Just need two logical shifts. */
9445
9446 /* 32 bit bitfield move, left and right of affected zeroed
9447 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9448 static void
9449 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9450 {
9451 unsigned rd;
9452 unsigned rn = INSTR (9, 5);
9453 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9454
9455 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9456 if (r <= s)
9457 {
9458 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9459 We want only bits s:xxx:r at the bottom of the word
9460 so we LSL bit s up to bit 31 i.e. by 31 - s
9461 and then we LSR to bring bit 31 down to bit s - r
9462 i.e. by 31 + r - s. */
9463 value <<= 31 - s;
9464 value >>= 31 + r - s;
9465 }
9466 else
9467 {
9468 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9469 We want only bits s:xxx:0 starting at it 31-(r-1)
9470 so we LSL bit s up to bit 31 i.e. by 31 - s
9471 and then we LSL to bring bit 31 down to 31-(r-1)+s
9472 i.e. by r - (s + 1). */
9473 value <<= 31 - s;
9474 value >>= r - (s + 1);
9475 }
9476
9477 rd = INSTR (4, 0);
9478 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9479 }
9480
9481 /* 64 bit bitfield move, left and right of affected zeroed
9482 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9483 static void
9484 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9485 {
9486 unsigned rd;
9487 unsigned rn = INSTR (9, 5);
9488 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9489
9490 if (r <= s)
9491 {
9492 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9493 We want only bits s:xxx:r at the bottom of the word.
9494 So we LSL bit s up to bit 63 i.e. by 63 - s
9495 and then we LSR to bring bit 63 down to bit s - r
9496 i.e. by 63 + r - s. */
9497 value <<= 63 - s;
9498 value >>= 63 + r - s;
9499 }
9500 else
9501 {
9502 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9503 We want only bits s:xxx:0 starting at it 63-(r-1).
9504 So we LSL bit s up to bit 63 i.e. by 63 - s
9505 and then we LSL to bring bit 63 down to 63-(r-1)+s
9506 i.e. by r - (s + 1). */
9507 value <<= 63 - s;
9508 value >>= r - (s + 1);
9509 }
9510
9511 rd = INSTR (4, 0);
9512 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9513 }
9514
9515 /* The signed versions need to insert sign bits
9516 on the left of the inserted bit field. so we do
9517 much the same as the unsigned version except we
9518 use an arithmetic shift right -- this just means
9519 we need to operate on signed values. */
9520
9521 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9522 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9523 static void
9524 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9525 {
9526 unsigned rd;
9527 unsigned rn = INSTR (9, 5);
9528 /* as per ubfm32 but use an ASR instead of an LSR. */
9529 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9530
9531 if (r <= s)
9532 {
9533 value <<= 31 - s;
9534 value >>= 31 + r - s;
9535 }
9536 else
9537 {
9538 value <<= 31 - s;
9539 value >>= r - (s + 1);
9540 }
9541
9542 rd = INSTR (4, 0);
9543 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9544 }
9545
9546 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9547 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9548 static void
9549 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9550 {
9551 unsigned rd;
9552 unsigned rn = INSTR (9, 5);
9553 /* acpu per ubfm but use an ASR instead of an LSR. */
9554 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9555
9556 if (r <= s)
9557 {
9558 value <<= 63 - s;
9559 value >>= 63 + r - s;
9560 }
9561 else
9562 {
9563 value <<= 63 - s;
9564 value >>= r - (s + 1);
9565 }
9566
9567 rd = INSTR (4, 0);
9568 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9569 }
9570
9571 /* Finally, these versions leave non-affected bits
9572 as is. so we need to generate the bits as per
9573 ubfm and also generate a mask to pick the
9574 bits from the original and computed values. */
9575
9576 /* 32 bit bitfield move, non-affected bits left as is.
9577 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9578 static void
9579 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9580 {
9581 unsigned rn = INSTR (9, 5);
9582 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9583 uint32_t mask = -1;
9584 unsigned rd;
9585 uint32_t value2;
9586
9587 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9588 if (r <= s)
9589 {
9590 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9591 We want only bits s:xxx:r at the bottom of the word
9592 so we LSL bit s up to bit 31 i.e. by 31 - s
9593 and then we LSR to bring bit 31 down to bit s - r
9594 i.e. by 31 + r - s. */
9595 value <<= 31 - s;
9596 value >>= 31 + r - s;
9597 /* the mask must include the same bits. */
9598 mask <<= 31 - s;
9599 mask >>= 31 + r - s;
9600 }
9601 else
9602 {
9603 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9604 We want only bits s:xxx:0 starting at it 31-(r-1)
9605 so we LSL bit s up to bit 31 i.e. by 31 - s
9606 and then we LSL to bring bit 31 down to 31-(r-1)+s
9607 i.e. by r - (s + 1). */
9608 value <<= 31 - s;
9609 value >>= r - (s + 1);
9610 /* The mask must include the same bits. */
9611 mask <<= 31 - s;
9612 mask >>= r - (s + 1);
9613 }
9614
9615 rd = INSTR (4, 0);
9616 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9617
9618 value2 &= ~mask;
9619 value2 |= value;
9620
9621 aarch64_set_reg_u64
9622 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9623 }
9624
9625 /* 64 bit bitfield move, non-affected bits left as is.
9626 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9627 static void
9628 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9629 {
9630 unsigned rd;
9631 unsigned rn = INSTR (9, 5);
9632 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9633 uint64_t mask = 0xffffffffffffffffULL;
9634
9635 if (r <= s)
9636 {
9637 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9638 We want only bits s:xxx:r at the bottom of the word
9639 so we LSL bit s up to bit 63 i.e. by 63 - s
9640 and then we LSR to bring bit 63 down to bit s - r
9641 i.e. by 63 + r - s. */
9642 value <<= 63 - s;
9643 value >>= 63 + r - s;
9644 /* The mask must include the same bits. */
9645 mask <<= 63 - s;
9646 mask >>= 63 + r - s;
9647 }
9648 else
9649 {
9650 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
9651 We want only bits s:xxx:0 starting at it 63-(r-1)
9652 so we LSL bit s up to bit 63 i.e. by 63 - s
9653 and then we LSL to bring bit 63 down to 63-(r-1)+s
9654 i.e. by r - (s + 1). */
9655 value <<= 63 - s;
9656 value >>= r - (s + 1);
9657 /* The mask must include the same bits. */
9658 mask <<= 63 - s;
9659 mask >>= r - (s + 1);
9660 }
9661
9662 rd = INSTR (4, 0);
9663 aarch64_set_reg_u64
9664 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
9665 }
9666
9667 static void
9668 dexBitfieldImmediate (sim_cpu *cpu)
9669 {
9670 /* assert instr[28:23] = 100110
9671 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9672 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
9673 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
9674 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
9675 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
9676 instr[9,5] = Rn
9677 instr[4,0] = Rd */
9678
9679 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9680 uint32_t dispatch;
9681 uint32_t imms;
9682 uint32_t size = INSTR (31, 31);
9683 uint32_t N = INSTR (22, 22);
9684 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
9685 /* or else we have an UNALLOC. */
9686 uint32_t immr = INSTR (21, 16);
9687
9688 if (~size & N)
9689 HALT_UNALLOC;
9690
9691 if (!size && uimm (immr, 5, 5))
9692 HALT_UNALLOC;
9693
9694 imms = INSTR (15, 10);
9695 if (!size && uimm (imms, 5, 5))
9696 HALT_UNALLOC;
9697
9698 /* Switch on combined size and op. */
9699 dispatch = INSTR (31, 29);
9700 switch (dispatch)
9701 {
9702 case 0: sbfm32 (cpu, immr, imms); return;
9703 case 1: bfm32 (cpu, immr, imms); return;
9704 case 2: ubfm32 (cpu, immr, imms); return;
9705 case 4: sbfm (cpu, immr, imms); return;
9706 case 5: bfm (cpu, immr, imms); return;
9707 case 6: ubfm (cpu, immr, imms); return;
9708 default: HALT_UNALLOC;
9709 }
9710 }
9711
9712 static void
9713 do_EXTR_32 (sim_cpu *cpu)
9714 {
9715 /* instr[31:21] = 00010011100
9716 instr[20,16] = Rm
9717 instr[15,10] = imms : 0xxxxx for 32 bit
9718 instr[9,5] = Rn
9719 instr[4,0] = Rd */
9720 unsigned rm = INSTR (20, 16);
9721 unsigned imms = INSTR (15, 10) & 31;
9722 unsigned rn = INSTR ( 9, 5);
9723 unsigned rd = INSTR ( 4, 0);
9724 uint64_t val1;
9725 uint64_t val2;
9726
9727 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
9728 val1 >>= imms;
9729 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9730 val2 <<= (32 - imms);
9731
9732 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
9733 }
9734
9735 static void
9736 do_EXTR_64 (sim_cpu *cpu)
9737 {
9738 /* instr[31:21] = 10010011100
9739 instr[20,16] = Rm
9740 instr[15,10] = imms
9741 instr[9,5] = Rn
9742 instr[4,0] = Rd */
9743 unsigned rm = INSTR (20, 16);
9744 unsigned imms = INSTR (15, 10) & 63;
9745 unsigned rn = INSTR ( 9, 5);
9746 unsigned rd = INSTR ( 4, 0);
9747 uint64_t val;
9748
9749 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
9750 val >>= imms;
9751 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
9752
9753 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
9754 }
9755
9756 static void
9757 dexExtractImmediate (sim_cpu *cpu)
9758 {
9759 /* assert instr[28:23] = 100111
9760 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9761 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
9762 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
9763 instr[21] = op0 : must be 0 or UNALLOC
9764 instr[20,16] = Rm
9765 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
9766 instr[9,5] = Rn
9767 instr[4,0] = Rd */
9768
9769 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9770 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
9771 uint32_t dispatch;
9772 uint32_t size = INSTR (31, 31);
9773 uint32_t N = INSTR (22, 22);
9774 /* 32 bit operations must have imms[5] = 0
9775 or else we have an UNALLOC. */
9776 uint32_t imms = INSTR (15, 10);
9777
9778 if (size ^ N)
9779 HALT_UNALLOC;
9780
9781 if (!size && uimm (imms, 5, 5))
9782 HALT_UNALLOC;
9783
9784 /* Switch on combined size and op. */
9785 dispatch = INSTR (31, 29);
9786
9787 if (dispatch == 0)
9788 do_EXTR_32 (cpu);
9789
9790 else if (dispatch == 4)
9791 do_EXTR_64 (cpu);
9792
9793 else if (dispatch == 1)
9794 HALT_NYI;
9795 else
9796 HALT_UNALLOC;
9797 }
9798
9799 static void
9800 dexDPImm (sim_cpu *cpu)
9801 {
9802 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
9803 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
9804 bits [25,23] of a DPImm are the secondary dispatch vector. */
9805 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
9806
9807 switch (group2)
9808 {
9809 case DPIMM_PCADR_000:
9810 case DPIMM_PCADR_001:
9811 dexPCRelAddressing (cpu);
9812 return;
9813
9814 case DPIMM_ADDSUB_010:
9815 case DPIMM_ADDSUB_011:
9816 dexAddSubtractImmediate (cpu);
9817 return;
9818
9819 case DPIMM_LOG_100:
9820 dexLogicalImmediate (cpu);
9821 return;
9822
9823 case DPIMM_MOV_101:
9824 dexMoveWideImmediate (cpu);
9825 return;
9826
9827 case DPIMM_BITF_110:
9828 dexBitfieldImmediate (cpu);
9829 return;
9830
9831 case DPIMM_EXTR_111:
9832 dexExtractImmediate (cpu);
9833 return;
9834
9835 default:
9836 /* Should never reach here. */
9837 HALT_NYI;
9838 }
9839 }
9840
9841 static void
9842 dexLoadUnscaledImmediate (sim_cpu *cpu)
9843 {
9844 /* instr[29,24] == 111_00
9845 instr[21] == 0
9846 instr[11,10] == 00
9847 instr[31,30] = size
9848 instr[26] = V
9849 instr[23,22] = opc
9850 instr[20,12] = simm9
9851 instr[9,5] = rn may be SP. */
9852 /* unsigned rt = INSTR (4, 0); */
9853 uint32_t V = INSTR (26, 26);
9854 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
9855 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
9856
9857 if (!V)
9858 {
9859 /* GReg operations. */
9860 switch (dispatch)
9861 {
9862 case 0: sturb (cpu, imm); return;
9863 case 1: ldurb32 (cpu, imm); return;
9864 case 2: ldursb64 (cpu, imm); return;
9865 case 3: ldursb32 (cpu, imm); return;
9866 case 4: sturh (cpu, imm); return;
9867 case 5: ldurh32 (cpu, imm); return;
9868 case 6: ldursh64 (cpu, imm); return;
9869 case 7: ldursh32 (cpu, imm); return;
9870 case 8: stur32 (cpu, imm); return;
9871 case 9: ldur32 (cpu, imm); return;
9872 case 10: ldursw (cpu, imm); return;
9873 case 12: stur64 (cpu, imm); return;
9874 case 13: ldur64 (cpu, imm); return;
9875
9876 case 14:
9877 /* PRFUM NYI. */
9878 HALT_NYI;
9879
9880 default:
9881 case 11:
9882 case 15:
9883 HALT_UNALLOC;
9884 }
9885 }
9886
9887 /* FReg operations. */
9888 switch (dispatch)
9889 {
9890 case 2: fsturq (cpu, imm); return;
9891 case 3: fldurq (cpu, imm); return;
9892 case 8: fsturs (cpu, imm); return;
9893 case 9: fldurs (cpu, imm); return;
9894 case 12: fsturd (cpu, imm); return;
9895 case 13: fldurd (cpu, imm); return;
9896
9897 case 0: /* STUR 8 bit FP. */
9898 case 1: /* LDUR 8 bit FP. */
9899 case 4: /* STUR 16 bit FP. */
9900 case 5: /* LDUR 8 bit FP. */
9901 HALT_NYI;
9902
9903 default:
9904 case 6:
9905 case 7:
9906 case 10:
9907 case 11:
9908 case 14:
9909 case 15:
9910 HALT_UNALLOC;
9911 }
9912 }
9913
9914 /* N.B. A preliminary note regarding all the ldrs<x>32
9915 instructions
9916
9917 The signed value loaded by these instructions is cast to unsigned
9918 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
9919 64 bit element of the GReg union. this performs a 32 bit sign extension
9920 (as required) but avoids 64 bit sign extension, thus ensuring that the
9921 top half of the register word is zero. this is what the spec demands
9922 when a 32 bit load occurs. */
9923
9924 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
9925 static void
9926 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
9927 {
9928 unsigned int rn = INSTR (9, 5);
9929 unsigned int rt = INSTR (4, 0);
9930
9931 /* The target register may not be SP but the source may be
9932 there is no scaling required for a byte load. */
9933 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
9934 aarch64_set_reg_u64 (cpu, rt, NO_SP,
9935 (int64_t) aarch64_get_mem_s8 (cpu, address));
9936 }
9937
9938 /* 32 bit load sign-extended byte scaled or unscaled zero-
9939 or sign-extended 32-bit register offset. */
9940 static void
9941 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9942 {
9943 unsigned int rm = INSTR (20, 16);
9944 unsigned int rn = INSTR (9, 5);
9945 unsigned int rt = INSTR (4, 0);
9946
9947 /* rn may reference SP, rm and rt must reference ZR. */
9948
9949 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9950 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9951 extension);
9952
9953 /* There is no scaling required for a byte load. */
9954 aarch64_set_reg_u64
9955 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
9956 + displacement));
9957 }
9958
9959 /* 32 bit load sign-extended byte unscaled signed 9 bit with
9960 pre- or post-writeback. */
9961 static void
9962 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9963 {
9964 uint64_t address;
9965 unsigned int rn = INSTR (9, 5);
9966 unsigned int rt = INSTR (4, 0);
9967
9968 if (rn == rt && wb != NoWriteBack)
9969 HALT_UNALLOC;
9970
9971 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9972
9973 if (wb == Pre)
9974 address += offset;
9975
9976 aarch64_set_reg_u64 (cpu, rt, NO_SP,
9977 (int64_t) aarch64_get_mem_s8 (cpu, address));
9978
9979 if (wb == Post)
9980 address += offset;
9981
9982 if (wb != NoWriteBack)
9983 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
9984 }
9985
9986 /* 8 bit store scaled. */
9987 static void
9988 fstrb_abs (sim_cpu *cpu, uint32_t offset)
9989 {
9990 unsigned st = INSTR (4, 0);
9991 unsigned rn = INSTR (9, 5);
9992
9993 aarch64_set_mem_u8 (cpu,
9994 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
9995 aarch64_get_vec_u8 (cpu, st, 0));
9996 }
9997
9998 /* 8 bit store scaled or unscaled zero- or
9999 sign-extended 8-bit register offset. */
10000 static void
10001 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10002 {
10003 unsigned rm = INSTR (20, 16);
10004 unsigned rn = INSTR (9, 5);
10005 unsigned st = INSTR (4, 0);
10006
10007 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10008 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10009 extension);
10010 uint64_t displacement = scaling == Scaled ? extended : 0;
10011
10012 aarch64_set_mem_u8
10013 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10014 }
10015
10016 /* 16 bit store scaled. */
10017 static void
10018 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10019 {
10020 unsigned st = INSTR (4, 0);
10021 unsigned rn = INSTR (9, 5);
10022
10023 aarch64_set_mem_u16
10024 (cpu,
10025 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10026 aarch64_get_vec_u16 (cpu, st, 0));
10027 }
10028
10029 /* 16 bit store scaled or unscaled zero-
10030 or sign-extended 16-bit register offset. */
10031 static void
10032 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10033 {
10034 unsigned rm = INSTR (20, 16);
10035 unsigned rn = INSTR (9, 5);
10036 unsigned st = INSTR (4, 0);
10037
10038 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10039 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10040 extension);
10041 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10042
10043 aarch64_set_mem_u16
10044 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10045 }
10046
10047 /* 32 bit store scaled unsigned 12 bit. */
10048 static void
10049 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10050 {
10051 unsigned st = INSTR (4, 0);
10052 unsigned rn = INSTR (9, 5);
10053
10054 aarch64_set_mem_u32
10055 (cpu,
10056 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10057 aarch64_get_vec_u32 (cpu, st, 0));
10058 }
10059
10060 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10061 static void
10062 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10063 {
10064 unsigned rn = INSTR (9, 5);
10065 unsigned st = INSTR (4, 0);
10066
10067 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10068
10069 if (wb != Post)
10070 address += offset;
10071
10072 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10073
10074 if (wb == Post)
10075 address += offset;
10076
10077 if (wb != NoWriteBack)
10078 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10079 }
10080
10081 /* 32 bit store scaled or unscaled zero-
10082 or sign-extended 32-bit register offset. */
10083 static void
10084 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10085 {
10086 unsigned rm = INSTR (20, 16);
10087 unsigned rn = INSTR (9, 5);
10088 unsigned st = INSTR (4, 0);
10089
10090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10092 extension);
10093 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10094
10095 aarch64_set_mem_u32
10096 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10097 }
10098
10099 /* 64 bit store scaled unsigned 12 bit. */
10100 static void
10101 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10102 {
10103 unsigned st = INSTR (4, 0);
10104 unsigned rn = INSTR (9, 5);
10105
10106 aarch64_set_mem_u64
10107 (cpu,
10108 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10109 aarch64_get_vec_u64 (cpu, st, 0));
10110 }
10111
10112 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10113 static void
10114 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10115 {
10116 unsigned rn = INSTR (9, 5);
10117 unsigned st = INSTR (4, 0);
10118
10119 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10120
10121 if (wb != Post)
10122 address += offset;
10123
10124 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10125
10126 if (wb == Post)
10127 address += offset;
10128
10129 if (wb != NoWriteBack)
10130 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10131 }
10132
10133 /* 64 bit store scaled or unscaled zero-
10134 or sign-extended 32-bit register offset. */
10135 static void
10136 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10137 {
10138 unsigned rm = INSTR (20, 16);
10139 unsigned rn = INSTR (9, 5);
10140 unsigned st = INSTR (4, 0);
10141
10142 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10143 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10144 extension);
10145 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10146
10147 aarch64_set_mem_u64
10148 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10149 }
10150
10151 /* 128 bit store scaled unsigned 12 bit. */
10152 static void
10153 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10154 {
10155 FRegister a;
10156 unsigned st = INSTR (4, 0);
10157 unsigned rn = INSTR (9, 5);
10158 uint64_t addr;
10159
10160 aarch64_get_FP_long_double (cpu, st, & a);
10161
10162 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10163 aarch64_set_mem_long_double (cpu, addr, a);
10164 }
10165
10166 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10167 static void
10168 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10169 {
10170 FRegister a;
10171 unsigned rn = INSTR (9, 5);
10172 unsigned st = INSTR (4, 0);
10173 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10174
10175 if (wb != Post)
10176 address += offset;
10177
10178 aarch64_get_FP_long_double (cpu, st, & a);
10179 aarch64_set_mem_long_double (cpu, address, a);
10180
10181 if (wb == Post)
10182 address += offset;
10183
10184 if (wb != NoWriteBack)
10185 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10186 }
10187
10188 /* 128 bit store scaled or unscaled zero-
10189 or sign-extended 32-bit register offset. */
10190 static void
10191 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10192 {
10193 unsigned rm = INSTR (20, 16);
10194 unsigned rn = INSTR (9, 5);
10195 unsigned st = INSTR (4, 0);
10196
10197 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10198 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10199 extension);
10200 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10201
10202 FRegister a;
10203
10204 aarch64_get_FP_long_double (cpu, st, & a);
10205 aarch64_set_mem_long_double (cpu, address + displacement, a);
10206 }
10207
10208 static void
10209 dexLoadImmediatePrePost (sim_cpu *cpu)
10210 {
10211 /* instr[31,30] = size
10212 instr[29,27] = 111
10213 instr[26] = V
10214 instr[25,24] = 00
10215 instr[23,22] = opc
10216 instr[21] = 0
10217 instr[20,12] = simm9
10218 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10219 instr[10] = 0
10220 instr[9,5] = Rn may be SP.
10221 instr[4,0] = Rt */
10222
10223 uint32_t V = INSTR (26, 26);
10224 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10225 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10226 WriteBack wb = INSTR (11, 11);
10227
10228 if (!V)
10229 {
10230 /* GReg operations. */
10231 switch (dispatch)
10232 {
10233 case 0: strb_wb (cpu, imm, wb); return;
10234 case 1: ldrb32_wb (cpu, imm, wb); return;
10235 case 2: ldrsb_wb (cpu, imm, wb); return;
10236 case 3: ldrsb32_wb (cpu, imm, wb); return;
10237 case 4: strh_wb (cpu, imm, wb); return;
10238 case 5: ldrh32_wb (cpu, imm, wb); return;
10239 case 6: ldrsh64_wb (cpu, imm, wb); return;
10240 case 7: ldrsh32_wb (cpu, imm, wb); return;
10241 case 8: str32_wb (cpu, imm, wb); return;
10242 case 9: ldr32_wb (cpu, imm, wb); return;
10243 case 10: ldrsw_wb (cpu, imm, wb); return;
10244 case 12: str_wb (cpu, imm, wb); return;
10245 case 13: ldr_wb (cpu, imm, wb); return;
10246
10247 default:
10248 case 11:
10249 case 14:
10250 case 15:
10251 HALT_UNALLOC;
10252 }
10253 }
10254
10255 /* FReg operations. */
10256 switch (dispatch)
10257 {
10258 case 2: fstrq_wb (cpu, imm, wb); return;
10259 case 3: fldrq_wb (cpu, imm, wb); return;
10260 case 8: fstrs_wb (cpu, imm, wb); return;
10261 case 9: fldrs_wb (cpu, imm, wb); return;
10262 case 12: fstrd_wb (cpu, imm, wb); return;
10263 case 13: fldrd_wb (cpu, imm, wb); return;
10264
10265 case 0: /* STUR 8 bit FP. */
10266 case 1: /* LDUR 8 bit FP. */
10267 case 4: /* STUR 16 bit FP. */
10268 case 5: /* LDUR 8 bit FP. */
10269 HALT_NYI;
10270
10271 default:
10272 case 6:
10273 case 7:
10274 case 10:
10275 case 11:
10276 case 14:
10277 case 15:
10278 HALT_UNALLOC;
10279 }
10280 }
10281
10282 static void
10283 dexLoadRegisterOffset (sim_cpu *cpu)
10284 {
10285 /* instr[31,30] = size
10286 instr[29,27] = 111
10287 instr[26] = V
10288 instr[25,24] = 00
10289 instr[23,22] = opc
10290 instr[21] = 1
10291 instr[20,16] = rm
10292 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10293 110 ==> SXTW, 111 ==> SXTX,
10294 ow ==> RESERVED
10295 instr[12] = scaled
10296 instr[11,10] = 10
10297 instr[9,5] = rn
10298 instr[4,0] = rt. */
10299
10300 uint32_t V = INSTR (26, 26);
10301 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10302 Scaling scale = INSTR (12, 12);
10303 Extension extensionType = INSTR (15, 13);
10304
10305 /* Check for illegal extension types. */
10306 if (uimm (extensionType, 1, 1) == 0)
10307 HALT_UNALLOC;
10308
10309 if (extensionType == UXTX || extensionType == SXTX)
10310 extensionType = NoExtension;
10311
10312 if (!V)
10313 {
10314 /* GReg operations. */
10315 switch (dispatch)
10316 {
10317 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10318 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10319 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10320 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10321 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10322 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10323 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10324 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10325 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10326 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10327 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10328 case 12: str_scale_ext (cpu, scale, extensionType); return;
10329 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10330 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10331
10332 default:
10333 case 11:
10334 case 15:
10335 HALT_UNALLOC;
10336 }
10337 }
10338
10339 /* FReg operations. */
10340 switch (dispatch)
10341 {
10342 case 1: /* LDUR 8 bit FP. */
10343 HALT_NYI;
10344 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10345 case 5: /* LDUR 8 bit FP. */
10346 HALT_NYI;
10347 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10348 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10349
10350 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10351 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10352 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10353 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10354 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10355
10356 default:
10357 case 6:
10358 case 7:
10359 case 10:
10360 case 11:
10361 case 14:
10362 case 15:
10363 HALT_UNALLOC;
10364 }
10365 }
10366
10367 static void
10368 dexLoadUnsignedImmediate (sim_cpu *cpu)
10369 {
10370 /* instr[29,24] == 111_01
10371 instr[31,30] = size
10372 instr[26] = V
10373 instr[23,22] = opc
10374 instr[21,10] = uimm12 : unsigned immediate offset
10375 instr[9,5] = rn may be SP.
10376 instr[4,0] = rt. */
10377
10378 uint32_t V = INSTR (26,26);
10379 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10380 uint32_t imm = INSTR (21, 10);
10381
10382 if (!V)
10383 {
10384 /* GReg operations. */
10385 switch (dispatch)
10386 {
10387 case 0: strb_abs (cpu, imm); return;
10388 case 1: ldrb32_abs (cpu, imm); return;
10389 case 2: ldrsb_abs (cpu, imm); return;
10390 case 3: ldrsb32_abs (cpu, imm); return;
10391 case 4: strh_abs (cpu, imm); return;
10392 case 5: ldrh32_abs (cpu, imm); return;
10393 case 6: ldrsh_abs (cpu, imm); return;
10394 case 7: ldrsh32_abs (cpu, imm); return;
10395 case 8: str32_abs (cpu, imm); return;
10396 case 9: ldr32_abs (cpu, imm); return;
10397 case 10: ldrsw_abs (cpu, imm); return;
10398 case 12: str_abs (cpu, imm); return;
10399 case 13: ldr_abs (cpu, imm); return;
10400 case 14: prfm_abs (cpu, imm); return;
10401
10402 default:
10403 case 11:
10404 case 15:
10405 HALT_UNALLOC;
10406 }
10407 }
10408
10409 /* FReg operations. */
10410 switch (dispatch)
10411 {
10412 case 0: fstrb_abs (cpu, imm); return;
10413 case 4: fstrh_abs (cpu, imm); return;
10414 case 8: fstrs_abs (cpu, imm); return;
10415 case 12: fstrd_abs (cpu, imm); return;
10416 case 2: fstrq_abs (cpu, imm); return;
10417
10418 case 1: fldrb_abs (cpu, imm); return;
10419 case 5: fldrh_abs (cpu, imm); return;
10420 case 9: fldrs_abs (cpu, imm); return;
10421 case 13: fldrd_abs (cpu, imm); return;
10422 case 3: fldrq_abs (cpu, imm); return;
10423
10424 default:
10425 case 6:
10426 case 7:
10427 case 10:
10428 case 11:
10429 case 14:
10430 case 15:
10431 HALT_UNALLOC;
10432 }
10433 }
10434
10435 static void
10436 dexLoadExclusive (sim_cpu *cpu)
10437 {
10438 /* assert instr[29:24] = 001000;
10439 instr[31,30] = size
10440 instr[23] = 0 if exclusive
10441 instr[22] = L : 1 if load, 0 if store
10442 instr[21] = 1 if pair
10443 instr[20,16] = Rs
10444 instr[15] = o0 : 1 if ordered
10445 instr[14,10] = Rt2
10446 instr[9,5] = Rn
10447 instr[4.0] = Rt. */
10448
10449 switch (INSTR (22, 21))
10450 {
10451 case 2: ldxr (cpu); return;
10452 case 0: stxr (cpu); return;
10453 default: HALT_NYI;
10454 }
10455 }
10456
10457 static void
10458 dexLoadOther (sim_cpu *cpu)
10459 {
10460 uint32_t dispatch;
10461
10462 /* instr[29,25] = 111_0
10463 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10464 instr[21:11,10] is the secondary dispatch. */
10465 if (INSTR (24, 24))
10466 {
10467 dexLoadUnsignedImmediate (cpu);
10468 return;
10469 }
10470
10471 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10472 switch (dispatch)
10473 {
10474 case 0: dexLoadUnscaledImmediate (cpu); return;
10475 case 1: dexLoadImmediatePrePost (cpu); return;
10476 case 3: dexLoadImmediatePrePost (cpu); return;
10477 case 6: dexLoadRegisterOffset (cpu); return;
10478
10479 default:
10480 case 2:
10481 case 4:
10482 case 5:
10483 case 7:
10484 HALT_NYI;
10485 }
10486 }
10487
10488 static void
10489 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10490 {
10491 unsigned rn = INSTR (14, 10);
10492 unsigned rd = INSTR (9, 5);
10493 unsigned rm = INSTR (4, 0);
10494 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10495
10496 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10497 HALT_UNALLOC; /* ??? */
10498
10499 offset <<= 2;
10500
10501 if (wb != Post)
10502 address += offset;
10503
10504 aarch64_set_mem_u32 (cpu, address,
10505 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10506 aarch64_set_mem_u32 (cpu, address + 4,
10507 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10508
10509 if (wb == Post)
10510 address += offset;
10511
10512 if (wb != NoWriteBack)
10513 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10514 }
10515
10516 static void
10517 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10518 {
10519 unsigned rn = INSTR (14, 10);
10520 unsigned rd = INSTR (9, 5);
10521 unsigned rm = INSTR (4, 0);
10522 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10523
10524 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10525 HALT_UNALLOC; /* ??? */
10526
10527 offset <<= 3;
10528
10529 if (wb != Post)
10530 address += offset;
10531
10532 aarch64_set_mem_u64 (cpu, address,
10533 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10534 aarch64_set_mem_u64 (cpu, address + 8,
10535 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10536
10537 if (wb == Post)
10538 address += offset;
10539
10540 if (wb != NoWriteBack)
10541 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10542 }
10543
10544 static void
10545 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10546 {
10547 unsigned rn = INSTR (14, 10);
10548 unsigned rd = INSTR (9, 5);
10549 unsigned rm = INSTR (4, 0);
10550 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10551
10552 /* Treat this as unalloc to make sure we don't do it. */
10553 if (rn == rm)
10554 HALT_UNALLOC;
10555
10556 offset <<= 2;
10557
10558 if (wb != Post)
10559 address += offset;
10560
10561 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10562 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10563
10564 if (wb == Post)
10565 address += offset;
10566
10567 if (wb != NoWriteBack)
10568 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10569 }
10570
10571 static void
10572 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10573 {
10574 unsigned rn = INSTR (14, 10);
10575 unsigned rd = INSTR (9, 5);
10576 unsigned rm = INSTR (4, 0);
10577 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10578
10579 /* Treat this as unalloc to make sure we don't do it. */
10580 if (rn == rm)
10581 HALT_UNALLOC;
10582
10583 offset <<= 2;
10584
10585 if (wb != Post)
10586 address += offset;
10587
10588 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10589 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10590
10591 if (wb == Post)
10592 address += offset;
10593
10594 if (wb != NoWriteBack)
10595 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10596 }
10597
10598 static void
10599 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10600 {
10601 unsigned rn = INSTR (14, 10);
10602 unsigned rd = INSTR (9, 5);
10603 unsigned rm = INSTR (4, 0);
10604 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10605
10606 /* Treat this as unalloc to make sure we don't do it. */
10607 if (rn == rm)
10608 HALT_UNALLOC;
10609
10610 offset <<= 3;
10611
10612 if (wb != Post)
10613 address += offset;
10614
10615 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10616 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10617
10618 if (wb == Post)
10619 address += offset;
10620
10621 if (wb != NoWriteBack)
10622 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10623 }
10624
10625 static void
10626 dex_load_store_pair_gr (sim_cpu *cpu)
10627 {
10628 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10629 instr[29,25] = instruction encoding: 101_0
10630 instr[26] = V : 1 if fp 0 if gp
10631 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10632 instr[22] = load/store (1=> load)
10633 instr[21,15] = signed, scaled, offset
10634 instr[14,10] = Rn
10635 instr[ 9, 5] = Rd
10636 instr[ 4, 0] = Rm. */
10637
10638 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10639 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10640
10641 switch (dispatch)
10642 {
10643 case 2: store_pair_u32 (cpu, offset, Post); return;
10644 case 3: load_pair_u32 (cpu, offset, Post); return;
10645 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
10646 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
10647 case 6: store_pair_u32 (cpu, offset, Pre); return;
10648 case 7: load_pair_u32 (cpu, offset, Pre); return;
10649
10650 case 11: load_pair_s32 (cpu, offset, Post); return;
10651 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
10652 case 15: load_pair_s32 (cpu, offset, Pre); return;
10653
10654 case 18: store_pair_u64 (cpu, offset, Post); return;
10655 case 19: load_pair_u64 (cpu, offset, Post); return;
10656 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
10657 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
10658 case 22: store_pair_u64 (cpu, offset, Pre); return;
10659 case 23: load_pair_u64 (cpu, offset, Pre); return;
10660
10661 default:
10662 HALT_UNALLOC;
10663 }
10664 }
10665
10666 static void
10667 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10668 {
10669 unsigned rn = INSTR (14, 10);
10670 unsigned rd = INSTR (9, 5);
10671 unsigned rm = INSTR (4, 0);
10672 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10673
10674 offset <<= 2;
10675
10676 if (wb != Post)
10677 address += offset;
10678
10679 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
10680 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
10681
10682 if (wb == Post)
10683 address += offset;
10684
10685 if (wb != NoWriteBack)
10686 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10687 }
10688
10689 static void
10690 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10691 {
10692 unsigned rn = INSTR (14, 10);
10693 unsigned rd = INSTR (9, 5);
10694 unsigned rm = INSTR (4, 0);
10695 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10696
10697 offset <<= 3;
10698
10699 if (wb != Post)
10700 address += offset;
10701
10702 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
10703 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
10704
10705 if (wb == Post)
10706 address += offset;
10707
10708 if (wb != NoWriteBack)
10709 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10710 }
10711
10712 static void
10713 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10714 {
10715 FRegister a;
10716 unsigned rn = INSTR (14, 10);
10717 unsigned rd = INSTR (9, 5);
10718 unsigned rm = INSTR (4, 0);
10719 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10720
10721 offset <<= 4;
10722
10723 if (wb != Post)
10724 address += offset;
10725
10726 aarch64_get_FP_long_double (cpu, rm, & a);
10727 aarch64_set_mem_long_double (cpu, address, a);
10728 aarch64_get_FP_long_double (cpu, rn, & a);
10729 aarch64_set_mem_long_double (cpu, address + 16, a);
10730
10731 if (wb == Post)
10732 address += offset;
10733
10734 if (wb != NoWriteBack)
10735 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10736 }
10737
10738 static void
10739 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10740 {
10741 unsigned rn = INSTR (14, 10);
10742 unsigned rd = INSTR (9, 5);
10743 unsigned rm = INSTR (4, 0);
10744 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10745
10746 if (rm == rn)
10747 HALT_UNALLOC;
10748
10749 offset <<= 2;
10750
10751 if (wb != Post)
10752 address += offset;
10753
10754 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
10755 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
10756
10757 if (wb == Post)
10758 address += offset;
10759
10760 if (wb != NoWriteBack)
10761 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10762 }
10763
10764 static void
10765 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10766 {
10767 unsigned rn = INSTR (14, 10);
10768 unsigned rd = INSTR (9, 5);
10769 unsigned rm = INSTR (4, 0);
10770 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10771
10772 if (rm == rn)
10773 HALT_UNALLOC;
10774
10775 offset <<= 3;
10776
10777 if (wb != Post)
10778 address += offset;
10779
10780 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
10781 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
10782
10783 if (wb == Post)
10784 address += offset;
10785
10786 if (wb != NoWriteBack)
10787 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10788 }
10789
10790 static void
10791 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10792 {
10793 FRegister a;
10794 unsigned rn = INSTR (14, 10);
10795 unsigned rd = INSTR (9, 5);
10796 unsigned rm = INSTR (4, 0);
10797 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10798
10799 if (rm == rn)
10800 HALT_UNALLOC;
10801
10802 offset <<= 4;
10803
10804 if (wb != Post)
10805 address += offset;
10806
10807 aarch64_get_mem_long_double (cpu, address, & a);
10808 aarch64_set_FP_long_double (cpu, rm, a);
10809 aarch64_get_mem_long_double (cpu, address + 16, & a);
10810 aarch64_set_FP_long_double (cpu, rn, a);
10811
10812 if (wb == Post)
10813 address += offset;
10814
10815 if (wb != NoWriteBack)
10816 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10817 }
10818
10819 static void
10820 dex_load_store_pair_fp (sim_cpu *cpu)
10821 {
10822 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
10823 instr[29,25] = instruction encoding
10824 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10825 instr[22] = load/store (1=> load)
10826 instr[21,15] = signed, scaled, offset
10827 instr[14,10] = Rn
10828 instr[ 9, 5] = Rd
10829 instr[ 4, 0] = Rm */
10830
10831 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10832 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10833
10834 switch (dispatch)
10835 {
10836 case 2: store_pair_float (cpu, offset, Post); return;
10837 case 3: load_pair_float (cpu, offset, Post); return;
10838 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
10839 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
10840 case 6: store_pair_float (cpu, offset, Pre); return;
10841 case 7: load_pair_float (cpu, offset, Pre); return;
10842
10843 case 10: store_pair_double (cpu, offset, Post); return;
10844 case 11: load_pair_double (cpu, offset, Post); return;
10845 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
10846 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
10847 case 14: store_pair_double (cpu, offset, Pre); return;
10848 case 15: load_pair_double (cpu, offset, Pre); return;
10849
10850 case 18: store_pair_long_double (cpu, offset, Post); return;
10851 case 19: load_pair_long_double (cpu, offset, Post); return;
10852 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
10853 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
10854 case 22: store_pair_long_double (cpu, offset, Pre); return;
10855 case 23: load_pair_long_double (cpu, offset, Pre); return;
10856
10857 default:
10858 HALT_UNALLOC;
10859 }
10860 }
10861
10862 static inline unsigned
10863 vec_reg (unsigned v, unsigned o)
10864 {
10865 return (v + o) & 0x3F;
10866 }
10867
10868 /* Load multiple N-element structures to N consecutive registers. */
10869 static void
10870 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
10871 {
10872 int all = INSTR (30, 30);
10873 unsigned size = INSTR (11, 10);
10874 unsigned vd = INSTR (4, 0);
10875 unsigned i;
10876
10877 switch (size)
10878 {
10879 case 0: /* 8-bit operations. */
10880 if (all)
10881 for (i = 0; i < (16 * N); i++)
10882 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
10883 aarch64_get_mem_u8 (cpu, address + i));
10884 else
10885 for (i = 0; i < (8 * N); i++)
10886 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
10887 aarch64_get_mem_u8 (cpu, address + i));
10888 return;
10889
10890 case 1: /* 16-bit operations. */
10891 if (all)
10892 for (i = 0; i < (8 * N); i++)
10893 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
10894 aarch64_get_mem_u16 (cpu, address + i * 2));
10895 else
10896 for (i = 0; i < (4 * N); i++)
10897 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
10898 aarch64_get_mem_u16 (cpu, address + i * 2));
10899 return;
10900
10901 case 2: /* 32-bit operations. */
10902 if (all)
10903 for (i = 0; i < (4 * N); i++)
10904 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
10905 aarch64_get_mem_u32 (cpu, address + i * 4));
10906 else
10907 for (i = 0; i < (2 * N); i++)
10908 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
10909 aarch64_get_mem_u32 (cpu, address + i * 4));
10910 return;
10911
10912 case 3: /* 64-bit operations. */
10913 if (all)
10914 for (i = 0; i < (2 * N); i++)
10915 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
10916 aarch64_get_mem_u64 (cpu, address + i * 8));
10917 else
10918 for (i = 0; i < N; i++)
10919 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
10920 aarch64_get_mem_u64 (cpu, address + i * 8));
10921 return;
10922 }
10923 }
10924
10925 /* LD4: load multiple 4-element to four consecutive registers. */
10926 static void
10927 LD4 (sim_cpu *cpu, uint64_t address)
10928 {
10929 vec_load (cpu, address, 4);
10930 }
10931
10932 /* LD3: load multiple 3-element structures to three consecutive registers. */
10933 static void
10934 LD3 (sim_cpu *cpu, uint64_t address)
10935 {
10936 vec_load (cpu, address, 3);
10937 }
10938
10939 /* LD2: load multiple 2-element structures to two consecutive registers. */
10940 static void
10941 LD2 (sim_cpu *cpu, uint64_t address)
10942 {
10943 vec_load (cpu, address, 2);
10944 }
10945
10946 /* Load multiple 1-element structures into one register. */
10947 static void
10948 LD1_1 (sim_cpu *cpu, uint64_t address)
10949 {
10950 int all = INSTR (30, 30);
10951 unsigned size = INSTR (11, 10);
10952 unsigned vd = INSTR (4, 0);
10953 unsigned i;
10954
10955 switch (size)
10956 {
10957 case 0:
10958 /* LD1 {Vd.16b}, addr, #16 */
10959 /* LD1 {Vd.8b}, addr, #8 */
10960 for (i = 0; i < (all ? 16 : 8); i++)
10961 aarch64_set_vec_u8 (cpu, vd, i,
10962 aarch64_get_mem_u8 (cpu, address + i));
10963 return;
10964
10965 case 1:
10966 /* LD1 {Vd.8h}, addr, #16 */
10967 /* LD1 {Vd.4h}, addr, #8 */
10968 for (i = 0; i < (all ? 8 : 4); i++)
10969 aarch64_set_vec_u16 (cpu, vd, i,
10970 aarch64_get_mem_u16 (cpu, address + i * 2));
10971 return;
10972
10973 case 2:
10974 /* LD1 {Vd.4s}, addr, #16 */
10975 /* LD1 {Vd.2s}, addr, #8 */
10976 for (i = 0; i < (all ? 4 : 2); i++)
10977 aarch64_set_vec_u32 (cpu, vd, i,
10978 aarch64_get_mem_u32 (cpu, address + i * 4));
10979 return;
10980
10981 case 3:
10982 /* LD1 {Vd.2d}, addr, #16 */
10983 /* LD1 {Vd.1d}, addr, #8 */
10984 for (i = 0; i < (all ? 2 : 1); i++)
10985 aarch64_set_vec_u64 (cpu, vd, i,
10986 aarch64_get_mem_u64 (cpu, address + i * 8));
10987 return;
10988 }
10989 }
10990
10991 /* Load multiple 1-element structures into two registers. */
10992 static void
10993 LD1_2 (sim_cpu *cpu, uint64_t address)
10994 {
10995 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
10996 So why have two different instructions ? There must be something
10997 wrong somewhere. */
10998 vec_load (cpu, address, 2);
10999 }
11000
11001 /* Load multiple 1-element structures into three registers. */
11002 static void
11003 LD1_3 (sim_cpu *cpu, uint64_t address)
11004 {
11005 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11006 So why have two different instructions ? There must be something
11007 wrong somewhere. */
11008 vec_load (cpu, address, 3);
11009 }
11010
11011 /* Load multiple 1-element structures into four registers. */
11012 static void
11013 LD1_4 (sim_cpu *cpu, uint64_t address)
11014 {
11015 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11016 So why have two different instructions ? There must be something
11017 wrong somewhere. */
11018 vec_load (cpu, address, 4);
11019 }
11020
11021 /* Store multiple N-element structures to N consecutive registers. */
11022 static void
11023 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11024 {
11025 int all = INSTR (30, 30);
11026 unsigned size = INSTR (11, 10);
11027 unsigned vd = INSTR (4, 0);
11028 unsigned i;
11029
11030 switch (size)
11031 {
11032 case 0: /* 8-bit operations. */
11033 if (all)
11034 for (i = 0; i < (16 * N); i++)
11035 aarch64_set_mem_u8
11036 (cpu, address + i,
11037 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11038 else
11039 for (i = 0; i < (8 * N); i++)
11040 aarch64_set_mem_u8
11041 (cpu, address + i,
11042 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11043 return;
11044
11045 case 1: /* 16-bit operations. */
11046 if (all)
11047 for (i = 0; i < (8 * N); i++)
11048 aarch64_set_mem_u16
11049 (cpu, address + i * 2,
11050 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11051 else
11052 for (i = 0; i < (4 * N); i++)
11053 aarch64_set_mem_u16
11054 (cpu, address + i * 2,
11055 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11056 return;
11057
11058 case 2: /* 32-bit operations. */
11059 if (all)
11060 for (i = 0; i < (4 * N); i++)
11061 aarch64_set_mem_u32
11062 (cpu, address + i * 4,
11063 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11064 else
11065 for (i = 0; i < (2 * N); i++)
11066 aarch64_set_mem_u32
11067 (cpu, address + i * 4,
11068 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11069 return;
11070
11071 case 3: /* 64-bit operations. */
11072 if (all)
11073 for (i = 0; i < (2 * N); i++)
11074 aarch64_set_mem_u64
11075 (cpu, address + i * 8,
11076 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11077 else
11078 for (i = 0; i < N; i++)
11079 aarch64_set_mem_u64
11080 (cpu, address + i * 8,
11081 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11082 return;
11083 }
11084 }
11085
11086 /* Store multiple 4-element structure to four consecutive registers. */
11087 static void
11088 ST4 (sim_cpu *cpu, uint64_t address)
11089 {
11090 vec_store (cpu, address, 4);
11091 }
11092
11093 /* Store multiple 3-element structures to three consecutive registers. */
11094 static void
11095 ST3 (sim_cpu *cpu, uint64_t address)
11096 {
11097 vec_store (cpu, address, 3);
11098 }
11099
11100 /* Store multiple 2-element structures to two consecutive registers. */
11101 static void
11102 ST2 (sim_cpu *cpu, uint64_t address)
11103 {
11104 vec_store (cpu, address, 2);
11105 }
11106
11107 /* Store multiple 1-element structures into one register. */
11108 static void
11109 ST1_1 (sim_cpu *cpu, uint64_t address)
11110 {
11111 int all = INSTR (30, 30);
11112 unsigned size = INSTR (11, 10);
11113 unsigned vd = INSTR (4, 0);
11114 unsigned i;
11115
11116 switch (size)
11117 {
11118 case 0:
11119 for (i = 0; i < (all ? 16 : 8); i++)
11120 aarch64_set_mem_u8 (cpu, address + i,
11121 aarch64_get_vec_u8 (cpu, vd, i));
11122 return;
11123
11124 case 1:
11125 for (i = 0; i < (all ? 8 : 4); i++)
11126 aarch64_set_mem_u16 (cpu, address + i * 2,
11127 aarch64_get_vec_u16 (cpu, vd, i));
11128 return;
11129
11130 case 2:
11131 for (i = 0; i < (all ? 4 : 2); i++)
11132 aarch64_set_mem_u32 (cpu, address + i * 4,
11133 aarch64_get_vec_u32 (cpu, vd, i));
11134 return;
11135
11136 case 3:
11137 for (i = 0; i < (all ? 2 : 1); i++)
11138 aarch64_set_mem_u64 (cpu, address + i * 8,
11139 aarch64_get_vec_u64 (cpu, vd, i));
11140 return;
11141 }
11142 }
11143
11144 /* Store multiple 1-element structures into two registers. */
11145 static void
11146 ST1_2 (sim_cpu *cpu, uint64_t address)
11147 {
11148 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11149 So why have two different instructions ? There must be
11150 something wrong somewhere. */
11151 vec_store (cpu, address, 2);
11152 }
11153
11154 /* Store multiple 1-element structures into three registers. */
11155 static void
11156 ST1_3 (sim_cpu *cpu, uint64_t address)
11157 {
11158 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11159 So why have two different instructions ? There must be
11160 something wrong somewhere. */
11161 vec_store (cpu, address, 3);
11162 }
11163
11164 /* Store multiple 1-element structures into four registers. */
11165 static void
11166 ST1_4 (sim_cpu *cpu, uint64_t address)
11167 {
11168 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11169 So why have two different instructions ? There must be
11170 something wrong somewhere. */
11171 vec_store (cpu, address, 4);
11172 }
11173
11174 static void
11175 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11176 {
11177 /* instr[31] = 0
11178 instr[30] = element selector 0=>half, 1=>all elements
11179 instr[29,24] = 00 1101
11180 instr[23] = 0=>simple, 1=>post
11181 instr[22] = 1
11182 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11183 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11184 11111 (immediate post inc)
11185 instr[15,14] = 11
11186 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11187 instr[12] = 0
11188 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11189 10=> word(s), 11=> double(d)
11190 instr[9,5] = address
11191 instr[4,0] = Vd */
11192
11193 unsigned full = INSTR (30, 30);
11194 unsigned vd = INSTR (4, 0);
11195 unsigned size = INSTR (11, 10);
11196 int i;
11197
11198 NYI_assert (29, 24, 0x0D);
11199 NYI_assert (22, 22, 1);
11200 NYI_assert (15, 14, 3);
11201 NYI_assert (12, 12, 0);
11202
11203 switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
11204 {
11205 case 0: /* LD1R. */
11206 switch (size)
11207 {
11208 case 0:
11209 {
11210 uint8_t val = aarch64_get_mem_u8 (cpu, address);
11211 for (i = 0; i < (full ? 16 : 8); i++)
11212 aarch64_set_vec_u8 (cpu, vd, i, val);
11213 break;
11214 }
11215
11216 case 1:
11217 {
11218 uint16_t val = aarch64_get_mem_u16 (cpu, address);
11219 for (i = 0; i < (full ? 8 : 4); i++)
11220 aarch64_set_vec_u16 (cpu, vd, i, val);
11221 break;
11222 }
11223
11224 case 2:
11225 {
11226 uint32_t val = aarch64_get_mem_u32 (cpu, address);
11227 for (i = 0; i < (full ? 4 : 2); i++)
11228 aarch64_set_vec_u32 (cpu, vd, i, val);
11229 break;
11230 }
11231
11232 case 3:
11233 {
11234 uint64_t val = aarch64_get_mem_u64 (cpu, address);
11235 for (i = 0; i < (full ? 2 : 1); i++)
11236 aarch64_set_vec_u64 (cpu, vd, i, val);
11237 break;
11238 }
11239
11240 default:
11241 HALT_UNALLOC;
11242 }
11243 break;
11244
11245 case 1: /* LD2R. */
11246 switch (size)
11247 {
11248 case 0:
11249 {
11250 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11251 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11252
11253 for (i = 0; i < (full ? 16 : 8); i++)
11254 {
11255 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11256 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11257 }
11258 break;
11259 }
11260
11261 case 1:
11262 {
11263 uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11264 uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11265
11266 for (i = 0; i < (full ? 8 : 4); i++)
11267 {
11268 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11269 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11270 }
11271 break;
11272 }
11273
11274 case 2:
11275 {
11276 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11277 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11278
11279 for (i = 0; i < (full ? 4 : 2); i++)
11280 {
11281 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11282 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11283 }
11284 break;
11285 }
11286
11287 case 3:
11288 {
11289 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11290 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11291
11292 for (i = 0; i < (full ? 2 : 1); i++)
11293 {
11294 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11295 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11296 }
11297 break;
11298 }
11299
11300 default:
11301 HALT_UNALLOC;
11302 }
11303 break;
11304
11305 case 2: /* LD3R. */
11306 switch (size)
11307 {
11308 case 0:
11309 {
11310 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11311 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11312 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11313
11314 for (i = 0; i < (full ? 16 : 8); i++)
11315 {
11316 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11317 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11318 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11319 }
11320 }
11321 break;
11322
11323 case 1:
11324 {
11325 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11326 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11327 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11328
11329 for (i = 0; i < (full ? 8 : 4); i++)
11330 {
11331 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11332 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11333 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11334 }
11335 }
11336 break;
11337
11338 case 2:
11339 {
11340 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11341 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11342 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11343
11344 for (i = 0; i < (full ? 4 : 2); i++)
11345 {
11346 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11347 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11348 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11349 }
11350 }
11351 break;
11352
11353 case 3:
11354 {
11355 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11356 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11357 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11358
11359 for (i = 0; i < (full ? 2 : 1); i++)
11360 {
11361 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11362 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11363 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11364 }
11365 }
11366 break;
11367
11368 default:
11369 HALT_UNALLOC;
11370 }
11371 break;
11372
11373 case 3: /* LD4R. */
11374 switch (size)
11375 {
11376 case 0:
11377 {
11378 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11379 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11380 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11381 uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11382
11383 for (i = 0; i < (full ? 16 : 8); i++)
11384 {
11385 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11386 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11387 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11388 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11389 }
11390 }
11391 break;
11392
11393 case 1:
11394 {
11395 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11396 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11397 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11398 uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11399
11400 for (i = 0; i < (full ? 8 : 4); i++)
11401 {
11402 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11403 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11404 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11405 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11406 }
11407 }
11408 break;
11409
11410 case 2:
11411 {
11412 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11413 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11414 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11415 uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11416
11417 for (i = 0; i < (full ? 4 : 2); i++)
11418 {
11419 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11420 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11421 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11422 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11423 }
11424 }
11425 break;
11426
11427 case 3:
11428 {
11429 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11430 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11431 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11432 uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11433
11434 for (i = 0; i < (full ? 2 : 1); i++)
11435 {
11436 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11437 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11438 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11439 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11440 }
11441 }
11442 break;
11443
11444 default:
11445 HALT_UNALLOC;
11446 }
11447 break;
11448
11449 default:
11450 HALT_UNALLOC;
11451 }
11452 }
11453
11454 static void
11455 do_vec_load_store (sim_cpu *cpu)
11456 {
11457 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11458
11459 instr[31] = 0
11460 instr[30] = element selector 0=>half, 1=>all elements
11461 instr[29,25] = 00110
11462 instr[24] = ?
11463 instr[23] = 0=>simple, 1=>post
11464 instr[22] = 0=>store, 1=>load
11465 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11466 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11467 11111 (immediate post inc)
11468 instr[15,12] = elements and destinations. eg for load:
11469 0000=>LD4 => load multiple 4-element to
11470 four consecutive registers
11471 0100=>LD3 => load multiple 3-element to
11472 three consecutive registers
11473 1000=>LD2 => load multiple 2-element to
11474 two consecutive registers
11475 0010=>LD1 => load multiple 1-element to
11476 four consecutive registers
11477 0110=>LD1 => load multiple 1-element to
11478 three consecutive registers
11479 1010=>LD1 => load multiple 1-element to
11480 two consecutive registers
11481 0111=>LD1 => load multiple 1-element to
11482 one register
11483 1100=>LDR1,LDR2
11484 1110=>LDR3,LDR4
11485 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11486 10=> word(s), 11=> double(d)
11487 instr[9,5] = Vn, can be SP
11488 instr[4,0] = Vd */
11489
11490 int post;
11491 int load;
11492 unsigned vn;
11493 uint64_t address;
11494 int type;
11495
11496 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11497 HALT_NYI;
11498
11499 type = INSTR (15, 12);
11500 if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11501 HALT_NYI;
11502
11503 post = INSTR (23, 23);
11504 load = INSTR (22, 22);
11505 vn = INSTR (9, 5);
11506 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11507
11508 if (post)
11509 {
11510 unsigned vm = INSTR (20, 16);
11511
11512 if (vm == R31)
11513 {
11514 unsigned sizeof_operation;
11515
11516 switch (type)
11517 {
11518 case 0: sizeof_operation = 32; break;
11519 case 4: sizeof_operation = 24; break;
11520 case 8: sizeof_operation = 16; break;
11521
11522 case 0xC:
11523 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11524 sizeof_operation <<= INSTR (11, 10);
11525 break;
11526
11527 case 0xE:
11528 sizeof_operation = INSTR (21, 21) ? 8 : 4;
11529 sizeof_operation <<= INSTR (11, 10);
11530 break;
11531
11532 case 7:
11533 /* One register, immediate offset variant. */
11534 sizeof_operation = 8;
11535 break;
11536
11537 case 10:
11538 /* Two registers, immediate offset variant. */
11539 sizeof_operation = 16;
11540 break;
11541
11542 case 6:
11543 /* Three registers, immediate offset variant. */
11544 sizeof_operation = 24;
11545 break;
11546
11547 case 2:
11548 /* Four registers, immediate offset variant. */
11549 sizeof_operation = 32;
11550 break;
11551
11552 default:
11553 HALT_UNALLOC;
11554 }
11555
11556 if (INSTR (30, 30))
11557 sizeof_operation *= 2;
11558
11559 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11560 }
11561 else
11562 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11563 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11564 }
11565 else
11566 {
11567 NYI_assert (20, 16, 0);
11568 }
11569
11570 if (load)
11571 {
11572 switch (type)
11573 {
11574 case 0: LD4 (cpu, address); return;
11575 case 4: LD3 (cpu, address); return;
11576 case 8: LD2 (cpu, address); return;
11577 case 2: LD1_4 (cpu, address); return;
11578 case 6: LD1_3 (cpu, address); return;
11579 case 10: LD1_2 (cpu, address); return;
11580 case 7: LD1_1 (cpu, address); return;
11581
11582 case 0xE:
11583 case 0xC: do_vec_LDnR (cpu, address); return;
11584
11585 default:
11586 HALT_NYI;
11587 }
11588 }
11589
11590 /* Stores. */
11591 switch (type)
11592 {
11593 case 0: ST4 (cpu, address); return;
11594 case 4: ST3 (cpu, address); return;
11595 case 8: ST2 (cpu, address); return;
11596 case 2: ST1_4 (cpu, address); return;
11597 case 6: ST1_3 (cpu, address); return;
11598 case 10: ST1_2 (cpu, address); return;
11599 case 7: ST1_1 (cpu, address); return;
11600 default:
11601 HALT_NYI;
11602 }
11603 }
11604
11605 static void
11606 dexLdSt (sim_cpu *cpu)
11607 {
11608 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11609 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11610 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11611 bits [29,28:26] of a LS are the secondary dispatch vector. */
11612 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11613
11614 switch (group2)
11615 {
11616 case LS_EXCL_000:
11617 dexLoadExclusive (cpu); return;
11618
11619 case LS_LIT_010:
11620 case LS_LIT_011:
11621 dexLoadLiteral (cpu); return;
11622
11623 case LS_OTHER_110:
11624 case LS_OTHER_111:
11625 dexLoadOther (cpu); return;
11626
11627 case LS_ADVSIMD_001:
11628 do_vec_load_store (cpu); return;
11629
11630 case LS_PAIR_100:
11631 dex_load_store_pair_gr (cpu); return;
11632
11633 case LS_PAIR_101:
11634 dex_load_store_pair_fp (cpu); return;
11635
11636 default:
11637 /* Should never reach here. */
11638 HALT_NYI;
11639 }
11640 }
11641
11642 /* Specific decode and execute for group Data Processing Register. */
11643
11644 static void
11645 dexLogicalShiftedRegister (sim_cpu *cpu)
11646 {
11647 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11648 instr[30,29] = op
11649 instr[28:24] = 01010
11650 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
11651 instr[21] = N
11652 instr[20,16] = Rm
11653 instr[15,10] = count : must be 0xxxxx for 32 bit
11654 instr[9,5] = Rn
11655 instr[4,0] = Rd */
11656
11657 uint32_t size = INSTR (31, 31);
11658 Shift shiftType = INSTR (23, 22);
11659 uint32_t count = INSTR (15, 10);
11660
11661 /* 32 bit operations must have count[5] = 0.
11662 or else we have an UNALLOC. */
11663 if (size == 0 && uimm (count, 5, 5))
11664 HALT_UNALLOC;
11665
11666 /* Dispatch on size:op:N. */
11667 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
11668 {
11669 case 0: and32_shift (cpu, shiftType, count); return;
11670 case 1: bic32_shift (cpu, shiftType, count); return;
11671 case 2: orr32_shift (cpu, shiftType, count); return;
11672 case 3: orn32_shift (cpu, shiftType, count); return;
11673 case 4: eor32_shift (cpu, shiftType, count); return;
11674 case 5: eon32_shift (cpu, shiftType, count); return;
11675 case 6: ands32_shift (cpu, shiftType, count); return;
11676 case 7: bics32_shift (cpu, shiftType, count); return;
11677 case 8: and64_shift (cpu, shiftType, count); return;
11678 case 9: bic64_shift (cpu, shiftType, count); return;
11679 case 10:orr64_shift (cpu, shiftType, count); return;
11680 case 11:orn64_shift (cpu, shiftType, count); return;
11681 case 12:eor64_shift (cpu, shiftType, count); return;
11682 case 13:eon64_shift (cpu, shiftType, count); return;
11683 case 14:ands64_shift (cpu, shiftType, count); return;
11684 case 15:bics64_shift (cpu, shiftType, count); return;
11685 }
11686 }
11687
11688 /* 32 bit conditional select. */
11689 static void
11690 csel32 (sim_cpu *cpu, CondCode cc)
11691 {
11692 unsigned rm = INSTR (20, 16);
11693 unsigned rn = INSTR (9, 5);
11694 unsigned rd = INSTR (4, 0);
11695
11696 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11697 testConditionCode (cpu, cc)
11698 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11699 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
11700 }
11701
11702 /* 64 bit conditional select. */
11703 static void
11704 csel64 (sim_cpu *cpu, CondCode cc)
11705 {
11706 unsigned rm = INSTR (20, 16);
11707 unsigned rn = INSTR (9, 5);
11708 unsigned rd = INSTR (4, 0);
11709
11710 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11711 testConditionCode (cpu, cc)
11712 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11713 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
11714 }
11715
11716 /* 32 bit conditional increment. */
11717 static void
11718 csinc32 (sim_cpu *cpu, CondCode cc)
11719 {
11720 unsigned rm = INSTR (20, 16);
11721 unsigned rn = INSTR (9, 5);
11722 unsigned rd = INSTR (4, 0);
11723
11724 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11725 testConditionCode (cpu, cc)
11726 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11727 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
11728 }
11729
11730 /* 64 bit conditional increment. */
11731 static void
11732 csinc64 (sim_cpu *cpu, CondCode cc)
11733 {
11734 unsigned rm = INSTR (20, 16);
11735 unsigned rn = INSTR (9, 5);
11736 unsigned rd = INSTR (4, 0);
11737
11738 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11739 testConditionCode (cpu, cc)
11740 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11741 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
11742 }
11743
11744 /* 32 bit conditional invert. */
11745 static void
11746 csinv32 (sim_cpu *cpu, CondCode cc)
11747 {
11748 unsigned rm = INSTR (20, 16);
11749 unsigned rn = INSTR (9, 5);
11750 unsigned rd = INSTR (4, 0);
11751
11752 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11753 testConditionCode (cpu, cc)
11754 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11755 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
11756 }
11757
11758 /* 64 bit conditional invert. */
11759 static void
11760 csinv64 (sim_cpu *cpu, CondCode cc)
11761 {
11762 unsigned rm = INSTR (20, 16);
11763 unsigned rn = INSTR (9, 5);
11764 unsigned rd = INSTR (4, 0);
11765
11766 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11767 testConditionCode (cpu, cc)
11768 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11769 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
11770 }
11771
11772 /* 32 bit conditional negate. */
11773 static void
11774 csneg32 (sim_cpu *cpu, CondCode cc)
11775 {
11776 unsigned rm = INSTR (20, 16);
11777 unsigned rn = INSTR (9, 5);
11778 unsigned rd = INSTR (4, 0);
11779
11780 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11781 testConditionCode (cpu, cc)
11782 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11783 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
11784 }
11785
11786 /* 64 bit conditional negate. */
11787 static void
11788 csneg64 (sim_cpu *cpu, CondCode cc)
11789 {
11790 unsigned rm = INSTR (20, 16);
11791 unsigned rn = INSTR (9, 5);
11792 unsigned rd = INSTR (4, 0);
11793
11794 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11795 testConditionCode (cpu, cc)
11796 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11797 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
11798 }
11799
11800 static void
11801 dexCondSelect (sim_cpu *cpu)
11802 {
11803 /* instr[28,21] = 11011011
11804 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11805 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
11806 100 ==> CSINV, 101 ==> CSNEG,
11807 _1_ ==> UNALLOC
11808 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
11809 instr[15,12] = cond
11810 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
11811
11812 CondCode cc = INSTR (15, 12);
11813 uint32_t S = INSTR (29, 29);
11814 uint32_t op2 = INSTR (11, 10);
11815
11816 if (S == 1)
11817 HALT_UNALLOC;
11818
11819 if (op2 & 0x2)
11820 HALT_UNALLOC;
11821
11822 switch ((INSTR (31, 30) << 1) | op2)
11823 {
11824 case 0: csel32 (cpu, cc); return;
11825 case 1: csinc32 (cpu, cc); return;
11826 case 2: csinv32 (cpu, cc); return;
11827 case 3: csneg32 (cpu, cc); return;
11828 case 4: csel64 (cpu, cc); return;
11829 case 5: csinc64 (cpu, cc); return;
11830 case 6: csinv64 (cpu, cc); return;
11831 case 7: csneg64 (cpu, cc); return;
11832 }
11833 }
11834
11835 /* Some helpers for counting leading 1 or 0 bits. */
11836
11837 /* Counts the number of leading bits which are the same
11838 in a 32 bit value in the range 1 to 32. */
11839 static uint32_t
11840 leading32 (uint32_t value)
11841 {
11842 int32_t mask= 0xffff0000;
11843 uint32_t count= 16; /* Counts number of bits set in mask. */
11844 uint32_t lo = 1; /* Lower bound for number of sign bits. */
11845 uint32_t hi = 32; /* Upper bound for number of sign bits. */
11846
11847 while (lo + 1 < hi)
11848 {
11849 int32_t test = (value & mask);
11850
11851 if (test == 0 || test == mask)
11852 {
11853 lo = count;
11854 count = (lo + hi) / 2;
11855 mask >>= (count - lo);
11856 }
11857 else
11858 {
11859 hi = count;
11860 count = (lo + hi) / 2;
11861 mask <<= hi - count;
11862 }
11863 }
11864
11865 if (lo != hi)
11866 {
11867 int32_t test;
11868
11869 mask >>= 1;
11870 test = (value & mask);
11871
11872 if (test == 0 || test == mask)
11873 count = hi;
11874 else
11875 count = lo;
11876 }
11877
11878 return count;
11879 }
11880
11881 /* Counts the number of leading bits which are the same
11882 in a 64 bit value in the range 1 to 64. */
11883 static uint64_t
11884 leading64 (uint64_t value)
11885 {
11886 int64_t mask= 0xffffffff00000000LL;
11887 uint64_t count = 32; /* Counts number of bits set in mask. */
11888 uint64_t lo = 1; /* Lower bound for number of sign bits. */
11889 uint64_t hi = 64; /* Upper bound for number of sign bits. */
11890
11891 while (lo + 1 < hi)
11892 {
11893 int64_t test = (value & mask);
11894
11895 if (test == 0 || test == mask)
11896 {
11897 lo = count;
11898 count = (lo + hi) / 2;
11899 mask >>= (count - lo);
11900 }
11901 else
11902 {
11903 hi = count;
11904 count = (lo + hi) / 2;
11905 mask <<= hi - count;
11906 }
11907 }
11908
11909 if (lo != hi)
11910 {
11911 int64_t test;
11912
11913 mask >>= 1;
11914 test = (value & mask);
11915
11916 if (test == 0 || test == mask)
11917 count = hi;
11918 else
11919 count = lo;
11920 }
11921
11922 return count;
11923 }
11924
11925 /* Bit operations. */
11926 /* N.B register args may not be SP. */
11927
11928 /* 32 bit count leading sign bits. */
11929 static void
11930 cls32 (sim_cpu *cpu)
11931 {
11932 unsigned rn = INSTR (9, 5);
11933 unsigned rd = INSTR (4, 0);
11934
11935 /* N.B. the result needs to exclude the leading bit. */
11936 aarch64_set_reg_u64
11937 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
11938 }
11939
11940 /* 64 bit count leading sign bits. */
11941 static void
11942 cls64 (sim_cpu *cpu)
11943 {
11944 unsigned rn = INSTR (9, 5);
11945 unsigned rd = INSTR (4, 0);
11946
11947 /* N.B. the result needs to exclude the leading bit. */
11948 aarch64_set_reg_u64
11949 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
11950 }
11951
11952 /* 32 bit count leading zero bits. */
11953 static void
11954 clz32 (sim_cpu *cpu)
11955 {
11956 unsigned rn = INSTR (9, 5);
11957 unsigned rd = INSTR (4, 0);
11958 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11959
11960 /* if the sign (top) bit is set then the count is 0. */
11961 if (pick32 (value, 31, 31))
11962 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11963 else
11964 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
11965 }
11966
11967 /* 64 bit count leading zero bits. */
11968 static void
11969 clz64 (sim_cpu *cpu)
11970 {
11971 unsigned rn = INSTR (9, 5);
11972 unsigned rd = INSTR (4, 0);
11973 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11974
11975 /* if the sign (top) bit is set then the count is 0. */
11976 if (pick64 (value, 63, 63))
11977 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11978 else
11979 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
11980 }
11981
11982 /* 32 bit reverse bits. */
11983 static void
11984 rbit32 (sim_cpu *cpu)
11985 {
11986 unsigned rn = INSTR (9, 5);
11987 unsigned rd = INSTR (4, 0);
11988 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11989 uint32_t result = 0;
11990 int i;
11991
11992 for (i = 0; i < 32; i++)
11993 {
11994 result <<= 1;
11995 result |= (value & 1);
11996 value >>= 1;
11997 }
11998 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11999 }
12000
12001 /* 64 bit reverse bits. */
12002 static void
12003 rbit64 (sim_cpu *cpu)
12004 {
12005 unsigned rn = INSTR (9, 5);
12006 unsigned rd = INSTR (4, 0);
12007 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12008 uint64_t result = 0;
12009 int i;
12010
12011 for (i = 0; i < 64; i++)
12012 {
12013 result <<= 1;
12014 result |= (value & 1UL);
12015 value >>= 1;
12016 }
12017 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12018 }
12019
12020 /* 32 bit reverse bytes. */
12021 static void
12022 rev32 (sim_cpu *cpu)
12023 {
12024 unsigned rn = INSTR (9, 5);
12025 unsigned rd = INSTR (4, 0);
12026 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12027 uint32_t result = 0;
12028 int i;
12029
12030 for (i = 0; i < 4; i++)
12031 {
12032 result <<= 8;
12033 result |= (value & 0xff);
12034 value >>= 8;
12035 }
12036 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12037 }
12038
12039 /* 64 bit reverse bytes. */
12040 static void
12041 rev64 (sim_cpu *cpu)
12042 {
12043 unsigned rn = INSTR (9, 5);
12044 unsigned rd = INSTR (4, 0);
12045 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12046 uint64_t result = 0;
12047 int i;
12048
12049 for (i = 0; i < 8; i++)
12050 {
12051 result <<= 8;
12052 result |= (value & 0xffULL);
12053 value >>= 8;
12054 }
12055 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12056 }
12057
12058 /* 32 bit reverse shorts. */
12059 /* N.B.this reverses the order of the bytes in each half word. */
12060 static void
12061 revh32 (sim_cpu *cpu)
12062 {
12063 unsigned rn = INSTR (9, 5);
12064 unsigned rd = INSTR (4, 0);
12065 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12066 uint32_t result = 0;
12067 int i;
12068
12069 for (i = 0; i < 2; i++)
12070 {
12071 result <<= 8;
12072 result |= (value & 0x00ff00ff);
12073 value >>= 8;
12074 }
12075 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12076 }
12077
12078 /* 64 bit reverse shorts. */
12079 /* N.B.this reverses the order of the bytes in each half word. */
12080 static void
12081 revh64 (sim_cpu *cpu)
12082 {
12083 unsigned rn = INSTR (9, 5);
12084 unsigned rd = INSTR (4, 0);
12085 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12086 uint64_t result = 0;
12087 int i;
12088
12089 for (i = 0; i < 2; i++)
12090 {
12091 result <<= 8;
12092 result |= (value & 0x00ff00ff00ff00ffULL);
12093 value >>= 8;
12094 }
12095 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12096 }
12097
12098 static void
12099 dexDataProc1Source (sim_cpu *cpu)
12100 {
12101 /* instr[30] = 1
12102 instr[28,21] = 111010110
12103 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12104 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12105 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12106 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12107 000010 ==> REV, 000011 ==> UNALLOC
12108 000100 ==> CLZ, 000101 ==> CLS
12109 ow ==> UNALLOC
12110 instr[9,5] = rn : may not be SP
12111 instr[4,0] = rd : may not be SP. */
12112
12113 uint32_t S = INSTR (29, 29);
12114 uint32_t opcode2 = INSTR (20, 16);
12115 uint32_t opcode = INSTR (15, 10);
12116 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12117
12118 if (S == 1)
12119 HALT_UNALLOC;
12120
12121 if (opcode2 != 0)
12122 HALT_UNALLOC;
12123
12124 if (opcode & 0x38)
12125 HALT_UNALLOC;
12126
12127 switch (dispatch)
12128 {
12129 case 0: rbit32 (cpu); return;
12130 case 1: revh32 (cpu); return;
12131 case 2: rev32 (cpu); return;
12132 case 4: clz32 (cpu); return;
12133 case 5: cls32 (cpu); return;
12134 case 8: rbit64 (cpu); return;
12135 case 9: revh64 (cpu); return;
12136 case 10:rev32 (cpu); return;
12137 case 11:rev64 (cpu); return;
12138 case 12:clz64 (cpu); return;
12139 case 13:cls64 (cpu); return;
12140 default: HALT_UNALLOC;
12141 }
12142 }
12143
12144 /* Variable shift.
12145 Shifts by count supplied in register.
12146 N.B register args may not be SP.
12147 These all use the shifted auxiliary function for
12148 simplicity and clarity. Writing the actual shift
12149 inline would avoid a branch and so be faster but
12150 would also necessitate getting signs right. */
12151
12152 /* 32 bit arithmetic shift right. */
12153 static void
12154 asrv32 (sim_cpu *cpu)
12155 {
12156 unsigned rm = INSTR (20, 16);
12157 unsigned rn = INSTR (9, 5);
12158 unsigned rd = INSTR (4, 0);
12159
12160 aarch64_set_reg_u64
12161 (cpu, rd, NO_SP,
12162 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12163 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12164 }
12165
12166 /* 64 bit arithmetic shift right. */
12167 static void
12168 asrv64 (sim_cpu *cpu)
12169 {
12170 unsigned rm = INSTR (20, 16);
12171 unsigned rn = INSTR (9, 5);
12172 unsigned rd = INSTR (4, 0);
12173
12174 aarch64_set_reg_u64
12175 (cpu, rd, NO_SP,
12176 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12177 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12178 }
12179
12180 /* 32 bit logical shift left. */
12181 static void
12182 lslv32 (sim_cpu *cpu)
12183 {
12184 unsigned rm = INSTR (20, 16);
12185 unsigned rn = INSTR (9, 5);
12186 unsigned rd = INSTR (4, 0);
12187
12188 aarch64_set_reg_u64
12189 (cpu, rd, NO_SP,
12190 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12191 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12192 }
12193
12194 /* 64 bit arithmetic shift left. */
12195 static void
12196 lslv64 (sim_cpu *cpu)
12197 {
12198 unsigned rm = INSTR (20, 16);
12199 unsigned rn = INSTR (9, 5);
12200 unsigned rd = INSTR (4, 0);
12201
12202 aarch64_set_reg_u64
12203 (cpu, rd, NO_SP,
12204 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12205 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12206 }
12207
12208 /* 32 bit logical shift right. */
12209 static void
12210 lsrv32 (sim_cpu *cpu)
12211 {
12212 unsigned rm = INSTR (20, 16);
12213 unsigned rn = INSTR (9, 5);
12214 unsigned rd = INSTR (4, 0);
12215
12216 aarch64_set_reg_u64
12217 (cpu, rd, NO_SP,
12218 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12219 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12220 }
12221
12222 /* 64 bit logical shift right. */
12223 static void
12224 lsrv64 (sim_cpu *cpu)
12225 {
12226 unsigned rm = INSTR (20, 16);
12227 unsigned rn = INSTR (9, 5);
12228 unsigned rd = INSTR (4, 0);
12229
12230 aarch64_set_reg_u64
12231 (cpu, rd, NO_SP,
12232 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12233 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12234 }
12235
12236 /* 32 bit rotate right. */
12237 static void
12238 rorv32 (sim_cpu *cpu)
12239 {
12240 unsigned rm = INSTR (20, 16);
12241 unsigned rn = INSTR (9, 5);
12242 unsigned rd = INSTR (4, 0);
12243
12244 aarch64_set_reg_u64
12245 (cpu, rd, NO_SP,
12246 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12247 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12248 }
12249
12250 /* 64 bit rotate right. */
12251 static void
12252 rorv64 (sim_cpu *cpu)
12253 {
12254 unsigned rm = INSTR (20, 16);
12255 unsigned rn = INSTR (9, 5);
12256 unsigned rd = INSTR (4, 0);
12257
12258 aarch64_set_reg_u64
12259 (cpu, rd, NO_SP,
12260 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12261 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12262 }
12263
12264
12265 /* divide. */
12266
12267 /* 32 bit signed divide. */
12268 static void
12269 cpuiv32 (sim_cpu *cpu)
12270 {
12271 unsigned rm = INSTR (20, 16);
12272 unsigned rn = INSTR (9, 5);
12273 unsigned rd = INSTR (4, 0);
12274 /* N.B. the pseudo-code does the divide using 64 bit data. */
12275 /* TODO : check that this rounds towards zero as required. */
12276 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12277 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12278
12279 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12280 divisor ? ((int32_t) (dividend / divisor)) : 0);
12281 }
12282
12283 /* 64 bit signed divide. */
12284 static void
12285 cpuiv64 (sim_cpu *cpu)
12286 {
12287 unsigned rm = INSTR (20, 16);
12288 unsigned rn = INSTR (9, 5);
12289 unsigned rd = INSTR (4, 0);
12290
12291 /* TODO : check that this rounds towards zero as required. */
12292 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12293
12294 aarch64_set_reg_s64
12295 (cpu, rd, NO_SP,
12296 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12297 }
12298
12299 /* 32 bit unsigned divide. */
12300 static void
12301 udiv32 (sim_cpu *cpu)
12302 {
12303 unsigned rm = INSTR (20, 16);
12304 unsigned rn = INSTR (9, 5);
12305 unsigned rd = INSTR (4, 0);
12306
12307 /* N.B. the pseudo-code does the divide using 64 bit data. */
12308 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12309 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12310
12311 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12312 divisor ? (uint32_t) (dividend / divisor) : 0);
12313 }
12314
12315 /* 64 bit unsigned divide. */
12316 static void
12317 udiv64 (sim_cpu *cpu)
12318 {
12319 unsigned rm = INSTR (20, 16);
12320 unsigned rn = INSTR (9, 5);
12321 unsigned rd = INSTR (4, 0);
12322
12323 /* TODO : check that this rounds towards zero as required. */
12324 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12325
12326 aarch64_set_reg_u64
12327 (cpu, rd, NO_SP,
12328 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12329 }
12330
12331 static void
12332 dexDataProc2Source (sim_cpu *cpu)
12333 {
12334 /* assert instr[30] == 0
12335 instr[28,21] == 11010110
12336 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12337 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12338 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12339 001000 ==> LSLV, 001001 ==> LSRV
12340 001010 ==> ASRV, 001011 ==> RORV
12341 ow ==> UNALLOC. */
12342
12343 uint32_t dispatch;
12344 uint32_t S = INSTR (29, 29);
12345 uint32_t opcode = INSTR (15, 10);
12346
12347 if (S == 1)
12348 HALT_UNALLOC;
12349
12350 if (opcode & 0x34)
12351 HALT_UNALLOC;
12352
12353 dispatch = ( (INSTR (31, 31) << 3)
12354 | (uimm (opcode, 3, 3) << 2)
12355 | uimm (opcode, 1, 0));
12356 switch (dispatch)
12357 {
12358 case 2: udiv32 (cpu); return;
12359 case 3: cpuiv32 (cpu); return;
12360 case 4: lslv32 (cpu); return;
12361 case 5: lsrv32 (cpu); return;
12362 case 6: asrv32 (cpu); return;
12363 case 7: rorv32 (cpu); return;
12364 case 10: udiv64 (cpu); return;
12365 case 11: cpuiv64 (cpu); return;
12366 case 12: lslv64 (cpu); return;
12367 case 13: lsrv64 (cpu); return;
12368 case 14: asrv64 (cpu); return;
12369 case 15: rorv64 (cpu); return;
12370 default: HALT_UNALLOC;
12371 }
12372 }
12373
12374
12375 /* Multiply. */
12376
12377 /* 32 bit multiply and add. */
12378 static void
12379 madd32 (sim_cpu *cpu)
12380 {
12381 unsigned rm = INSTR (20, 16);
12382 unsigned ra = INSTR (14, 10);
12383 unsigned rn = INSTR (9, 5);
12384 unsigned rd = INSTR (4, 0);
12385
12386 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12387 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12388 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12389 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12390 }
12391
12392 /* 64 bit multiply and add. */
12393 static void
12394 madd64 (sim_cpu *cpu)
12395 {
12396 unsigned rm = INSTR (20, 16);
12397 unsigned ra = INSTR (14, 10);
12398 unsigned rn = INSTR (9, 5);
12399 unsigned rd = INSTR (4, 0);
12400
12401 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12402 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12403 + aarch64_get_reg_u64 (cpu, rn, NO_SP)
12404 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12405 }
12406
12407 /* 32 bit multiply and sub. */
12408 static void
12409 msub32 (sim_cpu *cpu)
12410 {
12411 unsigned rm = INSTR (20, 16);
12412 unsigned ra = INSTR (14, 10);
12413 unsigned rn = INSTR (9, 5);
12414 unsigned rd = INSTR (4, 0);
12415
12416 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12417 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12418 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12419 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12420 }
12421
12422 /* 64 bit multiply and sub. */
12423 static void
12424 msub64 (sim_cpu *cpu)
12425 {
12426 unsigned rm = INSTR (20, 16);
12427 unsigned ra = INSTR (14, 10);
12428 unsigned rn = INSTR (9, 5);
12429 unsigned rd = INSTR (4, 0);
12430
12431 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12432 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12433 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12434 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12435 }
12436
12437 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12438 static void
12439 smaddl (sim_cpu *cpu)
12440 {
12441 unsigned rm = INSTR (20, 16);
12442 unsigned ra = INSTR (14, 10);
12443 unsigned rn = INSTR (9, 5);
12444 unsigned rd = INSTR (4, 0);
12445
12446 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12447 obtain a 64 bit product. */
12448 aarch64_set_reg_s64
12449 (cpu, rd, NO_SP,
12450 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12451 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12452 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12453 }
12454
12455 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12456 static void
12457 smsubl (sim_cpu *cpu)
12458 {
12459 unsigned rm = INSTR (20, 16);
12460 unsigned ra = INSTR (14, 10);
12461 unsigned rn = INSTR (9, 5);
12462 unsigned rd = INSTR (4, 0);
12463
12464 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12465 obtain a 64 bit product. */
12466 aarch64_set_reg_s64
12467 (cpu, rd, NO_SP,
12468 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12469 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12470 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12471 }
12472
12473 /* Integer Multiply/Divide. */
12474
12475 /* First some macros and a helper function. */
12476 /* Macros to test or access elements of 64 bit words. */
12477
12478 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12479 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12480 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12481 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12482 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12483 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12484
12485 /* Offset of sign bit in 64 bit signed integger. */
12486 #define SIGN_SHIFT_U64 63
12487 /* The sign bit itself -- also identifies the minimum negative int value. */
12488 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12489 /* Return true if a 64 bit signed int presented as an unsigned int is the
12490 most negative value. */
12491 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12492 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12493 int has its sign bit set to false. */
12494 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12495 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12496 an unsigned int has its sign bit set or not. */
12497 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12498 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12499 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12500
12501 /* Multiply two 64 bit ints and return.
12502 the hi 64 bits of the 128 bit product. */
12503
12504 static uint64_t
12505 mul64hi (uint64_t value1, uint64_t value2)
12506 {
12507 uint64_t resultmid1;
12508 uint64_t result;
12509 uint64_t value1_lo = lowWordToU64 (value1);
12510 uint64_t value1_hi = highWordToU64 (value1) ;
12511 uint64_t value2_lo = lowWordToU64 (value2);
12512 uint64_t value2_hi = highWordToU64 (value2);
12513
12514 /* Cross-multiply and collect results. */
12515
12516 uint64_t xproductlo = value1_lo * value2_lo;
12517 uint64_t xproductmid1 = value1_lo * value2_hi;
12518 uint64_t xproductmid2 = value1_hi * value2_lo;
12519 uint64_t xproducthi = value1_hi * value2_hi;
12520 uint64_t carry = 0;
12521 /* Start accumulating 64 bit results. */
12522 /* Drop bottom half of lowest cross-product. */
12523 uint64_t resultmid = xproductlo >> 32;
12524 /* Add in middle products. */
12525 resultmid = resultmid + xproductmid1;
12526
12527 /* Check for overflow. */
12528 if (resultmid < xproductmid1)
12529 /* Carry over 1 into top cross-product. */
12530 carry++;
12531
12532 resultmid1 = resultmid + xproductmid2;
12533
12534 /* Check for overflow. */
12535 if (resultmid1 < xproductmid2)
12536 /* Carry over 1 into top cross-product. */
12537 carry++;
12538
12539 /* Drop lowest 32 bits of middle cross-product. */
12540 result = resultmid1 >> 32;
12541
12542 /* Add top cross-product plus and any carry. */
12543 result += xproducthi + carry;
12544
12545 return result;
12546 }
12547
12548 /* Signed multiply high, source, source2 :
12549 64 bit, dest <-- high 64-bit of result. */
12550 static void
12551 smulh (sim_cpu *cpu)
12552 {
12553 uint64_t uresult;
12554 int64_t result;
12555 unsigned rm = INSTR (20, 16);
12556 unsigned rn = INSTR (9, 5);
12557 unsigned rd = INSTR (4, 0);
12558 GReg ra = INSTR (14, 10);
12559 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12560 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12561 uint64_t uvalue1;
12562 uint64_t uvalue2;
12563 int64_t signum = 1;
12564
12565 if (ra != R31)
12566 HALT_UNALLOC;
12567
12568 /* Convert to unsigned and use the unsigned mul64hi routine
12569 the fix the sign up afterwards. */
12570 if (value1 < 0)
12571 {
12572 signum *= -1L;
12573 uvalue1 = -value1;
12574 }
12575 else
12576 {
12577 uvalue1 = value1;
12578 }
12579
12580 if (value2 < 0)
12581 {
12582 signum *= -1L;
12583 uvalue2 = -value2;
12584 }
12585 else
12586 {
12587 uvalue2 = value2;
12588 }
12589
12590 uresult = mul64hi (uvalue1, uvalue2);
12591 result = uresult;
12592 result *= signum;
12593
12594 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12595 }
12596
12597 /* Unsigned multiply add long -- source, source2 :
12598 32 bit, source3 : 64 bit. */
12599 static void
12600 umaddl (sim_cpu *cpu)
12601 {
12602 unsigned rm = INSTR (20, 16);
12603 unsigned ra = INSTR (14, 10);
12604 unsigned rn = INSTR (9, 5);
12605 unsigned rd = INSTR (4, 0);
12606
12607 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12608 obtain a 64 bit product. */
12609 aarch64_set_reg_u64
12610 (cpu, rd, NO_SP,
12611 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12612 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12613 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12614 }
12615
12616 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12617 static void
12618 umsubl (sim_cpu *cpu)
12619 {
12620 unsigned rm = INSTR (20, 16);
12621 unsigned ra = INSTR (14, 10);
12622 unsigned rn = INSTR (9, 5);
12623 unsigned rd = INSTR (4, 0);
12624
12625 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12626 obtain a 64 bit product. */
12627 aarch64_set_reg_u64
12628 (cpu, rd, NO_SP,
12629 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12630 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12631 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12632 }
12633
12634 /* Unsigned multiply high, source, source2 :
12635 64 bit, dest <-- high 64-bit of result. */
12636 static void
12637 umulh (sim_cpu *cpu)
12638 {
12639 unsigned rm = INSTR (20, 16);
12640 unsigned rn = INSTR (9, 5);
12641 unsigned rd = INSTR (4, 0);
12642 GReg ra = INSTR (14, 10);
12643
12644 if (ra != R31)
12645 HALT_UNALLOC;
12646
12647 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12648 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
12649 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12650 }
12651
12652 static void
12653 dexDataProc3Source (sim_cpu *cpu)
12654 {
12655 /* assert instr[28,24] == 11011. */
12656 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
12657 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
12658 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
12659 instr[15] = o0 : 0/1 ==> ok
12660 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
12661 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
12662 0100 ==> SMULH, (64 bit only)
12663 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
12664 1100 ==> UMULH (64 bit only)
12665 ow ==> UNALLOC. */
12666
12667 uint32_t dispatch;
12668 uint32_t size = INSTR (31, 31);
12669 uint32_t op54 = INSTR (30, 29);
12670 uint32_t op31 = INSTR (23, 21);
12671 uint32_t o0 = INSTR (15, 15);
12672
12673 if (op54 != 0)
12674 HALT_UNALLOC;
12675
12676 if (size == 0)
12677 {
12678 if (op31 != 0)
12679 HALT_UNALLOC;
12680
12681 if (o0 == 0)
12682 madd32 (cpu);
12683 else
12684 msub32 (cpu);
12685 return;
12686 }
12687
12688 dispatch = (op31 << 1) | o0;
12689
12690 switch (dispatch)
12691 {
12692 case 0: madd64 (cpu); return;
12693 case 1: msub64 (cpu); return;
12694 case 2: smaddl (cpu); return;
12695 case 3: smsubl (cpu); return;
12696 case 4: smulh (cpu); return;
12697 case 10: umaddl (cpu); return;
12698 case 11: umsubl (cpu); return;
12699 case 12: umulh (cpu); return;
12700 default: HALT_UNALLOC;
12701 }
12702 }
12703
12704 static void
12705 dexDPReg (sim_cpu *cpu)
12706 {
12707 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12708 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
12709 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
12710 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
12711
12712 switch (group2)
12713 {
12714 case DPREG_LOG_000:
12715 case DPREG_LOG_001:
12716 dexLogicalShiftedRegister (cpu); return;
12717
12718 case DPREG_ADDSHF_010:
12719 dexAddSubtractShiftedRegister (cpu); return;
12720
12721 case DPREG_ADDEXT_011:
12722 dexAddSubtractExtendedRegister (cpu); return;
12723
12724 case DPREG_ADDCOND_100:
12725 {
12726 /* This set bundles a variety of different operations. */
12727 /* Check for. */
12728 /* 1) add/sub w carry. */
12729 uint32_t mask1 = 0x1FE00000U;
12730 uint32_t val1 = 0x1A000000U;
12731 /* 2) cond compare register/immediate. */
12732 uint32_t mask2 = 0x1FE00000U;
12733 uint32_t val2 = 0x1A400000U;
12734 /* 3) cond select. */
12735 uint32_t mask3 = 0x1FE00000U;
12736 uint32_t val3 = 0x1A800000U;
12737 /* 4) data proc 1/2 source. */
12738 uint32_t mask4 = 0x1FE00000U;
12739 uint32_t val4 = 0x1AC00000U;
12740
12741 if ((aarch64_get_instr (cpu) & mask1) == val1)
12742 dexAddSubtractWithCarry (cpu);
12743
12744 else if ((aarch64_get_instr (cpu) & mask2) == val2)
12745 CondCompare (cpu);
12746
12747 else if ((aarch64_get_instr (cpu) & mask3) == val3)
12748 dexCondSelect (cpu);
12749
12750 else if ((aarch64_get_instr (cpu) & mask4) == val4)
12751 {
12752 /* Bit 30 is clear for data proc 2 source
12753 and set for data proc 1 source. */
12754 if (aarch64_get_instr (cpu) & (1U << 30))
12755 dexDataProc1Source (cpu);
12756 else
12757 dexDataProc2Source (cpu);
12758 }
12759
12760 else
12761 /* Should not reach here. */
12762 HALT_NYI;
12763
12764 return;
12765 }
12766
12767 case DPREG_3SRC_110:
12768 dexDataProc3Source (cpu); return;
12769
12770 case DPREG_UNALLOC_101:
12771 HALT_UNALLOC;
12772
12773 case DPREG_3SRC_111:
12774 dexDataProc3Source (cpu); return;
12775
12776 default:
12777 /* Should never reach here. */
12778 HALT_NYI;
12779 }
12780 }
12781
12782 /* Unconditional Branch immediate.
12783 Offset is a PC-relative byte offset in the range +/- 128MiB.
12784 The offset is assumed to be raw from the decode i.e. the
12785 simulator is expected to scale them from word offsets to byte. */
12786
12787 /* Unconditional branch. */
12788 static void
12789 buc (sim_cpu *cpu, int32_t offset)
12790 {
12791 aarch64_set_next_PC_by_offset (cpu, offset);
12792 }
12793
12794 static unsigned stack_depth = 0;
12795
12796 /* Unconditional branch and link -- writes return PC to LR. */
12797 static void
12798 bl (sim_cpu *cpu, int32_t offset)
12799 {
12800 aarch64_save_LR (cpu);
12801 aarch64_set_next_PC_by_offset (cpu, offset);
12802
12803 if (TRACE_BRANCH_P (cpu))
12804 {
12805 ++ stack_depth;
12806 TRACE_BRANCH (cpu,
12807 " %*scall %" PRIx64 " [%s]"
12808 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12809 stack_depth, " ", aarch64_get_next_PC (cpu),
12810 aarch64_get_func (aarch64_get_next_PC (cpu)),
12811 aarch64_get_reg_u64 (cpu, 0, NO_SP),
12812 aarch64_get_reg_u64 (cpu, 1, NO_SP),
12813 aarch64_get_reg_u64 (cpu, 2, NO_SP)
12814 );
12815 }
12816 }
12817
12818 /* Unconditional Branch register.
12819 Branch/return address is in source register. */
12820
12821 /* Unconditional branch. */
12822 static void
12823 br (sim_cpu *cpu)
12824 {
12825 unsigned rn = INSTR (9, 5);
12826 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12827 }
12828
12829 /* Unconditional branch and link -- writes return PC to LR. */
12830 static void
12831 blr (sim_cpu *cpu)
12832 {
12833 unsigned rn = INSTR (9, 5);
12834
12835 /* The pseudo code in the spec says we update LR before fetching.
12836 the value from the rn. */
12837 aarch64_save_LR (cpu);
12838 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12839
12840 if (TRACE_BRANCH_P (cpu))
12841 {
12842 ++ stack_depth;
12843 TRACE_BRANCH (cpu,
12844 " %*scall %" PRIx64 " [%s]"
12845 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12846 stack_depth, " ", aarch64_get_next_PC (cpu),
12847 aarch64_get_func (aarch64_get_next_PC (cpu)),
12848 aarch64_get_reg_u64 (cpu, 0, NO_SP),
12849 aarch64_get_reg_u64 (cpu, 1, NO_SP),
12850 aarch64_get_reg_u64 (cpu, 2, NO_SP)
12851 );
12852 }
12853 }
12854
12855 /* Return -- assembler will default source to LR this is functionally
12856 equivalent to br but, presumably, unlike br it side effects the
12857 branch predictor. */
12858 static void
12859 ret (sim_cpu *cpu)
12860 {
12861 unsigned rn = INSTR (9, 5);
12862 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12863
12864 if (TRACE_BRANCH_P (cpu))
12865 {
12866 TRACE_BRANCH (cpu,
12867 " %*sreturn [result: %" PRIx64 "]",
12868 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
12869 -- stack_depth;
12870 }
12871 }
12872
12873 /* NOP -- we implement this and call it from the decode in case we
12874 want to intercept it later. */
12875
12876 static void
12877 nop (sim_cpu *cpu)
12878 {
12879 }
12880
12881 /* Data synchronization barrier. */
12882
12883 static void
12884 dsb (sim_cpu *cpu)
12885 {
12886 }
12887
12888 /* Data memory barrier. */
12889
12890 static void
12891 dmb (sim_cpu *cpu)
12892 {
12893 }
12894
12895 /* Instruction synchronization barrier. */
12896
12897 static void
12898 isb (sim_cpu *cpu)
12899 {
12900 }
12901
12902 static void
12903 dexBranchImmediate (sim_cpu *cpu)
12904 {
12905 /* assert instr[30,26] == 00101
12906 instr[31] ==> 0 == B, 1 == BL
12907 instr[25,0] == imm26 branch offset counted in words. */
12908
12909 uint32_t top = INSTR (31, 31);
12910 /* We have a 26 byte signed word offset which we need to pass to the
12911 execute routine as a signed byte offset. */
12912 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
12913
12914 if (top)
12915 bl (cpu, offset);
12916 else
12917 buc (cpu, offset);
12918 }
12919
12920 /* Control Flow. */
12921
12922 /* Conditional branch
12923
12924 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
12925 a bit position in the range 0 .. 63
12926
12927 cc is a CondCode enum value as pulled out of the decode
12928
12929 N.B. any offset register (source) can only be Xn or Wn. */
12930
12931 static void
12932 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
12933 {
12934 /* the test returns TRUE if CC is met. */
12935 if (testConditionCode (cpu, cc))
12936 aarch64_set_next_PC_by_offset (cpu, offset);
12937 }
12938
12939 /* 32 bit branch on register non-zero. */
12940 static void
12941 cbnz32 (sim_cpu *cpu, int32_t offset)
12942 {
12943 unsigned rt = INSTR (4, 0);
12944
12945 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
12946 aarch64_set_next_PC_by_offset (cpu, offset);
12947 }
12948
12949 /* 64 bit branch on register zero. */
12950 static void
12951 cbnz (sim_cpu *cpu, int32_t offset)
12952 {
12953 unsigned rt = INSTR (4, 0);
12954
12955 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
12956 aarch64_set_next_PC_by_offset (cpu, offset);
12957 }
12958
12959 /* 32 bit branch on register non-zero. */
12960 static void
12961 cbz32 (sim_cpu *cpu, int32_t offset)
12962 {
12963 unsigned rt = INSTR (4, 0);
12964
12965 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
12966 aarch64_set_next_PC_by_offset (cpu, offset);
12967 }
12968
12969 /* 64 bit branch on register zero. */
12970 static void
12971 cbz (sim_cpu *cpu, int32_t offset)
12972 {
12973 unsigned rt = INSTR (4, 0);
12974
12975 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
12976 aarch64_set_next_PC_by_offset (cpu, offset);
12977 }
12978
12979 /* Branch on register bit test non-zero -- one size fits all. */
12980 static void
12981 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
12982 {
12983 unsigned rt = INSTR (4, 0);
12984
12985 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))
12986 aarch64_set_next_PC_by_offset (cpu, offset);
12987 }
12988
12989 /* branch on register bit test zero -- one size fits all. */
12990 static void
12991 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
12992 {
12993 unsigned rt = INSTR (4, 0);
12994
12995 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)))
12996 aarch64_set_next_PC_by_offset (cpu, offset);
12997 }
12998
12999 static void
13000 dexCompareBranchImmediate (sim_cpu *cpu)
13001 {
13002 /* instr[30,25] = 01 1010
13003 instr[31] = size : 0 ==> 32, 1 ==> 64
13004 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13005 instr[23,5] = simm19 branch offset counted in words
13006 instr[4,0] = rt */
13007
13008 uint32_t size = INSTR (31, 31);
13009 uint32_t op = INSTR (24, 24);
13010 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13011
13012 if (size == 0)
13013 {
13014 if (op == 0)
13015 cbz32 (cpu, offset);
13016 else
13017 cbnz32 (cpu, offset);
13018 }
13019 else
13020 {
13021 if (op == 0)
13022 cbz (cpu, offset);
13023 else
13024 cbnz (cpu, offset);
13025 }
13026 }
13027
13028 static void
13029 dexTestBranchImmediate (sim_cpu *cpu)
13030 {
13031 /* instr[31] = b5 : bit 5 of test bit idx
13032 instr[30,25] = 01 1011
13033 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13034 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13035 instr[18,5] = simm14 : signed offset counted in words
13036 instr[4,0] = uimm5 */
13037
13038 uint32_t pos = ((INSTR (31, 31) << 4) | INSTR (23, 19));
13039 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13040
13041 NYI_assert (30, 25, 0x1b);
13042
13043 if (INSTR (24, 24) == 0)
13044 tbz (cpu, pos, offset);
13045 else
13046 tbnz (cpu, pos, offset);
13047 }
13048
13049 static void
13050 dexCondBranchImmediate (sim_cpu *cpu)
13051 {
13052 /* instr[31,25] = 010 1010
13053 instr[24] = op1; op => 00 ==> B.cond
13054 instr[23,5] = simm19 : signed offset counted in words
13055 instr[4] = op0
13056 instr[3,0] = cond */
13057
13058 int32_t offset;
13059 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13060
13061 NYI_assert (31, 25, 0x2a);
13062
13063 if (op != 0)
13064 HALT_UNALLOC;
13065
13066 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13067
13068 bcc (cpu, offset, INSTR (3, 0));
13069 }
13070
13071 static void
13072 dexBranchRegister (sim_cpu *cpu)
13073 {
13074 /* instr[31,25] = 110 1011
13075 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13076 instr[20,16] = op2 : must be 11111
13077 instr[15,10] = op3 : must be 000000
13078 instr[4,0] = op2 : must be 11111. */
13079
13080 uint32_t op = INSTR (24, 21);
13081 uint32_t op2 = INSTR (20, 16);
13082 uint32_t op3 = INSTR (15, 10);
13083 uint32_t op4 = INSTR (4, 0);
13084
13085 NYI_assert (31, 25, 0x6b);
13086
13087 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13088 HALT_UNALLOC;
13089
13090 if (op == 0)
13091 br (cpu);
13092
13093 else if (op == 1)
13094 blr (cpu);
13095
13096 else if (op == 2)
13097 ret (cpu);
13098
13099 else
13100 {
13101 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13102 /* anything else is unallocated. */
13103 uint32_t rn = INSTR (4, 0);
13104
13105 if (rn != 0x1f)
13106 HALT_UNALLOC;
13107
13108 if (op == 4 || op == 5)
13109 HALT_NYI;
13110
13111 HALT_UNALLOC;
13112 }
13113 }
13114
13115 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13116 but this may not be available. So instead we define the values we need
13117 here. */
13118 #define AngelSVC_Reason_Open 0x01
13119 #define AngelSVC_Reason_Close 0x02
13120 #define AngelSVC_Reason_Write 0x05
13121 #define AngelSVC_Reason_Read 0x06
13122 #define AngelSVC_Reason_IsTTY 0x09
13123 #define AngelSVC_Reason_Seek 0x0A
13124 #define AngelSVC_Reason_FLen 0x0C
13125 #define AngelSVC_Reason_Remove 0x0E
13126 #define AngelSVC_Reason_Rename 0x0F
13127 #define AngelSVC_Reason_Clock 0x10
13128 #define AngelSVC_Reason_Time 0x11
13129 #define AngelSVC_Reason_System 0x12
13130 #define AngelSVC_Reason_Errno 0x13
13131 #define AngelSVC_Reason_GetCmdLine 0x15
13132 #define AngelSVC_Reason_HeapInfo 0x16
13133 #define AngelSVC_Reason_ReportException 0x18
13134 #define AngelSVC_Reason_Elapsed 0x30
13135
13136
13137 static void
13138 handle_halt (sim_cpu *cpu, uint32_t val)
13139 {
13140 uint64_t result = 0;
13141
13142 if (val != 0xf000)
13143 {
13144 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13145 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13146 sim_stopped, SIM_SIGTRAP);
13147 }
13148
13149 /* We have encountered an Angel SVC call. See if we can process it. */
13150 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13151 {
13152 case AngelSVC_Reason_HeapInfo:
13153 {
13154 /* Get the values. */
13155 uint64_t stack_top = aarch64_get_stack_start (cpu);
13156 uint64_t heap_base = aarch64_get_heap_start (cpu);
13157
13158 /* Get the pointer */
13159 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13160 ptr = aarch64_get_mem_u64 (cpu, ptr);
13161
13162 /* Fill in the memory block. */
13163 /* Start addr of heap. */
13164 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13165 /* End addr of heap. */
13166 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13167 /* Lowest stack addr. */
13168 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13169 /* Initial stack addr. */
13170 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13171
13172 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13173 }
13174 break;
13175
13176 case AngelSVC_Reason_Open:
13177 {
13178 /* Get the pointer */
13179 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13180 /* FIXME: For now we just assume that we will only be asked
13181 to open the standard file descriptors. */
13182 static int fd = 0;
13183 result = fd ++;
13184
13185 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13186 }
13187 break;
13188
13189 case AngelSVC_Reason_Close:
13190 {
13191 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13192 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13193 result = 0;
13194 }
13195 break;
13196
13197 case AngelSVC_Reason_Errno:
13198 result = 0;
13199 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13200 break;
13201
13202 case AngelSVC_Reason_Clock:
13203 result =
13204 #ifdef CLOCKS_PER_SEC
13205 (CLOCKS_PER_SEC >= 100)
13206 ? (clock () / (CLOCKS_PER_SEC / 100))
13207 : ((clock () * 100) / CLOCKS_PER_SEC)
13208 #else
13209 /* Presume unix... clock() returns microseconds. */
13210 (clock () / 10000)
13211 #endif
13212 ;
13213 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13214 break;
13215
13216 case AngelSVC_Reason_GetCmdLine:
13217 {
13218 /* Get the pointer */
13219 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13220 ptr = aarch64_get_mem_u64 (cpu, ptr);
13221
13222 /* FIXME: No command line for now. */
13223 aarch64_set_mem_u64 (cpu, ptr, 0);
13224 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13225 }
13226 break;
13227
13228 case AngelSVC_Reason_IsTTY:
13229 result = 1;
13230 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13231 break;
13232
13233 case AngelSVC_Reason_Write:
13234 {
13235 /* Get the pointer */
13236 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13237 /* Get the write control block. */
13238 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13239 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13240 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13241
13242 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13243 PRIx64 " on descriptor %" PRIx64,
13244 len, buf, fd);
13245
13246 if (len > 1280)
13247 {
13248 TRACE_SYSCALL (cpu,
13249 " AngelSVC: Write: Suspiciously long write: %ld",
13250 (long) len);
13251 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13252 sim_stopped, SIM_SIGBUS);
13253 }
13254 else if (fd == 1)
13255 {
13256 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13257 }
13258 else if (fd == 2)
13259 {
13260 TRACE (cpu, 0, "\n");
13261 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13262 (int) len, aarch64_get_mem_ptr (cpu, buf));
13263 TRACE (cpu, 0, "\n");
13264 }
13265 else
13266 {
13267 TRACE_SYSCALL (cpu,
13268 " AngelSVC: Write: Unexpected file handle: %d",
13269 (int) fd);
13270 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13271 sim_stopped, SIM_SIGABRT);
13272 }
13273 }
13274 break;
13275
13276 case AngelSVC_Reason_ReportException:
13277 {
13278 /* Get the pointer */
13279 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13280 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13281 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13282 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13283
13284 TRACE_SYSCALL (cpu,
13285 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13286 type, state);
13287
13288 if (type == 0x20026)
13289 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13290 sim_exited, state);
13291 else
13292 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13293 sim_stopped, SIM_SIGINT);
13294 }
13295 break;
13296
13297 case AngelSVC_Reason_Read:
13298 case AngelSVC_Reason_FLen:
13299 case AngelSVC_Reason_Seek:
13300 case AngelSVC_Reason_Remove:
13301 case AngelSVC_Reason_Time:
13302 case AngelSVC_Reason_System:
13303 case AngelSVC_Reason_Rename:
13304 case AngelSVC_Reason_Elapsed:
13305 default:
13306 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13307 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13308 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13309 sim_stopped, SIM_SIGTRAP);
13310 }
13311
13312 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13313 }
13314
13315 static void
13316 dexExcpnGen (sim_cpu *cpu)
13317 {
13318 /* instr[31:24] = 11010100
13319 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13320 010 ==> HLT, 101 ==> DBG GEN EXCPN
13321 instr[20,5] = imm16
13322 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13323 instr[1,0] = LL : discriminates opc */
13324
13325 uint32_t opc = INSTR (23, 21);
13326 uint32_t imm16 = INSTR (20, 5);
13327 uint32_t opc2 = INSTR (4, 2);
13328 uint32_t LL;
13329
13330 NYI_assert (31, 24, 0xd4);
13331
13332 if (opc2 != 0)
13333 HALT_UNALLOC;
13334
13335 LL = INSTR (1, 0);
13336
13337 /* We only implement HLT and BRK for now. */
13338 if (opc == 1 && LL == 0)
13339 {
13340 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13341 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13342 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13343 }
13344
13345 if (opc == 2 && LL == 0)
13346 handle_halt (cpu, imm16);
13347
13348 else if (opc == 0 || opc == 5)
13349 HALT_NYI;
13350
13351 else
13352 HALT_UNALLOC;
13353 }
13354
13355 /* Stub for accessing system registers. */
13356
13357 static uint64_t
13358 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13359 unsigned crm, unsigned op2)
13360 {
13361 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13362 /* DCZID_EL0 - the Data Cache Zero ID register.
13363 We do not support DC ZVA at the moment, so
13364 we return a value with the disable bit set.
13365 We implement support for the DCZID register since
13366 it is used by the C library's memset function. */
13367 return ((uint64_t) 1) << 4;
13368
13369 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13370 /* Cache Type Register. */
13371 return 0x80008000UL;
13372
13373 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13374 /* TPIDR_EL0 - thread pointer id. */
13375 return aarch64_get_thread_id (cpu);
13376
13377 if (op1 == 3 && crm == 4 && op2 == 0)
13378 return aarch64_get_FPCR (cpu);
13379
13380 if (op1 == 3 && crm == 4 && op2 == 1)
13381 return aarch64_get_FPSR (cpu);
13382
13383 else if (op1 == 3 && crm == 2 && op2 == 0)
13384 return aarch64_get_CPSR (cpu);
13385
13386 HALT_NYI;
13387 }
13388
13389 static void
13390 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13391 unsigned crm, unsigned op2, uint64_t val)
13392 {
13393 if (op1 == 3 && crm == 4 && op2 == 0)
13394 aarch64_set_FPCR (cpu, val);
13395
13396 else if (op1 == 3 && crm == 4 && op2 == 1)
13397 aarch64_set_FPSR (cpu, val);
13398
13399 else if (op1 == 3 && crm == 2 && op2 == 0)
13400 aarch64_set_CPSR (cpu, val);
13401
13402 else
13403 HALT_NYI;
13404 }
13405
13406 static void
13407 do_mrs (sim_cpu *cpu)
13408 {
13409 /* instr[31:20] = 1101 0101 0001 1
13410 instr[19] = op0
13411 instr[18,16] = op1
13412 instr[15,12] = CRn
13413 instr[11,8] = CRm
13414 instr[7,5] = op2
13415 instr[4,0] = Rt */
13416 unsigned sys_op0 = INSTR (19, 19) + 2;
13417 unsigned sys_op1 = INSTR (18, 16);
13418 unsigned sys_crn = INSTR (15, 12);
13419 unsigned sys_crm = INSTR (11, 8);
13420 unsigned sys_op2 = INSTR (7, 5);
13421 unsigned rt = INSTR (4, 0);
13422
13423 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13424 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13425 }
13426
13427 static void
13428 do_MSR_immediate (sim_cpu *cpu)
13429 {
13430 /* instr[31:19] = 1101 0101 0000 0
13431 instr[18,16] = op1
13432 instr[15,12] = 0100
13433 instr[11,8] = CRm
13434 instr[7,5] = op2
13435 instr[4,0] = 1 1111 */
13436
13437 unsigned op1 = INSTR (18, 16);
13438 /*unsigned crm = INSTR (11, 8);*/
13439 unsigned op2 = INSTR (7, 5);
13440
13441 NYI_assert (31, 19, 0x1AA0);
13442 NYI_assert (15, 12, 0x4);
13443 NYI_assert (4, 0, 0x1F);
13444
13445 if (op1 == 0)
13446 {
13447 if (op2 == 5)
13448 HALT_NYI; /* set SPSel. */
13449 else
13450 HALT_UNALLOC;
13451 }
13452 else if (op1 == 3)
13453 {
13454 if (op2 == 6)
13455 HALT_NYI; /* set DAIFset. */
13456 else if (op2 == 7)
13457 HALT_NYI; /* set DAIFclr. */
13458 else
13459 HALT_UNALLOC;
13460 }
13461 else
13462 HALT_UNALLOC;
13463 }
13464
13465 static void
13466 do_MSR_reg (sim_cpu *cpu)
13467 {
13468 /* instr[31:20] = 1101 0101 0001
13469 instr[19] = op0
13470 instr[18,16] = op1
13471 instr[15,12] = CRn
13472 instr[11,8] = CRm
13473 instr[7,5] = op2
13474 instr[4,0] = Rt */
13475
13476 unsigned sys_op0 = INSTR (19, 19) + 2;
13477 unsigned sys_op1 = INSTR (18, 16);
13478 unsigned sys_crn = INSTR (15, 12);
13479 unsigned sys_crm = INSTR (11, 8);
13480 unsigned sys_op2 = INSTR (7, 5);
13481 unsigned rt = INSTR (4, 0);
13482
13483 NYI_assert (31, 20, 0xD51);
13484
13485 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13486 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13487 }
13488
13489 static void
13490 do_SYS (sim_cpu *cpu)
13491 {
13492 /* instr[31,19] = 1101 0101 0000 1
13493 instr[18,16] = op1
13494 instr[15,12] = CRn
13495 instr[11,8] = CRm
13496 instr[7,5] = op2
13497 instr[4,0] = Rt */
13498 NYI_assert (31, 19, 0x1AA1);
13499
13500 /* FIXME: For now we just silently accept system ops. */
13501 }
13502
13503 static void
13504 dexSystem (sim_cpu *cpu)
13505 {
13506 /* instr[31:22] = 1101 01010 0
13507 instr[21] = L
13508 instr[20,19] = op0
13509 instr[18,16] = op1
13510 instr[15,12] = CRn
13511 instr[11,8] = CRm
13512 instr[7,5] = op2
13513 instr[4,0] = uimm5 */
13514
13515 /* We are interested in HINT, DSB, DMB and ISB
13516
13517 Hint #0 encodes NOOP (this is the only hint we care about)
13518 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13519 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13520
13521 DSB, DMB, ISB are data store barrier, data memory barrier and
13522 instruction store barrier, respectively, where
13523
13524 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13525 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13526 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13527 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13528 10 ==> InerShareable, 11 ==> FullSystem
13529 types : 01 ==> Reads, 10 ==> Writes,
13530 11 ==> All, 00 ==> All (domain == FullSystem). */
13531
13532 unsigned rt = INSTR (4, 0);
13533
13534 NYI_assert (31, 22, 0x354);
13535
13536 switch (INSTR (21, 12))
13537 {
13538 case 0x032:
13539 if (rt == 0x1F)
13540 {
13541 /* NOP has CRm != 0000 OR. */
13542 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13543 uint32_t crm = INSTR (11, 8);
13544 uint32_t op2 = INSTR (7, 5);
13545
13546 if (crm != 0 || (op2 == 0 || op2 > 5))
13547 {
13548 /* Actually call nop method so we can reimplement it later. */
13549 nop (cpu);
13550 return;
13551 }
13552 }
13553 HALT_NYI;
13554
13555 case 0x033:
13556 {
13557 uint32_t op2 = INSTR (7, 5);
13558
13559 switch (op2)
13560 {
13561 case 2: HALT_NYI;
13562 case 4: dsb (cpu); return;
13563 case 5: dmb (cpu); return;
13564 case 6: isb (cpu); return;
13565 default: HALT_UNALLOC;
13566 }
13567 }
13568
13569 case 0x3B0:
13570 case 0x3B4:
13571 case 0x3BD:
13572 do_mrs (cpu);
13573 return;
13574
13575 case 0x0B7:
13576 do_SYS (cpu); /* DC is an alias of SYS. */
13577 return;
13578
13579 default:
13580 if (INSTR (21, 20) == 0x1)
13581 do_MSR_reg (cpu);
13582 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13583 do_MSR_immediate (cpu);
13584 else
13585 HALT_NYI;
13586 return;
13587 }
13588 }
13589
13590 static void
13591 dexBr (sim_cpu *cpu)
13592 {
13593 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13594 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13595 bits [31,29] of a BrExSys are the secondary dispatch vector. */
13596 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13597
13598 switch (group2)
13599 {
13600 case BR_IMM_000:
13601 return dexBranchImmediate (cpu);
13602
13603 case BR_IMMCMP_001:
13604 /* Compare has bit 25 clear while test has it set. */
13605 if (!INSTR (25, 25))
13606 dexCompareBranchImmediate (cpu);
13607 else
13608 dexTestBranchImmediate (cpu);
13609 return;
13610
13611 case BR_IMMCOND_010:
13612 /* This is a conditional branch if bit 25 is clear otherwise
13613 unallocated. */
13614 if (!INSTR (25, 25))
13615 dexCondBranchImmediate (cpu);
13616 else
13617 HALT_UNALLOC;
13618 return;
13619
13620 case BR_UNALLOC_011:
13621 HALT_UNALLOC;
13622
13623 case BR_IMM_100:
13624 dexBranchImmediate (cpu);
13625 return;
13626
13627 case BR_IMMCMP_101:
13628 /* Compare has bit 25 clear while test has it set. */
13629 if (!INSTR (25, 25))
13630 dexCompareBranchImmediate (cpu);
13631 else
13632 dexTestBranchImmediate (cpu);
13633 return;
13634
13635 case BR_REG_110:
13636 /* Unconditional branch reg has bit 25 set. */
13637 if (INSTR (25, 25))
13638 dexBranchRegister (cpu);
13639
13640 /* This includes both Excpn Gen, System and unalloc operations.
13641 We need to decode the Excpn Gen operation BRK so we can plant
13642 debugger entry points.
13643 Excpn Gen operations have instr [24] = 0.
13644 we need to decode at least one of the System operations NOP
13645 which is an alias for HINT #0.
13646 System operations have instr [24,22] = 100. */
13647 else if (INSTR (24, 24) == 0)
13648 dexExcpnGen (cpu);
13649
13650 else if (INSTR (24, 22) == 4)
13651 dexSystem (cpu);
13652
13653 else
13654 HALT_UNALLOC;
13655
13656 return;
13657
13658 case BR_UNALLOC_111:
13659 HALT_UNALLOC;
13660
13661 default:
13662 /* Should never reach here. */
13663 HALT_NYI;
13664 }
13665 }
13666
13667 static void
13668 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
13669 {
13670 /* We need to check if gdb wants an in here. */
13671 /* checkBreak (cpu);. */
13672
13673 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
13674
13675 switch (group)
13676 {
13677 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
13678 case GROUP_LDST_0100: dexLdSt (cpu); break;
13679 case GROUP_DPREG_0101: dexDPReg (cpu); break;
13680 case GROUP_LDST_0110: dexLdSt (cpu); break;
13681 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
13682 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
13683 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
13684 case GROUP_BREXSYS_1010: dexBr (cpu); break;
13685 case GROUP_BREXSYS_1011: dexBr (cpu); break;
13686 case GROUP_LDST_1100: dexLdSt (cpu); break;
13687 case GROUP_DPREG_1101: dexDPReg (cpu); break;
13688 case GROUP_LDST_1110: dexLdSt (cpu); break;
13689 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
13690
13691 case GROUP_UNALLOC_0001:
13692 case GROUP_UNALLOC_0010:
13693 case GROUP_UNALLOC_0011:
13694 HALT_UNALLOC;
13695
13696 default:
13697 /* Should never reach here. */
13698 HALT_NYI;
13699 }
13700 }
13701
13702 static bfd_boolean
13703 aarch64_step (sim_cpu *cpu)
13704 {
13705 uint64_t pc = aarch64_get_PC (cpu);
13706
13707 if (pc == TOP_LEVEL_RETURN_PC)
13708 return FALSE;
13709
13710 aarch64_set_next_PC (cpu, pc + 4);
13711 aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc);
13712
13713 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
13714 aarch64_get_instr (cpu));
13715 TRACE_DISASM (cpu, pc);
13716
13717 aarch64_decode_and_execute (cpu, pc);
13718
13719 return TRUE;
13720 }
13721
13722 void
13723 aarch64_run (SIM_DESC sd)
13724 {
13725 sim_cpu *cpu = STATE_CPU (sd, 0);
13726
13727 while (aarch64_step (cpu))
13728 aarch64_update_PC (cpu);
13729
13730 sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu),
13731 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13732 }
13733
13734 void
13735 aarch64_init (sim_cpu *cpu, uint64_t pc)
13736 {
13737 uint64_t sp = aarch64_get_stack_start (cpu);
13738
13739 /* Install SP, FP and PC and set LR to -20
13740 so we can detect a top-level return. */
13741 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
13742 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
13743 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
13744 aarch64_set_next_PC (cpu, pc);
13745 aarch64_update_PC (cpu);
13746 aarch64_init_LIT_table ();
13747 }
This page took 0.805585 seconds and 4 git commands to generate.