0970aceda9f2e1522204ca016109fa8224480602
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2021 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 /* This must come before any other includes. */
23 #include "defs.h"
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <sys/types.h>
29 #include <math.h>
30 #include <time.h>
31 #include <limits.h>
32
33 #include "simulator.h"
34 #include "cpustate.h"
35 #include "memory.h"
36
37 #include "sim-signal.h"
38
39 #define NO_SP 0
40 #define SP_OK 1
41
42 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
43 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
44 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
45
46 /* Space saver macro. */
47 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
48
49 #define HALT_UNALLOC \
50 do \
51 { \
52 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
53 TRACE_INSN (cpu, \
54 "Unallocated instruction detected at sim line %d," \
55 " exe addr %" PRIx64, \
56 __LINE__, aarch64_get_PC (cpu)); \
57 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
58 sim_stopped, SIM_SIGILL); \
59 } \
60 while (0)
61
62 #define HALT_NYI \
63 do \
64 { \
65 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
66 TRACE_INSN (cpu, \
67 "Unimplemented instruction detected at sim line %d," \
68 " exe addr %" PRIx64, \
69 __LINE__, aarch64_get_PC (cpu)); \
70 if (! TRACE_ANY_P (cpu)) \
71 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
72 aarch64_get_instr (cpu)); \
73 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
74 sim_stopped, SIM_SIGABRT); \
75 } \
76 while (0)
77
78 #define NYI_assert(HI, LO, EXPECTED) \
79 do \
80 { \
81 if (INSTR ((HI), (LO)) != (EXPECTED)) \
82 HALT_NYI; \
83 } \
84 while (0)
85
86 /* Helper functions used by expandLogicalImmediate. */
87
88 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
89 static inline uint64_t
90 ones (int N)
91 {
92 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
93 }
94
95 /* result<0> to val<N> */
96 static inline uint64_t
97 pickbit (uint64_t val, int N)
98 {
99 return pickbits64 (val, N, N);
100 }
101
102 static uint64_t
103 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
104 {
105 uint64_t mask;
106 uint64_t imm;
107 unsigned simd_size;
108
109 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
110 (in other words, right rotated by R), then replicated. */
111 if (N != 0)
112 {
113 simd_size = 64;
114 mask = 0xffffffffffffffffull;
115 }
116 else
117 {
118 switch (S)
119 {
120 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
121 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
122 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
123 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
124 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
125 default: return 0;
126 }
127 mask = (1ull << simd_size) - 1;
128 /* Top bits are IGNORED. */
129 R &= simd_size - 1;
130 }
131
132 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
133 if (S == simd_size - 1)
134 return 0;
135
136 /* S+1 consecutive bits to 1. */
137 /* NOTE: S can't be 63 due to detection above. */
138 imm = (1ull << (S + 1)) - 1;
139
140 /* Rotate to the left by simd_size - R. */
141 if (R != 0)
142 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
143
144 /* Replicate the value according to SIMD size. */
145 switch (simd_size)
146 {
147 case 2: imm = (imm << 2) | imm;
148 case 4: imm = (imm << 4) | imm;
149 case 8: imm = (imm << 8) | imm;
150 case 16: imm = (imm << 16) | imm;
151 case 32: imm = (imm << 32) | imm;
152 case 64: break;
153 default: return 0;
154 }
155
156 return imm;
157 }
158
159 /* Instr[22,10] encodes N immr and imms. we want a lookup table
160 for each possible combination i.e. 13 bits worth of int entries. */
161 #define LI_TABLE_SIZE (1 << 13)
162 static uint64_t LITable[LI_TABLE_SIZE];
163
164 void
165 aarch64_init_LIT_table (void)
166 {
167 unsigned index;
168
169 for (index = 0; index < LI_TABLE_SIZE; index++)
170 {
171 uint32_t N = uimm (index, 12, 12);
172 uint32_t immr = uimm (index, 11, 6);
173 uint32_t imms = uimm (index, 5, 0);
174
175 LITable [index] = expand_logical_immediate (imms, immr, N);
176 }
177 }
178
179 static void
180 dexNotify (sim_cpu *cpu)
181 {
182 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
183 2 ==> exit Java, 3 ==> start next bytecode. */
184 uint32_t type = INSTR (14, 0);
185
186 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
187
188 switch (type)
189 {
190 case 0:
191 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 break;
194 case 1:
195 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
196 aarch64_get_reg_u64 (cpu, R22, 0)); */
197 break;
198 case 2:
199 /* aarch64_notifyMethodExit (); */
200 break;
201 case 3:
202 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
203 aarch64_get_reg_u64 (cpu, R22, 0)); */
204 break;
205 }
206 }
207
208 /* secondary decode within top level groups */
209
210 static void
211 dexPseudo (sim_cpu *cpu)
212 {
213 /* assert instr[28,27] = 00
214
215 We provide 2 pseudo instructions:
216
217 HALT stops execution of the simulator causing an immediate
218 return to the x86 code which entered it.
219
220 CALLOUT initiates recursive entry into x86 code. A register
221 argument holds the address of the x86 routine. Immediate
222 values in the instruction identify the number of general
223 purpose and floating point register arguments to be passed
224 and the type of any value to be returned. */
225
226 uint32_t PSEUDO_HALT = 0xE0000000U;
227 uint32_t PSEUDO_CALLOUT = 0x00018000U;
228 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
229 uint32_t PSEUDO_NOTIFY = 0x00014000U;
230 uint32_t dispatch;
231
232 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
233 {
234 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
235 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
236 sim_stopped, SIM_SIGTRAP);
237 }
238
239 dispatch = INSTR (31, 15);
240
241 /* We do not handle callouts at the moment. */
242 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
243 {
244 TRACE_EVENTS (cpu, " Callout");
245 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
246 sim_stopped, SIM_SIGABRT);
247 }
248
249 else if (dispatch == PSEUDO_NOTIFY)
250 dexNotify (cpu);
251
252 else
253 HALT_UNALLOC;
254 }
255
256 /* Load-store single register (unscaled offset)
257 These instructions employ a base register plus an unscaled signed
258 9 bit offset.
259
260 N.B. the base register (source) can be Xn or SP. all other
261 registers may not be SP. */
262
263 /* 32 bit load 32 bit unscaled signed 9 bit. */
264 static void
265 ldur32 (sim_cpu *cpu, int32_t offset)
266 {
267 unsigned rn = INSTR (9, 5);
268 unsigned rt = INSTR (4, 0);
269
270 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
271 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
272 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
273 + offset));
274 }
275
276 /* 64 bit load 64 bit unscaled signed 9 bit. */
277 static void
278 ldur64 (sim_cpu *cpu, int32_t offset)
279 {
280 unsigned rn = INSTR (9, 5);
281 unsigned rt = INSTR (4, 0);
282
283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
284 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
285 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
286 + offset));
287 }
288
289 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
290 static void
291 ldurb32 (sim_cpu *cpu, int32_t offset)
292 {
293 unsigned rn = INSTR (9, 5);
294 unsigned rt = INSTR (4, 0);
295
296 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
297 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
298 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
299 + offset));
300 }
301
302 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
303 static void
304 ldursb32 (sim_cpu *cpu, int32_t offset)
305 {
306 unsigned rn = INSTR (9, 5);
307 unsigned rt = INSTR (4, 0);
308
309 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
310 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
311 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
312 + offset));
313 }
314
315 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
316 static void
317 ldursb64 (sim_cpu *cpu, int32_t offset)
318 {
319 unsigned rn = INSTR (9, 5);
320 unsigned rt = INSTR (4, 0);
321
322 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
323 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
324 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
325 + offset));
326 }
327
328 /* 32 bit load zero-extended short unscaled signed 9 bit */
329 static void
330 ldurh32 (sim_cpu *cpu, int32_t offset)
331 {
332 unsigned rn = INSTR (9, 5);
333 unsigned rd = INSTR (4, 0);
334
335 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
336 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
337 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
338 + offset));
339 }
340
341 /* 32 bit load sign-extended short unscaled signed 9 bit */
342 static void
343 ldursh32 (sim_cpu *cpu, int32_t offset)
344 {
345 unsigned rn = INSTR (9, 5);
346 unsigned rd = INSTR (4, 0);
347
348 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
349 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
350 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
351 + offset));
352 }
353
354 /* 64 bit load sign-extended short unscaled signed 9 bit */
355 static void
356 ldursh64 (sim_cpu *cpu, int32_t offset)
357 {
358 unsigned rn = INSTR (9, 5);
359 unsigned rt = INSTR (4, 0);
360
361 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
362 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
363 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
364 + offset));
365 }
366
367 /* 64 bit load sign-extended word unscaled signed 9 bit */
368 static void
369 ldursw (sim_cpu *cpu, int32_t offset)
370 {
371 unsigned rn = INSTR (9, 5);
372 unsigned rd = INSTR (4, 0);
373
374 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
375 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
376 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
377 + offset));
378 }
379
380 /* N.B. with stores the value in source is written to the address
381 identified by source2 modified by offset. */
382
383 /* 32 bit store 32 bit unscaled signed 9 bit. */
384 static void
385 stur32 (sim_cpu *cpu, int32_t offset)
386 {
387 unsigned rn = INSTR (9, 5);
388 unsigned rd = INSTR (4, 0);
389
390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
391 aarch64_set_mem_u32 (cpu,
392 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
393 aarch64_get_reg_u32 (cpu, rd, NO_SP));
394 }
395
396 /* 64 bit store 64 bit unscaled signed 9 bit */
397 static void
398 stur64 (sim_cpu *cpu, int32_t offset)
399 {
400 unsigned rn = INSTR (9, 5);
401 unsigned rd = INSTR (4, 0);
402
403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
404 aarch64_set_mem_u64 (cpu,
405 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
406 aarch64_get_reg_u64 (cpu, rd, NO_SP));
407 }
408
409 /* 32 bit store byte unscaled signed 9 bit */
410 static void
411 sturb (sim_cpu *cpu, int32_t offset)
412 {
413 unsigned rn = INSTR (9, 5);
414 unsigned rd = INSTR (4, 0);
415
416 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
417 aarch64_set_mem_u8 (cpu,
418 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
419 aarch64_get_reg_u8 (cpu, rd, NO_SP));
420 }
421
422 /* 32 bit store short unscaled signed 9 bit */
423 static void
424 sturh (sim_cpu *cpu, int32_t offset)
425 {
426 unsigned rn = INSTR (9, 5);
427 unsigned rd = INSTR (4, 0);
428
429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
430 aarch64_set_mem_u16 (cpu,
431 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
432 aarch64_get_reg_u16 (cpu, rd, NO_SP));
433 }
434
435 /* Load single register pc-relative label
436 Offset is a signed 19 bit immediate count in words
437 rt may not be SP. */
438
439 /* 32 bit pc-relative load */
440 static void
441 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
442 {
443 unsigned rd = INSTR (4, 0);
444
445 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
446 aarch64_set_reg_u64 (cpu, rd, NO_SP,
447 aarch64_get_mem_u32
448 (cpu, aarch64_get_PC (cpu) + offset * 4));
449 }
450
451 /* 64 bit pc-relative load */
452 static void
453 ldr_pcrel (sim_cpu *cpu, int32_t offset)
454 {
455 unsigned rd = INSTR (4, 0);
456
457 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
458 aarch64_set_reg_u64 (cpu, rd, NO_SP,
459 aarch64_get_mem_u64
460 (cpu, aarch64_get_PC (cpu) + offset * 4));
461 }
462
463 /* sign extended 32 bit pc-relative load */
464 static void
465 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
466 {
467 unsigned rd = INSTR (4, 0);
468
469 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
470 aarch64_set_reg_u64 (cpu, rd, NO_SP,
471 aarch64_get_mem_s32
472 (cpu, aarch64_get_PC (cpu) + offset * 4));
473 }
474
475 /* float pc-relative load */
476 static void
477 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
478 {
479 unsigned int rd = INSTR (4, 0);
480
481 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
482 aarch64_set_vec_u32 (cpu, rd, 0,
483 aarch64_get_mem_u32
484 (cpu, aarch64_get_PC (cpu) + offset * 4));
485 }
486
487 /* double pc-relative load */
488 static void
489 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
490 {
491 unsigned int st = INSTR (4, 0);
492
493 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
494 aarch64_set_vec_u64 (cpu, st, 0,
495 aarch64_get_mem_u64
496 (cpu, aarch64_get_PC (cpu) + offset * 4));
497 }
498
499 /* long double pc-relative load. */
500 static void
501 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
502 {
503 unsigned int st = INSTR (4, 0);
504 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
505 FRegister a;
506
507 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
508 aarch64_get_mem_long_double (cpu, addr, & a);
509 aarch64_set_FP_long_double (cpu, st, a);
510 }
511
512 /* This can be used to scale an offset by applying
513 the requisite shift. the second argument is either
514 16, 32 or 64. */
515
516 #define SCALE(_offset, _elementSize) \
517 ((_offset) << ScaleShift ## _elementSize)
518
519 /* This can be used to optionally scale a register derived offset
520 by applying the requisite shift as indicated by the Scaling
521 argument. The second argument is either Byte, Short, Word
522 or Long. The third argument is either Scaled or Unscaled.
523 N.B. when _Scaling is Scaled the shift gets ANDed with
524 all 1s while when it is Unscaled it gets ANDed with 0. */
525
526 #define OPT_SCALE(_offset, _elementType, _Scaling) \
527 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
528
529 /* This can be used to zero or sign extend a 32 bit register derived
530 value to a 64 bit value. the first argument must be the value as
531 a uint32_t and the second must be either UXTW or SXTW. The result
532 is returned as an int64_t. */
533
534 static inline int64_t
535 extend (uint32_t value, Extension extension)
536 {
537 union
538 {
539 uint32_t u;
540 int32_t n;
541 } x;
542
543 /* A branchless variant of this ought to be possible. */
544 if (extension == UXTW || extension == NoExtension)
545 return value;
546
547 x.u = value;
548 return x.n;
549 }
550
551 /* Scalar Floating Point
552
553 FP load/store single register (4 addressing modes)
554
555 N.B. the base register (source) can be the stack pointer.
556 The secondary source register (source2) can only be an Xn register. */
557
558 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
559 static void
560 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
561 {
562 unsigned rn = INSTR (9, 5);
563 unsigned st = INSTR (4, 0);
564 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
565
566 if (wb != Post)
567 address += offset;
568
569 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
570 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
571 if (wb == Post)
572 address += offset;
573
574 if (wb != NoWriteBack)
575 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
576 }
577
578 /* Load 8 bit with unsigned 12 bit offset. */
579 static void
580 fldrb_abs (sim_cpu *cpu, uint32_t offset)
581 {
582 unsigned rd = INSTR (4, 0);
583 unsigned rn = INSTR (9, 5);
584 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
585
586 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
587 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
588 }
589
590 /* Load 16 bit scaled unsigned 12 bit. */
591 static void
592 fldrh_abs (sim_cpu *cpu, uint32_t offset)
593 {
594 unsigned rd = INSTR (4, 0);
595 unsigned rn = INSTR (9, 5);
596 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
597
598 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
599 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
600 }
601
602 /* Load 32 bit scaled unsigned 12 bit. */
603 static void
604 fldrs_abs (sim_cpu *cpu, uint32_t offset)
605 {
606 unsigned rd = INSTR (4, 0);
607 unsigned rn = INSTR (9, 5);
608 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
609
610 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
611 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
612 }
613
614 /* Load 64 bit scaled unsigned 12 bit. */
615 static void
616 fldrd_abs (sim_cpu *cpu, uint32_t offset)
617 {
618 unsigned rd = INSTR (4, 0);
619 unsigned rn = INSTR (9, 5);
620 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
621
622 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
623 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
624 }
625
626 /* Load 128 bit scaled unsigned 12 bit. */
627 static void
628 fldrq_abs (sim_cpu *cpu, uint32_t offset)
629 {
630 unsigned rd = INSTR (4, 0);
631 unsigned rn = INSTR (9, 5);
632 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
633
634 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
635 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
636 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
637 }
638
639 /* Load 32 bit scaled or unscaled zero- or sign-extended
640 32-bit register offset. */
641 static void
642 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
643 {
644 unsigned rm = INSTR (20, 16);
645 unsigned rn = INSTR (9, 5);
646 unsigned st = INSTR (4, 0);
647 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
648 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
649 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
650
651 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
652 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
653 (cpu, address + displacement));
654 }
655
656 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
657 static void
658 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
659 {
660 unsigned rn = INSTR (9, 5);
661 unsigned st = INSTR (4, 0);
662 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
663
664 if (wb != Post)
665 address += offset;
666
667 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
668 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
669
670 if (wb == Post)
671 address += offset;
672
673 if (wb != NoWriteBack)
674 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
675 }
676
677 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
678 static void
679 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
680 {
681 unsigned rm = INSTR (20, 16);
682 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
683 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
684
685 fldrd_wb (cpu, displacement, NoWriteBack);
686 }
687
688 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
689 static void
690 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
691 {
692 FRegister a;
693 unsigned rn = INSTR (9, 5);
694 unsigned st = INSTR (4, 0);
695 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
696
697 if (wb != Post)
698 address += offset;
699
700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
701 aarch64_get_mem_long_double (cpu, address, & a);
702 aarch64_set_FP_long_double (cpu, st, a);
703
704 if (wb == Post)
705 address += offset;
706
707 if (wb != NoWriteBack)
708 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
709 }
710
711 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
712 static void
713 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
714 {
715 unsigned rm = INSTR (20, 16);
716 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
717 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
718
719 fldrq_wb (cpu, displacement, NoWriteBack);
720 }
721
722 /* Memory Access
723
724 load-store single register
725 There are four addressing modes available here which all employ a
726 64 bit source (base) register.
727
728 N.B. the base register (source) can be the stack pointer.
729 The secondary source register (source2)can only be an Xn register.
730
731 Scaled, 12-bit, unsigned immediate offset, without pre- and
732 post-index options.
733 Unscaled, 9-bit, signed immediate offset with pre- or post-index
734 writeback.
735 scaled or unscaled 64-bit register offset.
736 scaled or unscaled 32-bit extended register offset.
737
738 All offsets are assumed to be raw from the decode i.e. the
739 simulator is expected to adjust scaled offsets based on the
740 accessed data size with register or extended register offset
741 versions the same applies except that in the latter case the
742 operation may also require a sign extend.
743
744 A separate method is provided for each possible addressing mode. */
745
746 /* 32 bit load 32 bit scaled unsigned 12 bit */
747 static void
748 ldr32_abs (sim_cpu *cpu, uint32_t offset)
749 {
750 unsigned rn = INSTR (9, 5);
751 unsigned rt = INSTR (4, 0);
752
753 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
754 /* The target register may not be SP but the source may be. */
755 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
756 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
757 + SCALE (offset, 32)));
758 }
759
760 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
761 static void
762 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
763 {
764 unsigned rn = INSTR (9, 5);
765 unsigned rt = INSTR (4, 0);
766 uint64_t address;
767
768 if (rn == rt && wb != NoWriteBack)
769 HALT_UNALLOC;
770
771 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
772
773 if (wb != Post)
774 address += offset;
775
776 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
777 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
778
779 if (wb == Post)
780 address += offset;
781
782 if (wb != NoWriteBack)
783 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
784 }
785
786 /* 32 bit load 32 bit scaled or unscaled
787 zero- or sign-extended 32-bit register offset */
788 static void
789 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
790 {
791 unsigned rm = INSTR (20, 16);
792 unsigned rn = INSTR (9, 5);
793 unsigned rt = INSTR (4, 0);
794 /* rn may reference SP, rm and rt must reference ZR */
795
796 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
797 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
798 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
799
800 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
801 aarch64_set_reg_u64 (cpu, rt, NO_SP,
802 aarch64_get_mem_u32 (cpu, address + displacement));
803 }
804
805 /* 64 bit load 64 bit scaled unsigned 12 bit */
806 static void
807 ldr_abs (sim_cpu *cpu, uint32_t offset)
808 {
809 unsigned rn = INSTR (9, 5);
810 unsigned rt = INSTR (4, 0);
811
812 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
813 /* The target register may not be SP but the source may be. */
814 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
815 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
816 + SCALE (offset, 64)));
817 }
818
819 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
820 static void
821 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
822 {
823 unsigned rn = INSTR (9, 5);
824 unsigned rt = INSTR (4, 0);
825 uint64_t address;
826
827 if (rn == rt && wb != NoWriteBack)
828 HALT_UNALLOC;
829
830 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
831
832 if (wb != Post)
833 address += offset;
834
835 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
836 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
837
838 if (wb == Post)
839 address += offset;
840
841 if (wb != NoWriteBack)
842 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
843 }
844
845 /* 64 bit load 64 bit scaled or unscaled zero-
846 or sign-extended 32-bit register offset. */
847 static void
848 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
849 {
850 unsigned rm = INSTR (20, 16);
851 unsigned rn = INSTR (9, 5);
852 unsigned rt = INSTR (4, 0);
853 /* rn may reference SP, rm and rt must reference ZR */
854
855 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
856 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
857 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
858
859 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
860 aarch64_set_reg_u64 (cpu, rt, NO_SP,
861 aarch64_get_mem_u64 (cpu, address + displacement));
862 }
863
864 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
865 static void
866 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
867 {
868 unsigned rn = INSTR (9, 5);
869 unsigned rt = INSTR (4, 0);
870
871 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
872 /* The target register may not be SP but the source may be
873 there is no scaling required for a byte load. */
874 aarch64_set_reg_u64 (cpu, rt, NO_SP,
875 aarch64_get_mem_u8
876 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
877 }
878
879 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
880 static void
881 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
882 {
883 unsigned rn = INSTR (9, 5);
884 unsigned rt = INSTR (4, 0);
885 uint64_t address;
886
887 if (rn == rt && wb != NoWriteBack)
888 HALT_UNALLOC;
889
890 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
891
892 if (wb != Post)
893 address += offset;
894
895 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
896 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
897
898 if (wb == Post)
899 address += offset;
900
901 if (wb != NoWriteBack)
902 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
903 }
904
905 /* 32 bit load zero-extended byte scaled or unscaled zero-
906 or sign-extended 32-bit register offset. */
907 static void
908 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
909 {
910 unsigned rm = INSTR (20, 16);
911 unsigned rn = INSTR (9, 5);
912 unsigned rt = INSTR (4, 0);
913 /* rn may reference SP, rm and rt must reference ZR */
914
915 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
916 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
917 extension);
918
919 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
920 /* There is no scaling required for a byte load. */
921 aarch64_set_reg_u64 (cpu, rt, NO_SP,
922 aarch64_get_mem_u8 (cpu, address + displacement));
923 }
924
925 /* 64 bit load sign-extended byte unscaled signed 9 bit
926 with pre- or post-writeback. */
927 static void
928 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
929 {
930 unsigned rn = INSTR (9, 5);
931 unsigned rt = INSTR (4, 0);
932 uint64_t address;
933 int64_t val;
934
935 if (rn == rt && wb != NoWriteBack)
936 HALT_UNALLOC;
937
938 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
939
940 if (wb != Post)
941 address += offset;
942
943 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
944 val = aarch64_get_mem_s8 (cpu, address);
945 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
946
947 if (wb == Post)
948 address += offset;
949
950 if (wb != NoWriteBack)
951 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
952 }
953
954 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
955 static void
956 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
957 {
958 ldrsb_wb (cpu, offset, NoWriteBack);
959 }
960
961 /* 64 bit load sign-extended byte scaled or unscaled zero-
962 or sign-extended 32-bit register offset. */
963 static void
964 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
965 {
966 unsigned rm = INSTR (20, 16);
967 unsigned rn = INSTR (9, 5);
968 unsigned rt = INSTR (4, 0);
969 /* rn may reference SP, rm and rt must reference ZR */
970
971 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
972 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
973 extension);
974 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
975 /* There is no scaling required for a byte load. */
976 aarch64_set_reg_s64 (cpu, rt, NO_SP,
977 aarch64_get_mem_s8 (cpu, address + displacement));
978 }
979
980 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
981 static void
982 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
983 {
984 unsigned rn = INSTR (9, 5);
985 unsigned rt = INSTR (4, 0);
986 uint32_t val;
987
988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
989 /* The target register may not be SP but the source may be. */
990 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
991 + SCALE (offset, 16));
992 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
993 }
994
995 /* 32 bit load zero-extended short unscaled signed 9 bit
996 with pre- or post-writeback. */
997 static void
998 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
999 {
1000 unsigned rn = INSTR (9, 5);
1001 unsigned rt = INSTR (4, 0);
1002 uint64_t address;
1003
1004 if (rn == rt && wb != NoWriteBack)
1005 HALT_UNALLOC;
1006
1007 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1008
1009 if (wb != Post)
1010 address += offset;
1011
1012 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1013 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1014
1015 if (wb == Post)
1016 address += offset;
1017
1018 if (wb != NoWriteBack)
1019 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1020 }
1021
1022 /* 32 bit load zero-extended short scaled or unscaled zero-
1023 or sign-extended 32-bit register offset. */
1024 static void
1025 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1026 {
1027 unsigned rm = INSTR (20, 16);
1028 unsigned rn = INSTR (9, 5);
1029 unsigned rt = INSTR (4, 0);
1030 /* rn may reference SP, rm and rt must reference ZR */
1031
1032 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1033 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1034 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1035
1036 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1037 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1038 aarch64_get_mem_u16 (cpu, address + displacement));
1039 }
1040
1041 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1042 static void
1043 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1044 {
1045 unsigned rn = INSTR (9, 5);
1046 unsigned rt = INSTR (4, 0);
1047 int32_t val;
1048
1049 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1050 /* The target register may not be SP but the source may be. */
1051 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1052 + SCALE (offset, 16));
1053 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1054 }
1055
1056 /* 32 bit load sign-extended short unscaled signed 9 bit
1057 with pre- or post-writeback. */
1058 static void
1059 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1060 {
1061 unsigned rn = INSTR (9, 5);
1062 unsigned rt = INSTR (4, 0);
1063 uint64_t address;
1064
1065 if (rn == rt && wb != NoWriteBack)
1066 HALT_UNALLOC;
1067
1068 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1069
1070 if (wb != Post)
1071 address += offset;
1072
1073 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1074 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1075 (int32_t) aarch64_get_mem_s16 (cpu, address));
1076
1077 if (wb == Post)
1078 address += offset;
1079
1080 if (wb != NoWriteBack)
1081 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1082 }
1083
1084 /* 32 bit load sign-extended short scaled or unscaled zero-
1085 or sign-extended 32-bit register offset. */
1086 static void
1087 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1088 {
1089 unsigned rm = INSTR (20, 16);
1090 unsigned rn = INSTR (9, 5);
1091 unsigned rt = INSTR (4, 0);
1092 /* rn may reference SP, rm and rt must reference ZR */
1093
1094 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1095 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1096 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1097
1098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1099 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1100 (int32_t) aarch64_get_mem_s16
1101 (cpu, address + displacement));
1102 }
1103
1104 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1105 static void
1106 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1107 {
1108 unsigned rn = INSTR (9, 5);
1109 unsigned rt = INSTR (4, 0);
1110 int64_t val;
1111
1112 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1113 /* The target register may not be SP but the source may be. */
1114 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1115 + SCALE (offset, 16));
1116 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1117 }
1118
1119 /* 64 bit load sign-extended short unscaled signed 9 bit
1120 with pre- or post-writeback. */
1121 static void
1122 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1123 {
1124 unsigned rn = INSTR (9, 5);
1125 unsigned rt = INSTR (4, 0);
1126 uint64_t address;
1127 int64_t val;
1128
1129 if (rn == rt && wb != NoWriteBack)
1130 HALT_UNALLOC;
1131
1132 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1133 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1134
1135 if (wb != Post)
1136 address += offset;
1137
1138 val = aarch64_get_mem_s16 (cpu, address);
1139 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1140
1141 if (wb == Post)
1142 address += offset;
1143
1144 if (wb != NoWriteBack)
1145 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1146 }
1147
1148 /* 64 bit load sign-extended short scaled or unscaled zero-
1149 or sign-extended 32-bit register offset. */
1150 static void
1151 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1152 {
1153 unsigned rm = INSTR (20, 16);
1154 unsigned rn = INSTR (9, 5);
1155 unsigned rt = INSTR (4, 0);
1156
1157 /* rn may reference SP, rm and rt must reference ZR */
1158
1159 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1160 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1161 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1162 int64_t val;
1163
1164 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1165 val = aarch64_get_mem_s16 (cpu, address + displacement);
1166 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1167 }
1168
1169 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1170 static void
1171 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1172 {
1173 unsigned rn = INSTR (9, 5);
1174 unsigned rt = INSTR (4, 0);
1175 int64_t val;
1176
1177 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1178 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1179 + SCALE (offset, 32));
1180 /* The target register may not be SP but the source may be. */
1181 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1182 }
1183
1184 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1185 with pre- or post-writeback. */
1186 static void
1187 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1188 {
1189 unsigned rn = INSTR (9, 5);
1190 unsigned rt = INSTR (4, 0);
1191 uint64_t address;
1192
1193 if (rn == rt && wb != NoWriteBack)
1194 HALT_UNALLOC;
1195
1196 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1197
1198 if (wb != Post)
1199 address += offset;
1200
1201 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1202 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1203
1204 if (wb == Post)
1205 address += offset;
1206
1207 if (wb != NoWriteBack)
1208 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1209 }
1210
1211 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1212 or sign-extended 32-bit register offset. */
1213 static void
1214 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1215 {
1216 unsigned rm = INSTR (20, 16);
1217 unsigned rn = INSTR (9, 5);
1218 unsigned rt = INSTR (4, 0);
1219 /* rn may reference SP, rm and rt must reference ZR */
1220
1221 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1222 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1223 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1224
1225 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1226 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1227 aarch64_get_mem_s32 (cpu, address + displacement));
1228 }
1229
1230 /* N.B. with stores the value in source is written to the
1231 address identified by source2 modified by source3/offset. */
1232
1233 /* 32 bit store scaled unsigned 12 bit. */
1234 static void
1235 str32_abs (sim_cpu *cpu, uint32_t offset)
1236 {
1237 unsigned rn = INSTR (9, 5);
1238 unsigned rt = INSTR (4, 0);
1239
1240 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1241 /* The target register may not be SP but the source may be. */
1242 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1243 + SCALE (offset, 32)),
1244 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1245 }
1246
1247 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1248 static void
1249 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1250 {
1251 unsigned rn = INSTR (9, 5);
1252 unsigned rt = INSTR (4, 0);
1253 uint64_t address;
1254
1255 if (rn == rt && wb != NoWriteBack)
1256 HALT_UNALLOC;
1257
1258 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1259 if (wb != Post)
1260 address += offset;
1261
1262 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1263 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1264
1265 if (wb == Post)
1266 address += offset;
1267
1268 if (wb != NoWriteBack)
1269 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1270 }
1271
1272 /* 32 bit store scaled or unscaled zero- or
1273 sign-extended 32-bit register offset. */
1274 static void
1275 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1276 {
1277 unsigned rm = INSTR (20, 16);
1278 unsigned rn = INSTR (9, 5);
1279 unsigned rt = INSTR (4, 0);
1280
1281 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1282 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1283 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1284
1285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1286 aarch64_set_mem_u32 (cpu, address + displacement,
1287 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1288 }
1289
1290 /* 64 bit store scaled unsigned 12 bit. */
1291 static void
1292 str_abs (sim_cpu *cpu, uint32_t offset)
1293 {
1294 unsigned rn = INSTR (9, 5);
1295 unsigned rt = INSTR (4, 0);
1296
1297 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1298 aarch64_set_mem_u64 (cpu,
1299 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1300 + SCALE (offset, 64),
1301 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1302 }
1303
1304 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1305 static void
1306 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1307 {
1308 unsigned rn = INSTR (9, 5);
1309 unsigned rt = INSTR (4, 0);
1310 uint64_t address;
1311
1312 if (rn == rt && wb != NoWriteBack)
1313 HALT_UNALLOC;
1314
1315 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1316
1317 if (wb != Post)
1318 address += offset;
1319
1320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1321 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1322
1323 if (wb == Post)
1324 address += offset;
1325
1326 if (wb != NoWriteBack)
1327 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1328 }
1329
1330 /* 64 bit store scaled or unscaled zero-
1331 or sign-extended 32-bit register offset. */
1332 static void
1333 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1334 {
1335 unsigned rm = INSTR (20, 16);
1336 unsigned rn = INSTR (9, 5);
1337 unsigned rt = INSTR (4, 0);
1338 /* rn may reference SP, rm and rt must reference ZR */
1339
1340 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1341 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1342 extension);
1343 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1344
1345 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1346 aarch64_set_mem_u64 (cpu, address + displacement,
1347 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1348 }
1349
1350 /* 32 bit store byte scaled unsigned 12 bit. */
1351 static void
1352 strb_abs (sim_cpu *cpu, uint32_t offset)
1353 {
1354 unsigned rn = INSTR (9, 5);
1355 unsigned rt = INSTR (4, 0);
1356
1357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1358 /* The target register may not be SP but the source may be.
1359 There is no scaling required for a byte load. */
1360 aarch64_set_mem_u8 (cpu,
1361 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1362 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1363 }
1364
1365 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1366 static void
1367 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1368 {
1369 unsigned rn = INSTR (9, 5);
1370 unsigned rt = INSTR (4, 0);
1371 uint64_t address;
1372
1373 if (rn == rt && wb != NoWriteBack)
1374 HALT_UNALLOC;
1375
1376 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1377
1378 if (wb != Post)
1379 address += offset;
1380
1381 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1382 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1383
1384 if (wb == Post)
1385 address += offset;
1386
1387 if (wb != NoWriteBack)
1388 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1389 }
1390
1391 /* 32 bit store byte scaled or unscaled zero-
1392 or sign-extended 32-bit register offset. */
1393 static void
1394 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1395 {
1396 unsigned rm = INSTR (20, 16);
1397 unsigned rn = INSTR (9, 5);
1398 unsigned rt = INSTR (4, 0);
1399 /* rn may reference SP, rm and rt must reference ZR */
1400
1401 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1402 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1403 extension);
1404
1405 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1406 /* There is no scaling required for a byte load. */
1407 aarch64_set_mem_u8 (cpu, address + displacement,
1408 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1409 }
1410
1411 /* 32 bit store short scaled unsigned 12 bit. */
1412 static void
1413 strh_abs (sim_cpu *cpu, uint32_t offset)
1414 {
1415 unsigned rn = INSTR (9, 5);
1416 unsigned rt = INSTR (4, 0);
1417
1418 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1419 /* The target register may not be SP but the source may be. */
1420 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1421 + SCALE (offset, 16),
1422 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1423 }
1424
1425 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1426 static void
1427 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1428 {
1429 unsigned rn = INSTR (9, 5);
1430 unsigned rt = INSTR (4, 0);
1431 uint64_t address;
1432
1433 if (rn == rt && wb != NoWriteBack)
1434 HALT_UNALLOC;
1435
1436 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1437
1438 if (wb != Post)
1439 address += offset;
1440
1441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1442 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1443
1444 if (wb == Post)
1445 address += offset;
1446
1447 if (wb != NoWriteBack)
1448 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1449 }
1450
1451 /* 32 bit store short scaled or unscaled zero-
1452 or sign-extended 32-bit register offset. */
1453 static void
1454 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1455 {
1456 unsigned rm = INSTR (20, 16);
1457 unsigned rn = INSTR (9, 5);
1458 unsigned rt = INSTR (4, 0);
1459 /* rn may reference SP, rm and rt must reference ZR */
1460
1461 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1462 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1463 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1464
1465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1466 aarch64_set_mem_u16 (cpu, address + displacement,
1467 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1468 }
1469
1470 /* Prefetch unsigned 12 bit. */
1471 static void
1472 prfm_abs (sim_cpu *cpu, uint32_t offset)
1473 {
1474 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1475 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1476 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1477 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1478 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1479 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1480 ow ==> UNALLOC
1481 PrfOp prfop = prfop (instr, 4, 0);
1482 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1483 + SCALE (offset, 64). */
1484
1485 /* TODO : implement prefetch of address. */
1486 }
1487
1488 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1489 static void
1490 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1491 {
1492 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1493 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1494 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1495 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1496 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1497 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1498 ow ==> UNALLOC
1499 rn may reference SP, rm may only reference ZR
1500 PrfOp prfop = prfop (instr, 4, 0);
1501 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1502 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1503 extension);
1504 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1505 uint64_t address = base + displacement. */
1506
1507 /* TODO : implement prefetch of address */
1508 }
1509
1510 /* 64 bit pc-relative prefetch. */
1511 static void
1512 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1513 {
1514 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1515 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1516 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1517 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1518 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1519 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1520 ow ==> UNALLOC
1521 PrfOp prfop = prfop (instr, 4, 0);
1522 uint64_t address = aarch64_get_PC (cpu) + offset. */
1523
1524 /* TODO : implement this */
1525 }
1526
1527 /* Load-store exclusive. */
1528
1529 static void
1530 ldxr (sim_cpu *cpu)
1531 {
1532 unsigned rn = INSTR (9, 5);
1533 unsigned rt = INSTR (4, 0);
1534 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1535 int size = INSTR (31, 30);
1536 /* int ordered = INSTR (15, 15); */
1537 /* int exclusive = ! INSTR (23, 23); */
1538
1539 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1540 switch (size)
1541 {
1542 case 0:
1543 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1544 break;
1545 case 1:
1546 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1547 break;
1548 case 2:
1549 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1550 break;
1551 case 3:
1552 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1553 break;
1554 }
1555 }
1556
1557 static void
1558 stxr (sim_cpu *cpu)
1559 {
1560 unsigned rn = INSTR (9, 5);
1561 unsigned rt = INSTR (4, 0);
1562 unsigned rs = INSTR (20, 16);
1563 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1564 int size = INSTR (31, 30);
1565 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1566
1567 switch (size)
1568 {
1569 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1570 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1571 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1572 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1573 }
1574
1575 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1576 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1577 }
1578
1579 static void
1580 dexLoadLiteral (sim_cpu *cpu)
1581 {
1582 /* instr[29,27] == 011
1583 instr[25,24] == 00
1584 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1585 010 ==> LDRX, 011 ==> FLDRD
1586 100 ==> LDRSW, 101 ==> FLDRQ
1587 110 ==> PRFM, 111 ==> UNALLOC
1588 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1589 instr[23, 5] == simm19 */
1590
1591 /* unsigned rt = INSTR (4, 0); */
1592 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1593 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1594
1595 switch (dispatch)
1596 {
1597 case 0: ldr32_pcrel (cpu, imm); break;
1598 case 1: fldrs_pcrel (cpu, imm); break;
1599 case 2: ldr_pcrel (cpu, imm); break;
1600 case 3: fldrd_pcrel (cpu, imm); break;
1601 case 4: ldrsw_pcrel (cpu, imm); break;
1602 case 5: fldrq_pcrel (cpu, imm); break;
1603 case 6: prfm_pcrel (cpu, imm); break;
1604 case 7:
1605 default:
1606 HALT_UNALLOC;
1607 }
1608 }
1609
1610 /* Immediate arithmetic
1611 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1612 value left shifted by 12 bits (done at decode).
1613
1614 N.B. the register args (dest, source) can normally be Xn or SP.
1615 the exception occurs for flag setting instructions which may
1616 only use Xn for the output (dest). */
1617
1618 /* 32 bit add immediate. */
1619 static void
1620 add32 (sim_cpu *cpu, uint32_t aimm)
1621 {
1622 unsigned rn = INSTR (9, 5);
1623 unsigned rd = INSTR (4, 0);
1624
1625 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1626 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1627 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1628 }
1629
1630 /* 64 bit add immediate. */
1631 static void
1632 add64 (sim_cpu *cpu, uint32_t aimm)
1633 {
1634 unsigned rn = INSTR (9, 5);
1635 unsigned rd = INSTR (4, 0);
1636
1637 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1638 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1639 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1640 }
1641
1642 static void
1643 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1644 {
1645 int32_t result = value1 + value2;
1646 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1647 uint64_t uresult = (uint64_t)(uint32_t) value1
1648 + (uint64_t)(uint32_t) value2;
1649 uint32_t flags = 0;
1650
1651 if (result == 0)
1652 flags |= Z;
1653
1654 if (result & (1 << 31))
1655 flags |= N;
1656
1657 if (uresult != (uint32_t)uresult)
1658 flags |= C;
1659
1660 if (sresult != (int32_t)sresult)
1661 flags |= V;
1662
1663 aarch64_set_CPSR (cpu, flags);
1664 }
1665
1666 #define NEG(a) (((a) & signbit) == signbit)
1667 #define POS(a) (((a) & signbit) == 0)
1668
1669 static void
1670 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1671 {
1672 uint64_t result = value1 + value2;
1673 uint32_t flags = 0;
1674 uint64_t signbit = 1ULL << 63;
1675
1676 if (result == 0)
1677 flags |= Z;
1678
1679 if (NEG (result))
1680 flags |= N;
1681
1682 if ( (NEG (value1) && NEG (value2))
1683 || (NEG (value1) && POS (result))
1684 || (NEG (value2) && POS (result)))
1685 flags |= C;
1686
1687 if ( (NEG (value1) && NEG (value2) && POS (result))
1688 || (POS (value1) && POS (value2) && NEG (result)))
1689 flags |= V;
1690
1691 aarch64_set_CPSR (cpu, flags);
1692 }
1693
1694 static void
1695 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1696 {
1697 uint32_t result = value1 - value2;
1698 uint32_t flags = 0;
1699 uint32_t signbit = 1U << 31;
1700
1701 if (result == 0)
1702 flags |= Z;
1703
1704 if (NEG (result))
1705 flags |= N;
1706
1707 if ( (NEG (value1) && POS (value2))
1708 || (NEG (value1) && POS (result))
1709 || (POS (value2) && POS (result)))
1710 flags |= C;
1711
1712 if ( (NEG (value1) && POS (value2) && POS (result))
1713 || (POS (value1) && NEG (value2) && NEG (result)))
1714 flags |= V;
1715
1716 aarch64_set_CPSR (cpu, flags);
1717 }
1718
1719 static void
1720 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1721 {
1722 uint64_t result = value1 - value2;
1723 uint32_t flags = 0;
1724 uint64_t signbit = 1ULL << 63;
1725
1726 if (result == 0)
1727 flags |= Z;
1728
1729 if (NEG (result))
1730 flags |= N;
1731
1732 if ( (NEG (value1) && POS (value2))
1733 || (NEG (value1) && POS (result))
1734 || (POS (value2) && POS (result)))
1735 flags |= C;
1736
1737 if ( (NEG (value1) && POS (value2) && POS (result))
1738 || (POS (value1) && NEG (value2) && NEG (result)))
1739 flags |= V;
1740
1741 aarch64_set_CPSR (cpu, flags);
1742 }
1743
1744 static void
1745 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1746 {
1747 uint32_t flags = 0;
1748
1749 if (result == 0)
1750 flags |= Z;
1751 else
1752 flags &= ~ Z;
1753
1754 if (result & (1 << 31))
1755 flags |= N;
1756 else
1757 flags &= ~ N;
1758
1759 aarch64_set_CPSR (cpu, flags);
1760 }
1761
1762 static void
1763 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1764 {
1765 uint32_t flags = 0;
1766
1767 if (result == 0)
1768 flags |= Z;
1769 else
1770 flags &= ~ Z;
1771
1772 if (result & (1ULL << 63))
1773 flags |= N;
1774 else
1775 flags &= ~ N;
1776
1777 aarch64_set_CPSR (cpu, flags);
1778 }
1779
1780 /* 32 bit add immediate set flags. */
1781 static void
1782 adds32 (sim_cpu *cpu, uint32_t aimm)
1783 {
1784 unsigned rn = INSTR (9, 5);
1785 unsigned rd = INSTR (4, 0);
1786 /* TODO : do we need to worry about signs here? */
1787 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1788
1789 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1790 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1791 set_flags_for_add32 (cpu, value1, aimm);
1792 }
1793
1794 /* 64 bit add immediate set flags. */
1795 static void
1796 adds64 (sim_cpu *cpu, uint32_t aimm)
1797 {
1798 unsigned rn = INSTR (9, 5);
1799 unsigned rd = INSTR (4, 0);
1800 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1801 uint64_t value2 = aimm;
1802
1803 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1804 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1805 set_flags_for_add64 (cpu, value1, value2);
1806 }
1807
1808 /* 32 bit sub immediate. */
1809 static void
1810 sub32 (sim_cpu *cpu, uint32_t aimm)
1811 {
1812 unsigned rn = INSTR (9, 5);
1813 unsigned rd = INSTR (4, 0);
1814
1815 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1816 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1817 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1818 }
1819
1820 /* 64 bit sub immediate. */
1821 static void
1822 sub64 (sim_cpu *cpu, uint32_t aimm)
1823 {
1824 unsigned rn = INSTR (9, 5);
1825 unsigned rd = INSTR (4, 0);
1826
1827 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1828 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1829 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1830 }
1831
1832 /* 32 bit sub immediate set flags. */
1833 static void
1834 subs32 (sim_cpu *cpu, uint32_t aimm)
1835 {
1836 unsigned rn = INSTR (9, 5);
1837 unsigned rd = INSTR (4, 0);
1838 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1839 uint32_t value2 = aimm;
1840
1841 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1842 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1843 set_flags_for_sub32 (cpu, value1, value2);
1844 }
1845
1846 /* 64 bit sub immediate set flags. */
1847 static void
1848 subs64 (sim_cpu *cpu, uint32_t aimm)
1849 {
1850 unsigned rn = INSTR (9, 5);
1851 unsigned rd = INSTR (4, 0);
1852 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1853 uint32_t value2 = aimm;
1854
1855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1856 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1857 set_flags_for_sub64 (cpu, value1, value2);
1858 }
1859
1860 /* Data Processing Register. */
1861
1862 /* First two helpers to perform the shift operations. */
1863
1864 static inline uint32_t
1865 shifted32 (uint32_t value, Shift shift, uint32_t count)
1866 {
1867 switch (shift)
1868 {
1869 default:
1870 case LSL:
1871 return (value << count);
1872 case LSR:
1873 return (value >> count);
1874 case ASR:
1875 {
1876 int32_t svalue = value;
1877 return (svalue >> count);
1878 }
1879 case ROR:
1880 {
1881 uint32_t top = value >> count;
1882 uint32_t bottom = value << (32 - count);
1883 return (bottom | top);
1884 }
1885 }
1886 }
1887
1888 static inline uint64_t
1889 shifted64 (uint64_t value, Shift shift, uint32_t count)
1890 {
1891 switch (shift)
1892 {
1893 default:
1894 case LSL:
1895 return (value << count);
1896 case LSR:
1897 return (value >> count);
1898 case ASR:
1899 {
1900 int64_t svalue = value;
1901 return (svalue >> count);
1902 }
1903 case ROR:
1904 {
1905 uint64_t top = value >> count;
1906 uint64_t bottom = value << (64 - count);
1907 return (bottom | top);
1908 }
1909 }
1910 }
1911
1912 /* Arithmetic shifted register.
1913 These allow an optional LSL, ASR or LSR to the second source
1914 register with a count up to the register bit count.
1915
1916 N.B register args may not be SP. */
1917
1918 /* 32 bit ADD shifted register. */
1919 static void
1920 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1921 {
1922 unsigned rm = INSTR (20, 16);
1923 unsigned rn = INSTR (9, 5);
1924 unsigned rd = INSTR (4, 0);
1925
1926 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1927 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1928 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1929 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1930 shift, count));
1931 }
1932
1933 /* 64 bit ADD shifted register. */
1934 static void
1935 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1936 {
1937 unsigned rm = INSTR (20, 16);
1938 unsigned rn = INSTR (9, 5);
1939 unsigned rd = INSTR (4, 0);
1940
1941 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1942 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1943 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1944 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1945 shift, count));
1946 }
1947
1948 /* 32 bit ADD shifted register setting flags. */
1949 static void
1950 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1951 {
1952 unsigned rm = INSTR (20, 16);
1953 unsigned rn = INSTR (9, 5);
1954 unsigned rd = INSTR (4, 0);
1955
1956 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1957 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1958 shift, count);
1959
1960 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1961 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1962 set_flags_for_add32 (cpu, value1, value2);
1963 }
1964
1965 /* 64 bit ADD shifted register setting flags. */
1966 static void
1967 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1968 {
1969 unsigned rm = INSTR (20, 16);
1970 unsigned rn = INSTR (9, 5);
1971 unsigned rd = INSTR (4, 0);
1972
1973 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1974 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1975 shift, count);
1976
1977 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1978 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1979 set_flags_for_add64 (cpu, value1, value2);
1980 }
1981
1982 /* 32 bit SUB shifted register. */
1983 static void
1984 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1985 {
1986 unsigned rm = INSTR (20, 16);
1987 unsigned rn = INSTR (9, 5);
1988 unsigned rd = INSTR (4, 0);
1989
1990 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1991 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1992 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1993 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1994 shift, count));
1995 }
1996
1997 /* 64 bit SUB shifted register. */
1998 static void
1999 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2000 {
2001 unsigned rm = INSTR (20, 16);
2002 unsigned rn = INSTR (9, 5);
2003 unsigned rd = INSTR (4, 0);
2004
2005 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2006 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2007 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2008 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2009 shift, count));
2010 }
2011
2012 /* 32 bit SUB shifted register setting flags. */
2013 static void
2014 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2015 {
2016 unsigned rm = INSTR (20, 16);
2017 unsigned rn = INSTR (9, 5);
2018 unsigned rd = INSTR (4, 0);
2019
2020 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2021 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2022 shift, count);
2023
2024 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2025 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2026 set_flags_for_sub32 (cpu, value1, value2);
2027 }
2028
2029 /* 64 bit SUB shifted register setting flags. */
2030 static void
2031 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2032 {
2033 unsigned rm = INSTR (20, 16);
2034 unsigned rn = INSTR (9, 5);
2035 unsigned rd = INSTR (4, 0);
2036
2037 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2038 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2039 shift, count);
2040
2041 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2042 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2043 set_flags_for_sub64 (cpu, value1, value2);
2044 }
2045
2046 /* First a couple more helpers to fetch the
2047 relevant source register element either
2048 sign or zero extended as required by the
2049 extension value. */
2050
2051 static uint32_t
2052 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2053 {
2054 switch (extension)
2055 {
2056 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2057 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2058 case UXTW: /* Fall through. */
2059 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2060 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2061 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2062 case SXTW: /* Fall through. */
2063 case SXTX: /* Fall through. */
2064 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2065 }
2066 }
2067
2068 static uint64_t
2069 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2070 {
2071 switch (extension)
2072 {
2073 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2074 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2075 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2076 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2077 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2078 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2079 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2080 case SXTX:
2081 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2082 }
2083 }
2084
2085 /* Arithmetic extending register
2086 These allow an optional sign extension of some portion of the
2087 second source register followed by an optional left shift of
2088 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2089
2090 N.B output (dest) and first input arg (source) may normally be Xn
2091 or SP. However, for flag setting operations dest can only be
2092 Xn. Second input registers are always Xn. */
2093
2094 /* 32 bit ADD extending register. */
2095 static void
2096 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2097 {
2098 unsigned rm = INSTR (20, 16);
2099 unsigned rn = INSTR (9, 5);
2100 unsigned rd = INSTR (4, 0);
2101
2102 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2103 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2104 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2105 + (extreg32 (cpu, rm, extension) << shift));
2106 }
2107
2108 /* 64 bit ADD extending register.
2109 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2110 static void
2111 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2112 {
2113 unsigned rm = INSTR (20, 16);
2114 unsigned rn = INSTR (9, 5);
2115 unsigned rd = INSTR (4, 0);
2116
2117 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2118 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2119 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2120 + (extreg64 (cpu, rm, extension) << shift));
2121 }
2122
2123 /* 32 bit ADD extending register setting flags. */
2124 static void
2125 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2126 {
2127 unsigned rm = INSTR (20, 16);
2128 unsigned rn = INSTR (9, 5);
2129 unsigned rd = INSTR (4, 0);
2130
2131 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2132 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2133
2134 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2135 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2136 set_flags_for_add32 (cpu, value1, value2);
2137 }
2138
2139 /* 64 bit ADD extending register setting flags */
2140 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2141 static void
2142 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2143 {
2144 unsigned rm = INSTR (20, 16);
2145 unsigned rn = INSTR (9, 5);
2146 unsigned rd = INSTR (4, 0);
2147
2148 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2149 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2150
2151 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2152 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2153 set_flags_for_add64 (cpu, value1, value2);
2154 }
2155
2156 /* 32 bit SUB extending register. */
2157 static void
2158 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2159 {
2160 unsigned rm = INSTR (20, 16);
2161 unsigned rn = INSTR (9, 5);
2162 unsigned rd = INSTR (4, 0);
2163
2164 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2165 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2166 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2167 - (extreg32 (cpu, rm, extension) << shift));
2168 }
2169
2170 /* 64 bit SUB extending register. */
2171 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2172 static void
2173 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2174 {
2175 unsigned rm = INSTR (20, 16);
2176 unsigned rn = INSTR (9, 5);
2177 unsigned rd = INSTR (4, 0);
2178
2179 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2180 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2181 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2182 - (extreg64 (cpu, rm, extension) << shift));
2183 }
2184
2185 /* 32 bit SUB extending register setting flags. */
2186 static void
2187 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2188 {
2189 unsigned rm = INSTR (20, 16);
2190 unsigned rn = INSTR (9, 5);
2191 unsigned rd = INSTR (4, 0);
2192
2193 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2194 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2195
2196 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2197 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2198 set_flags_for_sub32 (cpu, value1, value2);
2199 }
2200
2201 /* 64 bit SUB extending register setting flags */
2202 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2203 static void
2204 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2205 {
2206 unsigned rm = INSTR (20, 16);
2207 unsigned rn = INSTR (9, 5);
2208 unsigned rd = INSTR (4, 0);
2209
2210 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2211 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2212
2213 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2214 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2215 set_flags_for_sub64 (cpu, value1, value2);
2216 }
2217
2218 static void
2219 dexAddSubtractImmediate (sim_cpu *cpu)
2220 {
2221 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2222 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2223 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2224 instr[28,24] = 10001
2225 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2226 instr[21,10] = uimm12
2227 instr[9,5] = Rn
2228 instr[4,0] = Rd */
2229
2230 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2231 uint32_t shift = INSTR (23, 22);
2232 uint32_t imm = INSTR (21, 10);
2233 uint32_t dispatch = INSTR (31, 29);
2234
2235 NYI_assert (28, 24, 0x11);
2236
2237 if (shift > 1)
2238 HALT_UNALLOC;
2239
2240 if (shift)
2241 imm <<= 12;
2242
2243 switch (dispatch)
2244 {
2245 case 0: add32 (cpu, imm); break;
2246 case 1: adds32 (cpu, imm); break;
2247 case 2: sub32 (cpu, imm); break;
2248 case 3: subs32 (cpu, imm); break;
2249 case 4: add64 (cpu, imm); break;
2250 case 5: adds64 (cpu, imm); break;
2251 case 6: sub64 (cpu, imm); break;
2252 case 7: subs64 (cpu, imm); break;
2253 }
2254 }
2255
2256 static void
2257 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2258 {
2259 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2260 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2261 instr[28,24] = 01011
2262 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2263 instr[21] = 0
2264 instr[20,16] = Rm
2265 instr[15,10] = count : must be 0xxxxx for 32 bit
2266 instr[9,5] = Rn
2267 instr[4,0] = Rd */
2268
2269 uint32_t size = INSTR (31, 31);
2270 uint32_t count = INSTR (15, 10);
2271 Shift shiftType = INSTR (23, 22);
2272
2273 NYI_assert (28, 24, 0x0B);
2274 NYI_assert (21, 21, 0);
2275
2276 /* Shift encoded as ROR is unallocated. */
2277 if (shiftType == ROR)
2278 HALT_UNALLOC;
2279
2280 /* 32 bit operations must have count[5] = 0
2281 or else we have an UNALLOC. */
2282 if (size == 0 && uimm (count, 5, 5))
2283 HALT_UNALLOC;
2284
2285 /* Dispatch on size:op i.e instr [31,29]. */
2286 switch (INSTR (31, 29))
2287 {
2288 case 0: add32_shift (cpu, shiftType, count); break;
2289 case 1: adds32_shift (cpu, shiftType, count); break;
2290 case 2: sub32_shift (cpu, shiftType, count); break;
2291 case 3: subs32_shift (cpu, shiftType, count); break;
2292 case 4: add64_shift (cpu, shiftType, count); break;
2293 case 5: adds64_shift (cpu, shiftType, count); break;
2294 case 6: sub64_shift (cpu, shiftType, count); break;
2295 case 7: subs64_shift (cpu, shiftType, count); break;
2296 }
2297 }
2298
2299 static void
2300 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2301 {
2302 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2303 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2304 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2305 instr[28,24] = 01011
2306 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2307 instr[21] = 1
2308 instr[20,16] = Rm
2309 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2310 000 ==> LSL|UXTW, 001 ==> UXTZ,
2311 000 ==> SXTB, 001 ==> SXTH,
2312 000 ==> SXTW, 001 ==> SXTX,
2313 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2314 instr[9,5] = Rn
2315 instr[4,0] = Rd */
2316
2317 Extension extensionType = INSTR (15, 13);
2318 uint32_t shift = INSTR (12, 10);
2319
2320 NYI_assert (28, 24, 0x0B);
2321 NYI_assert (21, 21, 1);
2322
2323 /* Shift may not exceed 4. */
2324 if (shift > 4)
2325 HALT_UNALLOC;
2326
2327 /* Dispatch on size:op:set?. */
2328 switch (INSTR (31, 29))
2329 {
2330 case 0: add32_ext (cpu, extensionType, shift); break;
2331 case 1: adds32_ext (cpu, extensionType, shift); break;
2332 case 2: sub32_ext (cpu, extensionType, shift); break;
2333 case 3: subs32_ext (cpu, extensionType, shift); break;
2334 case 4: add64_ext (cpu, extensionType, shift); break;
2335 case 5: adds64_ext (cpu, extensionType, shift); break;
2336 case 6: sub64_ext (cpu, extensionType, shift); break;
2337 case 7: subs64_ext (cpu, extensionType, shift); break;
2338 }
2339 }
2340
2341 /* Conditional data processing
2342 Condition register is implicit 3rd source. */
2343
2344 /* 32 bit add with carry. */
2345 /* N.B register args may not be SP. */
2346
2347 static void
2348 adc32 (sim_cpu *cpu)
2349 {
2350 unsigned rm = INSTR (20, 16);
2351 unsigned rn = INSTR (9, 5);
2352 unsigned rd = INSTR (4, 0);
2353
2354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2355 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2356 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2357 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2358 + IS_SET (C));
2359 }
2360
2361 /* 64 bit add with carry */
2362 static void
2363 adc64 (sim_cpu *cpu)
2364 {
2365 unsigned rm = INSTR (20, 16);
2366 unsigned rn = INSTR (9, 5);
2367 unsigned rd = INSTR (4, 0);
2368
2369 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2370 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2371 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2372 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2373 + IS_SET (C));
2374 }
2375
2376 /* 32 bit add with carry setting flags. */
2377 static void
2378 adcs32 (sim_cpu *cpu)
2379 {
2380 unsigned rm = INSTR (20, 16);
2381 unsigned rn = INSTR (9, 5);
2382 unsigned rd = INSTR (4, 0);
2383
2384 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2385 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2386 uint32_t carry = IS_SET (C);
2387
2388 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2389 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2390 set_flags_for_add32 (cpu, value1, value2 + carry);
2391 }
2392
2393 /* 64 bit add with carry setting flags. */
2394 static void
2395 adcs64 (sim_cpu *cpu)
2396 {
2397 unsigned rm = INSTR (20, 16);
2398 unsigned rn = INSTR (9, 5);
2399 unsigned rd = INSTR (4, 0);
2400
2401 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2402 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2403 uint64_t carry = IS_SET (C);
2404
2405 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2406 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2407 set_flags_for_add64 (cpu, value1, value2 + carry);
2408 }
2409
2410 /* 32 bit sub with carry. */
2411 static void
2412 sbc32 (sim_cpu *cpu)
2413 {
2414 unsigned rm = INSTR (20, 16);
2415 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2416 unsigned rd = INSTR (4, 0);
2417
2418 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2419 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2420 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2421 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2422 - 1 + IS_SET (C));
2423 }
2424
2425 /* 64 bit sub with carry */
2426 static void
2427 sbc64 (sim_cpu *cpu)
2428 {
2429 unsigned rm = INSTR (20, 16);
2430 unsigned rn = INSTR (9, 5);
2431 unsigned rd = INSTR (4, 0);
2432
2433 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2434 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2435 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2436 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2437 - 1 + IS_SET (C));
2438 }
2439
2440 /* 32 bit sub with carry setting flags */
2441 static void
2442 sbcs32 (sim_cpu *cpu)
2443 {
2444 unsigned rm = INSTR (20, 16);
2445 unsigned rn = INSTR (9, 5);
2446 unsigned rd = INSTR (4, 0);
2447
2448 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2449 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2450 uint32_t carry = IS_SET (C);
2451 uint32_t result = value1 - value2 + 1 - carry;
2452
2453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2454 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2455 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2456 }
2457
2458 /* 64 bit sub with carry setting flags */
2459 static void
2460 sbcs64 (sim_cpu *cpu)
2461 {
2462 unsigned rm = INSTR (20, 16);
2463 unsigned rn = INSTR (9, 5);
2464 unsigned rd = INSTR (4, 0);
2465
2466 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2467 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2468 uint64_t carry = IS_SET (C);
2469 uint64_t result = value1 - value2 + 1 - carry;
2470
2471 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2472 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2473 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2474 }
2475
2476 static void
2477 dexAddSubtractWithCarry (sim_cpu *cpu)
2478 {
2479 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2480 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2481 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2482 instr[28,21] = 1 1010 000
2483 instr[20,16] = Rm
2484 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2485 instr[9,5] = Rn
2486 instr[4,0] = Rd */
2487
2488 uint32_t op2 = INSTR (15, 10);
2489
2490 NYI_assert (28, 21, 0xD0);
2491
2492 if (op2 != 0)
2493 HALT_UNALLOC;
2494
2495 /* Dispatch on size:op:set?. */
2496 switch (INSTR (31, 29))
2497 {
2498 case 0: adc32 (cpu); break;
2499 case 1: adcs32 (cpu); break;
2500 case 2: sbc32 (cpu); break;
2501 case 3: sbcs32 (cpu); break;
2502 case 4: adc64 (cpu); break;
2503 case 5: adcs64 (cpu); break;
2504 case 6: sbc64 (cpu); break;
2505 case 7: sbcs64 (cpu); break;
2506 }
2507 }
2508
2509 static uint32_t
2510 testConditionCode (sim_cpu *cpu, CondCode cc)
2511 {
2512 /* This should be reduceable to branchless logic
2513 by some careful testing of bits in CC followed
2514 by the requisite masking and combining of bits
2515 from the flag register.
2516
2517 For now we do it with a switch. */
2518 int res;
2519
2520 switch (cc)
2521 {
2522 case EQ: res = IS_SET (Z); break;
2523 case NE: res = IS_CLEAR (Z); break;
2524 case CS: res = IS_SET (C); break;
2525 case CC: res = IS_CLEAR (C); break;
2526 case MI: res = IS_SET (N); break;
2527 case PL: res = IS_CLEAR (N); break;
2528 case VS: res = IS_SET (V); break;
2529 case VC: res = IS_CLEAR (V); break;
2530 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2531 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2532 case GE: res = IS_SET (N) == IS_SET (V); break;
2533 case LT: res = IS_SET (N) != IS_SET (V); break;
2534 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2535 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2536 case AL:
2537 case NV:
2538 default:
2539 res = 1;
2540 break;
2541 }
2542 return res;
2543 }
2544
2545 static void
2546 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2547 {
2548 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2549 instr[30] = compare with positive (1) or negative value (0)
2550 instr[29,21] = 1 1101 0010
2551 instr[20,16] = Rm or const
2552 instr[15,12] = cond
2553 instr[11] = compare reg (0) or const (1)
2554 instr[10] = 0
2555 instr[9,5] = Rn
2556 instr[4] = 0
2557 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2558 signed int negate;
2559 unsigned rm;
2560 unsigned rn;
2561
2562 NYI_assert (29, 21, 0x1d2);
2563 NYI_assert (10, 10, 0);
2564 NYI_assert (4, 4, 0);
2565
2566 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2567 if (! testConditionCode (cpu, INSTR (15, 12)))
2568 {
2569 aarch64_set_CPSR (cpu, INSTR (3, 0));
2570 return;
2571 }
2572
2573 negate = INSTR (30, 30) ? 1 : -1;
2574 rm = INSTR (20, 16);
2575 rn = INSTR ( 9, 5);
2576
2577 if (INSTR (31, 31))
2578 {
2579 if (INSTR (11, 11))
2580 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2581 negate * (uint64_t) rm);
2582 else
2583 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2584 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2585 }
2586 else
2587 {
2588 if (INSTR (11, 11))
2589 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2590 negate * rm);
2591 else
2592 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2593 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2594 }
2595 }
2596
2597 static void
2598 do_vec_MOV_whole_vector (sim_cpu *cpu)
2599 {
2600 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2601
2602 instr[31] = 0
2603 instr[30] = half(0)/full(1)
2604 instr[29,21] = 001110101
2605 instr[20,16] = Vs
2606 instr[15,10] = 000111
2607 instr[9,5] = Vs
2608 instr[4,0] = Vd */
2609
2610 unsigned vs = INSTR (9, 5);
2611 unsigned vd = INSTR (4, 0);
2612
2613 NYI_assert (29, 21, 0x075);
2614 NYI_assert (15, 10, 0x07);
2615
2616 if (INSTR (20, 16) != vs)
2617 HALT_NYI;
2618
2619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2620 if (INSTR (30, 30))
2621 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2622
2623 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2624 }
2625
2626 static void
2627 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2628 {
2629 /* instr[31] = 0
2630 instr[30] = word(0)/long(1)
2631 instr[29,21] = 00 1110 000
2632 instr[20,16] = element size and index
2633 instr[15,10] = 00 0010 11
2634 instr[9,5] = V source
2635 instr[4,0] = R dest */
2636
2637 unsigned vs = INSTR (9, 5);
2638 unsigned rd = INSTR (4, 0);
2639 unsigned imm5 = INSTR (20, 16);
2640 unsigned full = INSTR (30, 30);
2641 int size, index;
2642
2643 NYI_assert (29, 21, 0x070);
2644 NYI_assert (15, 10, 0x0B);
2645
2646 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2647
2648 if (imm5 & 0x1)
2649 {
2650 size = 0;
2651 index = (imm5 >> 1) & 0xF;
2652 }
2653 else if (imm5 & 0x2)
2654 {
2655 size = 1;
2656 index = (imm5 >> 2) & 0x7;
2657 }
2658 else if (full && (imm5 & 0x4))
2659 {
2660 size = 2;
2661 index = (imm5 >> 3) & 0x3;
2662 }
2663 else
2664 HALT_UNALLOC;
2665
2666 switch (size)
2667 {
2668 case 0:
2669 if (full)
2670 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2671 aarch64_get_vec_s8 (cpu, vs, index));
2672 else
2673 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2674 aarch64_get_vec_s8 (cpu, vs, index));
2675 break;
2676
2677 case 1:
2678 if (full)
2679 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2680 aarch64_get_vec_s16 (cpu, vs, index));
2681 else
2682 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2683 aarch64_get_vec_s16 (cpu, vs, index));
2684 break;
2685
2686 case 2:
2687 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2688 aarch64_get_vec_s32 (cpu, vs, index));
2689 break;
2690
2691 default:
2692 HALT_UNALLOC;
2693 }
2694 }
2695
2696 static void
2697 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2698 {
2699 /* instr[31] = 0
2700 instr[30] = word(0)/long(1)
2701 instr[29,21] = 00 1110 000
2702 instr[20,16] = element size and index
2703 instr[15,10] = 00 0011 11
2704 instr[9,5] = V source
2705 instr[4,0] = R dest */
2706
2707 unsigned vs = INSTR (9, 5);
2708 unsigned rd = INSTR (4, 0);
2709 unsigned imm5 = INSTR (20, 16);
2710 unsigned full = INSTR (30, 30);
2711 int size, index;
2712
2713 NYI_assert (29, 21, 0x070);
2714 NYI_assert (15, 10, 0x0F);
2715
2716 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2717
2718 if (!full)
2719 {
2720 if (imm5 & 0x1)
2721 {
2722 size = 0;
2723 index = (imm5 >> 1) & 0xF;
2724 }
2725 else if (imm5 & 0x2)
2726 {
2727 size = 1;
2728 index = (imm5 >> 2) & 0x7;
2729 }
2730 else if (imm5 & 0x4)
2731 {
2732 size = 2;
2733 index = (imm5 >> 3) & 0x3;
2734 }
2735 else
2736 HALT_UNALLOC;
2737 }
2738 else if (imm5 & 0x8)
2739 {
2740 size = 3;
2741 index = (imm5 >> 4) & 0x1;
2742 }
2743 else
2744 HALT_UNALLOC;
2745
2746 switch (size)
2747 {
2748 case 0:
2749 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2750 aarch64_get_vec_u8 (cpu, vs, index));
2751 break;
2752
2753 case 1:
2754 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2755 aarch64_get_vec_u16 (cpu, vs, index));
2756 break;
2757
2758 case 2:
2759 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2760 aarch64_get_vec_u32 (cpu, vs, index));
2761 break;
2762
2763 case 3:
2764 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2765 aarch64_get_vec_u64 (cpu, vs, index));
2766 break;
2767
2768 default:
2769 HALT_UNALLOC;
2770 }
2771 }
2772
2773 static void
2774 do_vec_INS (sim_cpu *cpu)
2775 {
2776 /* instr[31,21] = 01001110000
2777 instr[20,16] = element size and index
2778 instr[15,10] = 000111
2779 instr[9,5] = W source
2780 instr[4,0] = V dest */
2781
2782 int index;
2783 unsigned rs = INSTR (9, 5);
2784 unsigned vd = INSTR (4, 0);
2785
2786 NYI_assert (31, 21, 0x270);
2787 NYI_assert (15, 10, 0x07);
2788
2789 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2790 if (INSTR (16, 16))
2791 {
2792 index = INSTR (20, 17);
2793 aarch64_set_vec_u8 (cpu, vd, index,
2794 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2795 }
2796 else if (INSTR (17, 17))
2797 {
2798 index = INSTR (20, 18);
2799 aarch64_set_vec_u16 (cpu, vd, index,
2800 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2801 }
2802 else if (INSTR (18, 18))
2803 {
2804 index = INSTR (20, 19);
2805 aarch64_set_vec_u32 (cpu, vd, index,
2806 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2807 }
2808 else if (INSTR (19, 19))
2809 {
2810 index = INSTR (20, 20);
2811 aarch64_set_vec_u64 (cpu, vd, index,
2812 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2813 }
2814 else
2815 HALT_NYI;
2816 }
2817
2818 static void
2819 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2820 {
2821 /* instr[31] = 0
2822 instr[30] = half(0)/full(1)
2823 instr[29,21] = 00 1110 000
2824 instr[20,16] = element size and index
2825 instr[15,10] = 0000 01
2826 instr[9,5] = V source
2827 instr[4,0] = V dest. */
2828
2829 unsigned full = INSTR (30, 30);
2830 unsigned vs = INSTR (9, 5);
2831 unsigned vd = INSTR (4, 0);
2832 int i, index;
2833
2834 NYI_assert (29, 21, 0x070);
2835 NYI_assert (15, 10, 0x01);
2836
2837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2838 if (INSTR (16, 16))
2839 {
2840 index = INSTR (20, 17);
2841
2842 for (i = 0; i < (full ? 16 : 8); i++)
2843 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2844 }
2845 else if (INSTR (17, 17))
2846 {
2847 index = INSTR (20, 18);
2848
2849 for (i = 0; i < (full ? 8 : 4); i++)
2850 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2851 }
2852 else if (INSTR (18, 18))
2853 {
2854 index = INSTR (20, 19);
2855
2856 for (i = 0; i < (full ? 4 : 2); i++)
2857 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2858 }
2859 else
2860 {
2861 if (INSTR (19, 19) == 0)
2862 HALT_UNALLOC;
2863
2864 if (! full)
2865 HALT_UNALLOC;
2866
2867 index = INSTR (20, 20);
2868
2869 for (i = 0; i < 2; i++)
2870 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2871 }
2872 }
2873
2874 static void
2875 do_vec_TBL (sim_cpu *cpu)
2876 {
2877 /* instr[31] = 0
2878 instr[30] = half(0)/full(1)
2879 instr[29,21] = 00 1110 000
2880 instr[20,16] = Vm
2881 instr[15] = 0
2882 instr[14,13] = vec length
2883 instr[12,10] = 000
2884 instr[9,5] = V start
2885 instr[4,0] = V dest */
2886
2887 int full = INSTR (30, 30);
2888 int len = INSTR (14, 13) + 1;
2889 unsigned vm = INSTR (20, 16);
2890 unsigned vn = INSTR (9, 5);
2891 unsigned vd = INSTR (4, 0);
2892 unsigned i;
2893
2894 NYI_assert (29, 21, 0x070);
2895 NYI_assert (12, 10, 0);
2896
2897 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2898 for (i = 0; i < (full ? 16 : 8); i++)
2899 {
2900 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2901 uint8_t val;
2902
2903 if (selector < 16)
2904 val = aarch64_get_vec_u8 (cpu, vn, selector);
2905 else if (selector < 32)
2906 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2907 else if (selector < 48)
2908 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2909 else if (selector < 64)
2910 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2911 else
2912 val = 0;
2913
2914 aarch64_set_vec_u8 (cpu, vd, i, val);
2915 }
2916 }
2917
2918 static void
2919 do_vec_TRN (sim_cpu *cpu)
2920 {
2921 /* instr[31] = 0
2922 instr[30] = half(0)/full(1)
2923 instr[29,24] = 00 1110
2924 instr[23,22] = size
2925 instr[21] = 0
2926 instr[20,16] = Vm
2927 instr[15] = 0
2928 instr[14] = TRN1 (0) / TRN2 (1)
2929 instr[13,10] = 1010
2930 instr[9,5] = V source
2931 instr[4,0] = V dest. */
2932
2933 int full = INSTR (30, 30);
2934 int second = INSTR (14, 14);
2935 unsigned vm = INSTR (20, 16);
2936 unsigned vn = INSTR (9, 5);
2937 unsigned vd = INSTR (4, 0);
2938 unsigned i;
2939
2940 NYI_assert (29, 24, 0x0E);
2941 NYI_assert (13, 10, 0xA);
2942
2943 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2944 switch (INSTR (23, 22))
2945 {
2946 case 0:
2947 for (i = 0; i < (full ? 8 : 4); i++)
2948 {
2949 aarch64_set_vec_u8
2950 (cpu, vd, i * 2,
2951 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2952 aarch64_set_vec_u8
2953 (cpu, vd, 1 * 2 + 1,
2954 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2955 }
2956 break;
2957
2958 case 1:
2959 for (i = 0; i < (full ? 4 : 2); i++)
2960 {
2961 aarch64_set_vec_u16
2962 (cpu, vd, i * 2,
2963 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2964 aarch64_set_vec_u16
2965 (cpu, vd, 1 * 2 + 1,
2966 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2967 }
2968 break;
2969
2970 case 2:
2971 aarch64_set_vec_u32
2972 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2973 aarch64_set_vec_u32
2974 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2975 aarch64_set_vec_u32
2976 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2977 aarch64_set_vec_u32
2978 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2979 break;
2980
2981 case 3:
2982 if (! full)
2983 HALT_UNALLOC;
2984
2985 aarch64_set_vec_u64 (cpu, vd, 0,
2986 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2987 aarch64_set_vec_u64 (cpu, vd, 1,
2988 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2989 break;
2990 }
2991 }
2992
2993 static void
2994 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2995 {
2996 /* instr[31] = 0
2997 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2998 [must be 1 for 64-bit xfer]
2999 instr[29,20] = 00 1110 0000
3000 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
3001 0100=> 32-bits. 1000=>64-bits
3002 instr[15,10] = 0000 11
3003 instr[9,5] = W source
3004 instr[4,0] = V dest. */
3005
3006 unsigned i;
3007 unsigned Vd = INSTR (4, 0);
3008 unsigned Rs = INSTR (9, 5);
3009 int both = INSTR (30, 30);
3010
3011 NYI_assert (29, 20, 0x0E0);
3012 NYI_assert (15, 10, 0x03);
3013
3014 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3015 switch (INSTR (19, 16))
3016 {
3017 case 1:
3018 for (i = 0; i < (both ? 16 : 8); i++)
3019 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3020 break;
3021
3022 case 2:
3023 for (i = 0; i < (both ? 8 : 4); i++)
3024 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3025 break;
3026
3027 case 4:
3028 for (i = 0; i < (both ? 4 : 2); i++)
3029 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3030 break;
3031
3032 case 8:
3033 if (!both)
3034 HALT_NYI;
3035 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3036 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3037 break;
3038
3039 default:
3040 HALT_NYI;
3041 }
3042 }
3043
3044 static void
3045 do_vec_UZP (sim_cpu *cpu)
3046 {
3047 /* instr[31] = 0
3048 instr[30] = half(0)/full(1)
3049 instr[29,24] = 00 1110
3050 instr[23,22] = size: byte(00), half(01), word (10), long (11)
3051 instr[21] = 0
3052 instr[20,16] = Vm
3053 instr[15] = 0
3054 instr[14] = lower (0) / upper (1)
3055 instr[13,10] = 0110
3056 instr[9,5] = Vn
3057 instr[4,0] = Vd. */
3058
3059 int full = INSTR (30, 30);
3060 int upper = INSTR (14, 14);
3061
3062 unsigned vm = INSTR (20, 16);
3063 unsigned vn = INSTR (9, 5);
3064 unsigned vd = INSTR (4, 0);
3065
3066 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3067 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3068 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3069 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3070
3071 uint64_t val1;
3072 uint64_t val2;
3073
3074 uint64_t input2 = full ? val_n2 : val_m1;
3075
3076 NYI_assert (29, 24, 0x0E);
3077 NYI_assert (21, 21, 0);
3078 NYI_assert (15, 15, 0);
3079 NYI_assert (13, 10, 6);
3080
3081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3082 switch (INSTR (23, 22))
3083 {
3084 case 0:
3085 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3086 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3087 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3088 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3089
3090 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3091 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3092 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3093 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3094
3095 if (full)
3096 {
3097 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3098 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3099 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3100 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3101
3102 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3103 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3104 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3105 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3106 }
3107 break;
3108
3109 case 1:
3110 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3111 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3112
3113 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3114 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3115
3116 if (full)
3117 {
3118 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3119 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3120
3121 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3122 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3123 }
3124 break;
3125
3126 case 2:
3127 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3128 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3129
3130 if (full)
3131 {
3132 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3133 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3134 }
3135 break;
3136
3137 case 3:
3138 if (! full)
3139 HALT_UNALLOC;
3140
3141 val1 = upper ? val_n2 : val_n1;
3142 val2 = upper ? val_m2 : val_m1;
3143 break;
3144 }
3145
3146 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3147 if (full)
3148 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3149 }
3150
3151 static void
3152 do_vec_ZIP (sim_cpu *cpu)
3153 {
3154 /* instr[31] = 0
3155 instr[30] = half(0)/full(1)
3156 instr[29,24] = 00 1110
3157 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3158 instr[21] = 0
3159 instr[20,16] = Vm
3160 instr[15] = 0
3161 instr[14] = lower (0) / upper (1)
3162 instr[13,10] = 1110
3163 instr[9,5] = Vn
3164 instr[4,0] = Vd. */
3165
3166 int full = INSTR (30, 30);
3167 int upper = INSTR (14, 14);
3168
3169 unsigned vm = INSTR (20, 16);
3170 unsigned vn = INSTR (9, 5);
3171 unsigned vd = INSTR (4, 0);
3172
3173 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3174 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3175 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3176 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3177
3178 uint64_t val1 = 0;
3179 uint64_t val2 = 0;
3180
3181 uint64_t input1 = upper ? val_n1 : val_m1;
3182 uint64_t input2 = upper ? val_n2 : val_m2;
3183
3184 NYI_assert (29, 24, 0x0E);
3185 NYI_assert (21, 21, 0);
3186 NYI_assert (15, 15, 0);
3187 NYI_assert (13, 10, 0xE);
3188
3189 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3190 switch (INSTR (23, 23))
3191 {
3192 case 0:
3193 val1 =
3194 ((input1 << 0) & (0xFF << 0))
3195 | ((input2 << 8) & (0xFF << 8))
3196 | ((input1 << 8) & (0xFF << 16))
3197 | ((input2 << 16) & (0xFF << 24))
3198 | ((input1 << 16) & (0xFFULL << 32))
3199 | ((input2 << 24) & (0xFFULL << 40))
3200 | ((input1 << 24) & (0xFFULL << 48))
3201 | ((input2 << 32) & (0xFFULL << 56));
3202
3203 val2 =
3204 ((input1 >> 32) & (0xFF << 0))
3205 | ((input2 >> 24) & (0xFF << 8))
3206 | ((input1 >> 24) & (0xFF << 16))
3207 | ((input2 >> 16) & (0xFF << 24))
3208 | ((input1 >> 16) & (0xFFULL << 32))
3209 | ((input2 >> 8) & (0xFFULL << 40))
3210 | ((input1 >> 8) & (0xFFULL << 48))
3211 | ((input2 >> 0) & (0xFFULL << 56));
3212 break;
3213
3214 case 1:
3215 val1 =
3216 ((input1 << 0) & (0xFFFF << 0))
3217 | ((input2 << 16) & (0xFFFF << 16))
3218 | ((input1 << 16) & (0xFFFFULL << 32))
3219 | ((input2 << 32) & (0xFFFFULL << 48));
3220
3221 val2 =
3222 ((input1 >> 32) & (0xFFFF << 0))
3223 | ((input2 >> 16) & (0xFFFF << 16))
3224 | ((input1 >> 16) & (0xFFFFULL << 32))
3225 | ((input2 >> 0) & (0xFFFFULL << 48));
3226 break;
3227
3228 case 2:
3229 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3230 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3231 break;
3232
3233 case 3:
3234 val1 = input1;
3235 val2 = input2;
3236 break;
3237 }
3238
3239 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3240 if (full)
3241 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3242 }
3243
3244 /* Floating point immediates are encoded in 8 bits.
3245 fpimm[7] = sign bit.
3246 fpimm[6:4] = signed exponent.
3247 fpimm[3:0] = fraction (assuming leading 1).
3248 i.e. F = s * 1.f * 2^(e - b). */
3249
3250 static float
3251 fp_immediate_for_encoding_32 (uint32_t imm8)
3252 {
3253 float u;
3254 uint32_t s, e, f, i;
3255
3256 s = (imm8 >> 7) & 0x1;
3257 e = (imm8 >> 4) & 0x7;
3258 f = imm8 & 0xf;
3259
3260 /* The fp value is s * n/16 * 2r where n is 16+e. */
3261 u = (16.0 + f) / 16.0;
3262
3263 /* N.B. exponent is signed. */
3264 if (e < 4)
3265 {
3266 int epos = e;
3267
3268 for (i = 0; i <= epos; i++)
3269 u *= 2.0;
3270 }
3271 else
3272 {
3273 int eneg = 7 - e;
3274
3275 for (i = 0; i < eneg; i++)
3276 u /= 2.0;
3277 }
3278
3279 if (s)
3280 u = - u;
3281
3282 return u;
3283 }
3284
3285 static double
3286 fp_immediate_for_encoding_64 (uint32_t imm8)
3287 {
3288 double u;
3289 uint32_t s, e, f, i;
3290
3291 s = (imm8 >> 7) & 0x1;
3292 e = (imm8 >> 4) & 0x7;
3293 f = imm8 & 0xf;
3294
3295 /* The fp value is s * n/16 * 2r where n is 16+e. */
3296 u = (16.0 + f) / 16.0;
3297
3298 /* N.B. exponent is signed. */
3299 if (e < 4)
3300 {
3301 int epos = e;
3302
3303 for (i = 0; i <= epos; i++)
3304 u *= 2.0;
3305 }
3306 else
3307 {
3308 int eneg = 7 - e;
3309
3310 for (i = 0; i < eneg; i++)
3311 u /= 2.0;
3312 }
3313
3314 if (s)
3315 u = - u;
3316
3317 return u;
3318 }
3319
3320 static void
3321 do_vec_MOV_immediate (sim_cpu *cpu)
3322 {
3323 /* instr[31] = 0
3324 instr[30] = full/half selector
3325 instr[29,19] = 00111100000
3326 instr[18,16] = high 3 bits of uimm8
3327 instr[15,12] = size & shift:
3328 0000 => 32-bit
3329 0010 => 32-bit + LSL#8
3330 0100 => 32-bit + LSL#16
3331 0110 => 32-bit + LSL#24
3332 1010 => 16-bit + LSL#8
3333 1000 => 16-bit
3334 1101 => 32-bit + MSL#16
3335 1100 => 32-bit + MSL#8
3336 1110 => 8-bit
3337 1111 => double
3338 instr[11,10] = 01
3339 instr[9,5] = low 5-bits of uimm8
3340 instr[4,0] = Vd. */
3341
3342 int full = INSTR (30, 30);
3343 unsigned vd = INSTR (4, 0);
3344 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3345 unsigned i;
3346
3347 NYI_assert (29, 19, 0x1E0);
3348 NYI_assert (11, 10, 1);
3349
3350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3351 switch (INSTR (15, 12))
3352 {
3353 case 0x0: /* 32-bit, no shift. */
3354 case 0x2: /* 32-bit, shift by 8. */
3355 case 0x4: /* 32-bit, shift by 16. */
3356 case 0x6: /* 32-bit, shift by 24. */
3357 val <<= (8 * INSTR (14, 13));
3358 for (i = 0; i < (full ? 4 : 2); i++)
3359 aarch64_set_vec_u32 (cpu, vd, i, val);
3360 break;
3361
3362 case 0xa: /* 16-bit, shift by 8. */
3363 val <<= 8;
3364 /* Fall through. */
3365 case 0x8: /* 16-bit, no shift. */
3366 for (i = 0; i < (full ? 8 : 4); i++)
3367 aarch64_set_vec_u16 (cpu, vd, i, val);
3368 break;
3369
3370 case 0xd: /* 32-bit, mask shift by 16. */
3371 val <<= 8;
3372 val |= 0xFF;
3373 /* Fall through. */
3374 case 0xc: /* 32-bit, mask shift by 8. */
3375 val <<= 8;
3376 val |= 0xFF;
3377 for (i = 0; i < (full ? 4 : 2); i++)
3378 aarch64_set_vec_u32 (cpu, vd, i, val);
3379 break;
3380
3381 case 0xe: /* 8-bit, no shift. */
3382 for (i = 0; i < (full ? 16 : 8); i++)
3383 aarch64_set_vec_u8 (cpu, vd, i, val);
3384 break;
3385
3386 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3387 {
3388 float u = fp_immediate_for_encoding_32 (val);
3389 for (i = 0; i < (full ? 4 : 2); i++)
3390 aarch64_set_vec_float (cpu, vd, i, u);
3391 break;
3392 }
3393
3394 default:
3395 HALT_NYI;
3396 }
3397 }
3398
3399 static void
3400 do_vec_MVNI (sim_cpu *cpu)
3401 {
3402 /* instr[31] = 0
3403 instr[30] = full/half selector
3404 instr[29,19] = 10111100000
3405 instr[18,16] = high 3 bits of uimm8
3406 instr[15,12] = selector
3407 instr[11,10] = 01
3408 instr[9,5] = low 5-bits of uimm8
3409 instr[4,0] = Vd. */
3410
3411 int full = INSTR (30, 30);
3412 unsigned vd = INSTR (4, 0);
3413 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3414 unsigned i;
3415
3416 NYI_assert (29, 19, 0x5E0);
3417 NYI_assert (11, 10, 1);
3418
3419 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3420 switch (INSTR (15, 12))
3421 {
3422 case 0x0: /* 32-bit, no shift. */
3423 case 0x2: /* 32-bit, shift by 8. */
3424 case 0x4: /* 32-bit, shift by 16. */
3425 case 0x6: /* 32-bit, shift by 24. */
3426 val <<= (8 * INSTR (14, 13));
3427 val = ~ val;
3428 for (i = 0; i < (full ? 4 : 2); i++)
3429 aarch64_set_vec_u32 (cpu, vd, i, val);
3430 return;
3431
3432 case 0xa: /* 16-bit, 8 bit shift. */
3433 val <<= 8;
3434 case 0x8: /* 16-bit, no shift. */
3435 val = ~ val;
3436 for (i = 0; i < (full ? 8 : 4); i++)
3437 aarch64_set_vec_u16 (cpu, vd, i, val);
3438 return;
3439
3440 case 0xd: /* 32-bit, mask shift by 16. */
3441 val <<= 8;
3442 val |= 0xFF;
3443 case 0xc: /* 32-bit, mask shift by 8. */
3444 val <<= 8;
3445 val |= 0xFF;
3446 val = ~ val;
3447 for (i = 0; i < (full ? 4 : 2); i++)
3448 aarch64_set_vec_u32 (cpu, vd, i, val);
3449 return;
3450
3451 case 0xE: /* MOVI Dn, #mask64 */
3452 {
3453 uint64_t mask = 0;
3454
3455 for (i = 0; i < 8; i++)
3456 if (val & (1 << i))
3457 mask |= (0xFFUL << (i * 8));
3458 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3459 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3460 return;
3461 }
3462
3463 case 0xf: /* FMOV Vd.2D, #fpimm. */
3464 {
3465 double u = fp_immediate_for_encoding_64 (val);
3466
3467 if (! full)
3468 HALT_UNALLOC;
3469
3470 aarch64_set_vec_double (cpu, vd, 0, u);
3471 aarch64_set_vec_double (cpu, vd, 1, u);
3472 return;
3473 }
3474
3475 default:
3476 HALT_NYI;
3477 }
3478 }
3479
3480 #define ABS(A) ((A) < 0 ? - (A) : (A))
3481
3482 static void
3483 do_vec_ABS (sim_cpu *cpu)
3484 {
3485 /* instr[31] = 0
3486 instr[30] = half(0)/full(1)
3487 instr[29,24] = 00 1110
3488 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3489 instr[21,10] = 10 0000 1011 10
3490 instr[9,5] = Vn
3491 instr[4.0] = Vd. */
3492
3493 unsigned vn = INSTR (9, 5);
3494 unsigned vd = INSTR (4, 0);
3495 unsigned full = INSTR (30, 30);
3496 unsigned i;
3497
3498 NYI_assert (29, 24, 0x0E);
3499 NYI_assert (21, 10, 0x82E);
3500
3501 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3502 switch (INSTR (23, 22))
3503 {
3504 case 0:
3505 for (i = 0; i < (full ? 16 : 8); i++)
3506 aarch64_set_vec_s8 (cpu, vd, i,
3507 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3508 break;
3509
3510 case 1:
3511 for (i = 0; i < (full ? 8 : 4); i++)
3512 aarch64_set_vec_s16 (cpu, vd, i,
3513 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3514 break;
3515
3516 case 2:
3517 for (i = 0; i < (full ? 4 : 2); i++)
3518 aarch64_set_vec_s32 (cpu, vd, i,
3519 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3520 break;
3521
3522 case 3:
3523 if (! full)
3524 HALT_NYI;
3525 for (i = 0; i < 2; i++)
3526 aarch64_set_vec_s64 (cpu, vd, i,
3527 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3528 break;
3529 }
3530 }
3531
3532 static void
3533 do_vec_ADDV (sim_cpu *cpu)
3534 {
3535 /* instr[31] = 0
3536 instr[30] = full/half selector
3537 instr[29,24] = 00 1110
3538 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3539 instr[21,10] = 11 0001 1011 10
3540 instr[9,5] = Vm
3541 instr[4.0] = Rd. */
3542
3543 unsigned vm = INSTR (9, 5);
3544 unsigned rd = INSTR (4, 0);
3545 unsigned i;
3546 int full = INSTR (30, 30);
3547
3548 NYI_assert (29, 24, 0x0E);
3549 NYI_assert (21, 10, 0xC6E);
3550
3551 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3552 switch (INSTR (23, 22))
3553 {
3554 case 0:
3555 {
3556 uint8_t val = 0;
3557 for (i = 0; i < (full ? 16 : 8); i++)
3558 val += aarch64_get_vec_u8 (cpu, vm, i);
3559 aarch64_set_vec_u64 (cpu, rd, 0, val);
3560 return;
3561 }
3562
3563 case 1:
3564 {
3565 uint16_t val = 0;
3566 for (i = 0; i < (full ? 8 : 4); i++)
3567 val += aarch64_get_vec_u16 (cpu, vm, i);
3568 aarch64_set_vec_u64 (cpu, rd, 0, val);
3569 return;
3570 }
3571
3572 case 2:
3573 {
3574 uint32_t val = 0;
3575 if (! full)
3576 HALT_UNALLOC;
3577 for (i = 0; i < 4; i++)
3578 val += aarch64_get_vec_u32 (cpu, vm, i);
3579 aarch64_set_vec_u64 (cpu, rd, 0, val);
3580 return;
3581 }
3582
3583 case 3:
3584 HALT_UNALLOC;
3585 }
3586 }
3587
3588 static void
3589 do_vec_ins_2 (sim_cpu *cpu)
3590 {
3591 /* instr[31,21] = 01001110000
3592 instr[20,18] = size & element selector
3593 instr[17,14] = 0000
3594 instr[13] = direction: to vec(0), from vec (1)
3595 instr[12,10] = 111
3596 instr[9,5] = Vm
3597 instr[4,0] = Vd. */
3598
3599 unsigned elem;
3600 unsigned vm = INSTR (9, 5);
3601 unsigned vd = INSTR (4, 0);
3602
3603 NYI_assert (31, 21, 0x270);
3604 NYI_assert (17, 14, 0);
3605 NYI_assert (12, 10, 7);
3606
3607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3608 if (INSTR (13, 13) == 1)
3609 {
3610 if (INSTR (18, 18) == 1)
3611 {
3612 /* 32-bit moves. */
3613 elem = INSTR (20, 19);
3614 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3615 aarch64_get_vec_u32 (cpu, vm, elem));
3616 }
3617 else
3618 {
3619 /* 64-bit moves. */
3620 if (INSTR (19, 19) != 1)
3621 HALT_NYI;
3622
3623 elem = INSTR (20, 20);
3624 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3625 aarch64_get_vec_u64 (cpu, vm, elem));
3626 }
3627 }
3628 else
3629 {
3630 if (INSTR (18, 18) == 1)
3631 {
3632 /* 32-bit moves. */
3633 elem = INSTR (20, 19);
3634 aarch64_set_vec_u32 (cpu, vd, elem,
3635 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3636 }
3637 else
3638 {
3639 /* 64-bit moves. */
3640 if (INSTR (19, 19) != 1)
3641 HALT_NYI;
3642
3643 elem = INSTR (20, 20);
3644 aarch64_set_vec_u64 (cpu, vd, elem,
3645 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3646 }
3647 }
3648 }
3649
3650 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3651 do \
3652 { \
3653 DST_TYPE a[N], b[N]; \
3654 \
3655 for (i = 0; i < (N); i++) \
3656 { \
3657 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3658 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3659 } \
3660 for (i = 0; i < (N); i++) \
3661 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3662 } \
3663 while (0)
3664
3665 static void
3666 do_vec_mull (sim_cpu *cpu)
3667 {
3668 /* instr[31] = 0
3669 instr[30] = lower(0)/upper(1) selector
3670 instr[29] = signed(0)/unsigned(1)
3671 instr[28,24] = 0 1110
3672 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3673 instr[21] = 1
3674 instr[20,16] = Vm
3675 instr[15,10] = 11 0000
3676 instr[9,5] = Vn
3677 instr[4.0] = Vd. */
3678
3679 int unsign = INSTR (29, 29);
3680 int bias = INSTR (30, 30);
3681 unsigned vm = INSTR (20, 16);
3682 unsigned vn = INSTR ( 9, 5);
3683 unsigned vd = INSTR ( 4, 0);
3684 unsigned i;
3685
3686 NYI_assert (28, 24, 0x0E);
3687 NYI_assert (15, 10, 0x30);
3688
3689 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3690 /* NB: Read source values before writing results, in case
3691 the source and destination vectors are the same. */
3692 switch (INSTR (23, 22))
3693 {
3694 case 0:
3695 if (bias)
3696 bias = 8;
3697 if (unsign)
3698 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3699 else
3700 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3701 return;
3702
3703 case 1:
3704 if (bias)
3705 bias = 4;
3706 if (unsign)
3707 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3708 else
3709 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3710 return;
3711
3712 case 2:
3713 if (bias)
3714 bias = 2;
3715 if (unsign)
3716 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3717 else
3718 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3719 return;
3720
3721 case 3:
3722 HALT_NYI;
3723 }
3724 }
3725
3726 static void
3727 do_vec_fadd (sim_cpu *cpu)
3728 {
3729 /* instr[31] = 0
3730 instr[30] = half(0)/full(1)
3731 instr[29,24] = 001110
3732 instr[23] = FADD(0)/FSUB(1)
3733 instr[22] = float (0)/double(1)
3734 instr[21] = 1
3735 instr[20,16] = Vm
3736 instr[15,10] = 110101
3737 instr[9,5] = Vn
3738 instr[4.0] = Vd. */
3739
3740 unsigned vm = INSTR (20, 16);
3741 unsigned vn = INSTR (9, 5);
3742 unsigned vd = INSTR (4, 0);
3743 unsigned i;
3744 int full = INSTR (30, 30);
3745
3746 NYI_assert (29, 24, 0x0E);
3747 NYI_assert (21, 21, 1);
3748 NYI_assert (15, 10, 0x35);
3749
3750 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3751 if (INSTR (23, 23))
3752 {
3753 if (INSTR (22, 22))
3754 {
3755 if (! full)
3756 HALT_NYI;
3757
3758 for (i = 0; i < 2; i++)
3759 aarch64_set_vec_double (cpu, vd, i,
3760 aarch64_get_vec_double (cpu, vn, i)
3761 - aarch64_get_vec_double (cpu, vm, i));
3762 }
3763 else
3764 {
3765 for (i = 0; i < (full ? 4 : 2); i++)
3766 aarch64_set_vec_float (cpu, vd, i,
3767 aarch64_get_vec_float (cpu, vn, i)
3768 - aarch64_get_vec_float (cpu, vm, i));
3769 }
3770 }
3771 else
3772 {
3773 if (INSTR (22, 22))
3774 {
3775 if (! full)
3776 HALT_NYI;
3777
3778 for (i = 0; i < 2; i++)
3779 aarch64_set_vec_double (cpu, vd, i,
3780 aarch64_get_vec_double (cpu, vm, i)
3781 + aarch64_get_vec_double (cpu, vn, i));
3782 }
3783 else
3784 {
3785 for (i = 0; i < (full ? 4 : 2); i++)
3786 aarch64_set_vec_float (cpu, vd, i,
3787 aarch64_get_vec_float (cpu, vm, i)
3788 + aarch64_get_vec_float (cpu, vn, i));
3789 }
3790 }
3791 }
3792
3793 static void
3794 do_vec_add (sim_cpu *cpu)
3795 {
3796 /* instr[31] = 0
3797 instr[30] = full/half selector
3798 instr[29,24] = 001110
3799 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3800 instr[21] = 1
3801 instr[20,16] = Vn
3802 instr[15,10] = 100001
3803 instr[9,5] = Vm
3804 instr[4.0] = Vd. */
3805
3806 unsigned vm = INSTR (20, 16);
3807 unsigned vn = INSTR (9, 5);
3808 unsigned vd = INSTR (4, 0);
3809 unsigned i;
3810 int full = INSTR (30, 30);
3811
3812 NYI_assert (29, 24, 0x0E);
3813 NYI_assert (21, 21, 1);
3814 NYI_assert (15, 10, 0x21);
3815
3816 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3817 switch (INSTR (23, 22))
3818 {
3819 case 0:
3820 for (i = 0; i < (full ? 16 : 8); i++)
3821 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3822 + aarch64_get_vec_u8 (cpu, vm, i));
3823 return;
3824
3825 case 1:
3826 for (i = 0; i < (full ? 8 : 4); i++)
3827 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3828 + aarch64_get_vec_u16 (cpu, vm, i));
3829 return;
3830
3831 case 2:
3832 for (i = 0; i < (full ? 4 : 2); i++)
3833 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3834 + aarch64_get_vec_u32 (cpu, vm, i));
3835 return;
3836
3837 case 3:
3838 if (! full)
3839 HALT_UNALLOC;
3840 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3841 + aarch64_get_vec_u64 (cpu, vm, 0));
3842 aarch64_set_vec_u64 (cpu, vd, 1,
3843 aarch64_get_vec_u64 (cpu, vn, 1)
3844 + aarch64_get_vec_u64 (cpu, vm, 1));
3845 return;
3846 }
3847 }
3848
3849 static void
3850 do_vec_mul (sim_cpu *cpu)
3851 {
3852 /* instr[31] = 0
3853 instr[30] = full/half selector
3854 instr[29,24] = 00 1110
3855 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3856 instr[21] = 1
3857 instr[20,16] = Vn
3858 instr[15,10] = 10 0111
3859 instr[9,5] = Vm
3860 instr[4.0] = Vd. */
3861
3862 unsigned vm = INSTR (20, 16);
3863 unsigned vn = INSTR (9, 5);
3864 unsigned vd = INSTR (4, 0);
3865 unsigned i;
3866 int full = INSTR (30, 30);
3867 int bias = 0;
3868
3869 NYI_assert (29, 24, 0x0E);
3870 NYI_assert (21, 21, 1);
3871 NYI_assert (15, 10, 0x27);
3872
3873 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3874 switch (INSTR (23, 22))
3875 {
3876 case 0:
3877 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3878 return;
3879
3880 case 1:
3881 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3882 return;
3883
3884 case 2:
3885 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3886 return;
3887
3888 case 3:
3889 HALT_UNALLOC;
3890 }
3891 }
3892
3893 static void
3894 do_vec_MLA (sim_cpu *cpu)
3895 {
3896 /* instr[31] = 0
3897 instr[30] = full/half selector
3898 instr[29,24] = 00 1110
3899 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3900 instr[21] = 1
3901 instr[20,16] = Vn
3902 instr[15,10] = 1001 01
3903 instr[9,5] = Vm
3904 instr[4.0] = Vd. */
3905
3906 unsigned vm = INSTR (20, 16);
3907 unsigned vn = INSTR (9, 5);
3908 unsigned vd = INSTR (4, 0);
3909 unsigned i;
3910 int full = INSTR (30, 30);
3911
3912 NYI_assert (29, 24, 0x0E);
3913 NYI_assert (21, 21, 1);
3914 NYI_assert (15, 10, 0x25);
3915
3916 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3917 switch (INSTR (23, 22))
3918 {
3919 case 0:
3920 for (i = 0; i < (full ? 16 : 8); i++)
3921 aarch64_set_vec_u8 (cpu, vd, i,
3922 aarch64_get_vec_u8 (cpu, vd, i)
3923 + (aarch64_get_vec_u8 (cpu, vn, i)
3924 * aarch64_get_vec_u8 (cpu, vm, i)));
3925 return;
3926
3927 case 1:
3928 for (i = 0; i < (full ? 8 : 4); i++)
3929 aarch64_set_vec_u16 (cpu, vd, i,
3930 aarch64_get_vec_u16 (cpu, vd, i)
3931 + (aarch64_get_vec_u16 (cpu, vn, i)
3932 * aarch64_get_vec_u16 (cpu, vm, i)));
3933 return;
3934
3935 case 2:
3936 for (i = 0; i < (full ? 4 : 2); i++)
3937 aarch64_set_vec_u32 (cpu, vd, i,
3938 aarch64_get_vec_u32 (cpu, vd, i)
3939 + (aarch64_get_vec_u32 (cpu, vn, i)
3940 * aarch64_get_vec_u32 (cpu, vm, i)));
3941 return;
3942
3943 default:
3944 HALT_UNALLOC;
3945 }
3946 }
3947
3948 static float
3949 fmaxnm (float a, float b)
3950 {
3951 if (! isnan (a))
3952 {
3953 if (! isnan (b))
3954 return a > b ? a : b;
3955 return a;
3956 }
3957 else if (! isnan (b))
3958 return b;
3959 return a;
3960 }
3961
3962 static float
3963 fminnm (float a, float b)
3964 {
3965 if (! isnan (a))
3966 {
3967 if (! isnan (b))
3968 return a < b ? a : b;
3969 return a;
3970 }
3971 else if (! isnan (b))
3972 return b;
3973 return a;
3974 }
3975
3976 static double
3977 dmaxnm (double a, double b)
3978 {
3979 if (! isnan (a))
3980 {
3981 if (! isnan (b))
3982 return a > b ? a : b;
3983 return a;
3984 }
3985 else if (! isnan (b))
3986 return b;
3987 return a;
3988 }
3989
3990 static double
3991 dminnm (double a, double b)
3992 {
3993 if (! isnan (a))
3994 {
3995 if (! isnan (b))
3996 return a < b ? a : b;
3997 return a;
3998 }
3999 else if (! isnan (b))
4000 return b;
4001 return a;
4002 }
4003
4004 static void
4005 do_vec_FminmaxNMP (sim_cpu *cpu)
4006 {
4007 /* instr [31] = 0
4008 instr [30] = half (0)/full (1)
4009 instr [29,24] = 10 1110
4010 instr [23] = max(0)/min(1)
4011 instr [22] = float (0)/double (1)
4012 instr [21] = 1
4013 instr [20,16] = Vn
4014 instr [15,10] = 1100 01
4015 instr [9,5] = Vm
4016 instr [4.0] = Vd. */
4017
4018 unsigned vm = INSTR (20, 16);
4019 unsigned vn = INSTR (9, 5);
4020 unsigned vd = INSTR (4, 0);
4021 int full = INSTR (30, 30);
4022
4023 NYI_assert (29, 24, 0x2E);
4024 NYI_assert (21, 21, 1);
4025 NYI_assert (15, 10, 0x31);
4026
4027 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4028 if (INSTR (22, 22))
4029 {
4030 double (* fn)(double, double) = INSTR (23, 23)
4031 ? dminnm : dmaxnm;
4032
4033 if (! full)
4034 HALT_NYI;
4035 aarch64_set_vec_double (cpu, vd, 0,
4036 fn (aarch64_get_vec_double (cpu, vn, 0),
4037 aarch64_get_vec_double (cpu, vn, 1)));
4038 aarch64_set_vec_double (cpu, vd, 0,
4039 fn (aarch64_get_vec_double (cpu, vm, 0),
4040 aarch64_get_vec_double (cpu, vm, 1)));
4041 }
4042 else
4043 {
4044 float (* fn)(float, float) = INSTR (23, 23)
4045 ? fminnm : fmaxnm;
4046
4047 aarch64_set_vec_float (cpu, vd, 0,
4048 fn (aarch64_get_vec_float (cpu, vn, 0),
4049 aarch64_get_vec_float (cpu, vn, 1)));
4050 if (full)
4051 aarch64_set_vec_float (cpu, vd, 1,
4052 fn (aarch64_get_vec_float (cpu, vn, 2),
4053 aarch64_get_vec_float (cpu, vn, 3)));
4054
4055 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4056 fn (aarch64_get_vec_float (cpu, vm, 0),
4057 aarch64_get_vec_float (cpu, vm, 1)));
4058 if (full)
4059 aarch64_set_vec_float (cpu, vd, 3,
4060 fn (aarch64_get_vec_float (cpu, vm, 2),
4061 aarch64_get_vec_float (cpu, vm, 3)));
4062 }
4063 }
4064
4065 static void
4066 do_vec_AND (sim_cpu *cpu)
4067 {
4068 /* instr[31] = 0
4069 instr[30] = half (0)/full (1)
4070 instr[29,21] = 001110001
4071 instr[20,16] = Vm
4072 instr[15,10] = 000111
4073 instr[9,5] = Vn
4074 instr[4.0] = Vd. */
4075
4076 unsigned vm = INSTR (20, 16);
4077 unsigned vn = INSTR (9, 5);
4078 unsigned vd = INSTR (4, 0);
4079 unsigned i;
4080 int full = INSTR (30, 30);
4081
4082 NYI_assert (29, 21, 0x071);
4083 NYI_assert (15, 10, 0x07);
4084
4085 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4086 for (i = 0; i < (full ? 4 : 2); i++)
4087 aarch64_set_vec_u32 (cpu, vd, i,
4088 aarch64_get_vec_u32 (cpu, vn, i)
4089 & aarch64_get_vec_u32 (cpu, vm, i));
4090 }
4091
4092 static void
4093 do_vec_BSL (sim_cpu *cpu)
4094 {
4095 /* instr[31] = 0
4096 instr[30] = half (0)/full (1)
4097 instr[29,21] = 101110011
4098 instr[20,16] = Vm
4099 instr[15,10] = 000111
4100 instr[9,5] = Vn
4101 instr[4.0] = Vd. */
4102
4103 unsigned vm = INSTR (20, 16);
4104 unsigned vn = INSTR (9, 5);
4105 unsigned vd = INSTR (4, 0);
4106 unsigned i;
4107 int full = INSTR (30, 30);
4108
4109 NYI_assert (29, 21, 0x173);
4110 NYI_assert (15, 10, 0x07);
4111
4112 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4113 for (i = 0; i < (full ? 16 : 8); i++)
4114 aarch64_set_vec_u8 (cpu, vd, i,
4115 ( aarch64_get_vec_u8 (cpu, vd, i)
4116 & aarch64_get_vec_u8 (cpu, vn, i))
4117 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4118 & aarch64_get_vec_u8 (cpu, vm, i)));
4119 }
4120
4121 static void
4122 do_vec_EOR (sim_cpu *cpu)
4123 {
4124 /* instr[31] = 0
4125 instr[30] = half (0)/full (1)
4126 instr[29,21] = 10 1110 001
4127 instr[20,16] = Vm
4128 instr[15,10] = 000111
4129 instr[9,5] = Vn
4130 instr[4.0] = Vd. */
4131
4132 unsigned vm = INSTR (20, 16);
4133 unsigned vn = INSTR (9, 5);
4134 unsigned vd = INSTR (4, 0);
4135 unsigned i;
4136 int full = INSTR (30, 30);
4137
4138 NYI_assert (29, 21, 0x171);
4139 NYI_assert (15, 10, 0x07);
4140
4141 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4142 for (i = 0; i < (full ? 4 : 2); i++)
4143 aarch64_set_vec_u32 (cpu, vd, i,
4144 aarch64_get_vec_u32 (cpu, vn, i)
4145 ^ aarch64_get_vec_u32 (cpu, vm, i));
4146 }
4147
4148 static void
4149 do_vec_bit (sim_cpu *cpu)
4150 {
4151 /* instr[31] = 0
4152 instr[30] = half (0)/full (1)
4153 instr[29,23] = 10 1110 1
4154 instr[22] = BIT (0) / BIF (1)
4155 instr[21] = 1
4156 instr[20,16] = Vm
4157 instr[15,10] = 0001 11
4158 instr[9,5] = Vn
4159 instr[4.0] = Vd. */
4160
4161 unsigned vm = INSTR (20, 16);
4162 unsigned vn = INSTR (9, 5);
4163 unsigned vd = INSTR (4, 0);
4164 unsigned full = INSTR (30, 30);
4165 unsigned test_false = INSTR (22, 22);
4166 unsigned i;
4167
4168 NYI_assert (29, 23, 0x5D);
4169 NYI_assert (21, 21, 1);
4170 NYI_assert (15, 10, 0x07);
4171
4172 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4173 for (i = 0; i < (full ? 4 : 2); i++)
4174 {
4175 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4176 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4177 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4178 if (test_false)
4179 aarch64_set_vec_u32 (cpu, vd, i,
4180 (vd_val & vm_val) | (vn_val & ~vm_val));
4181 else
4182 aarch64_set_vec_u32 (cpu, vd, i,
4183 (vd_val & ~vm_val) | (vn_val & vm_val));
4184 }
4185 }
4186
4187 static void
4188 do_vec_ORN (sim_cpu *cpu)
4189 {
4190 /* instr[31] = 0
4191 instr[30] = half (0)/full (1)
4192 instr[29,21] = 00 1110 111
4193 instr[20,16] = Vm
4194 instr[15,10] = 00 0111
4195 instr[9,5] = Vn
4196 instr[4.0] = Vd. */
4197
4198 unsigned vm = INSTR (20, 16);
4199 unsigned vn = INSTR (9, 5);
4200 unsigned vd = INSTR (4, 0);
4201 unsigned i;
4202 int full = INSTR (30, 30);
4203
4204 NYI_assert (29, 21, 0x077);
4205 NYI_assert (15, 10, 0x07);
4206
4207 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4208 for (i = 0; i < (full ? 16 : 8); i++)
4209 aarch64_set_vec_u8 (cpu, vd, i,
4210 aarch64_get_vec_u8 (cpu, vn, i)
4211 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4212 }
4213
4214 static void
4215 do_vec_ORR (sim_cpu *cpu)
4216 {
4217 /* instr[31] = 0
4218 instr[30] = half (0)/full (1)
4219 instr[29,21] = 00 1110 101
4220 instr[20,16] = Vm
4221 instr[15,10] = 0001 11
4222 instr[9,5] = Vn
4223 instr[4.0] = Vd. */
4224
4225 unsigned vm = INSTR (20, 16);
4226 unsigned vn = INSTR (9, 5);
4227 unsigned vd = INSTR (4, 0);
4228 unsigned i;
4229 int full = INSTR (30, 30);
4230
4231 NYI_assert (29, 21, 0x075);
4232 NYI_assert (15, 10, 0x07);
4233
4234 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4235 for (i = 0; i < (full ? 16 : 8); i++)
4236 aarch64_set_vec_u8 (cpu, vd, i,
4237 aarch64_get_vec_u8 (cpu, vn, i)
4238 | aarch64_get_vec_u8 (cpu, vm, i));
4239 }
4240
4241 static void
4242 do_vec_BIC (sim_cpu *cpu)
4243 {
4244 /* instr[31] = 0
4245 instr[30] = half (0)/full (1)
4246 instr[29,21] = 00 1110 011
4247 instr[20,16] = Vm
4248 instr[15,10] = 00 0111
4249 instr[9,5] = Vn
4250 instr[4.0] = Vd. */
4251
4252 unsigned vm = INSTR (20, 16);
4253 unsigned vn = INSTR (9, 5);
4254 unsigned vd = INSTR (4, 0);
4255 unsigned i;
4256 int full = INSTR (30, 30);
4257
4258 NYI_assert (29, 21, 0x073);
4259 NYI_assert (15, 10, 0x07);
4260
4261 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4262 for (i = 0; i < (full ? 16 : 8); i++)
4263 aarch64_set_vec_u8 (cpu, vd, i,
4264 aarch64_get_vec_u8 (cpu, vn, i)
4265 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4266 }
4267
4268 static void
4269 do_vec_XTN (sim_cpu *cpu)
4270 {
4271 /* instr[31] = 0
4272 instr[30] = first part (0)/ second part (1)
4273 instr[29,24] = 00 1110
4274 instr[23,22] = size: byte(00), half(01), word (10)
4275 instr[21,10] = 1000 0100 1010
4276 instr[9,5] = Vs
4277 instr[4,0] = Vd. */
4278
4279 unsigned vs = INSTR (9, 5);
4280 unsigned vd = INSTR (4, 0);
4281 unsigned bias = INSTR (30, 30);
4282 unsigned i;
4283
4284 NYI_assert (29, 24, 0x0E);
4285 NYI_assert (21, 10, 0x84A);
4286
4287 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4288 switch (INSTR (23, 22))
4289 {
4290 case 0:
4291 for (i = 0; i < 8; i++)
4292 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4293 aarch64_get_vec_u16 (cpu, vs, i));
4294 return;
4295
4296 case 1:
4297 for (i = 0; i < 4; i++)
4298 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4299 aarch64_get_vec_u32 (cpu, vs, i));
4300 return;
4301
4302 case 2:
4303 for (i = 0; i < 2; i++)
4304 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4305 aarch64_get_vec_u64 (cpu, vs, i));
4306 return;
4307 }
4308 }
4309
4310 /* Return the number of bits set in the input value. */
4311 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4312 # define popcount __builtin_popcount
4313 #else
4314 static int
4315 popcount (unsigned char x)
4316 {
4317 static const unsigned char popcnt[16] =
4318 {
4319 0, 1, 1, 2,
4320 1, 2, 2, 3,
4321 1, 2, 2, 3,
4322 2, 3, 3, 4
4323 };
4324
4325 /* Only counts the low 8 bits of the input as that is all we need. */
4326 return popcnt[x % 16] + popcnt[x / 16];
4327 }
4328 #endif
4329
4330 static void
4331 do_vec_CNT (sim_cpu *cpu)
4332 {
4333 /* instr[31] = 0
4334 instr[30] = half (0)/ full (1)
4335 instr[29,24] = 00 1110
4336 instr[23,22] = size: byte(00)
4337 instr[21,10] = 1000 0001 0110
4338 instr[9,5] = Vs
4339 instr[4,0] = Vd. */
4340
4341 unsigned vs = INSTR (9, 5);
4342 unsigned vd = INSTR (4, 0);
4343 int full = INSTR (30, 30);
4344 int size = INSTR (23, 22);
4345 int i;
4346
4347 NYI_assert (29, 24, 0x0E);
4348 NYI_assert (21, 10, 0x816);
4349
4350 if (size != 0)
4351 HALT_UNALLOC;
4352
4353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4354
4355 for (i = 0; i < (full ? 16 : 8); i++)
4356 aarch64_set_vec_u8 (cpu, vd, i,
4357 popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4358 }
4359
4360 static void
4361 do_vec_maxv (sim_cpu *cpu)
4362 {
4363 /* instr[31] = 0
4364 instr[30] = half(0)/full(1)
4365 instr[29] = signed (0)/unsigned(1)
4366 instr[28,24] = 0 1110
4367 instr[23,22] = size: byte(00), half(01), word (10)
4368 instr[21] = 1
4369 instr[20,17] = 1 000
4370 instr[16] = max(0)/min(1)
4371 instr[15,10] = 1010 10
4372 instr[9,5] = V source
4373 instr[4.0] = R dest. */
4374
4375 unsigned vs = INSTR (9, 5);
4376 unsigned rd = INSTR (4, 0);
4377 unsigned full = INSTR (30, 30);
4378 unsigned i;
4379
4380 NYI_assert (28, 24, 0x0E);
4381 NYI_assert (21, 21, 1);
4382 NYI_assert (20, 17, 8);
4383 NYI_assert (15, 10, 0x2A);
4384
4385 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4386 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4387 {
4388 case 0: /* SMAXV. */
4389 {
4390 int64_t smax;
4391 switch (INSTR (23, 22))
4392 {
4393 case 0:
4394 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4395 for (i = 1; i < (full ? 16 : 8); i++)
4396 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4397 break;
4398 case 1:
4399 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4400 for (i = 1; i < (full ? 8 : 4); i++)
4401 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4402 break;
4403 case 2:
4404 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4405 for (i = 1; i < (full ? 4 : 2); i++)
4406 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4407 break;
4408 case 3:
4409 HALT_UNALLOC;
4410 }
4411 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4412 return;
4413 }
4414
4415 case 1: /* SMINV. */
4416 {
4417 int64_t smin;
4418 switch (INSTR (23, 22))
4419 {
4420 case 0:
4421 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4422 for (i = 1; i < (full ? 16 : 8); i++)
4423 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4424 break;
4425 case 1:
4426 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4427 for (i = 1; i < (full ? 8 : 4); i++)
4428 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4429 break;
4430 case 2:
4431 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4432 for (i = 1; i < (full ? 4 : 2); i++)
4433 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4434 break;
4435
4436 case 3:
4437 HALT_UNALLOC;
4438 }
4439 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4440 return;
4441 }
4442
4443 case 2: /* UMAXV. */
4444 {
4445 uint64_t umax;
4446 switch (INSTR (23, 22))
4447 {
4448 case 0:
4449 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4450 for (i = 1; i < (full ? 16 : 8); i++)
4451 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4452 break;
4453 case 1:
4454 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4455 for (i = 1; i < (full ? 8 : 4); i++)
4456 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4457 break;
4458 case 2:
4459 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4460 for (i = 1; i < (full ? 4 : 2); i++)
4461 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4462 break;
4463
4464 case 3:
4465 HALT_UNALLOC;
4466 }
4467 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4468 return;
4469 }
4470
4471 case 3: /* UMINV. */
4472 {
4473 uint64_t umin;
4474 switch (INSTR (23, 22))
4475 {
4476 case 0:
4477 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4478 for (i = 1; i < (full ? 16 : 8); i++)
4479 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4480 break;
4481 case 1:
4482 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4483 for (i = 1; i < (full ? 8 : 4); i++)
4484 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4485 break;
4486 case 2:
4487 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4488 for (i = 1; i < (full ? 4 : 2); i++)
4489 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4490 break;
4491
4492 case 3:
4493 HALT_UNALLOC;
4494 }
4495 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4496 return;
4497 }
4498 }
4499 }
4500
4501 static void
4502 do_vec_fminmaxV (sim_cpu *cpu)
4503 {
4504 /* instr[31,24] = 0110 1110
4505 instr[23] = max(0)/min(1)
4506 instr[22,14] = 011 0000 11
4507 instr[13,12] = nm(00)/normal(11)
4508 instr[11,10] = 10
4509 instr[9,5] = V source
4510 instr[4.0] = R dest. */
4511
4512 unsigned vs = INSTR (9, 5);
4513 unsigned rd = INSTR (4, 0);
4514 unsigned i;
4515 float res = aarch64_get_vec_float (cpu, vs, 0);
4516
4517 NYI_assert (31, 24, 0x6E);
4518 NYI_assert (22, 14, 0x0C3);
4519 NYI_assert (11, 10, 2);
4520
4521 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4522 if (INSTR (23, 23))
4523 {
4524 switch (INSTR (13, 12))
4525 {
4526 case 0: /* FMNINNMV. */
4527 for (i = 1; i < 4; i++)
4528 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4529 break;
4530
4531 case 3: /* FMINV. */
4532 for (i = 1; i < 4; i++)
4533 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4534 break;
4535
4536 default:
4537 HALT_NYI;
4538 }
4539 }
4540 else
4541 {
4542 switch (INSTR (13, 12))
4543 {
4544 case 0: /* FMNAXNMV. */
4545 for (i = 1; i < 4; i++)
4546 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4547 break;
4548
4549 case 3: /* FMAXV. */
4550 for (i = 1; i < 4; i++)
4551 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4552 break;
4553
4554 default:
4555 HALT_NYI;
4556 }
4557 }
4558
4559 aarch64_set_FP_float (cpu, rd, res);
4560 }
4561
4562 static void
4563 do_vec_Fminmax (sim_cpu *cpu)
4564 {
4565 /* instr[31] = 0
4566 instr[30] = half(0)/full(1)
4567 instr[29,24] = 00 1110
4568 instr[23] = max(0)/min(1)
4569 instr[22] = float(0)/double(1)
4570 instr[21] = 1
4571 instr[20,16] = Vm
4572 instr[15,14] = 11
4573 instr[13,12] = nm(00)/normal(11)
4574 instr[11,10] = 01
4575 instr[9,5] = Vn
4576 instr[4,0] = Vd. */
4577
4578 unsigned vm = INSTR (20, 16);
4579 unsigned vn = INSTR (9, 5);
4580 unsigned vd = INSTR (4, 0);
4581 unsigned full = INSTR (30, 30);
4582 unsigned min = INSTR (23, 23);
4583 unsigned i;
4584
4585 NYI_assert (29, 24, 0x0E);
4586 NYI_assert (21, 21, 1);
4587 NYI_assert (15, 14, 3);
4588 NYI_assert (11, 10, 1);
4589
4590 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4591 if (INSTR (22, 22))
4592 {
4593 double (* func)(double, double);
4594
4595 if (! full)
4596 HALT_NYI;
4597
4598 if (INSTR (13, 12) == 0)
4599 func = min ? dminnm : dmaxnm;
4600 else if (INSTR (13, 12) == 3)
4601 func = min ? fmin : fmax;
4602 else
4603 HALT_NYI;
4604
4605 for (i = 0; i < 2; i++)
4606 aarch64_set_vec_double (cpu, vd, i,
4607 func (aarch64_get_vec_double (cpu, vn, i),
4608 aarch64_get_vec_double (cpu, vm, i)));
4609 }
4610 else
4611 {
4612 float (* func)(float, float);
4613
4614 if (INSTR (13, 12) == 0)
4615 func = min ? fminnm : fmaxnm;
4616 else if (INSTR (13, 12) == 3)
4617 func = min ? fminf : fmaxf;
4618 else
4619 HALT_NYI;
4620
4621 for (i = 0; i < (full ? 4 : 2); i++)
4622 aarch64_set_vec_float (cpu, vd, i,
4623 func (aarch64_get_vec_float (cpu, vn, i),
4624 aarch64_get_vec_float (cpu, vm, i)));
4625 }
4626 }
4627
4628 static void
4629 do_vec_SCVTF (sim_cpu *cpu)
4630 {
4631 /* instr[31] = 0
4632 instr[30] = Q
4633 instr[29,23] = 00 1110 0
4634 instr[22] = float(0)/double(1)
4635 instr[21,10] = 10 0001 1101 10
4636 instr[9,5] = Vn
4637 instr[4,0] = Vd. */
4638
4639 unsigned vn = INSTR (9, 5);
4640 unsigned vd = INSTR (4, 0);
4641 unsigned full = INSTR (30, 30);
4642 unsigned size = INSTR (22, 22);
4643 unsigned i;
4644
4645 NYI_assert (29, 23, 0x1C);
4646 NYI_assert (21, 10, 0x876);
4647
4648 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4649 if (size)
4650 {
4651 if (! full)
4652 HALT_UNALLOC;
4653
4654 for (i = 0; i < 2; i++)
4655 {
4656 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4657 aarch64_set_vec_double (cpu, vd, i, val);
4658 }
4659 }
4660 else
4661 {
4662 for (i = 0; i < (full ? 4 : 2); i++)
4663 {
4664 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4665 aarch64_set_vec_float (cpu, vd, i, val);
4666 }
4667 }
4668 }
4669
4670 #define VEC_CMP(SOURCE, CMP) \
4671 do \
4672 { \
4673 switch (size) \
4674 { \
4675 case 0: \
4676 for (i = 0; i < (full ? 16 : 8); i++) \
4677 aarch64_set_vec_u8 (cpu, vd, i, \
4678 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4679 CMP \
4680 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4681 ? -1 : 0); \
4682 return; \
4683 case 1: \
4684 for (i = 0; i < (full ? 8 : 4); i++) \
4685 aarch64_set_vec_u16 (cpu, vd, i, \
4686 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4687 CMP \
4688 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4689 ? -1 : 0); \
4690 return; \
4691 case 2: \
4692 for (i = 0; i < (full ? 4 : 2); i++) \
4693 aarch64_set_vec_u32 (cpu, vd, i, \
4694 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4695 CMP \
4696 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4697 ? -1 : 0); \
4698 return; \
4699 case 3: \
4700 if (! full) \
4701 HALT_UNALLOC; \
4702 for (i = 0; i < 2; i++) \
4703 aarch64_set_vec_u64 (cpu, vd, i, \
4704 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4705 CMP \
4706 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4707 ? -1ULL : 0); \
4708 return; \
4709 } \
4710 } \
4711 while (0)
4712
4713 #define VEC_CMP0(SOURCE, CMP) \
4714 do \
4715 { \
4716 switch (size) \
4717 { \
4718 case 0: \
4719 for (i = 0; i < (full ? 16 : 8); i++) \
4720 aarch64_set_vec_u8 (cpu, vd, i, \
4721 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4722 CMP 0 ? -1 : 0); \
4723 return; \
4724 case 1: \
4725 for (i = 0; i < (full ? 8 : 4); i++) \
4726 aarch64_set_vec_u16 (cpu, vd, i, \
4727 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4728 CMP 0 ? -1 : 0); \
4729 return; \
4730 case 2: \
4731 for (i = 0; i < (full ? 4 : 2); i++) \
4732 aarch64_set_vec_u32 (cpu, vd, i, \
4733 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4734 CMP 0 ? -1 : 0); \
4735 return; \
4736 case 3: \
4737 if (! full) \
4738 HALT_UNALLOC; \
4739 for (i = 0; i < 2; i++) \
4740 aarch64_set_vec_u64 (cpu, vd, i, \
4741 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4742 CMP 0 ? -1ULL : 0); \
4743 return; \
4744 } \
4745 } \
4746 while (0)
4747
4748 #define VEC_FCMP0(CMP) \
4749 do \
4750 { \
4751 if (vm != 0) \
4752 HALT_NYI; \
4753 if (INSTR (22, 22)) \
4754 { \
4755 if (! full) \
4756 HALT_NYI; \
4757 for (i = 0; i < 2; i++) \
4758 aarch64_set_vec_u64 (cpu, vd, i, \
4759 aarch64_get_vec_double (cpu, vn, i) \
4760 CMP 0.0 ? -1 : 0); \
4761 } \
4762 else \
4763 { \
4764 for (i = 0; i < (full ? 4 : 2); i++) \
4765 aarch64_set_vec_u32 (cpu, vd, i, \
4766 aarch64_get_vec_float (cpu, vn, i) \
4767 CMP 0.0 ? -1 : 0); \
4768 } \
4769 return; \
4770 } \
4771 while (0)
4772
4773 #define VEC_FCMP(CMP) \
4774 do \
4775 { \
4776 if (INSTR (22, 22)) \
4777 { \
4778 if (! full) \
4779 HALT_NYI; \
4780 for (i = 0; i < 2; i++) \
4781 aarch64_set_vec_u64 (cpu, vd, i, \
4782 aarch64_get_vec_double (cpu, vn, i) \
4783 CMP \
4784 aarch64_get_vec_double (cpu, vm, i) \
4785 ? -1 : 0); \
4786 } \
4787 else \
4788 { \
4789 for (i = 0; i < (full ? 4 : 2); i++) \
4790 aarch64_set_vec_u32 (cpu, vd, i, \
4791 aarch64_get_vec_float (cpu, vn, i) \
4792 CMP \
4793 aarch64_get_vec_float (cpu, vm, i) \
4794 ? -1 : 0); \
4795 } \
4796 return; \
4797 } \
4798 while (0)
4799
4800 static void
4801 do_vec_compare (sim_cpu *cpu)
4802 {
4803 /* instr[31] = 0
4804 instr[30] = half(0)/full(1)
4805 instr[29] = part-of-comparison-type
4806 instr[28,24] = 0 1110
4807 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4808 type of float compares: single (-0) / double (-1)
4809 instr[21] = 1
4810 instr[20,16] = Vm or 00000 (compare vs 0)
4811 instr[15,10] = part-of-comparison-type
4812 instr[9,5] = Vn
4813 instr[4.0] = Vd. */
4814
4815 int full = INSTR (30, 30);
4816 int size = INSTR (23, 22);
4817 unsigned vm = INSTR (20, 16);
4818 unsigned vn = INSTR (9, 5);
4819 unsigned vd = INSTR (4, 0);
4820 unsigned i;
4821
4822 NYI_assert (28, 24, 0x0E);
4823 NYI_assert (21, 21, 1);
4824
4825 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4826 if ((INSTR (11, 11)
4827 && INSTR (14, 14))
4828 || ((INSTR (11, 11) == 0
4829 && INSTR (10, 10) == 0)))
4830 {
4831 /* A compare vs 0. */
4832 if (vm != 0)
4833 {
4834 if (INSTR (15, 10) == 0x2A)
4835 do_vec_maxv (cpu);
4836 else if (INSTR (15, 10) == 0x32
4837 || INSTR (15, 10) == 0x3E)
4838 do_vec_fminmaxV (cpu);
4839 else if (INSTR (29, 23) == 0x1C
4840 && INSTR (21, 10) == 0x876)
4841 do_vec_SCVTF (cpu);
4842 else
4843 HALT_NYI;
4844 return;
4845 }
4846 }
4847
4848 if (INSTR (14, 14))
4849 {
4850 /* A floating point compare. */
4851 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4852 | INSTR (13, 10);
4853
4854 NYI_assert (15, 15, 1);
4855
4856 switch (decode)
4857 {
4858 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4859 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4860 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4861 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4862 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4863 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4864 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4865 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4866
4867 default:
4868 HALT_NYI;
4869 }
4870 }
4871 else
4872 {
4873 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4874
4875 switch (decode)
4876 {
4877 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4878 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4879 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4880 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4881 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4882 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4883 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4884 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4885 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4886 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4887 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4888 default:
4889 if (vm == 0)
4890 HALT_NYI;
4891 do_vec_maxv (cpu);
4892 }
4893 }
4894 }
4895
4896 static void
4897 do_vec_SSHL (sim_cpu *cpu)
4898 {
4899 /* instr[31] = 0
4900 instr[30] = first part (0)/ second part (1)
4901 instr[29,24] = 00 1110
4902 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4903 instr[21] = 1
4904 instr[20,16] = Vm
4905 instr[15,10] = 0100 01
4906 instr[9,5] = Vn
4907 instr[4,0] = Vd. */
4908
4909 unsigned full = INSTR (30, 30);
4910 unsigned vm = INSTR (20, 16);
4911 unsigned vn = INSTR (9, 5);
4912 unsigned vd = INSTR (4, 0);
4913 unsigned i;
4914 signed int shift;
4915
4916 NYI_assert (29, 24, 0x0E);
4917 NYI_assert (21, 21, 1);
4918 NYI_assert (15, 10, 0x11);
4919
4920 /* FIXME: What is a signed shift left in this context ?. */
4921
4922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4923 switch (INSTR (23, 22))
4924 {
4925 case 0:
4926 for (i = 0; i < (full ? 16 : 8); i++)
4927 {
4928 shift = aarch64_get_vec_s8 (cpu, vm, i);
4929 if (shift >= 0)
4930 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4931 << shift);
4932 else
4933 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4934 >> - shift);
4935 }
4936 return;
4937
4938 case 1:
4939 for (i = 0; i < (full ? 8 : 4); i++)
4940 {
4941 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4942 if (shift >= 0)
4943 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4944 << shift);
4945 else
4946 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4947 >> - shift);
4948 }
4949 return;
4950
4951 case 2:
4952 for (i = 0; i < (full ? 4 : 2); i++)
4953 {
4954 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4955 if (shift >= 0)
4956 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4957 << shift);
4958 else
4959 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4960 >> - shift);
4961 }
4962 return;
4963
4964 case 3:
4965 if (! full)
4966 HALT_UNALLOC;
4967 for (i = 0; i < 2; i++)
4968 {
4969 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4970 if (shift >= 0)
4971 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4972 << shift);
4973 else
4974 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4975 >> - shift);
4976 }
4977 return;
4978 }
4979 }
4980
4981 static void
4982 do_vec_USHL (sim_cpu *cpu)
4983 {
4984 /* instr[31] = 0
4985 instr[30] = first part (0)/ second part (1)
4986 instr[29,24] = 10 1110
4987 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4988 instr[21] = 1
4989 instr[20,16] = Vm
4990 instr[15,10] = 0100 01
4991 instr[9,5] = Vn
4992 instr[4,0] = Vd */
4993
4994 unsigned full = INSTR (30, 30);
4995 unsigned vm = INSTR (20, 16);
4996 unsigned vn = INSTR (9, 5);
4997 unsigned vd = INSTR (4, 0);
4998 unsigned i;
4999 signed int shift;
5000
5001 NYI_assert (29, 24, 0x2E);
5002 NYI_assert (15, 10, 0x11);
5003
5004 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5005 switch (INSTR (23, 22))
5006 {
5007 case 0:
5008 for (i = 0; i < (full ? 16 : 8); i++)
5009 {
5010 shift = aarch64_get_vec_s8 (cpu, vm, i);
5011 if (shift >= 0)
5012 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5013 << shift);
5014 else
5015 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5016 >> - shift);
5017 }
5018 return;
5019
5020 case 1:
5021 for (i = 0; i < (full ? 8 : 4); i++)
5022 {
5023 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5024 if (shift >= 0)
5025 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5026 << shift);
5027 else
5028 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5029 >> - shift);
5030 }
5031 return;
5032
5033 case 2:
5034 for (i = 0; i < (full ? 4 : 2); i++)
5035 {
5036 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5037 if (shift >= 0)
5038 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5039 << shift);
5040 else
5041 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5042 >> - shift);
5043 }
5044 return;
5045
5046 case 3:
5047 if (! full)
5048 HALT_UNALLOC;
5049 for (i = 0; i < 2; i++)
5050 {
5051 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5052 if (shift >= 0)
5053 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5054 << shift);
5055 else
5056 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5057 >> - shift);
5058 }
5059 return;
5060 }
5061 }
5062
5063 static void
5064 do_vec_FMLA (sim_cpu *cpu)
5065 {
5066 /* instr[31] = 0
5067 instr[30] = full/half selector
5068 instr[29,23] = 0011100
5069 instr[22] = size: 0=>float, 1=>double
5070 instr[21] = 1
5071 instr[20,16] = Vn
5072 instr[15,10] = 1100 11
5073 instr[9,5] = Vm
5074 instr[4.0] = Vd. */
5075
5076 unsigned vm = INSTR (20, 16);
5077 unsigned vn = INSTR (9, 5);
5078 unsigned vd = INSTR (4, 0);
5079 unsigned i;
5080 int full = INSTR (30, 30);
5081
5082 NYI_assert (29, 23, 0x1C);
5083 NYI_assert (21, 21, 1);
5084 NYI_assert (15, 10, 0x33);
5085
5086 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5087 if (INSTR (22, 22))
5088 {
5089 if (! full)
5090 HALT_UNALLOC;
5091 for (i = 0; i < 2; i++)
5092 aarch64_set_vec_double (cpu, vd, i,
5093 aarch64_get_vec_double (cpu, vn, i) *
5094 aarch64_get_vec_double (cpu, vm, i) +
5095 aarch64_get_vec_double (cpu, vd, i));
5096 }
5097 else
5098 {
5099 for (i = 0; i < (full ? 4 : 2); i++)
5100 aarch64_set_vec_float (cpu, vd, i,
5101 aarch64_get_vec_float (cpu, vn, i) *
5102 aarch64_get_vec_float (cpu, vm, i) +
5103 aarch64_get_vec_float (cpu, vd, i));
5104 }
5105 }
5106
5107 static void
5108 do_vec_max (sim_cpu *cpu)
5109 {
5110 /* instr[31] = 0
5111 instr[30] = full/half selector
5112 instr[29] = SMAX (0) / UMAX (1)
5113 instr[28,24] = 0 1110
5114 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5115 instr[21] = 1
5116 instr[20,16] = Vn
5117 instr[15,10] = 0110 01
5118 instr[9,5] = Vm
5119 instr[4.0] = Vd. */
5120
5121 unsigned vm = INSTR (20, 16);
5122 unsigned vn = INSTR (9, 5);
5123 unsigned vd = INSTR (4, 0);
5124 unsigned i;
5125 int full = INSTR (30, 30);
5126
5127 NYI_assert (28, 24, 0x0E);
5128 NYI_assert (21, 21, 1);
5129 NYI_assert (15, 10, 0x19);
5130
5131 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5132 if (INSTR (29, 29))
5133 {
5134 switch (INSTR (23, 22))
5135 {
5136 case 0:
5137 for (i = 0; i < (full ? 16 : 8); i++)
5138 aarch64_set_vec_u8 (cpu, vd, i,
5139 aarch64_get_vec_u8 (cpu, vn, i)
5140 > aarch64_get_vec_u8 (cpu, vm, i)
5141 ? aarch64_get_vec_u8 (cpu, vn, i)
5142 : aarch64_get_vec_u8 (cpu, vm, i));
5143 return;
5144
5145 case 1:
5146 for (i = 0; i < (full ? 8 : 4); i++)
5147 aarch64_set_vec_u16 (cpu, vd, i,
5148 aarch64_get_vec_u16 (cpu, vn, i)
5149 > aarch64_get_vec_u16 (cpu, vm, i)
5150 ? aarch64_get_vec_u16 (cpu, vn, i)
5151 : aarch64_get_vec_u16 (cpu, vm, i));
5152 return;
5153
5154 case 2:
5155 for (i = 0; i < (full ? 4 : 2); i++)
5156 aarch64_set_vec_u32 (cpu, vd, i,
5157 aarch64_get_vec_u32 (cpu, vn, i)
5158 > aarch64_get_vec_u32 (cpu, vm, i)
5159 ? aarch64_get_vec_u32 (cpu, vn, i)
5160 : aarch64_get_vec_u32 (cpu, vm, i));
5161 return;
5162
5163 case 3:
5164 HALT_UNALLOC;
5165 }
5166 }
5167 else
5168 {
5169 switch (INSTR (23, 22))
5170 {
5171 case 0:
5172 for (i = 0; i < (full ? 16 : 8); i++)
5173 aarch64_set_vec_s8 (cpu, vd, i,
5174 aarch64_get_vec_s8 (cpu, vn, i)
5175 > aarch64_get_vec_s8 (cpu, vm, i)
5176 ? aarch64_get_vec_s8 (cpu, vn, i)
5177 : aarch64_get_vec_s8 (cpu, vm, i));
5178 return;
5179
5180 case 1:
5181 for (i = 0; i < (full ? 8 : 4); i++)
5182 aarch64_set_vec_s16 (cpu, vd, i,
5183 aarch64_get_vec_s16 (cpu, vn, i)
5184 > aarch64_get_vec_s16 (cpu, vm, i)
5185 ? aarch64_get_vec_s16 (cpu, vn, i)
5186 : aarch64_get_vec_s16 (cpu, vm, i));
5187 return;
5188
5189 case 2:
5190 for (i = 0; i < (full ? 4 : 2); i++)
5191 aarch64_set_vec_s32 (cpu, vd, i,
5192 aarch64_get_vec_s32 (cpu, vn, i)
5193 > aarch64_get_vec_s32 (cpu, vm, i)
5194 ? aarch64_get_vec_s32 (cpu, vn, i)
5195 : aarch64_get_vec_s32 (cpu, vm, i));
5196 return;
5197
5198 case 3:
5199 HALT_UNALLOC;
5200 }
5201 }
5202 }
5203
5204 static void
5205 do_vec_min (sim_cpu *cpu)
5206 {
5207 /* instr[31] = 0
5208 instr[30] = full/half selector
5209 instr[29] = SMIN (0) / UMIN (1)
5210 instr[28,24] = 0 1110
5211 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5212 instr[21] = 1
5213 instr[20,16] = Vn
5214 instr[15,10] = 0110 11
5215 instr[9,5] = Vm
5216 instr[4.0] = Vd. */
5217
5218 unsigned vm = INSTR (20, 16);
5219 unsigned vn = INSTR (9, 5);
5220 unsigned vd = INSTR (4, 0);
5221 unsigned i;
5222 int full = INSTR (30, 30);
5223
5224 NYI_assert (28, 24, 0x0E);
5225 NYI_assert (21, 21, 1);
5226 NYI_assert (15, 10, 0x1B);
5227
5228 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5229 if (INSTR (29, 29))
5230 {
5231 switch (INSTR (23, 22))
5232 {
5233 case 0:
5234 for (i = 0; i < (full ? 16 : 8); i++)
5235 aarch64_set_vec_u8 (cpu, vd, i,
5236 aarch64_get_vec_u8 (cpu, vn, i)
5237 < aarch64_get_vec_u8 (cpu, vm, i)
5238 ? aarch64_get_vec_u8 (cpu, vn, i)
5239 : aarch64_get_vec_u8 (cpu, vm, i));
5240 return;
5241
5242 case 1:
5243 for (i = 0; i < (full ? 8 : 4); i++)
5244 aarch64_set_vec_u16 (cpu, vd, i,
5245 aarch64_get_vec_u16 (cpu, vn, i)
5246 < aarch64_get_vec_u16 (cpu, vm, i)
5247 ? aarch64_get_vec_u16 (cpu, vn, i)
5248 : aarch64_get_vec_u16 (cpu, vm, i));
5249 return;
5250
5251 case 2:
5252 for (i = 0; i < (full ? 4 : 2); i++)
5253 aarch64_set_vec_u32 (cpu, vd, i,
5254 aarch64_get_vec_u32 (cpu, vn, i)
5255 < aarch64_get_vec_u32 (cpu, vm, i)
5256 ? aarch64_get_vec_u32 (cpu, vn, i)
5257 : aarch64_get_vec_u32 (cpu, vm, i));
5258 return;
5259
5260 case 3:
5261 HALT_UNALLOC;
5262 }
5263 }
5264 else
5265 {
5266 switch (INSTR (23, 22))
5267 {
5268 case 0:
5269 for (i = 0; i < (full ? 16 : 8); i++)
5270 aarch64_set_vec_s8 (cpu, vd, i,
5271 aarch64_get_vec_s8 (cpu, vn, i)
5272 < aarch64_get_vec_s8 (cpu, vm, i)
5273 ? aarch64_get_vec_s8 (cpu, vn, i)
5274 : aarch64_get_vec_s8 (cpu, vm, i));
5275 return;
5276
5277 case 1:
5278 for (i = 0; i < (full ? 8 : 4); i++)
5279 aarch64_set_vec_s16 (cpu, vd, i,
5280 aarch64_get_vec_s16 (cpu, vn, i)
5281 < aarch64_get_vec_s16 (cpu, vm, i)
5282 ? aarch64_get_vec_s16 (cpu, vn, i)
5283 : aarch64_get_vec_s16 (cpu, vm, i));
5284 return;
5285
5286 case 2:
5287 for (i = 0; i < (full ? 4 : 2); i++)
5288 aarch64_set_vec_s32 (cpu, vd, i,
5289 aarch64_get_vec_s32 (cpu, vn, i)
5290 < aarch64_get_vec_s32 (cpu, vm, i)
5291 ? aarch64_get_vec_s32 (cpu, vn, i)
5292 : aarch64_get_vec_s32 (cpu, vm, i));
5293 return;
5294
5295 case 3:
5296 HALT_UNALLOC;
5297 }
5298 }
5299 }
5300
5301 static void
5302 do_vec_sub_long (sim_cpu *cpu)
5303 {
5304 /* instr[31] = 0
5305 instr[30] = lower (0) / upper (1)
5306 instr[29] = signed (0) / unsigned (1)
5307 instr[28,24] = 0 1110
5308 instr[23,22] = size: bytes (00), half (01), word (10)
5309 instr[21] = 1
5310 insrt[20,16] = Vm
5311 instr[15,10] = 0010 00
5312 instr[9,5] = Vn
5313 instr[4,0] = V dest. */
5314
5315 unsigned size = INSTR (23, 22);
5316 unsigned vm = INSTR (20, 16);
5317 unsigned vn = INSTR (9, 5);
5318 unsigned vd = INSTR (4, 0);
5319 unsigned bias = 0;
5320 unsigned i;
5321
5322 NYI_assert (28, 24, 0x0E);
5323 NYI_assert (21, 21, 1);
5324 NYI_assert (15, 10, 0x08);
5325
5326 if (size == 3)
5327 HALT_UNALLOC;
5328
5329 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5330 switch (INSTR (30, 29))
5331 {
5332 case 2: /* SSUBL2. */
5333 bias = 2;
5334 case 0: /* SSUBL. */
5335 switch (size)
5336 {
5337 case 0:
5338 bias *= 3;
5339 for (i = 0; i < 8; i++)
5340 aarch64_set_vec_s16 (cpu, vd, i,
5341 aarch64_get_vec_s8 (cpu, vn, i + bias)
5342 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5343 break;
5344
5345 case 1:
5346 bias *= 2;
5347 for (i = 0; i < 4; i++)
5348 aarch64_set_vec_s32 (cpu, vd, i,
5349 aarch64_get_vec_s16 (cpu, vn, i + bias)
5350 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5351 break;
5352
5353 case 2:
5354 for (i = 0; i < 2; i++)
5355 aarch64_set_vec_s64 (cpu, vd, i,
5356 aarch64_get_vec_s32 (cpu, vn, i + bias)
5357 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5358 break;
5359
5360 default:
5361 HALT_UNALLOC;
5362 }
5363 break;
5364
5365 case 3: /* USUBL2. */
5366 bias = 2;
5367 case 1: /* USUBL. */
5368 switch (size)
5369 {
5370 case 0:
5371 bias *= 3;
5372 for (i = 0; i < 8; i++)
5373 aarch64_set_vec_u16 (cpu, vd, i,
5374 aarch64_get_vec_u8 (cpu, vn, i + bias)
5375 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5376 break;
5377
5378 case 1:
5379 bias *= 2;
5380 for (i = 0; i < 4; i++)
5381 aarch64_set_vec_u32 (cpu, vd, i,
5382 aarch64_get_vec_u16 (cpu, vn, i + bias)
5383 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5384 break;
5385
5386 case 2:
5387 for (i = 0; i < 2; i++)
5388 aarch64_set_vec_u64 (cpu, vd, i,
5389 aarch64_get_vec_u32 (cpu, vn, i + bias)
5390 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5391 break;
5392
5393 default:
5394 HALT_UNALLOC;
5395 }
5396 break;
5397 }
5398 }
5399
5400 static void
5401 do_vec_ADDP (sim_cpu *cpu)
5402 {
5403 /* instr[31] = 0
5404 instr[30] = half(0)/full(1)
5405 instr[29,24] = 00 1110
5406 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5407 instr[21] = 1
5408 insrt[20,16] = Vm
5409 instr[15,10] = 1011 11
5410 instr[9,5] = Vn
5411 instr[4,0] = V dest. */
5412
5413 FRegister copy_vn;
5414 FRegister copy_vm;
5415 unsigned full = INSTR (30, 30);
5416 unsigned size = INSTR (23, 22);
5417 unsigned vm = INSTR (20, 16);
5418 unsigned vn = INSTR (9, 5);
5419 unsigned vd = INSTR (4, 0);
5420 unsigned i, range;
5421
5422 NYI_assert (29, 24, 0x0E);
5423 NYI_assert (21, 21, 1);
5424 NYI_assert (15, 10, 0x2F);
5425
5426 /* Make copies of the source registers in case vd == vn/vm. */
5427 copy_vn = cpu->fr[vn];
5428 copy_vm = cpu->fr[vm];
5429
5430 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5431 switch (size)
5432 {
5433 case 0:
5434 range = full ? 8 : 4;
5435 for (i = 0; i < range; i++)
5436 {
5437 aarch64_set_vec_u8 (cpu, vd, i,
5438 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5439 aarch64_set_vec_u8 (cpu, vd, i + range,
5440 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5441 }
5442 return;
5443
5444 case 1:
5445 range = full ? 4 : 2;
5446 for (i = 0; i < range; i++)
5447 {
5448 aarch64_set_vec_u16 (cpu, vd, i,
5449 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5450 aarch64_set_vec_u16 (cpu, vd, i + range,
5451 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5452 }
5453 return;
5454
5455 case 2:
5456 range = full ? 2 : 1;
5457 for (i = 0; i < range; i++)
5458 {
5459 aarch64_set_vec_u32 (cpu, vd, i,
5460 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5461 aarch64_set_vec_u32 (cpu, vd, i + range,
5462 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5463 }
5464 return;
5465
5466 case 3:
5467 if (! full)
5468 HALT_UNALLOC;
5469 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5470 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5471 return;
5472 }
5473 }
5474
5475 /* Float point vector convert to longer (precision). */
5476 static void
5477 do_vec_FCVTL (sim_cpu *cpu)
5478 {
5479 /* instr[31] = 0
5480 instr[30] = half (0) / all (1)
5481 instr[29,23] = 00 1110 0
5482 instr[22] = single (0) / double (1)
5483 instr[21,10] = 10 0001 0111 10
5484 instr[9,5] = Rn
5485 instr[4,0] = Rd. */
5486
5487 unsigned rn = INSTR (9, 5);
5488 unsigned rd = INSTR (4, 0);
5489 unsigned full = INSTR (30, 30);
5490 unsigned i;
5491
5492 NYI_assert (31, 31, 0);
5493 NYI_assert (29, 23, 0x1C);
5494 NYI_assert (21, 10, 0x85E);
5495
5496 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5497 if (INSTR (22, 22))
5498 {
5499 for (i = 0; i < 2; i++)
5500 aarch64_set_vec_double (cpu, rd, i,
5501 aarch64_get_vec_float (cpu, rn, i + 2*full));
5502 }
5503 else
5504 {
5505 HALT_NYI;
5506
5507 #if 0
5508 /* TODO: Implement missing half-float support. */
5509 for (i = 0; i < 4; i++)
5510 aarch64_set_vec_float (cpu, rd, i,
5511 aarch64_get_vec_halffloat (cpu, rn, i + 4*full));
5512 #endif
5513 }
5514 }
5515
5516 static void
5517 do_vec_FABS (sim_cpu *cpu)
5518 {
5519 /* instr[31] = 0
5520 instr[30] = half(0)/full(1)
5521 instr[29,23] = 00 1110 1
5522 instr[22] = float(0)/double(1)
5523 instr[21,16] = 10 0000
5524 instr[15,10] = 1111 10
5525 instr[9,5] = Vn
5526 instr[4,0] = Vd. */
5527
5528 unsigned vn = INSTR (9, 5);
5529 unsigned vd = INSTR (4, 0);
5530 unsigned full = INSTR (30, 30);
5531 unsigned i;
5532
5533 NYI_assert (29, 23, 0x1D);
5534 NYI_assert (21, 10, 0x83E);
5535
5536 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5537 if (INSTR (22, 22))
5538 {
5539 if (! full)
5540 HALT_NYI;
5541
5542 for (i = 0; i < 2; i++)
5543 aarch64_set_vec_double (cpu, vd, i,
5544 fabs (aarch64_get_vec_double (cpu, vn, i)));
5545 }
5546 else
5547 {
5548 for (i = 0; i < (full ? 4 : 2); i++)
5549 aarch64_set_vec_float (cpu, vd, i,
5550 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5551 }
5552 }
5553
5554 static void
5555 do_vec_FCVTZS (sim_cpu *cpu)
5556 {
5557 /* instr[31] = 0
5558 instr[30] = half (0) / all (1)
5559 instr[29,23] = 00 1110 1
5560 instr[22] = single (0) / double (1)
5561 instr[21,10] = 10 0001 1011 10
5562 instr[9,5] = Rn
5563 instr[4,0] = Rd. */
5564
5565 unsigned rn = INSTR (9, 5);
5566 unsigned rd = INSTR (4, 0);
5567 unsigned full = INSTR (30, 30);
5568 unsigned i;
5569
5570 NYI_assert (31, 31, 0);
5571 NYI_assert (29, 23, 0x1D);
5572 NYI_assert (21, 10, 0x86E);
5573
5574 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5575 if (INSTR (22, 22))
5576 {
5577 if (! full)
5578 HALT_UNALLOC;
5579
5580 for (i = 0; i < 2; i++)
5581 aarch64_set_vec_s64 (cpu, rd, i,
5582 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5583 }
5584 else
5585 for (i = 0; i < (full ? 4 : 2); i++)
5586 aarch64_set_vec_s32 (cpu, rd, i,
5587 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5588 }
5589
5590 static void
5591 do_vec_REV64 (sim_cpu *cpu)
5592 {
5593 /* instr[31] = 0
5594 instr[30] = full/half
5595 instr[29,24] = 00 1110
5596 instr[23,22] = size
5597 instr[21,10] = 10 0000 0000 10
5598 instr[9,5] = Rn
5599 instr[4,0] = Rd. */
5600
5601 unsigned rn = INSTR (9, 5);
5602 unsigned rd = INSTR (4, 0);
5603 unsigned size = INSTR (23, 22);
5604 unsigned full = INSTR (30, 30);
5605 unsigned i;
5606 FRegister val;
5607
5608 NYI_assert (29, 24, 0x0E);
5609 NYI_assert (21, 10, 0x802);
5610
5611 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5612 switch (size)
5613 {
5614 case 0:
5615 for (i = 0; i < (full ? 16 : 8); i++)
5616 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5617 break;
5618
5619 case 1:
5620 for (i = 0; i < (full ? 8 : 4); i++)
5621 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5622 break;
5623
5624 case 2:
5625 for (i = 0; i < (full ? 4 : 2); i++)
5626 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5627 break;
5628
5629 case 3:
5630 HALT_UNALLOC;
5631 }
5632
5633 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5634 if (full)
5635 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5636 }
5637
5638 static void
5639 do_vec_REV16 (sim_cpu *cpu)
5640 {
5641 /* instr[31] = 0
5642 instr[30] = full/half
5643 instr[29,24] = 00 1110
5644 instr[23,22] = size
5645 instr[21,10] = 10 0000 0001 10
5646 instr[9,5] = Rn
5647 instr[4,0] = Rd. */
5648
5649 unsigned rn = INSTR (9, 5);
5650 unsigned rd = INSTR (4, 0);
5651 unsigned size = INSTR (23, 22);
5652 unsigned full = INSTR (30, 30);
5653 unsigned i;
5654 FRegister val;
5655
5656 NYI_assert (29, 24, 0x0E);
5657 NYI_assert (21, 10, 0x806);
5658
5659 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5660 switch (size)
5661 {
5662 case 0:
5663 for (i = 0; i < (full ? 16 : 8); i++)
5664 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5665 break;
5666
5667 default:
5668 HALT_UNALLOC;
5669 }
5670
5671 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5672 if (full)
5673 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5674 }
5675
5676 static void
5677 do_vec_op1 (sim_cpu *cpu)
5678 {
5679 /* instr[31] = 0
5680 instr[30] = half/full
5681 instr[29,24] = 00 1110
5682 instr[23,21] = ???
5683 instr[20,16] = Vm
5684 instr[15,10] = sub-opcode
5685 instr[9,5] = Vn
5686 instr[4,0] = Vd */
5687 NYI_assert (29, 24, 0x0E);
5688
5689 if (INSTR (21, 21) == 0)
5690 {
5691 if (INSTR (23, 22) == 0)
5692 {
5693 if (INSTR (30, 30) == 1
5694 && INSTR (17, 14) == 0
5695 && INSTR (12, 10) == 7)
5696 return do_vec_ins_2 (cpu);
5697
5698 switch (INSTR (15, 10))
5699 {
5700 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5701 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5702 case 0x07: do_vec_INS (cpu); return;
5703 case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5704 case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5705
5706 case 0x00:
5707 case 0x08:
5708 case 0x10:
5709 case 0x18:
5710 do_vec_TBL (cpu); return;
5711
5712 case 0x06:
5713 case 0x16:
5714 do_vec_UZP (cpu); return;
5715
5716 case 0x0A: do_vec_TRN (cpu); return;
5717
5718 case 0x0E:
5719 case 0x1E:
5720 do_vec_ZIP (cpu); return;
5721
5722 default:
5723 HALT_NYI;
5724 }
5725 }
5726
5727 switch (INSTR (13, 10))
5728 {
5729 case 0x6: do_vec_UZP (cpu); return;
5730 case 0xE: do_vec_ZIP (cpu); return;
5731 case 0xA: do_vec_TRN (cpu); return;
5732 default: HALT_NYI;
5733 }
5734 }
5735
5736 switch (INSTR (15, 10))
5737 {
5738 case 0x02: do_vec_REV64 (cpu); return;
5739 case 0x06: do_vec_REV16 (cpu); return;
5740
5741 case 0x07:
5742 switch (INSTR (23, 21))
5743 {
5744 case 1: do_vec_AND (cpu); return;
5745 case 3: do_vec_BIC (cpu); return;
5746 case 5: do_vec_ORR (cpu); return;
5747 case 7: do_vec_ORN (cpu); return;
5748 default: HALT_NYI;
5749 }
5750
5751 case 0x08: do_vec_sub_long (cpu); return;
5752 case 0x0a: do_vec_XTN (cpu); return;
5753 case 0x11: do_vec_SSHL (cpu); return;
5754 case 0x16: do_vec_CNT (cpu); return;
5755 case 0x19: do_vec_max (cpu); return;
5756 case 0x1B: do_vec_min (cpu); return;
5757 case 0x21: do_vec_add (cpu); return;
5758 case 0x25: do_vec_MLA (cpu); return;
5759 case 0x27: do_vec_mul (cpu); return;
5760 case 0x2F: do_vec_ADDP (cpu); return;
5761 case 0x30: do_vec_mull (cpu); return;
5762 case 0x33: do_vec_FMLA (cpu); return;
5763 case 0x35: do_vec_fadd (cpu); return;
5764
5765 case 0x1E:
5766 switch (INSTR (20, 16))
5767 {
5768 case 0x01: do_vec_FCVTL (cpu); return;
5769 default: HALT_NYI;
5770 }
5771
5772 case 0x2E:
5773 switch (INSTR (20, 16))
5774 {
5775 case 0x00: do_vec_ABS (cpu); return;
5776 case 0x01: do_vec_FCVTZS (cpu); return;
5777 case 0x11: do_vec_ADDV (cpu); return;
5778 default: HALT_NYI;
5779 }
5780
5781 case 0x31:
5782 case 0x3B:
5783 do_vec_Fminmax (cpu); return;
5784
5785 case 0x0D:
5786 case 0x0F:
5787 case 0x22:
5788 case 0x23:
5789 case 0x26:
5790 case 0x2A:
5791 case 0x32:
5792 case 0x36:
5793 case 0x39:
5794 case 0x3A:
5795 do_vec_compare (cpu); return;
5796
5797 case 0x3E:
5798 do_vec_FABS (cpu); return;
5799
5800 default:
5801 HALT_NYI;
5802 }
5803 }
5804
5805 static void
5806 do_vec_xtl (sim_cpu *cpu)
5807 {
5808 /* instr[31] = 0
5809 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5810 instr[28,22] = 0 1111 00
5811 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5812 instr[15,10] = 1010 01
5813 instr[9,5] = V source
5814 instr[4,0] = V dest. */
5815
5816 unsigned vs = INSTR (9, 5);
5817 unsigned vd = INSTR (4, 0);
5818 unsigned i, shift, bias = 0;
5819
5820 NYI_assert (28, 22, 0x3C);
5821 NYI_assert (15, 10, 0x29);
5822
5823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5824 switch (INSTR (30, 29))
5825 {
5826 case 2: /* SXTL2, SSHLL2. */
5827 bias = 2;
5828 case 0: /* SXTL, SSHLL. */
5829 if (INSTR (21, 21))
5830 {
5831 int64_t val1, val2;
5832
5833 shift = INSTR (20, 16);
5834 /* Get the source values before setting the destination values
5835 in case the source and destination are the same. */
5836 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5837 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5838 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5839 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5840 }
5841 else if (INSTR (20, 20))
5842 {
5843 int32_t v[4];
5844 int32_t v1,v2,v3,v4;
5845
5846 shift = INSTR (19, 16);
5847 bias *= 2;
5848 for (i = 0; i < 4; i++)
5849 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5850 for (i = 0; i < 4; i++)
5851 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5852 }
5853 else
5854 {
5855 int16_t v[8];
5856 NYI_assert (19, 19, 1);
5857
5858 shift = INSTR (18, 16);
5859 bias *= 4;
5860 for (i = 0; i < 8; i++)
5861 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5862 for (i = 0; i < 8; i++)
5863 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5864 }
5865 return;
5866
5867 case 3: /* UXTL2, USHLL2. */
5868 bias = 2;
5869 case 1: /* UXTL, USHLL. */
5870 if (INSTR (21, 21))
5871 {
5872 uint64_t v1, v2;
5873 shift = INSTR (20, 16);
5874 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5875 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5876 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5877 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5878 }
5879 else if (INSTR (20, 20))
5880 {
5881 uint32_t v[4];
5882 shift = INSTR (19, 16);
5883 bias *= 2;
5884 for (i = 0; i < 4; i++)
5885 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5886 for (i = 0; i < 4; i++)
5887 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5888 }
5889 else
5890 {
5891 uint16_t v[8];
5892 NYI_assert (19, 19, 1);
5893
5894 shift = INSTR (18, 16);
5895 bias *= 4;
5896 for (i = 0; i < 8; i++)
5897 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5898 for (i = 0; i < 8; i++)
5899 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5900 }
5901 return;
5902 }
5903 }
5904
5905 static void
5906 do_vec_SHL (sim_cpu *cpu)
5907 {
5908 /* instr [31] = 0
5909 instr [30] = half(0)/full(1)
5910 instr [29,23] = 001 1110
5911 instr [22,16] = size and shift amount
5912 instr [15,10] = 01 0101
5913 instr [9, 5] = Vs
5914 instr [4, 0] = Vd. */
5915
5916 int shift;
5917 int full = INSTR (30, 30);
5918 unsigned vs = INSTR (9, 5);
5919 unsigned vd = INSTR (4, 0);
5920 unsigned i;
5921
5922 NYI_assert (29, 23, 0x1E);
5923 NYI_assert (15, 10, 0x15);
5924
5925 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5926 if (INSTR (22, 22))
5927 {
5928 shift = INSTR (21, 16);
5929
5930 if (full == 0)
5931 HALT_UNALLOC;
5932
5933 for (i = 0; i < 2; i++)
5934 {
5935 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5936 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5937 }
5938
5939 return;
5940 }
5941
5942 if (INSTR (21, 21))
5943 {
5944 shift = INSTR (20, 16);
5945
5946 for (i = 0; i < (full ? 4 : 2); i++)
5947 {
5948 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5949 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5950 }
5951
5952 return;
5953 }
5954
5955 if (INSTR (20, 20))
5956 {
5957 shift = INSTR (19, 16);
5958
5959 for (i = 0; i < (full ? 8 : 4); i++)
5960 {
5961 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5962 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5963 }
5964
5965 return;
5966 }
5967
5968 if (INSTR (19, 19) == 0)
5969 HALT_UNALLOC;
5970
5971 shift = INSTR (18, 16);
5972
5973 for (i = 0; i < (full ? 16 : 8); i++)
5974 {
5975 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5976 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5977 }
5978 }
5979
5980 static void
5981 do_vec_SSHR_USHR (sim_cpu *cpu)
5982 {
5983 /* instr [31] = 0
5984 instr [30] = half(0)/full(1)
5985 instr [29] = signed(0)/unsigned(1)
5986 instr [28,23] = 0 1111 0
5987 instr [22,16] = size and shift amount
5988 instr [15,10] = 0000 01
5989 instr [9, 5] = Vs
5990 instr [4, 0] = Vd. */
5991
5992 int full = INSTR (30, 30);
5993 int sign = ! INSTR (29, 29);
5994 unsigned shift = INSTR (22, 16);
5995 unsigned vs = INSTR (9, 5);
5996 unsigned vd = INSTR (4, 0);
5997 unsigned i;
5998
5999 NYI_assert (28, 23, 0x1E);
6000 NYI_assert (15, 10, 0x01);
6001
6002 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6003 if (INSTR (22, 22))
6004 {
6005 shift = 128 - shift;
6006
6007 if (full == 0)
6008 HALT_UNALLOC;
6009
6010 if (sign)
6011 for (i = 0; i < 2; i++)
6012 {
6013 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
6014 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
6015 }
6016 else
6017 for (i = 0; i < 2; i++)
6018 {
6019 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
6020 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
6021 }
6022
6023 return;
6024 }
6025
6026 if (INSTR (21, 21))
6027 {
6028 shift = 64 - shift;
6029
6030 if (sign)
6031 for (i = 0; i < (full ? 4 : 2); i++)
6032 {
6033 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
6034 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
6035 }
6036 else
6037 for (i = 0; i < (full ? 4 : 2); i++)
6038 {
6039 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
6040 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
6041 }
6042
6043 return;
6044 }
6045
6046 if (INSTR (20, 20))
6047 {
6048 shift = 32 - shift;
6049
6050 if (sign)
6051 for (i = 0; i < (full ? 8 : 4); i++)
6052 {
6053 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6054 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6055 }
6056 else
6057 for (i = 0; i < (full ? 8 : 4); i++)
6058 {
6059 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6060 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6061 }
6062
6063 return;
6064 }
6065
6066 if (INSTR (19, 19) == 0)
6067 HALT_UNALLOC;
6068
6069 shift = 16 - shift;
6070
6071 if (sign)
6072 for (i = 0; i < (full ? 16 : 8); i++)
6073 {
6074 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6075 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6076 }
6077 else
6078 for (i = 0; i < (full ? 16 : 8); i++)
6079 {
6080 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6081 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6082 }
6083 }
6084
6085 static void
6086 do_vec_MUL_by_element (sim_cpu *cpu)
6087 {
6088 /* instr[31] = 0
6089 instr[30] = half/full
6090 instr[29,24] = 00 1111
6091 instr[23,22] = size
6092 instr[21] = L
6093 instr[20] = M
6094 instr[19,16] = m
6095 instr[15,12] = 1000
6096 instr[11] = H
6097 instr[10] = 0
6098 instr[9,5] = Vn
6099 instr[4,0] = Vd */
6100
6101 unsigned full = INSTR (30, 30);
6102 unsigned L = INSTR (21, 21);
6103 unsigned H = INSTR (11, 11);
6104 unsigned vn = INSTR (9, 5);
6105 unsigned vd = INSTR (4, 0);
6106 unsigned size = INSTR (23, 22);
6107 unsigned index;
6108 unsigned vm;
6109 unsigned e;
6110
6111 NYI_assert (29, 24, 0x0F);
6112 NYI_assert (15, 12, 0x8);
6113 NYI_assert (10, 10, 0);
6114
6115 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6116 switch (size)
6117 {
6118 case 1:
6119 {
6120 /* 16 bit products. */
6121 uint16_t product;
6122 uint16_t element1;
6123 uint16_t element2;
6124
6125 index = (H << 2) | (L << 1) | INSTR (20, 20);
6126 vm = INSTR (19, 16);
6127 element2 = aarch64_get_vec_u16 (cpu, vm, index);
6128
6129 for (e = 0; e < (full ? 8 : 4); e ++)
6130 {
6131 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6132 product = element1 * element2;
6133 aarch64_set_vec_u16 (cpu, vd, e, product);
6134 }
6135 }
6136 break;
6137
6138 case 2:
6139 {
6140 /* 32 bit products. */
6141 uint32_t product;
6142 uint32_t element1;
6143 uint32_t element2;
6144
6145 index = (H << 1) | L;
6146 vm = INSTR (20, 16);
6147 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6148
6149 for (e = 0; e < (full ? 4 : 2); e ++)
6150 {
6151 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6152 product = element1 * element2;
6153 aarch64_set_vec_u32 (cpu, vd, e, product);
6154 }
6155 }
6156 break;
6157
6158 default:
6159 HALT_UNALLOC;
6160 }
6161 }
6162
6163 static void
6164 do_FMLA_by_element (sim_cpu *cpu)
6165 {
6166 /* instr[31] = 0
6167 instr[30] = half/full
6168 instr[29,23] = 00 1111 1
6169 instr[22] = size
6170 instr[21] = L
6171 instr[20,16] = m
6172 instr[15,12] = 0001
6173 instr[11] = H
6174 instr[10] = 0
6175 instr[9,5] = Vn
6176 instr[4,0] = Vd */
6177
6178 unsigned full = INSTR (30, 30);
6179 unsigned size = INSTR (22, 22);
6180 unsigned L = INSTR (21, 21);
6181 unsigned vm = INSTR (20, 16);
6182 unsigned H = INSTR (11, 11);
6183 unsigned vn = INSTR (9, 5);
6184 unsigned vd = INSTR (4, 0);
6185 unsigned e;
6186
6187 NYI_assert (29, 23, 0x1F);
6188 NYI_assert (15, 12, 0x1);
6189 NYI_assert (10, 10, 0);
6190
6191 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6192 if (size)
6193 {
6194 double element1, element2;
6195
6196 if (! full || L)
6197 HALT_UNALLOC;
6198
6199 element2 = aarch64_get_vec_double (cpu, vm, H);
6200
6201 for (e = 0; e < 2; e++)
6202 {
6203 element1 = aarch64_get_vec_double (cpu, vn, e);
6204 element1 *= element2;
6205 element1 += aarch64_get_vec_double (cpu, vd, e);
6206 aarch64_set_vec_double (cpu, vd, e, element1);
6207 }
6208 }
6209 else
6210 {
6211 float element1;
6212 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6213
6214 for (e = 0; e < (full ? 4 : 2); e++)
6215 {
6216 element1 = aarch64_get_vec_float (cpu, vn, e);
6217 element1 *= element2;
6218 element1 += aarch64_get_vec_float (cpu, vd, e);
6219 aarch64_set_vec_float (cpu, vd, e, element1);
6220 }
6221 }
6222 }
6223
6224 static void
6225 do_vec_op2 (sim_cpu *cpu)
6226 {
6227 /* instr[31] = 0
6228 instr[30] = half/full
6229 instr[29,24] = 00 1111
6230 instr[23] = ?
6231 instr[22,16] = element size & index
6232 instr[15,10] = sub-opcode
6233 instr[9,5] = Vm
6234 instr[4,0] = Vd */
6235
6236 NYI_assert (29, 24, 0x0F);
6237
6238 if (INSTR (23, 23) != 0)
6239 {
6240 switch (INSTR (15, 10))
6241 {
6242 case 0x04:
6243 case 0x06:
6244 do_FMLA_by_element (cpu);
6245 return;
6246
6247 case 0x20:
6248 case 0x22:
6249 do_vec_MUL_by_element (cpu);
6250 return;
6251
6252 default:
6253 HALT_NYI;
6254 }
6255 }
6256 else
6257 {
6258 switch (INSTR (15, 10))
6259 {
6260 case 0x01: do_vec_SSHR_USHR (cpu); return;
6261 case 0x15: do_vec_SHL (cpu); return;
6262 case 0x20:
6263 case 0x22: do_vec_MUL_by_element (cpu); return;
6264 case 0x29: do_vec_xtl (cpu); return;
6265 default: HALT_NYI;
6266 }
6267 }
6268 }
6269
6270 static void
6271 do_vec_neg (sim_cpu *cpu)
6272 {
6273 /* instr[31] = 0
6274 instr[30] = full(1)/half(0)
6275 instr[29,24] = 10 1110
6276 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6277 instr[21,10] = 1000 0010 1110
6278 instr[9,5] = Vs
6279 instr[4,0] = Vd */
6280
6281 int full = INSTR (30, 30);
6282 unsigned vs = INSTR (9, 5);
6283 unsigned vd = INSTR (4, 0);
6284 unsigned i;
6285
6286 NYI_assert (29, 24, 0x2E);
6287 NYI_assert (21, 10, 0x82E);
6288
6289 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6290 switch (INSTR (23, 22))
6291 {
6292 case 0:
6293 for (i = 0; i < (full ? 16 : 8); i++)
6294 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6295 return;
6296
6297 case 1:
6298 for (i = 0; i < (full ? 8 : 4); i++)
6299 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6300 return;
6301
6302 case 2:
6303 for (i = 0; i < (full ? 4 : 2); i++)
6304 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6305 return;
6306
6307 case 3:
6308 if (! full)
6309 HALT_NYI;
6310 for (i = 0; i < 2; i++)
6311 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6312 return;
6313 }
6314 }
6315
6316 static void
6317 do_vec_sqrt (sim_cpu *cpu)
6318 {
6319 /* instr[31] = 0
6320 instr[30] = full(1)/half(0)
6321 instr[29,23] = 101 1101
6322 instr[22] = single(0)/double(1)
6323 instr[21,10] = 1000 0111 1110
6324 instr[9,5] = Vs
6325 instr[4,0] = Vd. */
6326
6327 int full = INSTR (30, 30);
6328 unsigned vs = INSTR (9, 5);
6329 unsigned vd = INSTR (4, 0);
6330 unsigned i;
6331
6332 NYI_assert (29, 23, 0x5B);
6333 NYI_assert (21, 10, 0x87E);
6334
6335 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6336 if (INSTR (22, 22) == 0)
6337 for (i = 0; i < (full ? 4 : 2); i++)
6338 aarch64_set_vec_float (cpu, vd, i,
6339 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6340 else
6341 for (i = 0; i < 2; i++)
6342 aarch64_set_vec_double (cpu, vd, i,
6343 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6344 }
6345
6346 static void
6347 do_vec_mls_indexed (sim_cpu *cpu)
6348 {
6349 /* instr[31] = 0
6350 instr[30] = half(0)/full(1)
6351 instr[29,24] = 10 1111
6352 instr[23,22] = 16-bit(01)/32-bit(10)
6353 instr[21,20+11] = index (if 16-bit)
6354 instr[21+11] = index (if 32-bit)
6355 instr[20,16] = Vm
6356 instr[15,12] = 0100
6357 instr[11] = part of index
6358 instr[10] = 0
6359 instr[9,5] = Vs
6360 instr[4,0] = Vd. */
6361
6362 int full = INSTR (30, 30);
6363 unsigned vs = INSTR (9, 5);
6364 unsigned vd = INSTR (4, 0);
6365 unsigned vm = INSTR (20, 16);
6366 unsigned i;
6367
6368 NYI_assert (15, 12, 4);
6369 NYI_assert (10, 10, 0);
6370
6371 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6372 switch (INSTR (23, 22))
6373 {
6374 case 1:
6375 {
6376 unsigned elem;
6377 uint32_t val;
6378
6379 if (vm > 15)
6380 HALT_NYI;
6381
6382 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6383 val = aarch64_get_vec_u16 (cpu, vm, elem);
6384
6385 for (i = 0; i < (full ? 8 : 4); i++)
6386 aarch64_set_vec_u32 (cpu, vd, i,
6387 aarch64_get_vec_u32 (cpu, vd, i) -
6388 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6389 return;
6390 }
6391
6392 case 2:
6393 {
6394 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6395 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6396
6397 for (i = 0; i < (full ? 4 : 2); i++)
6398 aarch64_set_vec_u64 (cpu, vd, i,
6399 aarch64_get_vec_u64 (cpu, vd, i) -
6400 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6401 return;
6402 }
6403
6404 case 0:
6405 case 3:
6406 default:
6407 HALT_NYI;
6408 }
6409 }
6410
6411 static void
6412 do_vec_SUB (sim_cpu *cpu)
6413 {
6414 /* instr [31] = 0
6415 instr [30] = half(0)/full(1)
6416 instr [29,24] = 10 1110
6417 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6418 instr [21] = 1
6419 instr [20,16] = Vm
6420 instr [15,10] = 10 0001
6421 instr [9, 5] = Vn
6422 instr [4, 0] = Vd. */
6423
6424 unsigned full = INSTR (30, 30);
6425 unsigned vm = INSTR (20, 16);
6426 unsigned vn = INSTR (9, 5);
6427 unsigned vd = INSTR (4, 0);
6428 unsigned i;
6429
6430 NYI_assert (29, 24, 0x2E);
6431 NYI_assert (21, 21, 1);
6432 NYI_assert (15, 10, 0x21);
6433
6434 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6435 switch (INSTR (23, 22))
6436 {
6437 case 0:
6438 for (i = 0; i < (full ? 16 : 8); i++)
6439 aarch64_set_vec_s8 (cpu, vd, i,
6440 aarch64_get_vec_s8 (cpu, vn, i)
6441 - aarch64_get_vec_s8 (cpu, vm, i));
6442 return;
6443
6444 case 1:
6445 for (i = 0; i < (full ? 8 : 4); i++)
6446 aarch64_set_vec_s16 (cpu, vd, i,
6447 aarch64_get_vec_s16 (cpu, vn, i)
6448 - aarch64_get_vec_s16 (cpu, vm, i));
6449 return;
6450
6451 case 2:
6452 for (i = 0; i < (full ? 4 : 2); i++)
6453 aarch64_set_vec_s32 (cpu, vd, i,
6454 aarch64_get_vec_s32 (cpu, vn, i)
6455 - aarch64_get_vec_s32 (cpu, vm, i));
6456 return;
6457
6458 case 3:
6459 if (full == 0)
6460 HALT_UNALLOC;
6461
6462 for (i = 0; i < 2; i++)
6463 aarch64_set_vec_s64 (cpu, vd, i,
6464 aarch64_get_vec_s64 (cpu, vn, i)
6465 - aarch64_get_vec_s64 (cpu, vm, i));
6466 return;
6467 }
6468 }
6469
6470 static void
6471 do_vec_MLS (sim_cpu *cpu)
6472 {
6473 /* instr [31] = 0
6474 instr [30] = half(0)/full(1)
6475 instr [29,24] = 10 1110
6476 instr [23,22] = size: byte(00, half(01), word (10)
6477 instr [21] = 1
6478 instr [20,16] = Vm
6479 instr [15,10] = 10 0101
6480 instr [9, 5] = Vn
6481 instr [4, 0] = Vd. */
6482
6483 unsigned full = INSTR (30, 30);
6484 unsigned vm = INSTR (20, 16);
6485 unsigned vn = INSTR (9, 5);
6486 unsigned vd = INSTR (4, 0);
6487 unsigned i;
6488
6489 NYI_assert (29, 24, 0x2E);
6490 NYI_assert (21, 21, 1);
6491 NYI_assert (15, 10, 0x25);
6492
6493 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6494 switch (INSTR (23, 22))
6495 {
6496 case 0:
6497 for (i = 0; i < (full ? 16 : 8); i++)
6498 aarch64_set_vec_u8 (cpu, vd, i,
6499 aarch64_get_vec_u8 (cpu, vd, i)
6500 - (aarch64_get_vec_u8 (cpu, vn, i)
6501 * aarch64_get_vec_u8 (cpu, vm, i)));
6502 return;
6503
6504 case 1:
6505 for (i = 0; i < (full ? 8 : 4); i++)
6506 aarch64_set_vec_u16 (cpu, vd, i,
6507 aarch64_get_vec_u16 (cpu, vd, i)
6508 - (aarch64_get_vec_u16 (cpu, vn, i)
6509 * aarch64_get_vec_u16 (cpu, vm, i)));
6510 return;
6511
6512 case 2:
6513 for (i = 0; i < (full ? 4 : 2); i++)
6514 aarch64_set_vec_u32 (cpu, vd, i,
6515 aarch64_get_vec_u32 (cpu, vd, i)
6516 - (aarch64_get_vec_u32 (cpu, vn, i)
6517 * aarch64_get_vec_u32 (cpu, vm, i)));
6518 return;
6519
6520 default:
6521 HALT_UNALLOC;
6522 }
6523 }
6524
6525 static void
6526 do_vec_FDIV (sim_cpu *cpu)
6527 {
6528 /* instr [31] = 0
6529 instr [30] = half(0)/full(1)
6530 instr [29,23] = 10 1110 0
6531 instr [22] = float()/double(1)
6532 instr [21] = 1
6533 instr [20,16] = Vm
6534 instr [15,10] = 1111 11
6535 instr [9, 5] = Vn
6536 instr [4, 0] = Vd. */
6537
6538 unsigned full = INSTR (30, 30);
6539 unsigned vm = INSTR (20, 16);
6540 unsigned vn = INSTR (9, 5);
6541 unsigned vd = INSTR (4, 0);
6542 unsigned i;
6543
6544 NYI_assert (29, 23, 0x5C);
6545 NYI_assert (21, 21, 1);
6546 NYI_assert (15, 10, 0x3F);
6547
6548 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6549 if (INSTR (22, 22))
6550 {
6551 if (! full)
6552 HALT_UNALLOC;
6553
6554 for (i = 0; i < 2; i++)
6555 aarch64_set_vec_double (cpu, vd, i,
6556 aarch64_get_vec_double (cpu, vn, i)
6557 / aarch64_get_vec_double (cpu, vm, i));
6558 }
6559 else
6560 for (i = 0; i < (full ? 4 : 2); i++)
6561 aarch64_set_vec_float (cpu, vd, i,
6562 aarch64_get_vec_float (cpu, vn, i)
6563 / aarch64_get_vec_float (cpu, vm, i));
6564 }
6565
6566 static void
6567 do_vec_FMUL (sim_cpu *cpu)
6568 {
6569 /* instr [31] = 0
6570 instr [30] = half(0)/full(1)
6571 instr [29,23] = 10 1110 0
6572 instr [22] = float(0)/double(1)
6573 instr [21] = 1
6574 instr [20,16] = Vm
6575 instr [15,10] = 1101 11
6576 instr [9, 5] = Vn
6577 instr [4, 0] = Vd. */
6578
6579 unsigned full = INSTR (30, 30);
6580 unsigned vm = INSTR (20, 16);
6581 unsigned vn = INSTR (9, 5);
6582 unsigned vd = INSTR (4, 0);
6583 unsigned i;
6584
6585 NYI_assert (29, 23, 0x5C);
6586 NYI_assert (21, 21, 1);
6587 NYI_assert (15, 10, 0x37);
6588
6589 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6590 if (INSTR (22, 22))
6591 {
6592 if (! full)
6593 HALT_UNALLOC;
6594
6595 for (i = 0; i < 2; i++)
6596 aarch64_set_vec_double (cpu, vd, i,
6597 aarch64_get_vec_double (cpu, vn, i)
6598 * aarch64_get_vec_double (cpu, vm, i));
6599 }
6600 else
6601 for (i = 0; i < (full ? 4 : 2); i++)
6602 aarch64_set_vec_float (cpu, vd, i,
6603 aarch64_get_vec_float (cpu, vn, i)
6604 * aarch64_get_vec_float (cpu, vm, i));
6605 }
6606
6607 static void
6608 do_vec_FADDP (sim_cpu *cpu)
6609 {
6610 /* instr [31] = 0
6611 instr [30] = half(0)/full(1)
6612 instr [29,23] = 10 1110 0
6613 instr [22] = float(0)/double(1)
6614 instr [21] = 1
6615 instr [20,16] = Vm
6616 instr [15,10] = 1101 01
6617 instr [9, 5] = Vn
6618 instr [4, 0] = Vd. */
6619
6620 unsigned full = INSTR (30, 30);
6621 unsigned vm = INSTR (20, 16);
6622 unsigned vn = INSTR (9, 5);
6623 unsigned vd = INSTR (4, 0);
6624
6625 NYI_assert (29, 23, 0x5C);
6626 NYI_assert (21, 21, 1);
6627 NYI_assert (15, 10, 0x35);
6628
6629 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6630 if (INSTR (22, 22))
6631 {
6632 /* Extract values before adding them incase vd == vn/vm. */
6633 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6634 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6635 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6636 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6637
6638 if (! full)
6639 HALT_UNALLOC;
6640
6641 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6642 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6643 }
6644 else
6645 {
6646 /* Extract values before adding them incase vd == vn/vm. */
6647 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6648 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6649 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6650 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6651
6652 if (full)
6653 {
6654 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6655 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6656 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6657 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6658
6659 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6660 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6661 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6662 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6663 }
6664 else
6665 {
6666 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6667 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6668 }
6669 }
6670 }
6671
6672 static void
6673 do_vec_FSQRT (sim_cpu *cpu)
6674 {
6675 /* instr[31] = 0
6676 instr[30] = half(0)/full(1)
6677 instr[29,23] = 10 1110 1
6678 instr[22] = single(0)/double(1)
6679 instr[21,10] = 10 0001 1111 10
6680 instr[9,5] = Vsrc
6681 instr[4,0] = Vdest. */
6682
6683 unsigned vn = INSTR (9, 5);
6684 unsigned vd = INSTR (4, 0);
6685 unsigned full = INSTR (30, 30);
6686 int i;
6687
6688 NYI_assert (29, 23, 0x5D);
6689 NYI_assert (21, 10, 0x87E);
6690
6691 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6692 if (INSTR (22, 22))
6693 {
6694 if (! full)
6695 HALT_UNALLOC;
6696
6697 for (i = 0; i < 2; i++)
6698 aarch64_set_vec_double (cpu, vd, i,
6699 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6700 }
6701 else
6702 {
6703 for (i = 0; i < (full ? 4 : 2); i++)
6704 aarch64_set_vec_float (cpu, vd, i,
6705 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6706 }
6707 }
6708
6709 static void
6710 do_vec_FNEG (sim_cpu *cpu)
6711 {
6712 /* instr[31] = 0
6713 instr[30] = half (0)/full (1)
6714 instr[29,23] = 10 1110 1
6715 instr[22] = single (0)/double (1)
6716 instr[21,10] = 10 0000 1111 10
6717 instr[9,5] = Vsrc
6718 instr[4,0] = Vdest. */
6719
6720 unsigned vn = INSTR (9, 5);
6721 unsigned vd = INSTR (4, 0);
6722 unsigned full = INSTR (30, 30);
6723 int i;
6724
6725 NYI_assert (29, 23, 0x5D);
6726 NYI_assert (21, 10, 0x83E);
6727
6728 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6729 if (INSTR (22, 22))
6730 {
6731 if (! full)
6732 HALT_UNALLOC;
6733
6734 for (i = 0; i < 2; i++)
6735 aarch64_set_vec_double (cpu, vd, i,
6736 - aarch64_get_vec_double (cpu, vn, i));
6737 }
6738 else
6739 {
6740 for (i = 0; i < (full ? 4 : 2); i++)
6741 aarch64_set_vec_float (cpu, vd, i,
6742 - aarch64_get_vec_float (cpu, vn, i));
6743 }
6744 }
6745
6746 static void
6747 do_vec_NOT (sim_cpu *cpu)
6748 {
6749 /* instr[31] = 0
6750 instr[30] = half (0)/full (1)
6751 instr[29,10] = 10 1110 0010 0000 0101 10
6752 instr[9,5] = Vn
6753 instr[4.0] = Vd. */
6754
6755 unsigned vn = INSTR (9, 5);
6756 unsigned vd = INSTR (4, 0);
6757 unsigned i;
6758 int full = INSTR (30, 30);
6759
6760 NYI_assert (29, 10, 0xB8816);
6761
6762 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6763 for (i = 0; i < (full ? 16 : 8); i++)
6764 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6765 }
6766
6767 static unsigned int
6768 clz (uint64_t val, unsigned size)
6769 {
6770 uint64_t mask = 1;
6771 int count;
6772
6773 mask <<= (size - 1);
6774 count = 0;
6775 do
6776 {
6777 if (val & mask)
6778 break;
6779 mask >>= 1;
6780 count ++;
6781 }
6782 while (mask);
6783
6784 return count;
6785 }
6786
6787 static void
6788 do_vec_CLZ (sim_cpu *cpu)
6789 {
6790 /* instr[31] = 0
6791 instr[30] = half (0)/full (1)
6792 instr[29,24] = 10 1110
6793 instr[23,22] = size
6794 instr[21,10] = 10 0000 0100 10
6795 instr[9,5] = Vn
6796 instr[4.0] = Vd. */
6797
6798 unsigned vn = INSTR (9, 5);
6799 unsigned vd = INSTR (4, 0);
6800 unsigned i;
6801 int full = INSTR (30,30);
6802
6803 NYI_assert (29, 24, 0x2E);
6804 NYI_assert (21, 10, 0x812);
6805
6806 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6807 switch (INSTR (23, 22))
6808 {
6809 case 0:
6810 for (i = 0; i < (full ? 16 : 8); i++)
6811 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6812 break;
6813 case 1:
6814 for (i = 0; i < (full ? 8 : 4); i++)
6815 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6816 break;
6817 case 2:
6818 for (i = 0; i < (full ? 4 : 2); i++)
6819 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6820 break;
6821 case 3:
6822 if (! full)
6823 HALT_UNALLOC;
6824 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6825 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6826 break;
6827 }
6828 }
6829
6830 static void
6831 do_vec_MOV_element (sim_cpu *cpu)
6832 {
6833 /* instr[31,21] = 0110 1110 000
6834 instr[20,16] = size & dest index
6835 instr[15] = 0
6836 instr[14,11] = source index
6837 instr[10] = 1
6838 instr[9,5] = Vs
6839 instr[4.0] = Vd. */
6840
6841 unsigned vs = INSTR (9, 5);
6842 unsigned vd = INSTR (4, 0);
6843 unsigned src_index;
6844 unsigned dst_index;
6845
6846 NYI_assert (31, 21, 0x370);
6847 NYI_assert (15, 15, 0);
6848 NYI_assert (10, 10, 1);
6849
6850 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6851 if (INSTR (16, 16))
6852 {
6853 /* Move a byte. */
6854 src_index = INSTR (14, 11);
6855 dst_index = INSTR (20, 17);
6856 aarch64_set_vec_u8 (cpu, vd, dst_index,
6857 aarch64_get_vec_u8 (cpu, vs, src_index));
6858 }
6859 else if (INSTR (17, 17))
6860 {
6861 /* Move 16-bits. */
6862 NYI_assert (11, 11, 0);
6863 src_index = INSTR (14, 12);
6864 dst_index = INSTR (20, 18);
6865 aarch64_set_vec_u16 (cpu, vd, dst_index,
6866 aarch64_get_vec_u16 (cpu, vs, src_index));
6867 }
6868 else if (INSTR (18, 18))
6869 {
6870 /* Move 32-bits. */
6871 NYI_assert (12, 11, 0);
6872 src_index = INSTR (14, 13);
6873 dst_index = INSTR (20, 19);
6874 aarch64_set_vec_u32 (cpu, vd, dst_index,
6875 aarch64_get_vec_u32 (cpu, vs, src_index));
6876 }
6877 else
6878 {
6879 NYI_assert (19, 19, 1);
6880 NYI_assert (13, 11, 0);
6881 src_index = INSTR (14, 14);
6882 dst_index = INSTR (20, 20);
6883 aarch64_set_vec_u64 (cpu, vd, dst_index,
6884 aarch64_get_vec_u64 (cpu, vs, src_index));
6885 }
6886 }
6887
6888 static void
6889 do_vec_REV32 (sim_cpu *cpu)
6890 {
6891 /* instr[31] = 0
6892 instr[30] = full/half
6893 instr[29,24] = 10 1110
6894 instr[23,22] = size
6895 instr[21,10] = 10 0000 0000 10
6896 instr[9,5] = Rn
6897 instr[4,0] = Rd. */
6898
6899 unsigned rn = INSTR (9, 5);
6900 unsigned rd = INSTR (4, 0);
6901 unsigned size = INSTR (23, 22);
6902 unsigned full = INSTR (30, 30);
6903 unsigned i;
6904 FRegister val;
6905
6906 NYI_assert (29, 24, 0x2E);
6907 NYI_assert (21, 10, 0x802);
6908
6909 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6910 switch (size)
6911 {
6912 case 0:
6913 for (i = 0; i < (full ? 16 : 8); i++)
6914 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6915 break;
6916
6917 case 1:
6918 for (i = 0; i < (full ? 8 : 4); i++)
6919 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6920 break;
6921
6922 default:
6923 HALT_UNALLOC;
6924 }
6925
6926 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6927 if (full)
6928 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6929 }
6930
6931 static void
6932 do_vec_EXT (sim_cpu *cpu)
6933 {
6934 /* instr[31] = 0
6935 instr[30] = full/half
6936 instr[29,21] = 10 1110 000
6937 instr[20,16] = Vm
6938 instr[15] = 0
6939 instr[14,11] = source index
6940 instr[10] = 0
6941 instr[9,5] = Vn
6942 instr[4.0] = Vd. */
6943
6944 unsigned vm = INSTR (20, 16);
6945 unsigned vn = INSTR (9, 5);
6946 unsigned vd = INSTR (4, 0);
6947 unsigned src_index = INSTR (14, 11);
6948 unsigned full = INSTR (30, 30);
6949 unsigned i;
6950 unsigned j;
6951 FRegister val;
6952
6953 NYI_assert (31, 21, 0x370);
6954 NYI_assert (15, 15, 0);
6955 NYI_assert (10, 10, 0);
6956
6957 if (!full && (src_index & 0x8))
6958 HALT_UNALLOC;
6959
6960 j = 0;
6961
6962 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6963 for (i = src_index; i < (full ? 16 : 8); i++)
6964 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6965 for (i = 0; i < src_index; i++)
6966 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6967
6968 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6969 if (full)
6970 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6971 }
6972
6973 static void
6974 dexAdvSIMD0 (sim_cpu *cpu)
6975 {
6976 /* instr [28,25] = 0 111. */
6977 if ( INSTR (15, 10) == 0x07
6978 && (INSTR (9, 5) ==
6979 INSTR (20, 16)))
6980 {
6981 if (INSTR (31, 21) == 0x075
6982 || INSTR (31, 21) == 0x275)
6983 {
6984 do_vec_MOV_whole_vector (cpu);
6985 return;
6986 }
6987 }
6988
6989 if (INSTR (29, 19) == 0x1E0)
6990 {
6991 do_vec_MOV_immediate (cpu);
6992 return;
6993 }
6994
6995 if (INSTR (29, 19) == 0x5E0)
6996 {
6997 do_vec_MVNI (cpu);
6998 return;
6999 }
7000
7001 if (INSTR (29, 19) == 0x1C0
7002 || INSTR (29, 19) == 0x1C1)
7003 {
7004 if (INSTR (15, 10) == 0x03)
7005 {
7006 do_vec_DUP_scalar_into_vector (cpu);
7007 return;
7008 }
7009 }
7010
7011 switch (INSTR (29, 24))
7012 {
7013 case 0x0E: do_vec_op1 (cpu); return;
7014 case 0x0F: do_vec_op2 (cpu); return;
7015
7016 case 0x2E:
7017 if (INSTR (21, 21) == 1)
7018 {
7019 switch (INSTR (15, 10))
7020 {
7021 case 0x02:
7022 do_vec_REV32 (cpu);
7023 return;
7024
7025 case 0x07:
7026 switch (INSTR (23, 22))
7027 {
7028 case 0: do_vec_EOR (cpu); return;
7029 case 1: do_vec_BSL (cpu); return;
7030 case 2:
7031 case 3: do_vec_bit (cpu); return;
7032 }
7033 break;
7034
7035 case 0x08: do_vec_sub_long (cpu); return;
7036 case 0x11: do_vec_USHL (cpu); return;
7037 case 0x12: do_vec_CLZ (cpu); return;
7038 case 0x16: do_vec_NOT (cpu); return;
7039 case 0x19: do_vec_max (cpu); return;
7040 case 0x1B: do_vec_min (cpu); return;
7041 case 0x21: do_vec_SUB (cpu); return;
7042 case 0x25: do_vec_MLS (cpu); return;
7043 case 0x31: do_vec_FminmaxNMP (cpu); return;
7044 case 0x35: do_vec_FADDP (cpu); return;
7045 case 0x37: do_vec_FMUL (cpu); return;
7046 case 0x3F: do_vec_FDIV (cpu); return;
7047
7048 case 0x3E:
7049 switch (INSTR (20, 16))
7050 {
7051 case 0x00: do_vec_FNEG (cpu); return;
7052 case 0x01: do_vec_FSQRT (cpu); return;
7053 default: HALT_NYI;
7054 }
7055
7056 case 0x0D:
7057 case 0x0F:
7058 case 0x22:
7059 case 0x23:
7060 case 0x26:
7061 case 0x2A:
7062 case 0x32:
7063 case 0x36:
7064 case 0x39:
7065 case 0x3A:
7066 do_vec_compare (cpu); return;
7067
7068 default:
7069 break;
7070 }
7071 }
7072
7073 if (INSTR (31, 21) == 0x370)
7074 {
7075 if (INSTR (10, 10))
7076 do_vec_MOV_element (cpu);
7077 else
7078 do_vec_EXT (cpu);
7079 return;
7080 }
7081
7082 switch (INSTR (21, 10))
7083 {
7084 case 0x82E: do_vec_neg (cpu); return;
7085 case 0x87E: do_vec_sqrt (cpu); return;
7086 default:
7087 if (INSTR (15, 10) == 0x30)
7088 {
7089 do_vec_mull (cpu);
7090 return;
7091 }
7092 break;
7093 }
7094 break;
7095
7096 case 0x2f:
7097 switch (INSTR (15, 10))
7098 {
7099 case 0x01: do_vec_SSHR_USHR (cpu); return;
7100 case 0x10:
7101 case 0x12: do_vec_mls_indexed (cpu); return;
7102 case 0x29: do_vec_xtl (cpu); return;
7103 default:
7104 HALT_NYI;
7105 }
7106
7107 default:
7108 break;
7109 }
7110
7111 HALT_NYI;
7112 }
7113
7114 /* 3 sources. */
7115
7116 /* Float multiply add. */
7117 static void
7118 fmadds (sim_cpu *cpu)
7119 {
7120 unsigned sa = INSTR (14, 10);
7121 unsigned sm = INSTR (20, 16);
7122 unsigned sn = INSTR ( 9, 5);
7123 unsigned sd = INSTR ( 4, 0);
7124
7125 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7126 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7127 + aarch64_get_FP_float (cpu, sn)
7128 * aarch64_get_FP_float (cpu, sm));
7129 }
7130
7131 /* Double multiply add. */
7132 static void
7133 fmaddd (sim_cpu *cpu)
7134 {
7135 unsigned sa = INSTR (14, 10);
7136 unsigned sm = INSTR (20, 16);
7137 unsigned sn = INSTR ( 9, 5);
7138 unsigned sd = INSTR ( 4, 0);
7139
7140 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7141 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7142 + aarch64_get_FP_double (cpu, sn)
7143 * aarch64_get_FP_double (cpu, sm));
7144 }
7145
7146 /* Float multiply subtract. */
7147 static void
7148 fmsubs (sim_cpu *cpu)
7149 {
7150 unsigned sa = INSTR (14, 10);
7151 unsigned sm = INSTR (20, 16);
7152 unsigned sn = INSTR ( 9, 5);
7153 unsigned sd = INSTR ( 4, 0);
7154
7155 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7156 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7157 - aarch64_get_FP_float (cpu, sn)
7158 * aarch64_get_FP_float (cpu, sm));
7159 }
7160
7161 /* Double multiply subtract. */
7162 static void
7163 fmsubd (sim_cpu *cpu)
7164 {
7165 unsigned sa = INSTR (14, 10);
7166 unsigned sm = INSTR (20, 16);
7167 unsigned sn = INSTR ( 9, 5);
7168 unsigned sd = INSTR ( 4, 0);
7169
7170 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7171 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7172 - aarch64_get_FP_double (cpu, sn)
7173 * aarch64_get_FP_double (cpu, sm));
7174 }
7175
7176 /* Float negative multiply add. */
7177 static void
7178 fnmadds (sim_cpu *cpu)
7179 {
7180 unsigned sa = INSTR (14, 10);
7181 unsigned sm = INSTR (20, 16);
7182 unsigned sn = INSTR ( 9, 5);
7183 unsigned sd = INSTR ( 4, 0);
7184
7185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7186 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7187 + (- aarch64_get_FP_float (cpu, sn))
7188 * aarch64_get_FP_float (cpu, sm));
7189 }
7190
7191 /* Double negative multiply add. */
7192 static void
7193 fnmaddd (sim_cpu *cpu)
7194 {
7195 unsigned sa = INSTR (14, 10);
7196 unsigned sm = INSTR (20, 16);
7197 unsigned sn = INSTR ( 9, 5);
7198 unsigned sd = INSTR ( 4, 0);
7199
7200 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7201 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7202 + (- aarch64_get_FP_double (cpu, sn))
7203 * aarch64_get_FP_double (cpu, sm));
7204 }
7205
7206 /* Float negative multiply subtract. */
7207 static void
7208 fnmsubs (sim_cpu *cpu)
7209 {
7210 unsigned sa = INSTR (14, 10);
7211 unsigned sm = INSTR (20, 16);
7212 unsigned sn = INSTR ( 9, 5);
7213 unsigned sd = INSTR ( 4, 0);
7214
7215 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7216 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7217 + aarch64_get_FP_float (cpu, sn)
7218 * aarch64_get_FP_float (cpu, sm));
7219 }
7220
7221 /* Double negative multiply subtract. */
7222 static void
7223 fnmsubd (sim_cpu *cpu)
7224 {
7225 unsigned sa = INSTR (14, 10);
7226 unsigned sm = INSTR (20, 16);
7227 unsigned sn = INSTR ( 9, 5);
7228 unsigned sd = INSTR ( 4, 0);
7229
7230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7231 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7232 + aarch64_get_FP_double (cpu, sn)
7233 * aarch64_get_FP_double (cpu, sm));
7234 }
7235
7236 static void
7237 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7238 {
7239 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7240 instr[30] = 0
7241 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7242 instr[28,25] = 1111
7243 instr[24] = 1
7244 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7245 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7246 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7247
7248 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7249 /* dispatch on combined type:o1:o2. */
7250 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7251
7252 if (M_S != 0)
7253 HALT_UNALLOC;
7254
7255 switch (dispatch)
7256 {
7257 case 0: fmadds (cpu); return;
7258 case 1: fmsubs (cpu); return;
7259 case 2: fnmadds (cpu); return;
7260 case 3: fnmsubs (cpu); return;
7261 case 4: fmaddd (cpu); return;
7262 case 5: fmsubd (cpu); return;
7263 case 6: fnmaddd (cpu); return;
7264 case 7: fnmsubd (cpu); return;
7265 default:
7266 /* type > 1 is currently unallocated. */
7267 HALT_UNALLOC;
7268 }
7269 }
7270
7271 static void
7272 dexSimpleFPFixedConvert (sim_cpu *cpu)
7273 {
7274 HALT_NYI;
7275 }
7276
7277 static void
7278 dexSimpleFPCondCompare (sim_cpu *cpu)
7279 {
7280 /* instr [31,23] = 0001 1110 0
7281 instr [22] = type
7282 instr [21] = 1
7283 instr [20,16] = Rm
7284 instr [15,12] = condition
7285 instr [11,10] = 01
7286 instr [9,5] = Rn
7287 instr [4] = 0
7288 instr [3,0] = nzcv */
7289
7290 unsigned rm = INSTR (20, 16);
7291 unsigned rn = INSTR (9, 5);
7292
7293 NYI_assert (31, 23, 0x3C);
7294 NYI_assert (11, 10, 0x1);
7295 NYI_assert (4, 4, 0);
7296
7297 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7298 if (! testConditionCode (cpu, INSTR (15, 12)))
7299 {
7300 aarch64_set_CPSR (cpu, INSTR (3, 0));
7301 return;
7302 }
7303
7304 if (INSTR (22, 22))
7305 {
7306 /* Double precision. */
7307 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7308 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7309
7310 /* FIXME: Check for NaNs. */
7311 if (val1 == val2)
7312 aarch64_set_CPSR (cpu, (Z | C));
7313 else if (val1 < val2)
7314 aarch64_set_CPSR (cpu, N);
7315 else /* val1 > val2 */
7316 aarch64_set_CPSR (cpu, C);
7317 }
7318 else
7319 {
7320 /* Single precision. */
7321 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7322 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7323
7324 /* FIXME: Check for NaNs. */
7325 if (val1 == val2)
7326 aarch64_set_CPSR (cpu, (Z | C));
7327 else if (val1 < val2)
7328 aarch64_set_CPSR (cpu, N);
7329 else /* val1 > val2 */
7330 aarch64_set_CPSR (cpu, C);
7331 }
7332 }
7333
7334 /* 2 sources. */
7335
7336 /* Float add. */
7337 static void
7338 fadds (sim_cpu *cpu)
7339 {
7340 unsigned sm = INSTR (20, 16);
7341 unsigned sn = INSTR ( 9, 5);
7342 unsigned sd = INSTR ( 4, 0);
7343
7344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7345 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7346 + aarch64_get_FP_float (cpu, sm));
7347 }
7348
7349 /* Double add. */
7350 static void
7351 faddd (sim_cpu *cpu)
7352 {
7353 unsigned sm = INSTR (20, 16);
7354 unsigned sn = INSTR ( 9, 5);
7355 unsigned sd = INSTR ( 4, 0);
7356
7357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7358 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7359 + aarch64_get_FP_double (cpu, sm));
7360 }
7361
7362 /* Float divide. */
7363 static void
7364 fdivs (sim_cpu *cpu)
7365 {
7366 unsigned sm = INSTR (20, 16);
7367 unsigned sn = INSTR ( 9, 5);
7368 unsigned sd = INSTR ( 4, 0);
7369
7370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7371 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7372 / aarch64_get_FP_float (cpu, sm));
7373 }
7374
7375 /* Double divide. */
7376 static void
7377 fdivd (sim_cpu *cpu)
7378 {
7379 unsigned sm = INSTR (20, 16);
7380 unsigned sn = INSTR ( 9, 5);
7381 unsigned sd = INSTR ( 4, 0);
7382
7383 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7384 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7385 / aarch64_get_FP_double (cpu, sm));
7386 }
7387
7388 /* Float multiply. */
7389 static void
7390 fmuls (sim_cpu *cpu)
7391 {
7392 unsigned sm = INSTR (20, 16);
7393 unsigned sn = INSTR ( 9, 5);
7394 unsigned sd = INSTR ( 4, 0);
7395
7396 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7397 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7398 * aarch64_get_FP_float (cpu, sm));
7399 }
7400
7401 /* Double multiply. */
7402 static void
7403 fmuld (sim_cpu *cpu)
7404 {
7405 unsigned sm = INSTR (20, 16);
7406 unsigned sn = INSTR ( 9, 5);
7407 unsigned sd = INSTR ( 4, 0);
7408
7409 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7410 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7411 * aarch64_get_FP_double (cpu, sm));
7412 }
7413
7414 /* Float negate and multiply. */
7415 static void
7416 fnmuls (sim_cpu *cpu)
7417 {
7418 unsigned sm = INSTR (20, 16);
7419 unsigned sn = INSTR ( 9, 5);
7420 unsigned sd = INSTR ( 4, 0);
7421
7422 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7423 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7424 * aarch64_get_FP_float (cpu, sm)));
7425 }
7426
7427 /* Double negate and multiply. */
7428 static void
7429 fnmuld (sim_cpu *cpu)
7430 {
7431 unsigned sm = INSTR (20, 16);
7432 unsigned sn = INSTR ( 9, 5);
7433 unsigned sd = INSTR ( 4, 0);
7434
7435 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7436 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7437 * aarch64_get_FP_double (cpu, sm)));
7438 }
7439
7440 /* Float subtract. */
7441 static void
7442 fsubs (sim_cpu *cpu)
7443 {
7444 unsigned sm = INSTR (20, 16);
7445 unsigned sn = INSTR ( 9, 5);
7446 unsigned sd = INSTR ( 4, 0);
7447
7448 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7449 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7450 - aarch64_get_FP_float (cpu, sm));
7451 }
7452
7453 /* Double subtract. */
7454 static void
7455 fsubd (sim_cpu *cpu)
7456 {
7457 unsigned sm = INSTR (20, 16);
7458 unsigned sn = INSTR ( 9, 5);
7459 unsigned sd = INSTR ( 4, 0);
7460
7461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7462 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7463 - aarch64_get_FP_double (cpu, sm));
7464 }
7465
7466 static void
7467 do_FMINNM (sim_cpu *cpu)
7468 {
7469 /* instr[31,23] = 0 0011 1100
7470 instr[22] = float(0)/double(1)
7471 instr[21] = 1
7472 instr[20,16] = Sm
7473 instr[15,10] = 01 1110
7474 instr[9,5] = Sn
7475 instr[4,0] = Cpu */
7476
7477 unsigned sm = INSTR (20, 16);
7478 unsigned sn = INSTR ( 9, 5);
7479 unsigned sd = INSTR ( 4, 0);
7480
7481 NYI_assert (31, 23, 0x03C);
7482 NYI_assert (15, 10, 0x1E);
7483
7484 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7485 if (INSTR (22, 22))
7486 aarch64_set_FP_double (cpu, sd,
7487 dminnm (aarch64_get_FP_double (cpu, sn),
7488 aarch64_get_FP_double (cpu, sm)));
7489 else
7490 aarch64_set_FP_float (cpu, sd,
7491 fminnm (aarch64_get_FP_float (cpu, sn),
7492 aarch64_get_FP_float (cpu, sm)));
7493 }
7494
7495 static void
7496 do_FMAXNM (sim_cpu *cpu)
7497 {
7498 /* instr[31,23] = 0 0011 1100
7499 instr[22] = float(0)/double(1)
7500 instr[21] = 1
7501 instr[20,16] = Sm
7502 instr[15,10] = 01 1010
7503 instr[9,5] = Sn
7504 instr[4,0] = Cpu */
7505
7506 unsigned sm = INSTR (20, 16);
7507 unsigned sn = INSTR ( 9, 5);
7508 unsigned sd = INSTR ( 4, 0);
7509
7510 NYI_assert (31, 23, 0x03C);
7511 NYI_assert (15, 10, 0x1A);
7512
7513 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7514 if (INSTR (22, 22))
7515 aarch64_set_FP_double (cpu, sd,
7516 dmaxnm (aarch64_get_FP_double (cpu, sn),
7517 aarch64_get_FP_double (cpu, sm)));
7518 else
7519 aarch64_set_FP_float (cpu, sd,
7520 fmaxnm (aarch64_get_FP_float (cpu, sn),
7521 aarch64_get_FP_float (cpu, sm)));
7522 }
7523
7524 static void
7525 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7526 {
7527 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7528 instr[30] = 0
7529 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7530 instr[28,25] = 1111
7531 instr[24] = 0
7532 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7533 instr[21] = 1
7534 instr[20,16] = Vm
7535 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7536 0010 ==> FADD, 0011 ==> FSUB,
7537 0100 ==> FMAX, 0101 ==> FMIN
7538 0110 ==> FMAXNM, 0111 ==> FMINNM
7539 1000 ==> FNMUL, ow ==> UNALLOC
7540 instr[11,10] = 10
7541 instr[9,5] = Vn
7542 instr[4,0] = Vd */
7543
7544 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7545 uint32_t type = INSTR (23, 22);
7546 /* Dispatch on opcode. */
7547 uint32_t dispatch = INSTR (15, 12);
7548
7549 if (type > 1)
7550 HALT_UNALLOC;
7551
7552 if (M_S != 0)
7553 HALT_UNALLOC;
7554
7555 if (type)
7556 switch (dispatch)
7557 {
7558 case 0: fmuld (cpu); return;
7559 case 1: fdivd (cpu); return;
7560 case 2: faddd (cpu); return;
7561 case 3: fsubd (cpu); return;
7562 case 6: do_FMAXNM (cpu); return;
7563 case 7: do_FMINNM (cpu); return;
7564 case 8: fnmuld (cpu); return;
7565
7566 /* Have not yet implemented fmax and fmin. */
7567 case 4:
7568 case 5:
7569 HALT_NYI;
7570
7571 default:
7572 HALT_UNALLOC;
7573 }
7574 else /* type == 0 => floats. */
7575 switch (dispatch)
7576 {
7577 case 0: fmuls (cpu); return;
7578 case 1: fdivs (cpu); return;
7579 case 2: fadds (cpu); return;
7580 case 3: fsubs (cpu); return;
7581 case 6: do_FMAXNM (cpu); return;
7582 case 7: do_FMINNM (cpu); return;
7583 case 8: fnmuls (cpu); return;
7584
7585 case 4:
7586 case 5:
7587 HALT_NYI;
7588
7589 default:
7590 HALT_UNALLOC;
7591 }
7592 }
7593
7594 static void
7595 dexSimpleFPCondSelect (sim_cpu *cpu)
7596 {
7597 /* FCSEL
7598 instr[31,23] = 0 0011 1100
7599 instr[22] = 0=>single 1=>double
7600 instr[21] = 1
7601 instr[20,16] = Sm
7602 instr[15,12] = cond
7603 instr[11,10] = 11
7604 instr[9,5] = Sn
7605 instr[4,0] = Cpu */
7606 unsigned sm = INSTR (20, 16);
7607 unsigned sn = INSTR ( 9, 5);
7608 unsigned sd = INSTR ( 4, 0);
7609 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7610
7611 NYI_assert (31, 23, 0x03C);
7612 NYI_assert (11, 10, 0x3);
7613
7614 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7615 if (INSTR (22, 22))
7616 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7617 : aarch64_get_FP_double (cpu, sm)));
7618 else
7619 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7620 : aarch64_get_FP_float (cpu, sm)));
7621 }
7622
7623 /* Store 32 bit unscaled signed 9 bit. */
7624 static void
7625 fsturs (sim_cpu *cpu, int32_t offset)
7626 {
7627 unsigned int rn = INSTR (9, 5);
7628 unsigned int st = INSTR (4, 0);
7629
7630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7631 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7632 aarch64_get_vec_u32 (cpu, st, 0));
7633 }
7634
7635 /* Store 64 bit unscaled signed 9 bit. */
7636 static void
7637 fsturd (sim_cpu *cpu, int32_t offset)
7638 {
7639 unsigned int rn = INSTR (9, 5);
7640 unsigned int st = INSTR (4, 0);
7641
7642 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7643 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7644 aarch64_get_vec_u64 (cpu, st, 0));
7645 }
7646
7647 /* Store 128 bit unscaled signed 9 bit. */
7648 static void
7649 fsturq (sim_cpu *cpu, int32_t offset)
7650 {
7651 unsigned int rn = INSTR (9, 5);
7652 unsigned int st = INSTR (4, 0);
7653 FRegister a;
7654
7655 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7656 aarch64_get_FP_long_double (cpu, st, & a);
7657 aarch64_set_mem_long_double (cpu,
7658 aarch64_get_reg_u64 (cpu, rn, 1)
7659 + offset, a);
7660 }
7661
7662 /* TODO FP move register. */
7663
7664 /* 32 bit fp to fp move register. */
7665 static void
7666 ffmovs (sim_cpu *cpu)
7667 {
7668 unsigned int rn = INSTR (9, 5);
7669 unsigned int st = INSTR (4, 0);
7670
7671 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7672 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7673 }
7674
7675 /* 64 bit fp to fp move register. */
7676 static void
7677 ffmovd (sim_cpu *cpu)
7678 {
7679 unsigned int rn = INSTR (9, 5);
7680 unsigned int st = INSTR (4, 0);
7681
7682 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7683 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7684 }
7685
7686 /* 32 bit GReg to Vec move register. */
7687 static void
7688 fgmovs (sim_cpu *cpu)
7689 {
7690 unsigned int rn = INSTR (9, 5);
7691 unsigned int st = INSTR (4, 0);
7692
7693 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7694 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7695 }
7696
7697 /* 64 bit g to fp move register. */
7698 static void
7699 fgmovd (sim_cpu *cpu)
7700 {
7701 unsigned int rn = INSTR (9, 5);
7702 unsigned int st = INSTR (4, 0);
7703
7704 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7705 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7706 }
7707
7708 /* 32 bit fp to g move register. */
7709 static void
7710 gfmovs (sim_cpu *cpu)
7711 {
7712 unsigned int rn = INSTR (9, 5);
7713 unsigned int st = INSTR (4, 0);
7714
7715 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7716 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7717 }
7718
7719 /* 64 bit fp to g move register. */
7720 static void
7721 gfmovd (sim_cpu *cpu)
7722 {
7723 unsigned int rn = INSTR (9, 5);
7724 unsigned int st = INSTR (4, 0);
7725
7726 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7727 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7728 }
7729
7730 /* FP move immediate
7731
7732 These install an immediate 8 bit value in the target register
7733 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7734 bit exponent. */
7735
7736 static void
7737 fmovs (sim_cpu *cpu)
7738 {
7739 unsigned int sd = INSTR (4, 0);
7740 uint32_t imm = INSTR (20, 13);
7741 float f = fp_immediate_for_encoding_32 (imm);
7742
7743 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7744 aarch64_set_FP_float (cpu, sd, f);
7745 }
7746
7747 static void
7748 fmovd (sim_cpu *cpu)
7749 {
7750 unsigned int sd = INSTR (4, 0);
7751 uint32_t imm = INSTR (20, 13);
7752 double d = fp_immediate_for_encoding_64 (imm);
7753
7754 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7755 aarch64_set_FP_double (cpu, sd, d);
7756 }
7757
7758 static void
7759 dexSimpleFPImmediate (sim_cpu *cpu)
7760 {
7761 /* instr[31,23] == 00111100
7762 instr[22] == type : single(0)/double(1)
7763 instr[21] == 1
7764 instr[20,13] == imm8
7765 instr[12,10] == 100
7766 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7767 instr[4,0] == Rd */
7768 uint32_t imm5 = INSTR (9, 5);
7769
7770 NYI_assert (31, 23, 0x3C);
7771
7772 if (imm5 != 0)
7773 HALT_UNALLOC;
7774
7775 if (INSTR (22, 22))
7776 fmovd (cpu);
7777 else
7778 fmovs (cpu);
7779 }
7780
7781 /* TODO specific decode and execute for group Load Store. */
7782
7783 /* TODO FP load/store single register (unscaled offset). */
7784
7785 /* TODO load 8 bit unscaled signed 9 bit. */
7786 /* TODO load 16 bit unscaled signed 9 bit. */
7787
7788 /* Load 32 bit unscaled signed 9 bit. */
7789 static void
7790 fldurs (sim_cpu *cpu, int32_t offset)
7791 {
7792 unsigned int rn = INSTR (9, 5);
7793 unsigned int st = INSTR (4, 0);
7794
7795 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7796 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7797 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7798 }
7799
7800 /* Load 64 bit unscaled signed 9 bit. */
7801 static void
7802 fldurd (sim_cpu *cpu, int32_t offset)
7803 {
7804 unsigned int rn = INSTR (9, 5);
7805 unsigned int st = INSTR (4, 0);
7806
7807 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7808 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7809 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7810 }
7811
7812 /* Load 128 bit unscaled signed 9 bit. */
7813 static void
7814 fldurq (sim_cpu *cpu, int32_t offset)
7815 {
7816 unsigned int rn = INSTR (9, 5);
7817 unsigned int st = INSTR (4, 0);
7818 FRegister a;
7819 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7820
7821 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7822 aarch64_get_mem_long_double (cpu, addr, & a);
7823 aarch64_set_FP_long_double (cpu, st, a);
7824 }
7825
7826 /* TODO store 8 bit unscaled signed 9 bit. */
7827 /* TODO store 16 bit unscaled signed 9 bit. */
7828
7829
7830 /* 1 source. */
7831
7832 /* Float absolute value. */
7833 static void
7834 fabss (sim_cpu *cpu)
7835 {
7836 unsigned sn = INSTR (9, 5);
7837 unsigned sd = INSTR (4, 0);
7838 float value = aarch64_get_FP_float (cpu, sn);
7839
7840 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7841 aarch64_set_FP_float (cpu, sd, fabsf (value));
7842 }
7843
7844 /* Double absolute value. */
7845 static void
7846 fabcpu (sim_cpu *cpu)
7847 {
7848 unsigned sn = INSTR (9, 5);
7849 unsigned sd = INSTR (4, 0);
7850 double value = aarch64_get_FP_double (cpu, sn);
7851
7852 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7853 aarch64_set_FP_double (cpu, sd, fabs (value));
7854 }
7855
7856 /* Float negative value. */
7857 static void
7858 fnegs (sim_cpu *cpu)
7859 {
7860 unsigned sn = INSTR (9, 5);
7861 unsigned sd = INSTR (4, 0);
7862
7863 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7864 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7865 }
7866
7867 /* Double negative value. */
7868 static void
7869 fnegd (sim_cpu *cpu)
7870 {
7871 unsigned sn = INSTR (9, 5);
7872 unsigned sd = INSTR (4, 0);
7873
7874 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7875 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7876 }
7877
7878 /* Float square root. */
7879 static void
7880 fsqrts (sim_cpu *cpu)
7881 {
7882 unsigned sn = INSTR (9, 5);
7883 unsigned sd = INSTR (4, 0);
7884
7885 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7886 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7887 }
7888
7889 /* Double square root. */
7890 static void
7891 fsqrtd (sim_cpu *cpu)
7892 {
7893 unsigned sn = INSTR (9, 5);
7894 unsigned sd = INSTR (4, 0);
7895
7896 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7897 aarch64_set_FP_double (cpu, sd,
7898 sqrt (aarch64_get_FP_double (cpu, sn)));
7899 }
7900
7901 /* Convert double to float. */
7902 static void
7903 fcvtds (sim_cpu *cpu)
7904 {
7905 unsigned sn = INSTR (9, 5);
7906 unsigned sd = INSTR (4, 0);
7907
7908 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7909 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7910 }
7911
7912 /* Convert float to double. */
7913 static void
7914 fcvtcpu (sim_cpu *cpu)
7915 {
7916 unsigned sn = INSTR (9, 5);
7917 unsigned sd = INSTR (4, 0);
7918
7919 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7920 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7921 }
7922
7923 static void
7924 do_FRINT (sim_cpu *cpu)
7925 {
7926 /* instr[31,23] = 0001 1110 0
7927 instr[22] = single(0)/double(1)
7928 instr[21,18] = 1001
7929 instr[17,15] = rounding mode
7930 instr[14,10] = 10000
7931 instr[9,5] = source
7932 instr[4,0] = dest */
7933
7934 float val;
7935 unsigned rs = INSTR (9, 5);
7936 unsigned rd = INSTR (4, 0);
7937 unsigned int rmode = INSTR (17, 15);
7938
7939 NYI_assert (31, 23, 0x03C);
7940 NYI_assert (21, 18, 0x9);
7941 NYI_assert (14, 10, 0x10);
7942
7943 if (rmode == 6 || rmode == 7)
7944 /* FIXME: Add support for rmode == 6 exactness check. */
7945 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7946
7947 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7948 if (INSTR (22, 22))
7949 {
7950 double val = aarch64_get_FP_double (cpu, rs);
7951
7952 switch (rmode)
7953 {
7954 case 0: /* mode N: nearest or even. */
7955 {
7956 double rval = round (val);
7957
7958 if (val - rval == 0.5)
7959 {
7960 if (((rval / 2.0) * 2.0) != rval)
7961 rval += 1.0;
7962 }
7963
7964 aarch64_set_FP_double (cpu, rd, round (val));
7965 return;
7966 }
7967
7968 case 1: /* mode P: towards +inf. */
7969 if (val < 0.0)
7970 aarch64_set_FP_double (cpu, rd, trunc (val));
7971 else
7972 aarch64_set_FP_double (cpu, rd, round (val));
7973 return;
7974
7975 case 2: /* mode M: towards -inf. */
7976 if (val < 0.0)
7977 aarch64_set_FP_double (cpu, rd, round (val));
7978 else
7979 aarch64_set_FP_double (cpu, rd, trunc (val));
7980 return;
7981
7982 case 3: /* mode Z: towards 0. */
7983 aarch64_set_FP_double (cpu, rd, trunc (val));
7984 return;
7985
7986 case 4: /* mode A: away from 0. */
7987 aarch64_set_FP_double (cpu, rd, round (val));
7988 return;
7989
7990 case 6: /* mode X: use FPCR with exactness check. */
7991 case 7: /* mode I: use FPCR mode. */
7992 HALT_NYI;
7993
7994 default:
7995 HALT_UNALLOC;
7996 }
7997 }
7998
7999 val = aarch64_get_FP_float (cpu, rs);
8000
8001 switch (rmode)
8002 {
8003 case 0: /* mode N: nearest or even. */
8004 {
8005 float rval = roundf (val);
8006
8007 if (val - rval == 0.5)
8008 {
8009 if (((rval / 2.0) * 2.0) != rval)
8010 rval += 1.0;
8011 }
8012
8013 aarch64_set_FP_float (cpu, rd, rval);
8014 return;
8015 }
8016
8017 case 1: /* mode P: towards +inf. */
8018 if (val < 0.0)
8019 aarch64_set_FP_float (cpu, rd, truncf (val));
8020 else
8021 aarch64_set_FP_float (cpu, rd, roundf (val));
8022 return;
8023
8024 case 2: /* mode M: towards -inf. */
8025 if (val < 0.0)
8026 aarch64_set_FP_float (cpu, rd, truncf (val));
8027 else
8028 aarch64_set_FP_float (cpu, rd, roundf (val));
8029 return;
8030
8031 case 3: /* mode Z: towards 0. */
8032 aarch64_set_FP_float (cpu, rd, truncf (val));
8033 return;
8034
8035 case 4: /* mode A: away from 0. */
8036 aarch64_set_FP_float (cpu, rd, roundf (val));
8037 return;
8038
8039 case 6: /* mode X: use FPCR with exactness check. */
8040 case 7: /* mode I: use FPCR mode. */
8041 HALT_NYI;
8042
8043 default:
8044 HALT_UNALLOC;
8045 }
8046 }
8047
8048 /* Convert half to float. */
8049 static void
8050 do_FCVT_half_to_single (sim_cpu *cpu)
8051 {
8052 unsigned rn = INSTR (9, 5);
8053 unsigned rd = INSTR (4, 0);
8054
8055 NYI_assert (31, 10, 0x7B890);
8056
8057 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8058 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
8059 }
8060
8061 /* Convert half to double. */
8062 static void
8063 do_FCVT_half_to_double (sim_cpu *cpu)
8064 {
8065 unsigned rn = INSTR (9, 5);
8066 unsigned rd = INSTR (4, 0);
8067
8068 NYI_assert (31, 10, 0x7B8B0);
8069
8070 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8071 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
8072 }
8073
8074 static void
8075 do_FCVT_single_to_half (sim_cpu *cpu)
8076 {
8077 unsigned rn = INSTR (9, 5);
8078 unsigned rd = INSTR (4, 0);
8079
8080 NYI_assert (31, 10, 0x788F0);
8081
8082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8083 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
8084 }
8085
8086 /* Convert double to half. */
8087 static void
8088 do_FCVT_double_to_half (sim_cpu *cpu)
8089 {
8090 unsigned rn = INSTR (9, 5);
8091 unsigned rd = INSTR (4, 0);
8092
8093 NYI_assert (31, 10, 0x798F0);
8094
8095 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8096 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
8097 }
8098
8099 static void
8100 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8101 {
8102 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
8103 instr[30] = 0
8104 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8105 instr[28,25] = 1111
8106 instr[24] = 0
8107 instr[23,22] ==> type : 00 ==> source is single,
8108 01 ==> source is double
8109 10 ==> UNALLOC
8110 11 ==> UNALLOC or source is half
8111 instr[21] = 1
8112 instr[20,15] ==> opcode : with type 00 or 01
8113 000000 ==> FMOV, 000001 ==> FABS,
8114 000010 ==> FNEG, 000011 ==> FSQRT,
8115 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8116 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8117 001000 ==> FRINTN, 001001 ==> FRINTP,
8118 001010 ==> FRINTM, 001011 ==> FRINTZ,
8119 001100 ==> FRINTA, 001101 ==> UNALLOC
8120 001110 ==> FRINTX, 001111 ==> FRINTI
8121 with type 11
8122 000100 ==> FCVT (half-to-single)
8123 000101 ==> FCVT (half-to-double)
8124 instr[14,10] = 10000. */
8125
8126 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8127 uint32_t type = INSTR (23, 22);
8128 uint32_t opcode = INSTR (20, 15);
8129
8130 if (M_S != 0)
8131 HALT_UNALLOC;
8132
8133 if (type == 3)
8134 {
8135 if (opcode == 4)
8136 do_FCVT_half_to_single (cpu);
8137 else if (opcode == 5)
8138 do_FCVT_half_to_double (cpu);
8139 else
8140 HALT_UNALLOC;
8141 return;
8142 }
8143
8144 if (type == 2)
8145 HALT_UNALLOC;
8146
8147 switch (opcode)
8148 {
8149 case 0:
8150 if (type)
8151 ffmovd (cpu);
8152 else
8153 ffmovs (cpu);
8154 return;
8155
8156 case 1:
8157 if (type)
8158 fabcpu (cpu);
8159 else
8160 fabss (cpu);
8161 return;
8162
8163 case 2:
8164 if (type)
8165 fnegd (cpu);
8166 else
8167 fnegs (cpu);
8168 return;
8169
8170 case 3:
8171 if (type)
8172 fsqrtd (cpu);
8173 else
8174 fsqrts (cpu);
8175 return;
8176
8177 case 4:
8178 if (type)
8179 fcvtds (cpu);
8180 else
8181 HALT_UNALLOC;
8182 return;
8183
8184 case 5:
8185 if (type)
8186 HALT_UNALLOC;
8187 fcvtcpu (cpu);
8188 return;
8189
8190 case 8: /* FRINTN etc. */
8191 case 9:
8192 case 10:
8193 case 11:
8194 case 12:
8195 case 14:
8196 case 15:
8197 do_FRINT (cpu);
8198 return;
8199
8200 case 7:
8201 if (INSTR (22, 22))
8202 do_FCVT_double_to_half (cpu);
8203 else
8204 do_FCVT_single_to_half (cpu);
8205 return;
8206
8207 case 13:
8208 HALT_NYI;
8209
8210 default:
8211 HALT_UNALLOC;
8212 }
8213 }
8214
8215 /* 32 bit signed int to float. */
8216 static void
8217 scvtf32 (sim_cpu *cpu)
8218 {
8219 unsigned rn = INSTR (9, 5);
8220 unsigned sd = INSTR (4, 0);
8221
8222 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8223 aarch64_set_FP_float
8224 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8225 }
8226
8227 /* signed int to float. */
8228 static void
8229 scvtf (sim_cpu *cpu)
8230 {
8231 unsigned rn = INSTR (9, 5);
8232 unsigned sd = INSTR (4, 0);
8233
8234 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8235 aarch64_set_FP_float
8236 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8237 }
8238
8239 /* 32 bit signed int to double. */
8240 static void
8241 scvtd32 (sim_cpu *cpu)
8242 {
8243 unsigned rn = INSTR (9, 5);
8244 unsigned sd = INSTR (4, 0);
8245
8246 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8247 aarch64_set_FP_double
8248 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8249 }
8250
8251 /* signed int to double. */
8252 static void
8253 scvtd (sim_cpu *cpu)
8254 {
8255 unsigned rn = INSTR (9, 5);
8256 unsigned sd = INSTR (4, 0);
8257
8258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8259 aarch64_set_FP_double
8260 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8261 }
8262
8263 static const float FLOAT_INT_MAX = (float) INT_MAX;
8264 static const float FLOAT_INT_MIN = (float) INT_MIN;
8265 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8266 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8267 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8268 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8269 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8270 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8271
8272 #define UINT_MIN 0
8273 #define ULONG_MIN 0
8274 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8275 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8276 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8277 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8278 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8279 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8280 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8281 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8282
8283 /* Check for FP exception conditions:
8284 NaN raises IO
8285 Infinity raises IO
8286 Out of Range raises IO and IX and saturates value
8287 Denormal raises ID and IX and sets to zero. */
8288 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8289 do \
8290 { \
8291 switch (fpclassify (F)) \
8292 { \
8293 case FP_INFINITE: \
8294 case FP_NAN: \
8295 aarch64_set_FPSR (cpu, IO); \
8296 if (signbit (F)) \
8297 VALUE = ITYPE##_MAX; \
8298 else \
8299 VALUE = ITYPE##_MIN; \
8300 break; \
8301 \
8302 case FP_NORMAL: \
8303 if (F >= FTYPE##_##ITYPE##_MAX) \
8304 { \
8305 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8306 VALUE = ITYPE##_MAX; \
8307 } \
8308 else if (F <= FTYPE##_##ITYPE##_MIN) \
8309 { \
8310 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8311 VALUE = ITYPE##_MIN; \
8312 } \
8313 break; \
8314 \
8315 case FP_SUBNORMAL: \
8316 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8317 VALUE = 0; \
8318 break; \
8319 \
8320 default: \
8321 case FP_ZERO: \
8322 VALUE = 0; \
8323 break; \
8324 } \
8325 } \
8326 while (0)
8327
8328 /* 32 bit convert float to signed int truncate towards zero. */
8329 static void
8330 fcvtszs32 (sim_cpu *cpu)
8331 {
8332 unsigned sn = INSTR (9, 5);
8333 unsigned rd = INSTR (4, 0);
8334 /* TODO : check that this rounds toward zero. */
8335 float f = aarch64_get_FP_float (cpu, sn);
8336 int32_t value = (int32_t) f;
8337
8338 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8339
8340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8341 /* Avoid sign extension to 64 bit. */
8342 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8343 }
8344
8345 /* 64 bit convert float to signed int truncate towards zero. */
8346 static void
8347 fcvtszs (sim_cpu *cpu)
8348 {
8349 unsigned sn = INSTR (9, 5);
8350 unsigned rd = INSTR (4, 0);
8351 float f = aarch64_get_FP_float (cpu, sn);
8352 int64_t value = (int64_t) f;
8353
8354 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8355
8356 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8357 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8358 }
8359
8360 /* 32 bit convert double to signed int truncate towards zero. */
8361 static void
8362 fcvtszd32 (sim_cpu *cpu)
8363 {
8364 unsigned sn = INSTR (9, 5);
8365 unsigned rd = INSTR (4, 0);
8366 /* TODO : check that this rounds toward zero. */
8367 double d = aarch64_get_FP_double (cpu, sn);
8368 int32_t value = (int32_t) d;
8369
8370 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8371
8372 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8373 /* Avoid sign extension to 64 bit. */
8374 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8375 }
8376
8377 /* 64 bit convert double to signed int truncate towards zero. */
8378 static void
8379 fcvtszd (sim_cpu *cpu)
8380 {
8381 unsigned sn = INSTR (9, 5);
8382 unsigned rd = INSTR (4, 0);
8383 /* TODO : check that this rounds toward zero. */
8384 double d = aarch64_get_FP_double (cpu, sn);
8385 int64_t value;
8386
8387 value = (int64_t) d;
8388
8389 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8390
8391 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8392 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8393 }
8394
8395 static void
8396 do_fcvtzu (sim_cpu *cpu)
8397 {
8398 /* instr[31] = size: 32-bit (0), 64-bit (1)
8399 instr[30,23] = 00111100
8400 instr[22] = type: single (0)/ double (1)
8401 instr[21] = enable (0)/disable(1) precision
8402 instr[20,16] = 11001
8403 instr[15,10] = precision
8404 instr[9,5] = Rs
8405 instr[4,0] = Rd. */
8406
8407 unsigned rs = INSTR (9, 5);
8408 unsigned rd = INSTR (4, 0);
8409
8410 NYI_assert (30, 23, 0x3C);
8411 NYI_assert (20, 16, 0x19);
8412
8413 if (INSTR (21, 21) != 1)
8414 /* Convert to fixed point. */
8415 HALT_NYI;
8416
8417 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8418 if (INSTR (31, 31))
8419 {
8420 /* Convert to unsigned 64-bit integer. */
8421 if (INSTR (22, 22))
8422 {
8423 double d = aarch64_get_FP_double (cpu, rs);
8424 uint64_t value = (uint64_t) d;
8425
8426 /* Do not raise an exception if we have reached ULONG_MAX. */
8427 if (value != (1ULL << 63))
8428 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8429
8430 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8431 }
8432 else
8433 {
8434 float f = aarch64_get_FP_float (cpu, rs);
8435 uint64_t value = (uint64_t) f;
8436
8437 /* Do not raise an exception if we have reached ULONG_MAX. */
8438 if (value != (1ULL << 63))
8439 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8440
8441 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8442 }
8443 }
8444 else
8445 {
8446 uint32_t value;
8447
8448 /* Convert to unsigned 32-bit integer. */
8449 if (INSTR (22, 22))
8450 {
8451 double d = aarch64_get_FP_double (cpu, rs);
8452
8453 value = (uint32_t) d;
8454 /* Do not raise an exception if we have reached UINT_MAX. */
8455 if (value != (1UL << 31))
8456 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8457 }
8458 else
8459 {
8460 float f = aarch64_get_FP_float (cpu, rs);
8461
8462 value = (uint32_t) f;
8463 /* Do not raise an exception if we have reached UINT_MAX. */
8464 if (value != (1UL << 31))
8465 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8466 }
8467
8468 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8469 }
8470 }
8471
8472 static void
8473 do_UCVTF (sim_cpu *cpu)
8474 {
8475 /* instr[31] = size: 32-bit (0), 64-bit (1)
8476 instr[30,23] = 001 1110 0
8477 instr[22] = type: single (0)/ double (1)
8478 instr[21] = enable (0)/disable(1) precision
8479 instr[20,16] = 0 0011
8480 instr[15,10] = precision
8481 instr[9,5] = Rs
8482 instr[4,0] = Rd. */
8483
8484 unsigned rs = INSTR (9, 5);
8485 unsigned rd = INSTR (4, 0);
8486
8487 NYI_assert (30, 23, 0x3C);
8488 NYI_assert (20, 16, 0x03);
8489
8490 if (INSTR (21, 21) != 1)
8491 HALT_NYI;
8492
8493 /* FIXME: Add exception raising. */
8494 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8495 if (INSTR (31, 31))
8496 {
8497 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8498
8499 if (INSTR (22, 22))
8500 aarch64_set_FP_double (cpu, rd, (double) value);
8501 else
8502 aarch64_set_FP_float (cpu, rd, (float) value);
8503 }
8504 else
8505 {
8506 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8507
8508 if (INSTR (22, 22))
8509 aarch64_set_FP_double (cpu, rd, (double) value);
8510 else
8511 aarch64_set_FP_float (cpu, rd, (float) value);
8512 }
8513 }
8514
8515 static void
8516 float_vector_move (sim_cpu *cpu)
8517 {
8518 /* instr[31,17] == 100 1111 0101 0111
8519 instr[16] ==> direction 0=> to GR, 1=> from GR
8520 instr[15,10] => ???
8521 instr[9,5] ==> source
8522 instr[4,0] ==> dest. */
8523
8524 unsigned rn = INSTR (9, 5);
8525 unsigned rd = INSTR (4, 0);
8526
8527 NYI_assert (31, 17, 0x4F57);
8528
8529 if (INSTR (15, 10) != 0)
8530 HALT_UNALLOC;
8531
8532 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8533 if (INSTR (16, 16))
8534 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8535 else
8536 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8537 }
8538
8539 static void
8540 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8541 {
8542 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8543 instr[30 = 0
8544 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8545 instr[28,25] = 1111
8546 instr[24] = 0
8547 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8548 instr[21] = 1
8549 instr[20,19] = rmode
8550 instr[18,16] = opcode
8551 instr[15,10] = 10 0000 */
8552
8553 uint32_t rmode_opcode;
8554 uint32_t size_type;
8555 uint32_t type;
8556 uint32_t size;
8557 uint32_t S;
8558
8559 if (INSTR (31, 17) == 0x4F57)
8560 {
8561 float_vector_move (cpu);
8562 return;
8563 }
8564
8565 size = INSTR (31, 31);
8566 S = INSTR (29, 29);
8567 if (S != 0)
8568 HALT_UNALLOC;
8569
8570 type = INSTR (23, 22);
8571 if (type > 1)
8572 HALT_UNALLOC;
8573
8574 rmode_opcode = INSTR (20, 16);
8575 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8576
8577 switch (rmode_opcode)
8578 {
8579 case 2: /* SCVTF. */
8580 switch (size_type)
8581 {
8582 case 0: scvtf32 (cpu); return;
8583 case 1: scvtd32 (cpu); return;
8584 case 2: scvtf (cpu); return;
8585 case 3: scvtd (cpu); return;
8586 }
8587
8588 case 6: /* FMOV GR, Vec. */
8589 switch (size_type)
8590 {
8591 case 0: gfmovs (cpu); return;
8592 case 3: gfmovd (cpu); return;
8593 default: HALT_UNALLOC;
8594 }
8595
8596 case 7: /* FMOV vec, GR. */
8597 switch (size_type)
8598 {
8599 case 0: fgmovs (cpu); return;
8600 case 3: fgmovd (cpu); return;
8601 default: HALT_UNALLOC;
8602 }
8603
8604 case 24: /* FCVTZS. */
8605 switch (size_type)
8606 {
8607 case 0: fcvtszs32 (cpu); return;
8608 case 1: fcvtszd32 (cpu); return;
8609 case 2: fcvtszs (cpu); return;
8610 case 3: fcvtszd (cpu); return;
8611 }
8612
8613 case 25: do_fcvtzu (cpu); return;
8614 case 3: do_UCVTF (cpu); return;
8615
8616 case 0: /* FCVTNS. */
8617 case 1: /* FCVTNU. */
8618 case 4: /* FCVTAS. */
8619 case 5: /* FCVTAU. */
8620 case 8: /* FCVPTS. */
8621 case 9: /* FCVTPU. */
8622 case 16: /* FCVTMS. */
8623 case 17: /* FCVTMU. */
8624 default:
8625 HALT_NYI;
8626 }
8627 }
8628
8629 static void
8630 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8631 {
8632 uint32_t flags;
8633
8634 /* FIXME: Add exception raising. */
8635 if (isnan (fvalue1) || isnan (fvalue2))
8636 flags = C|V;
8637 else if (isinf (fvalue1) && isinf (fvalue2))
8638 {
8639 /* Subtracting two infinities may give a NaN. We only need to compare
8640 the signs, which we can get from isinf. */
8641 int result = isinf (fvalue1) - isinf (fvalue2);
8642
8643 if (result == 0)
8644 flags = Z|C;
8645 else if (result < 0)
8646 flags = N;
8647 else /* (result > 0). */
8648 flags = C;
8649 }
8650 else
8651 {
8652 float result = fvalue1 - fvalue2;
8653
8654 if (result == 0.0)
8655 flags = Z|C;
8656 else if (result < 0)
8657 flags = N;
8658 else /* (result > 0). */
8659 flags = C;
8660 }
8661
8662 aarch64_set_CPSR (cpu, flags);
8663 }
8664
8665 static void
8666 fcmps (sim_cpu *cpu)
8667 {
8668 unsigned sm = INSTR (20, 16);
8669 unsigned sn = INSTR ( 9, 5);
8670
8671 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8672 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8673
8674 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8675 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8676 }
8677
8678 /* Float compare to zero -- Invalid Operation exception
8679 only on signaling NaNs. */
8680 static void
8681 fcmpzs (sim_cpu *cpu)
8682 {
8683 unsigned sn = INSTR ( 9, 5);
8684 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8685
8686 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8687 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8688 }
8689
8690 /* Float compare -- Invalid Operation exception on all NaNs. */
8691 static void
8692 fcmpes (sim_cpu *cpu)
8693 {
8694 unsigned sm = INSTR (20, 16);
8695 unsigned sn = INSTR ( 9, 5);
8696
8697 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8698 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8699
8700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8701 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8702 }
8703
8704 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8705 static void
8706 fcmpzes (sim_cpu *cpu)
8707 {
8708 unsigned sn = INSTR ( 9, 5);
8709 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8710
8711 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8712 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8713 }
8714
8715 static void
8716 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8717 {
8718 uint32_t flags;
8719
8720 /* FIXME: Add exception raising. */
8721 if (isnan (dval1) || isnan (dval2))
8722 flags = C|V;
8723 else if (isinf (dval1) && isinf (dval2))
8724 {
8725 /* Subtracting two infinities may give a NaN. We only need to compare
8726 the signs, which we can get from isinf. */
8727 int result = isinf (dval1) - isinf (dval2);
8728
8729 if (result == 0)
8730 flags = Z|C;
8731 else if (result < 0)
8732 flags = N;
8733 else /* (result > 0). */
8734 flags = C;
8735 }
8736 else
8737 {
8738 double result = dval1 - dval2;
8739
8740 if (result == 0.0)
8741 flags = Z|C;
8742 else if (result < 0)
8743 flags = N;
8744 else /* (result > 0). */
8745 flags = C;
8746 }
8747
8748 aarch64_set_CPSR (cpu, flags);
8749 }
8750
8751 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8752 static void
8753 fcmpd (sim_cpu *cpu)
8754 {
8755 unsigned sm = INSTR (20, 16);
8756 unsigned sn = INSTR ( 9, 5);
8757
8758 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8759 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8760
8761 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8762 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8763 }
8764
8765 /* Double compare to zero -- Invalid Operation exception
8766 only on signaling NaNs. */
8767 static void
8768 fcmpzd (sim_cpu *cpu)
8769 {
8770 unsigned sn = INSTR ( 9, 5);
8771 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8772
8773 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8774 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8775 }
8776
8777 /* Double compare -- Invalid Operation exception on all NaNs. */
8778 static void
8779 fcmped (sim_cpu *cpu)
8780 {
8781 unsigned sm = INSTR (20, 16);
8782 unsigned sn = INSTR ( 9, 5);
8783
8784 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8785 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8786
8787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8788 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8789 }
8790
8791 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8792 static void
8793 fcmpzed (sim_cpu *cpu)
8794 {
8795 unsigned sn = INSTR ( 9, 5);
8796 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8797
8798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8799 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8800 }
8801
8802 static void
8803 dexSimpleFPCompare (sim_cpu *cpu)
8804 {
8805 /* assert instr[28,25] == 1111
8806 instr[30:24:21:13,10] = 0011000
8807 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8808 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8809 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8810 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8811 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8812 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8813 ow ==> UNALLOC */
8814 uint32_t dispatch;
8815 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8816 uint32_t type = INSTR (23, 22);
8817 uint32_t op = INSTR (15, 14);
8818 uint32_t op2_2_0 = INSTR (2, 0);
8819
8820 if (op2_2_0 != 0)
8821 HALT_UNALLOC;
8822
8823 if (M_S != 0)
8824 HALT_UNALLOC;
8825
8826 if (type > 1)
8827 HALT_UNALLOC;
8828
8829 if (op != 0)
8830 HALT_UNALLOC;
8831
8832 /* dispatch on type and top 2 bits of opcode. */
8833 dispatch = (type << 2) | INSTR (4, 3);
8834
8835 switch (dispatch)
8836 {
8837 case 0: fcmps (cpu); return;
8838 case 1: fcmpzs (cpu); return;
8839 case 2: fcmpes (cpu); return;
8840 case 3: fcmpzes (cpu); return;
8841 case 4: fcmpd (cpu); return;
8842 case 5: fcmpzd (cpu); return;
8843 case 6: fcmped (cpu); return;
8844 case 7: fcmpzed (cpu); return;
8845 }
8846 }
8847
8848 static void
8849 do_scalar_FADDP (sim_cpu *cpu)
8850 {
8851 /* instr [31,23] = 0111 1110 0
8852 instr [22] = single(0)/double(1)
8853 instr [21,10] = 11 0000 1101 10
8854 instr [9,5] = Fn
8855 instr [4,0] = Fd. */
8856
8857 unsigned Fn = INSTR (9, 5);
8858 unsigned Fd = INSTR (4, 0);
8859
8860 NYI_assert (31, 23, 0x0FC);
8861 NYI_assert (21, 10, 0xC36);
8862
8863 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8864 if (INSTR (22, 22))
8865 {
8866 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8867 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8868
8869 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8870 }
8871 else
8872 {
8873 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8874 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8875
8876 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8877 }
8878 }
8879
8880 /* Floating point absolute difference. */
8881
8882 static void
8883 do_scalar_FABD (sim_cpu *cpu)
8884 {
8885 /* instr [31,23] = 0111 1110 1
8886 instr [22] = float(0)/double(1)
8887 instr [21] = 1
8888 instr [20,16] = Rm
8889 instr [15,10] = 1101 01
8890 instr [9, 5] = Rn
8891 instr [4, 0] = Rd. */
8892
8893 unsigned rm = INSTR (20, 16);
8894 unsigned rn = INSTR (9, 5);
8895 unsigned rd = INSTR (4, 0);
8896
8897 NYI_assert (31, 23, 0x0FD);
8898 NYI_assert (21, 21, 1);
8899 NYI_assert (15, 10, 0x35);
8900
8901 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8902 if (INSTR (22, 22))
8903 aarch64_set_FP_double (cpu, rd,
8904 fabs (aarch64_get_FP_double (cpu, rn)
8905 - aarch64_get_FP_double (cpu, rm)));
8906 else
8907 aarch64_set_FP_float (cpu, rd,
8908 fabsf (aarch64_get_FP_float (cpu, rn)
8909 - aarch64_get_FP_float (cpu, rm)));
8910 }
8911
8912 static void
8913 do_scalar_CMGT (sim_cpu *cpu)
8914 {
8915 /* instr [31,21] = 0101 1110 111
8916 instr [20,16] = Rm
8917 instr [15,10] = 00 1101
8918 instr [9, 5] = Rn
8919 instr [4, 0] = Rd. */
8920
8921 unsigned rm = INSTR (20, 16);
8922 unsigned rn = INSTR (9, 5);
8923 unsigned rd = INSTR (4, 0);
8924
8925 NYI_assert (31, 21, 0x2F7);
8926 NYI_assert (15, 10, 0x0D);
8927
8928 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8929 aarch64_set_vec_u64 (cpu, rd, 0,
8930 aarch64_get_vec_u64 (cpu, rn, 0) >
8931 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8932 }
8933
8934 static void
8935 do_scalar_USHR (sim_cpu *cpu)
8936 {
8937 /* instr [31,23] = 0111 1111 0
8938 instr [22,16] = shift amount
8939 instr [15,10] = 0000 01
8940 instr [9, 5] = Rn
8941 instr [4, 0] = Rd. */
8942
8943 unsigned amount = 128 - INSTR (22, 16);
8944 unsigned rn = INSTR (9, 5);
8945 unsigned rd = INSTR (4, 0);
8946
8947 NYI_assert (31, 23, 0x0FE);
8948 NYI_assert (15, 10, 0x01);
8949
8950 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8951 aarch64_set_vec_u64 (cpu, rd, 0,
8952 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8953 }
8954
8955 static void
8956 do_scalar_SSHL (sim_cpu *cpu)
8957 {
8958 /* instr [31,21] = 0101 1110 111
8959 instr [20,16] = Rm
8960 instr [15,10] = 0100 01
8961 instr [9, 5] = Rn
8962 instr [4, 0] = Rd. */
8963
8964 unsigned rm = INSTR (20, 16);
8965 unsigned rn = INSTR (9, 5);
8966 unsigned rd = INSTR (4, 0);
8967 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8968
8969 NYI_assert (31, 21, 0x2F7);
8970 NYI_assert (15, 10, 0x11);
8971
8972 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8973 if (shift >= 0)
8974 aarch64_set_vec_s64 (cpu, rd, 0,
8975 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8976 else
8977 aarch64_set_vec_s64 (cpu, rd, 0,
8978 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8979 }
8980
8981 /* Floating point scalar compare greater than or equal to 0. */
8982 static void
8983 do_scalar_FCMGE_zero (sim_cpu *cpu)
8984 {
8985 /* instr [31,23] = 0111 1110 1
8986 instr [22,22] = size
8987 instr [21,16] = 1000 00
8988 instr [15,10] = 1100 10
8989 instr [9, 5] = Rn
8990 instr [4, 0] = Rd. */
8991
8992 unsigned size = INSTR (22, 22);
8993 unsigned rn = INSTR (9, 5);
8994 unsigned rd = INSTR (4, 0);
8995
8996 NYI_assert (31, 23, 0x0FD);
8997 NYI_assert (21, 16, 0x20);
8998 NYI_assert (15, 10, 0x32);
8999
9000 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9001 if (size)
9002 aarch64_set_vec_u64 (cpu, rd, 0,
9003 aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
9004 else
9005 aarch64_set_vec_u32 (cpu, rd, 0,
9006 aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
9007 }
9008
9009 /* Floating point scalar compare less than or equal to 0. */
9010 static void
9011 do_scalar_FCMLE_zero (sim_cpu *cpu)
9012 {
9013 /* instr [31,23] = 0111 1110 1
9014 instr [22,22] = size
9015 instr [21,16] = 1000 00
9016 instr [15,10] = 1101 10
9017 instr [9, 5] = Rn
9018 instr [4, 0] = Rd. */
9019
9020 unsigned size = INSTR (22, 22);
9021 unsigned rn = INSTR (9, 5);
9022 unsigned rd = INSTR (4, 0);
9023
9024 NYI_assert (31, 23, 0x0FD);
9025 NYI_assert (21, 16, 0x20);
9026 NYI_assert (15, 10, 0x36);
9027
9028 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9029 if (size)
9030 aarch64_set_vec_u64 (cpu, rd, 0,
9031 aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
9032 else
9033 aarch64_set_vec_u32 (cpu, rd, 0,
9034 aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
9035 }
9036
9037 /* Floating point scalar compare greater than 0. */
9038 static void
9039 do_scalar_FCMGT_zero (sim_cpu *cpu)
9040 {
9041 /* instr [31,23] = 0101 1110 1
9042 instr [22,22] = size
9043 instr [21,16] = 1000 00
9044 instr [15,10] = 1100 10
9045 instr [9, 5] = Rn
9046 instr [4, 0] = Rd. */
9047
9048 unsigned size = INSTR (22, 22);
9049 unsigned rn = INSTR (9, 5);
9050 unsigned rd = INSTR (4, 0);
9051
9052 NYI_assert (31, 23, 0x0BD);
9053 NYI_assert (21, 16, 0x20);
9054 NYI_assert (15, 10, 0x32);
9055
9056 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9057 if (size)
9058 aarch64_set_vec_u64 (cpu, rd, 0,
9059 aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9060 else
9061 aarch64_set_vec_u32 (cpu, rd, 0,
9062 aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9063 }
9064
9065 /* Floating point scalar compare equal to 0. */
9066 static void
9067 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9068 {
9069 /* instr [31,23] = 0101 1110 1
9070 instr [22,22] = size
9071 instr [21,16] = 1000 00
9072 instr [15,10] = 1101 10
9073 instr [9, 5] = Rn
9074 instr [4, 0] = Rd. */
9075
9076 unsigned size = INSTR (22, 22);
9077 unsigned rn = INSTR (9, 5);
9078 unsigned rd = INSTR (4, 0);
9079
9080 NYI_assert (31, 23, 0x0BD);
9081 NYI_assert (21, 16, 0x20);
9082 NYI_assert (15, 10, 0x36);
9083
9084 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9085 if (size)
9086 aarch64_set_vec_u64 (cpu, rd, 0,
9087 aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9088 else
9089 aarch64_set_vec_u32 (cpu, rd, 0,
9090 aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9091 }
9092
9093 /* Floating point scalar compare less than 0. */
9094 static void
9095 do_scalar_FCMLT_zero (sim_cpu *cpu)
9096 {
9097 /* instr [31,23] = 0101 1110 1
9098 instr [22,22] = size
9099 instr [21,16] = 1000 00
9100 instr [15,10] = 1110 10
9101 instr [9, 5] = Rn
9102 instr [4, 0] = Rd. */
9103
9104 unsigned size = INSTR (22, 22);
9105 unsigned rn = INSTR (9, 5);
9106 unsigned rd = INSTR (4, 0);
9107
9108 NYI_assert (31, 23, 0x0BD);
9109 NYI_assert (21, 16, 0x20);
9110 NYI_assert (15, 10, 0x3A);
9111
9112 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9113 if (size)
9114 aarch64_set_vec_u64 (cpu, rd, 0,
9115 aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9116 else
9117 aarch64_set_vec_u32 (cpu, rd, 0,
9118 aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9119 }
9120
9121 static void
9122 do_scalar_shift (sim_cpu *cpu)
9123 {
9124 /* instr [31,23] = 0101 1111 0
9125 instr [22,16] = shift amount
9126 instr [15,10] = 0101 01 [SHL]
9127 instr [15,10] = 0000 01 [SSHR]
9128 instr [9, 5] = Rn
9129 instr [4, 0] = Rd. */
9130
9131 unsigned rn = INSTR (9, 5);
9132 unsigned rd = INSTR (4, 0);
9133 unsigned amount;
9134
9135 NYI_assert (31, 23, 0x0BE);
9136
9137 if (INSTR (22, 22) == 0)
9138 HALT_UNALLOC;
9139
9140 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9141 switch (INSTR (15, 10))
9142 {
9143 case 0x01: /* SSHR */
9144 amount = 128 - INSTR (22, 16);
9145 aarch64_set_vec_s64 (cpu, rd, 0,
9146 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9147 return;
9148 case 0x15: /* SHL */
9149 amount = INSTR (22, 16) - 64;
9150 aarch64_set_vec_u64 (cpu, rd, 0,
9151 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9152 return;
9153 default:
9154 HALT_NYI;
9155 }
9156 }
9157
9158 /* FCMEQ FCMGT FCMGE. */
9159 static void
9160 do_scalar_FCM (sim_cpu *cpu)
9161 {
9162 /* instr [31,30] = 01
9163 instr [29] = U
9164 instr [28,24] = 1 1110
9165 instr [23] = E
9166 instr [22] = size
9167 instr [21] = 1
9168 instr [20,16] = Rm
9169 instr [15,12] = 1110
9170 instr [11] = AC
9171 instr [10] = 1
9172 instr [9, 5] = Rn
9173 instr [4, 0] = Rd. */
9174
9175 unsigned rm = INSTR (20, 16);
9176 unsigned rn = INSTR (9, 5);
9177 unsigned rd = INSTR (4, 0);
9178 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9179 unsigned result;
9180 float val1;
9181 float val2;
9182
9183 NYI_assert (31, 30, 1);
9184 NYI_assert (28, 24, 0x1E);
9185 NYI_assert (21, 21, 1);
9186 NYI_assert (15, 12, 0xE);
9187 NYI_assert (10, 10, 1);
9188
9189 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9190 if (INSTR (22, 22))
9191 {
9192 double val1 = aarch64_get_FP_double (cpu, rn);
9193 double val2 = aarch64_get_FP_double (cpu, rm);
9194
9195 switch (EUac)
9196 {
9197 case 0: /* 000 */
9198 result = val1 == val2;
9199 break;
9200
9201 case 3: /* 011 */
9202 val1 = fabs (val1);
9203 val2 = fabs (val2);
9204 /* Fall through. */
9205 case 2: /* 010 */
9206 result = val1 >= val2;
9207 break;
9208
9209 case 7: /* 111 */
9210 val1 = fabs (val1);
9211 val2 = fabs (val2);
9212 /* Fall through. */
9213 case 6: /* 110 */
9214 result = val1 > val2;
9215 break;
9216
9217 default:
9218 HALT_UNALLOC;
9219 }
9220
9221 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9222 return;
9223 }
9224
9225 val1 = aarch64_get_FP_float (cpu, rn);
9226 val2 = aarch64_get_FP_float (cpu, rm);
9227
9228 switch (EUac)
9229 {
9230 case 0: /* 000 */
9231 result = val1 == val2;
9232 break;
9233
9234 case 3: /* 011 */
9235 val1 = fabsf (val1);
9236 val2 = fabsf (val2);
9237 /* Fall through. */
9238 case 2: /* 010 */
9239 result = val1 >= val2;
9240 break;
9241
9242 case 7: /* 111 */
9243 val1 = fabsf (val1);
9244 val2 = fabsf (val2);
9245 /* Fall through. */
9246 case 6: /* 110 */
9247 result = val1 > val2;
9248 break;
9249
9250 default:
9251 HALT_UNALLOC;
9252 }
9253
9254 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9255 }
9256
9257 /* An alias of DUP. */
9258 static void
9259 do_scalar_MOV (sim_cpu *cpu)
9260 {
9261 /* instr [31,21] = 0101 1110 000
9262 instr [20,16] = imm5
9263 instr [15,10] = 0000 01
9264 instr [9, 5] = Rn
9265 instr [4, 0] = Rd. */
9266
9267 unsigned rn = INSTR (9, 5);
9268 unsigned rd = INSTR (4, 0);
9269 unsigned index;
9270
9271 NYI_assert (31, 21, 0x2F0);
9272 NYI_assert (15, 10, 0x01);
9273
9274 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9275 if (INSTR (16, 16))
9276 {
9277 /* 8-bit. */
9278 index = INSTR (20, 17);
9279 aarch64_set_vec_u8
9280 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9281 }
9282 else if (INSTR (17, 17))
9283 {
9284 /* 16-bit. */
9285 index = INSTR (20, 18);
9286 aarch64_set_vec_u16
9287 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9288 }
9289 else if (INSTR (18, 18))
9290 {
9291 /* 32-bit. */
9292 index = INSTR (20, 19);
9293 aarch64_set_vec_u32
9294 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9295 }
9296 else if (INSTR (19, 19))
9297 {
9298 /* 64-bit. */
9299 index = INSTR (20, 20);
9300 aarch64_set_vec_u64
9301 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9302 }
9303 else
9304 HALT_UNALLOC;
9305 }
9306
9307 static void
9308 do_scalar_NEG (sim_cpu *cpu)
9309 {
9310 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9311 instr [9, 5] = Rn
9312 instr [4, 0] = Rd. */
9313
9314 unsigned rn = INSTR (9, 5);
9315 unsigned rd = INSTR (4, 0);
9316
9317 NYI_assert (31, 10, 0x1FB82E);
9318
9319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9320 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9321 }
9322
9323 static void
9324 do_scalar_USHL (sim_cpu *cpu)
9325 {
9326 /* instr [31,21] = 0111 1110 111
9327 instr [20,16] = Rm
9328 instr [15,10] = 0100 01
9329 instr [9, 5] = Rn
9330 instr [4, 0] = Rd. */
9331
9332 unsigned rm = INSTR (20, 16);
9333 unsigned rn = INSTR (9, 5);
9334 unsigned rd = INSTR (4, 0);
9335 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9336
9337 NYI_assert (31, 21, 0x3F7);
9338 NYI_assert (15, 10, 0x11);
9339
9340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9341 if (shift >= 0)
9342 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9343 else
9344 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9345 }
9346
9347 static void
9348 do_double_add (sim_cpu *cpu)
9349 {
9350 /* instr [31,21] = 0101 1110 111
9351 instr [20,16] = Fn
9352 instr [15,10] = 1000 01
9353 instr [9,5] = Fm
9354 instr [4,0] = Fd. */
9355 unsigned Fd;
9356 unsigned Fm;
9357 unsigned Fn;
9358 double val1;
9359 double val2;
9360
9361 NYI_assert (31, 21, 0x2F7);
9362 NYI_assert (15, 10, 0x21);
9363
9364 Fd = INSTR (4, 0);
9365 Fm = INSTR (9, 5);
9366 Fn = INSTR (20, 16);
9367
9368 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9369 val1 = aarch64_get_FP_double (cpu, Fm);
9370 val2 = aarch64_get_FP_double (cpu, Fn);
9371
9372 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9373 }
9374
9375 static void
9376 do_scalar_UCVTF (sim_cpu *cpu)
9377 {
9378 /* instr [31,23] = 0111 1110 0
9379 instr [22] = single(0)/double(1)
9380 instr [21,10] = 10 0001 1101 10
9381 instr [9,5] = rn
9382 instr [4,0] = rd. */
9383
9384 unsigned rn = INSTR (9, 5);
9385 unsigned rd = INSTR (4, 0);
9386
9387 NYI_assert (31, 23, 0x0FC);
9388 NYI_assert (21, 10, 0x876);
9389
9390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9391 if (INSTR (22, 22))
9392 {
9393 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9394
9395 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9396 }
9397 else
9398 {
9399 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9400
9401 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9402 }
9403 }
9404
9405 static void
9406 do_scalar_vec (sim_cpu *cpu)
9407 {
9408 /* instr [30] = 1. */
9409 /* instr [28,25] = 1111. */
9410 switch (INSTR (31, 23))
9411 {
9412 case 0xBC:
9413 switch (INSTR (15, 10))
9414 {
9415 case 0x01: do_scalar_MOV (cpu); return;
9416 case 0x39: do_scalar_FCM (cpu); return;
9417 case 0x3B: do_scalar_FCM (cpu); return;
9418 }
9419 break;
9420
9421 case 0xBE: do_scalar_shift (cpu); return;
9422
9423 case 0xFC:
9424 switch (INSTR (15, 10))
9425 {
9426 case 0x36:
9427 switch (INSTR (21, 16))
9428 {
9429 case 0x30: do_scalar_FADDP (cpu); return;
9430 case 0x21: do_scalar_UCVTF (cpu); return;
9431 }
9432 HALT_NYI;
9433 case 0x39: do_scalar_FCM (cpu); return;
9434 case 0x3B: do_scalar_FCM (cpu); return;
9435 }
9436 break;
9437
9438 case 0xFD:
9439 switch (INSTR (15, 10))
9440 {
9441 case 0x0D: do_scalar_CMGT (cpu); return;
9442 case 0x11: do_scalar_USHL (cpu); return;
9443 case 0x2E: do_scalar_NEG (cpu); return;
9444 case 0x32: do_scalar_FCMGE_zero (cpu); return;
9445 case 0x35: do_scalar_FABD (cpu); return;
9446 case 0x36: do_scalar_FCMLE_zero (cpu); return;
9447 case 0x39: do_scalar_FCM (cpu); return;
9448 case 0x3B: do_scalar_FCM (cpu); return;
9449 default:
9450 HALT_NYI;
9451 }
9452
9453 case 0xFE: do_scalar_USHR (cpu); return;
9454
9455 case 0xBD:
9456 switch (INSTR (15, 10))
9457 {
9458 case 0x21: do_double_add (cpu); return;
9459 case 0x11: do_scalar_SSHL (cpu); return;
9460 case 0x32: do_scalar_FCMGT_zero (cpu); return;
9461 case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9462 case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9463 default:
9464 HALT_NYI;
9465 }
9466
9467 default:
9468 HALT_NYI;
9469 }
9470 }
9471
9472 static void
9473 dexAdvSIMD1 (sim_cpu *cpu)
9474 {
9475 /* instr [28,25] = 1 111. */
9476
9477 /* We are currently only interested in the basic
9478 scalar fp routines which all have bit 30 = 0. */
9479 if (INSTR (30, 30))
9480 do_scalar_vec (cpu);
9481
9482 /* instr[24] is set for FP data processing 3-source and clear for
9483 all other basic scalar fp instruction groups. */
9484 else if (INSTR (24, 24))
9485 dexSimpleFPDataProc3Source (cpu);
9486
9487 /* instr[21] is clear for floating <-> fixed conversions and set for
9488 all other basic scalar fp instruction groups. */
9489 else if (!INSTR (21, 21))
9490 dexSimpleFPFixedConvert (cpu);
9491
9492 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9493 11 ==> cond select, 00 ==> other. */
9494 else
9495 switch (INSTR (11, 10))
9496 {
9497 case 1: dexSimpleFPCondCompare (cpu); return;
9498 case 2: dexSimpleFPDataProc2Source (cpu); return;
9499 case 3: dexSimpleFPCondSelect (cpu); return;
9500
9501 default:
9502 /* Now an ordered cascade of tests.
9503 FP immediate has instr [12] == 1.
9504 FP compare has instr [13] == 1.
9505 FP Data Proc 1 Source has instr [14] == 1.
9506 FP floating <--> integer conversions has instr [15] == 0. */
9507 if (INSTR (12, 12))
9508 dexSimpleFPImmediate (cpu);
9509
9510 else if (INSTR (13, 13))
9511 dexSimpleFPCompare (cpu);
9512
9513 else if (INSTR (14, 14))
9514 dexSimpleFPDataProc1Source (cpu);
9515
9516 else if (!INSTR (15, 15))
9517 dexSimpleFPIntegerConvert (cpu);
9518
9519 else
9520 /* If we get here then instr[15] == 1 which means UNALLOC. */
9521 HALT_UNALLOC;
9522 }
9523 }
9524
9525 /* PC relative addressing. */
9526
9527 static void
9528 pcadr (sim_cpu *cpu)
9529 {
9530 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9531 instr[30,29] = immlo
9532 instr[23,5] = immhi. */
9533 uint64_t address;
9534 unsigned rd = INSTR (4, 0);
9535 uint32_t isPage = INSTR (31, 31);
9536 union { int64_t u64; uint64_t s64; } imm;
9537 uint64_t offset;
9538
9539 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9540 offset = imm.u64;
9541 offset = (offset << 2) | INSTR (30, 29);
9542
9543 address = aarch64_get_PC (cpu);
9544
9545 if (isPage)
9546 {
9547 offset <<= 12;
9548 address &= ~0xfff;
9549 }
9550
9551 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9552 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9553 }
9554
9555 /* Specific decode and execute for group Data Processing Immediate. */
9556
9557 static void
9558 dexPCRelAddressing (sim_cpu *cpu)
9559 {
9560 /* assert instr[28,24] = 10000. */
9561 pcadr (cpu);
9562 }
9563
9564 /* Immediate logical.
9565 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9566 16, 32 or 64 bit sequence pulled out at decode and possibly
9567 inverting it..
9568
9569 N.B. the output register (dest) can normally be Xn or SP
9570 the exception occurs for flag setting instructions which may
9571 only use Xn for the output (dest). The input register can
9572 never be SP. */
9573
9574 /* 32 bit and immediate. */
9575 static void
9576 and32 (sim_cpu *cpu, uint32_t bimm)
9577 {
9578 unsigned rn = INSTR (9, 5);
9579 unsigned rd = INSTR (4, 0);
9580
9581 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9582 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9583 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9584 }
9585
9586 /* 64 bit and immediate. */
9587 static void
9588 and64 (sim_cpu *cpu, uint64_t bimm)
9589 {
9590 unsigned rn = INSTR (9, 5);
9591 unsigned rd = INSTR (4, 0);
9592
9593 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9594 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9595 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9596 }
9597
9598 /* 32 bit and immediate set flags. */
9599 static void
9600 ands32 (sim_cpu *cpu, uint32_t bimm)
9601 {
9602 unsigned rn = INSTR (9, 5);
9603 unsigned rd = INSTR (4, 0);
9604
9605 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9606 uint32_t value2 = bimm;
9607
9608 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9609 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9610 set_flags_for_binop32 (cpu, value1 & value2);
9611 }
9612
9613 /* 64 bit and immediate set flags. */
9614 static void
9615 ands64 (sim_cpu *cpu, uint64_t bimm)
9616 {
9617 unsigned rn = INSTR (9, 5);
9618 unsigned rd = INSTR (4, 0);
9619
9620 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9621 uint64_t value2 = bimm;
9622
9623 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9624 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9625 set_flags_for_binop64 (cpu, value1 & value2);
9626 }
9627
9628 /* 32 bit exclusive or immediate. */
9629 static void
9630 eor32 (sim_cpu *cpu, uint32_t bimm)
9631 {
9632 unsigned rn = INSTR (9, 5);
9633 unsigned rd = INSTR (4, 0);
9634
9635 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9636 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9637 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9638 }
9639
9640 /* 64 bit exclusive or immediate. */
9641 static void
9642 eor64 (sim_cpu *cpu, uint64_t bimm)
9643 {
9644 unsigned rn = INSTR (9, 5);
9645 unsigned rd = INSTR (4, 0);
9646
9647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9648 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9649 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9650 }
9651
9652 /* 32 bit or immediate. */
9653 static void
9654 orr32 (sim_cpu *cpu, uint32_t bimm)
9655 {
9656 unsigned rn = INSTR (9, 5);
9657 unsigned rd = INSTR (4, 0);
9658
9659 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9660 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9661 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9662 }
9663
9664 /* 64 bit or immediate. */
9665 static void
9666 orr64 (sim_cpu *cpu, uint64_t bimm)
9667 {
9668 unsigned rn = INSTR (9, 5);
9669 unsigned rd = INSTR (4, 0);
9670
9671 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9672 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9673 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9674 }
9675
9676 /* Logical shifted register.
9677 These allow an optional LSL, ASR, LSR or ROR to the second source
9678 register with a count up to the register bit count.
9679 N.B register args may not be SP. */
9680
9681 /* 32 bit AND shifted register. */
9682 static void
9683 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9684 {
9685 unsigned rm = INSTR (20, 16);
9686 unsigned rn = INSTR (9, 5);
9687 unsigned rd = INSTR (4, 0);
9688
9689 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9690 aarch64_set_reg_u64
9691 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9692 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9693 }
9694
9695 /* 64 bit AND shifted register. */
9696 static void
9697 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9698 {
9699 unsigned rm = INSTR (20, 16);
9700 unsigned rn = INSTR (9, 5);
9701 unsigned rd = INSTR (4, 0);
9702
9703 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9704 aarch64_set_reg_u64
9705 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9706 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9707 }
9708
9709 /* 32 bit AND shifted register setting flags. */
9710 static void
9711 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9712 {
9713 unsigned rm = INSTR (20, 16);
9714 unsigned rn = INSTR (9, 5);
9715 unsigned rd = INSTR (4, 0);
9716
9717 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9718 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9719 shift, count);
9720
9721 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9722 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9723 set_flags_for_binop32 (cpu, value1 & value2);
9724 }
9725
9726 /* 64 bit AND shifted register setting flags. */
9727 static void
9728 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9729 {
9730 unsigned rm = INSTR (20, 16);
9731 unsigned rn = INSTR (9, 5);
9732 unsigned rd = INSTR (4, 0);
9733
9734 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9735 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9736 shift, count);
9737
9738 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9739 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9740 set_flags_for_binop64 (cpu, value1 & value2);
9741 }
9742
9743 /* 32 bit BIC shifted register. */
9744 static void
9745 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9746 {
9747 unsigned rm = INSTR (20, 16);
9748 unsigned rn = INSTR (9, 5);
9749 unsigned rd = INSTR (4, 0);
9750
9751 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9752 aarch64_set_reg_u64
9753 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9754 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9755 }
9756
9757 /* 64 bit BIC shifted register. */
9758 static void
9759 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9760 {
9761 unsigned rm = INSTR (20, 16);
9762 unsigned rn = INSTR (9, 5);
9763 unsigned rd = INSTR (4, 0);
9764
9765 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9766 aarch64_set_reg_u64
9767 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9768 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9769 }
9770
9771 /* 32 bit BIC shifted register setting flags. */
9772 static void
9773 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9774 {
9775 unsigned rm = INSTR (20, 16);
9776 unsigned rn = INSTR (9, 5);
9777 unsigned rd = INSTR (4, 0);
9778
9779 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9780 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9781 shift, count);
9782
9783 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9784 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9785 set_flags_for_binop32 (cpu, value1 & value2);
9786 }
9787
9788 /* 64 bit BIC shifted register setting flags. */
9789 static void
9790 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9791 {
9792 unsigned rm = INSTR (20, 16);
9793 unsigned rn = INSTR (9, 5);
9794 unsigned rd = INSTR (4, 0);
9795
9796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9797 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9798 shift, count);
9799
9800 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9801 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9802 set_flags_for_binop64 (cpu, value1 & value2);
9803 }
9804
9805 /* 32 bit EON shifted register. */
9806 static void
9807 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9808 {
9809 unsigned rm = INSTR (20, 16);
9810 unsigned rn = INSTR (9, 5);
9811 unsigned rd = INSTR (4, 0);
9812
9813 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9814 aarch64_set_reg_u64
9815 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9816 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9817 }
9818
9819 /* 64 bit EON shifted register. */
9820 static void
9821 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9822 {
9823 unsigned rm = INSTR (20, 16);
9824 unsigned rn = INSTR (9, 5);
9825 unsigned rd = INSTR (4, 0);
9826
9827 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9828 aarch64_set_reg_u64
9829 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9830 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9831 }
9832
9833 /* 32 bit EOR shifted register. */
9834 static void
9835 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9836 {
9837 unsigned rm = INSTR (20, 16);
9838 unsigned rn = INSTR (9, 5);
9839 unsigned rd = INSTR (4, 0);
9840
9841 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9842 aarch64_set_reg_u64
9843 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9844 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9845 }
9846
9847 /* 64 bit EOR shifted register. */
9848 static void
9849 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9850 {
9851 unsigned rm = INSTR (20, 16);
9852 unsigned rn = INSTR (9, 5);
9853 unsigned rd = INSTR (4, 0);
9854
9855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9856 aarch64_set_reg_u64
9857 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9858 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9859 }
9860
9861 /* 32 bit ORR shifted register. */
9862 static void
9863 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9864 {
9865 unsigned rm = INSTR (20, 16);
9866 unsigned rn = INSTR (9, 5);
9867 unsigned rd = INSTR (4, 0);
9868
9869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9870 aarch64_set_reg_u64
9871 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9872 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9873 }
9874
9875 /* 64 bit ORR shifted register. */
9876 static void
9877 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9878 {
9879 unsigned rm = INSTR (20, 16);
9880 unsigned rn = INSTR (9, 5);
9881 unsigned rd = INSTR (4, 0);
9882
9883 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9884 aarch64_set_reg_u64
9885 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9886 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9887 }
9888
9889 /* 32 bit ORN shifted register. */
9890 static void
9891 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9892 {
9893 unsigned rm = INSTR (20, 16);
9894 unsigned rn = INSTR (9, 5);
9895 unsigned rd = INSTR (4, 0);
9896
9897 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9898 aarch64_set_reg_u64
9899 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9900 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9901 }
9902
9903 /* 64 bit ORN shifted register. */
9904 static void
9905 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9906 {
9907 unsigned rm = INSTR (20, 16);
9908 unsigned rn = INSTR (9, 5);
9909 unsigned rd = INSTR (4, 0);
9910
9911 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9912 aarch64_set_reg_u64
9913 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9914 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9915 }
9916
9917 static void
9918 dexLogicalImmediate (sim_cpu *cpu)
9919 {
9920 /* assert instr[28,23] = 1001000
9921 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9922 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9923 instr[22] = N : used to construct immediate mask
9924 instr[21,16] = immr
9925 instr[15,10] = imms
9926 instr[9,5] = Rn
9927 instr[4,0] = Rd */
9928
9929 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9930 uint32_t size = INSTR (31, 31);
9931 uint32_t N = INSTR (22, 22);
9932 /* uint32_t immr = INSTR (21, 16);. */
9933 /* uint32_t imms = INSTR (15, 10);. */
9934 uint32_t index = INSTR (22, 10);
9935 uint64_t bimm64 = LITable [index];
9936 uint32_t dispatch = INSTR (30, 29);
9937
9938 if (~size & N)
9939 HALT_UNALLOC;
9940
9941 if (!bimm64)
9942 HALT_UNALLOC;
9943
9944 if (size == 0)
9945 {
9946 uint32_t bimm = (uint32_t) bimm64;
9947
9948 switch (dispatch)
9949 {
9950 case 0: and32 (cpu, bimm); return;
9951 case 1: orr32 (cpu, bimm); return;
9952 case 2: eor32 (cpu, bimm); return;
9953 case 3: ands32 (cpu, bimm); return;
9954 }
9955 }
9956 else
9957 {
9958 switch (dispatch)
9959 {
9960 case 0: and64 (cpu, bimm64); return;
9961 case 1: orr64 (cpu, bimm64); return;
9962 case 2: eor64 (cpu, bimm64); return;
9963 case 3: ands64 (cpu, bimm64); return;
9964 }
9965 }
9966 HALT_UNALLOC;
9967 }
9968
9969 /* Immediate move.
9970 The uimm argument is a 16 bit value to be inserted into the
9971 target register the pos argument locates the 16 bit word in the
9972 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9973 3} for 64 bit.
9974 N.B register arg may not be SP so it should be.
9975 accessed using the setGZRegisterXXX accessors. */
9976
9977 /* 32 bit move 16 bit immediate zero remaining shorts. */
9978 static void
9979 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9980 {
9981 unsigned rd = INSTR (4, 0);
9982
9983 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9984 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9985 }
9986
9987 /* 64 bit move 16 bit immediate zero remaining shorts. */
9988 static void
9989 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9990 {
9991 unsigned rd = INSTR (4, 0);
9992
9993 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9994 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9995 }
9996
9997 /* 32 bit move 16 bit immediate negated. */
9998 static void
9999 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10000 {
10001 unsigned rd = INSTR (4, 0);
10002
10003 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10004 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
10005 }
10006
10007 /* 64 bit move 16 bit immediate negated. */
10008 static void
10009 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10010 {
10011 unsigned rd = INSTR (4, 0);
10012
10013 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10014 aarch64_set_reg_u64
10015 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
10016 ^ 0xffffffffffffffffULL));
10017 }
10018
10019 /* 32 bit move 16 bit immediate keep remaining shorts. */
10020 static void
10021 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10022 {
10023 unsigned rd = INSTR (4, 0);
10024 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10025 uint32_t value = val << (pos * 16);
10026 uint32_t mask = ~(0xffffU << (pos * 16));
10027
10028 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10029 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10030 }
10031
10032 /* 64 bit move 16 it immediate keep remaining shorts. */
10033 static void
10034 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10035 {
10036 unsigned rd = INSTR (4, 0);
10037 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
10038 uint64_t value = (uint64_t) val << (pos * 16);
10039 uint64_t mask = ~(0xffffULL << (pos * 16));
10040
10041 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10042 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10043 }
10044
10045 static void
10046 dexMoveWideImmediate (sim_cpu *cpu)
10047 {
10048 /* assert instr[28:23] = 100101
10049 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10050 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
10051 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10052 instr[20,5] = uimm16
10053 instr[4,0] = Rd */
10054
10055 /* N.B. the (multiple of 16) shift is applied by the called routine,
10056 we just pass the multiplier. */
10057
10058 uint32_t imm;
10059 uint32_t size = INSTR (31, 31);
10060 uint32_t op = INSTR (30, 29);
10061 uint32_t shift = INSTR (22, 21);
10062
10063 /* 32 bit can only shift 0 or 1 lot of 16.
10064 anything else is an unallocated instruction. */
10065 if (size == 0 && (shift > 1))
10066 HALT_UNALLOC;
10067
10068 if (op == 1)
10069 HALT_UNALLOC;
10070
10071 imm = INSTR (20, 5);
10072
10073 if (size == 0)
10074 {
10075 if (op == 0)
10076 movn32 (cpu, imm, shift);
10077 else if (op == 2)
10078 movz32 (cpu, imm, shift);
10079 else
10080 movk32 (cpu, imm, shift);
10081 }
10082 else
10083 {
10084 if (op == 0)
10085 movn64 (cpu, imm, shift);
10086 else if (op == 2)
10087 movz64 (cpu, imm, shift);
10088 else
10089 movk64 (cpu, imm, shift);
10090 }
10091 }
10092
10093 /* Bitfield operations.
10094 These take a pair of bit positions r and s which are in {0..31}
10095 or {0..63} depending on the instruction word size.
10096 N.B register args may not be SP. */
10097
10098 /* OK, we start with ubfm which just needs to pick
10099 some bits out of source zero the rest and write
10100 the result to dest. Just need two logical shifts. */
10101
10102 /* 32 bit bitfield move, left and right of affected zeroed
10103 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10104 static void
10105 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10106 {
10107 unsigned rd;
10108 unsigned rn = INSTR (9, 5);
10109 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10110
10111 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10112 if (r <= s)
10113 {
10114 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10115 We want only bits s:xxx:r at the bottom of the word
10116 so we LSL bit s up to bit 31 i.e. by 31 - s
10117 and then we LSR to bring bit 31 down to bit s - r
10118 i.e. by 31 + r - s. */
10119 value <<= 31 - s;
10120 value >>= 31 + r - s;
10121 }
10122 else
10123 {
10124 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10125 We want only bits s:xxx:0 starting at it 31-(r-1)
10126 so we LSL bit s up to bit 31 i.e. by 31 - s
10127 and then we LSL to bring bit 31 down to 31-(r-1)+s
10128 i.e. by r - (s + 1). */
10129 value <<= 31 - s;
10130 value >>= r - (s + 1);
10131 }
10132
10133 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10134 rd = INSTR (4, 0);
10135 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10136 }
10137
10138 /* 64 bit bitfield move, left and right of affected zeroed
10139 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10140 static void
10141 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10142 {
10143 unsigned rd;
10144 unsigned rn = INSTR (9, 5);
10145 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10146
10147 if (r <= s)
10148 {
10149 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10150 We want only bits s:xxx:r at the bottom of the word.
10151 So we LSL bit s up to bit 63 i.e. by 63 - s
10152 and then we LSR to bring bit 63 down to bit s - r
10153 i.e. by 63 + r - s. */
10154 value <<= 63 - s;
10155 value >>= 63 + r - s;
10156 }
10157 else
10158 {
10159 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10160 We want only bits s:xxx:0 starting at it 63-(r-1).
10161 So we LSL bit s up to bit 63 i.e. by 63 - s
10162 and then we LSL to bring bit 63 down to 63-(r-1)+s
10163 i.e. by r - (s + 1). */
10164 value <<= 63 - s;
10165 value >>= r - (s + 1);
10166 }
10167
10168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10169 rd = INSTR (4, 0);
10170 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10171 }
10172
10173 /* The signed versions need to insert sign bits
10174 on the left of the inserted bit field. so we do
10175 much the same as the unsigned version except we
10176 use an arithmetic shift right -- this just means
10177 we need to operate on signed values. */
10178
10179 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
10180 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10181 static void
10182 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10183 {
10184 unsigned rd;
10185 unsigned rn = INSTR (9, 5);
10186 /* as per ubfm32 but use an ASR instead of an LSR. */
10187 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10188
10189 if (r <= s)
10190 {
10191 value <<= 31 - s;
10192 value >>= 31 + r - s;
10193 }
10194 else
10195 {
10196 value <<= 31 - s;
10197 value >>= r - (s + 1);
10198 }
10199
10200 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10201 rd = INSTR (4, 0);
10202 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10203 }
10204
10205 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
10206 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10207 static void
10208 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10209 {
10210 unsigned rd;
10211 unsigned rn = INSTR (9, 5);
10212 /* acpu per ubfm but use an ASR instead of an LSR. */
10213 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10214
10215 if (r <= s)
10216 {
10217 value <<= 63 - s;
10218 value >>= 63 + r - s;
10219 }
10220 else
10221 {
10222 value <<= 63 - s;
10223 value >>= r - (s + 1);
10224 }
10225
10226 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10227 rd = INSTR (4, 0);
10228 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10229 }
10230
10231 /* Finally, these versions leave non-affected bits
10232 as is. so we need to generate the bits as per
10233 ubfm and also generate a mask to pick the
10234 bits from the original and computed values. */
10235
10236 /* 32 bit bitfield move, non-affected bits left as is.
10237 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10238 static void
10239 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10240 {
10241 unsigned rn = INSTR (9, 5);
10242 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10243 uint32_t mask = -1;
10244 unsigned rd;
10245 uint32_t value2;
10246
10247 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10248 if (r <= s)
10249 {
10250 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10251 We want only bits s:xxx:r at the bottom of the word
10252 so we LSL bit s up to bit 31 i.e. by 31 - s
10253 and then we LSR to bring bit 31 down to bit s - r
10254 i.e. by 31 + r - s. */
10255 value <<= 31 - s;
10256 value >>= 31 + r - s;
10257 /* the mask must include the same bits. */
10258 mask <<= 31 - s;
10259 mask >>= 31 + r - s;
10260 }
10261 else
10262 {
10263 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10264 We want only bits s:xxx:0 starting at it 31-(r-1)
10265 so we LSL bit s up to bit 31 i.e. by 31 - s
10266 and then we LSL to bring bit 31 down to 31-(r-1)+s
10267 i.e. by r - (s + 1). */
10268 value <<= 31 - s;
10269 value >>= r - (s + 1);
10270 /* The mask must include the same bits. */
10271 mask <<= 31 - s;
10272 mask >>= r - (s + 1);
10273 }
10274
10275 rd = INSTR (4, 0);
10276 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10277
10278 value2 &= ~mask;
10279 value2 |= value;
10280
10281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10282 aarch64_set_reg_u64
10283 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10284 }
10285
10286 /* 64 bit bitfield move, non-affected bits left as is.
10287 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10288 static void
10289 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10290 {
10291 unsigned rd;
10292 unsigned rn = INSTR (9, 5);
10293 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10294 uint64_t mask = 0xffffffffffffffffULL;
10295
10296 if (r <= s)
10297 {
10298 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10299 We want only bits s:xxx:r at the bottom of the word
10300 so we LSL bit s up to bit 63 i.e. by 63 - s
10301 and then we LSR to bring bit 63 down to bit s - r
10302 i.e. by 63 + r - s. */
10303 value <<= 63 - s;
10304 value >>= 63 + r - s;
10305 /* The mask must include the same bits. */
10306 mask <<= 63 - s;
10307 mask >>= 63 + r - s;
10308 }
10309 else
10310 {
10311 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10312 We want only bits s:xxx:0 starting at it 63-(r-1)
10313 so we LSL bit s up to bit 63 i.e. by 63 - s
10314 and then we LSL to bring bit 63 down to 63-(r-1)+s
10315 i.e. by r - (s + 1). */
10316 value <<= 63 - s;
10317 value >>= r - (s + 1);
10318 /* The mask must include the same bits. */
10319 mask <<= 63 - s;
10320 mask >>= r - (s + 1);
10321 }
10322
10323 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10324 rd = INSTR (4, 0);
10325 aarch64_set_reg_u64
10326 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10327 }
10328
10329 static void
10330 dexBitfieldImmediate (sim_cpu *cpu)
10331 {
10332 /* assert instr[28:23] = 100110
10333 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10334 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10335 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10336 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10337 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10338 instr[9,5] = Rn
10339 instr[4,0] = Rd */
10340
10341 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10342 uint32_t dispatch;
10343 uint32_t imms;
10344 uint32_t size = INSTR (31, 31);
10345 uint32_t N = INSTR (22, 22);
10346 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10347 /* or else we have an UNALLOC. */
10348 uint32_t immr = INSTR (21, 16);
10349
10350 if (~size & N)
10351 HALT_UNALLOC;
10352
10353 if (!size && uimm (immr, 5, 5))
10354 HALT_UNALLOC;
10355
10356 imms = INSTR (15, 10);
10357 if (!size && uimm (imms, 5, 5))
10358 HALT_UNALLOC;
10359
10360 /* Switch on combined size and op. */
10361 dispatch = INSTR (31, 29);
10362 switch (dispatch)
10363 {
10364 case 0: sbfm32 (cpu, immr, imms); return;
10365 case 1: bfm32 (cpu, immr, imms); return;
10366 case 2: ubfm32 (cpu, immr, imms); return;
10367 case 4: sbfm (cpu, immr, imms); return;
10368 case 5: bfm (cpu, immr, imms); return;
10369 case 6: ubfm (cpu, immr, imms); return;
10370 default: HALT_UNALLOC;
10371 }
10372 }
10373
10374 static void
10375 do_EXTR_32 (sim_cpu *cpu)
10376 {
10377 /* instr[31:21] = 00010011100
10378 instr[20,16] = Rm
10379 instr[15,10] = imms : 0xxxxx for 32 bit
10380 instr[9,5] = Rn
10381 instr[4,0] = Rd */
10382 unsigned rm = INSTR (20, 16);
10383 unsigned imms = INSTR (15, 10) & 31;
10384 unsigned rn = INSTR ( 9, 5);
10385 unsigned rd = INSTR ( 4, 0);
10386 uint64_t val1;
10387 uint64_t val2;
10388
10389 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10390 val1 >>= imms;
10391 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10392 val2 <<= (32 - imms);
10393
10394 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10395 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10396 }
10397
10398 static void
10399 do_EXTR_64 (sim_cpu *cpu)
10400 {
10401 /* instr[31:21] = 10010011100
10402 instr[20,16] = Rm
10403 instr[15,10] = imms
10404 instr[9,5] = Rn
10405 instr[4,0] = Rd */
10406 unsigned rm = INSTR (20, 16);
10407 unsigned imms = INSTR (15, 10) & 63;
10408 unsigned rn = INSTR ( 9, 5);
10409 unsigned rd = INSTR ( 4, 0);
10410 uint64_t val;
10411
10412 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10413 val >>= imms;
10414 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10415
10416 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10417 }
10418
10419 static void
10420 dexExtractImmediate (sim_cpu *cpu)
10421 {
10422 /* assert instr[28:23] = 100111
10423 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10424 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10425 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10426 instr[21] = op0 : must be 0 or UNALLOC
10427 instr[20,16] = Rm
10428 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10429 instr[9,5] = Rn
10430 instr[4,0] = Rd */
10431
10432 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10433 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10434 uint32_t dispatch;
10435 uint32_t size = INSTR (31, 31);
10436 uint32_t N = INSTR (22, 22);
10437 /* 32 bit operations must have imms[5] = 0
10438 or else we have an UNALLOC. */
10439 uint32_t imms = INSTR (15, 10);
10440
10441 if (size ^ N)
10442 HALT_UNALLOC;
10443
10444 if (!size && uimm (imms, 5, 5))
10445 HALT_UNALLOC;
10446
10447 /* Switch on combined size and op. */
10448 dispatch = INSTR (31, 29);
10449
10450 if (dispatch == 0)
10451 do_EXTR_32 (cpu);
10452
10453 else if (dispatch == 4)
10454 do_EXTR_64 (cpu);
10455
10456 else if (dispatch == 1)
10457 HALT_NYI;
10458 else
10459 HALT_UNALLOC;
10460 }
10461
10462 static void
10463 dexDPImm (sim_cpu *cpu)
10464 {
10465 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10466 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10467 bits [25,23] of a DPImm are the secondary dispatch vector. */
10468 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10469
10470 switch (group2)
10471 {
10472 case DPIMM_PCADR_000:
10473 case DPIMM_PCADR_001:
10474 dexPCRelAddressing (cpu);
10475 return;
10476
10477 case DPIMM_ADDSUB_010:
10478 case DPIMM_ADDSUB_011:
10479 dexAddSubtractImmediate (cpu);
10480 return;
10481
10482 case DPIMM_LOG_100:
10483 dexLogicalImmediate (cpu);
10484 return;
10485
10486 case DPIMM_MOV_101:
10487 dexMoveWideImmediate (cpu);
10488 return;
10489
10490 case DPIMM_BITF_110:
10491 dexBitfieldImmediate (cpu);
10492 return;
10493
10494 case DPIMM_EXTR_111:
10495 dexExtractImmediate (cpu);
10496 return;
10497
10498 default:
10499 /* Should never reach here. */
10500 HALT_NYI;
10501 }
10502 }
10503
10504 static void
10505 dexLoadUnscaledImmediate (sim_cpu *cpu)
10506 {
10507 /* instr[29,24] == 111_00
10508 instr[21] == 0
10509 instr[11,10] == 00
10510 instr[31,30] = size
10511 instr[26] = V
10512 instr[23,22] = opc
10513 instr[20,12] = simm9
10514 instr[9,5] = rn may be SP. */
10515 /* unsigned rt = INSTR (4, 0); */
10516 uint32_t V = INSTR (26, 26);
10517 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10518 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10519
10520 if (!V)
10521 {
10522 /* GReg operations. */
10523 switch (dispatch)
10524 {
10525 case 0: sturb (cpu, imm); return;
10526 case 1: ldurb32 (cpu, imm); return;
10527 case 2: ldursb64 (cpu, imm); return;
10528 case 3: ldursb32 (cpu, imm); return;
10529 case 4: sturh (cpu, imm); return;
10530 case 5: ldurh32 (cpu, imm); return;
10531 case 6: ldursh64 (cpu, imm); return;
10532 case 7: ldursh32 (cpu, imm); return;
10533 case 8: stur32 (cpu, imm); return;
10534 case 9: ldur32 (cpu, imm); return;
10535 case 10: ldursw (cpu, imm); return;
10536 case 12: stur64 (cpu, imm); return;
10537 case 13: ldur64 (cpu, imm); return;
10538
10539 case 14:
10540 /* PRFUM NYI. */
10541 HALT_NYI;
10542
10543 default:
10544 case 11:
10545 case 15:
10546 HALT_UNALLOC;
10547 }
10548 }
10549
10550 /* FReg operations. */
10551 switch (dispatch)
10552 {
10553 case 2: fsturq (cpu, imm); return;
10554 case 3: fldurq (cpu, imm); return;
10555 case 8: fsturs (cpu, imm); return;
10556 case 9: fldurs (cpu, imm); return;
10557 case 12: fsturd (cpu, imm); return;
10558 case 13: fldurd (cpu, imm); return;
10559
10560 case 0: /* STUR 8 bit FP. */
10561 case 1: /* LDUR 8 bit FP. */
10562 case 4: /* STUR 16 bit FP. */
10563 case 5: /* LDUR 8 bit FP. */
10564 HALT_NYI;
10565
10566 default:
10567 case 6:
10568 case 7:
10569 case 10:
10570 case 11:
10571 case 14:
10572 case 15:
10573 HALT_UNALLOC;
10574 }
10575 }
10576
10577 /* N.B. A preliminary note regarding all the ldrs<x>32
10578 instructions
10579
10580 The signed value loaded by these instructions is cast to unsigned
10581 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10582 64 bit element of the GReg union. this performs a 32 bit sign extension
10583 (as required) but avoids 64 bit sign extension, thus ensuring that the
10584 top half of the register word is zero. this is what the spec demands
10585 when a 32 bit load occurs. */
10586
10587 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10588 static void
10589 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10590 {
10591 unsigned int rn = INSTR (9, 5);
10592 unsigned int rt = INSTR (4, 0);
10593
10594 /* The target register may not be SP but the source may be
10595 there is no scaling required for a byte load. */
10596 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10597 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10598 (int64_t) aarch64_get_mem_s8 (cpu, address));
10599 }
10600
10601 /* 32 bit load sign-extended byte scaled or unscaled zero-
10602 or sign-extended 32-bit register offset. */
10603 static void
10604 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10605 {
10606 unsigned int rm = INSTR (20, 16);
10607 unsigned int rn = INSTR (9, 5);
10608 unsigned int rt = INSTR (4, 0);
10609
10610 /* rn may reference SP, rm and rt must reference ZR. */
10611
10612 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10613 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10614 extension);
10615
10616 /* There is no scaling required for a byte load. */
10617 aarch64_set_reg_u64
10618 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10619 + displacement));
10620 }
10621
10622 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10623 pre- or post-writeback. */
10624 static void
10625 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10626 {
10627 uint64_t address;
10628 unsigned int rn = INSTR (9, 5);
10629 unsigned int rt = INSTR (4, 0);
10630
10631 if (rn == rt && wb != NoWriteBack)
10632 HALT_UNALLOC;
10633
10634 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10635
10636 if (wb == Pre)
10637 address += offset;
10638
10639 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10640 (int64_t) aarch64_get_mem_s8 (cpu, address));
10641
10642 if (wb == Post)
10643 address += offset;
10644
10645 if (wb != NoWriteBack)
10646 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10647 }
10648
10649 /* 8 bit store scaled. */
10650 static void
10651 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10652 {
10653 unsigned st = INSTR (4, 0);
10654 unsigned rn = INSTR (9, 5);
10655
10656 aarch64_set_mem_u8 (cpu,
10657 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10658 aarch64_get_vec_u8 (cpu, st, 0));
10659 }
10660
10661 /* 8 bit store scaled or unscaled zero- or
10662 sign-extended 8-bit register offset. */
10663 static void
10664 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10665 {
10666 unsigned rm = INSTR (20, 16);
10667 unsigned rn = INSTR (9, 5);
10668 unsigned st = INSTR (4, 0);
10669
10670 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10671 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10672 extension);
10673 uint64_t displacement = scaling == Scaled ? extended : 0;
10674
10675 aarch64_set_mem_u8
10676 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10677 }
10678
10679 /* 16 bit store scaled. */
10680 static void
10681 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10682 {
10683 unsigned st = INSTR (4, 0);
10684 unsigned rn = INSTR (9, 5);
10685
10686 aarch64_set_mem_u16
10687 (cpu,
10688 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10689 aarch64_get_vec_u16 (cpu, st, 0));
10690 }
10691
10692 /* 16 bit store scaled or unscaled zero-
10693 or sign-extended 16-bit register offset. */
10694 static void
10695 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10696 {
10697 unsigned rm = INSTR (20, 16);
10698 unsigned rn = INSTR (9, 5);
10699 unsigned st = INSTR (4, 0);
10700
10701 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10702 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10703 extension);
10704 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10705
10706 aarch64_set_mem_u16
10707 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10708 }
10709
10710 /* 32 bit store scaled unsigned 12 bit. */
10711 static void
10712 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10713 {
10714 unsigned st = INSTR (4, 0);
10715 unsigned rn = INSTR (9, 5);
10716
10717 aarch64_set_mem_u32
10718 (cpu,
10719 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10720 aarch64_get_vec_u32 (cpu, st, 0));
10721 }
10722
10723 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10724 static void
10725 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10726 {
10727 unsigned rn = INSTR (9, 5);
10728 unsigned st = INSTR (4, 0);
10729
10730 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10731
10732 if (wb != Post)
10733 address += offset;
10734
10735 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10736
10737 if (wb == Post)
10738 address += offset;
10739
10740 if (wb != NoWriteBack)
10741 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10742 }
10743
10744 /* 32 bit store scaled or unscaled zero-
10745 or sign-extended 32-bit register offset. */
10746 static void
10747 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10748 {
10749 unsigned rm = INSTR (20, 16);
10750 unsigned rn = INSTR (9, 5);
10751 unsigned st = INSTR (4, 0);
10752
10753 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10754 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10755 extension);
10756 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10757
10758 aarch64_set_mem_u32
10759 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10760 }
10761
10762 /* 64 bit store scaled unsigned 12 bit. */
10763 static void
10764 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10765 {
10766 unsigned st = INSTR (4, 0);
10767 unsigned rn = INSTR (9, 5);
10768
10769 aarch64_set_mem_u64
10770 (cpu,
10771 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10772 aarch64_get_vec_u64 (cpu, st, 0));
10773 }
10774
10775 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10776 static void
10777 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10778 {
10779 unsigned rn = INSTR (9, 5);
10780 unsigned st = INSTR (4, 0);
10781
10782 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10783
10784 if (wb != Post)
10785 address += offset;
10786
10787 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10788
10789 if (wb == Post)
10790 address += offset;
10791
10792 if (wb != NoWriteBack)
10793 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10794 }
10795
10796 /* 64 bit store scaled or unscaled zero-
10797 or sign-extended 32-bit register offset. */
10798 static void
10799 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10800 {
10801 unsigned rm = INSTR (20, 16);
10802 unsigned rn = INSTR (9, 5);
10803 unsigned st = INSTR (4, 0);
10804
10805 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10806 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10807 extension);
10808 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10809
10810 aarch64_set_mem_u64
10811 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10812 }
10813
10814 /* 128 bit store scaled unsigned 12 bit. */
10815 static void
10816 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10817 {
10818 FRegister a;
10819 unsigned st = INSTR (4, 0);
10820 unsigned rn = INSTR (9, 5);
10821 uint64_t addr;
10822
10823 aarch64_get_FP_long_double (cpu, st, & a);
10824
10825 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10826 aarch64_set_mem_long_double (cpu, addr, a);
10827 }
10828
10829 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10830 static void
10831 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10832 {
10833 FRegister a;
10834 unsigned rn = INSTR (9, 5);
10835 unsigned st = INSTR (4, 0);
10836 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10837
10838 if (wb != Post)
10839 address += offset;
10840
10841 aarch64_get_FP_long_double (cpu, st, & a);
10842 aarch64_set_mem_long_double (cpu, address, a);
10843
10844 if (wb == Post)
10845 address += offset;
10846
10847 if (wb != NoWriteBack)
10848 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10849 }
10850
10851 /* 128 bit store scaled or unscaled zero-
10852 or sign-extended 32-bit register offset. */
10853 static void
10854 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10855 {
10856 unsigned rm = INSTR (20, 16);
10857 unsigned rn = INSTR (9, 5);
10858 unsigned st = INSTR (4, 0);
10859
10860 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10861 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10862 extension);
10863 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10864
10865 FRegister a;
10866
10867 aarch64_get_FP_long_double (cpu, st, & a);
10868 aarch64_set_mem_long_double (cpu, address + displacement, a);
10869 }
10870
10871 static void
10872 dexLoadImmediatePrePost (sim_cpu *cpu)
10873 {
10874 /* instr[31,30] = size
10875 instr[29,27] = 111
10876 instr[26] = V
10877 instr[25,24] = 00
10878 instr[23,22] = opc
10879 instr[21] = 0
10880 instr[20,12] = simm9
10881 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10882 instr[10] = 0
10883 instr[9,5] = Rn may be SP.
10884 instr[4,0] = Rt */
10885
10886 uint32_t V = INSTR (26, 26);
10887 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10888 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10889 WriteBack wb = INSTR (11, 11);
10890
10891 if (!V)
10892 {
10893 /* GReg operations. */
10894 switch (dispatch)
10895 {
10896 case 0: strb_wb (cpu, imm, wb); return;
10897 case 1: ldrb32_wb (cpu, imm, wb); return;
10898 case 2: ldrsb_wb (cpu, imm, wb); return;
10899 case 3: ldrsb32_wb (cpu, imm, wb); return;
10900 case 4: strh_wb (cpu, imm, wb); return;
10901 case 5: ldrh32_wb (cpu, imm, wb); return;
10902 case 6: ldrsh64_wb (cpu, imm, wb); return;
10903 case 7: ldrsh32_wb (cpu, imm, wb); return;
10904 case 8: str32_wb (cpu, imm, wb); return;
10905 case 9: ldr32_wb (cpu, imm, wb); return;
10906 case 10: ldrsw_wb (cpu, imm, wb); return;
10907 case 12: str_wb (cpu, imm, wb); return;
10908 case 13: ldr_wb (cpu, imm, wb); return;
10909
10910 default:
10911 case 11:
10912 case 14:
10913 case 15:
10914 HALT_UNALLOC;
10915 }
10916 }
10917
10918 /* FReg operations. */
10919 switch (dispatch)
10920 {
10921 case 2: fstrq_wb (cpu, imm, wb); return;
10922 case 3: fldrq_wb (cpu, imm, wb); return;
10923 case 8: fstrs_wb (cpu, imm, wb); return;
10924 case 9: fldrs_wb (cpu, imm, wb); return;
10925 case 12: fstrd_wb (cpu, imm, wb); return;
10926 case 13: fldrd_wb (cpu, imm, wb); return;
10927
10928 case 0: /* STUR 8 bit FP. */
10929 case 1: /* LDUR 8 bit FP. */
10930 case 4: /* STUR 16 bit FP. */
10931 case 5: /* LDUR 8 bit FP. */
10932 HALT_NYI;
10933
10934 default:
10935 case 6:
10936 case 7:
10937 case 10:
10938 case 11:
10939 case 14:
10940 case 15:
10941 HALT_UNALLOC;
10942 }
10943 }
10944
10945 static void
10946 dexLoadRegisterOffset (sim_cpu *cpu)
10947 {
10948 /* instr[31,30] = size
10949 instr[29,27] = 111
10950 instr[26] = V
10951 instr[25,24] = 00
10952 instr[23,22] = opc
10953 instr[21] = 1
10954 instr[20,16] = rm
10955 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10956 110 ==> SXTW, 111 ==> SXTX,
10957 ow ==> RESERVED
10958 instr[12] = scaled
10959 instr[11,10] = 10
10960 instr[9,5] = rn
10961 instr[4,0] = rt. */
10962
10963 uint32_t V = INSTR (26, 26);
10964 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10965 Scaling scale = INSTR (12, 12);
10966 Extension extensionType = INSTR (15, 13);
10967
10968 /* Check for illegal extension types. */
10969 if (uimm (extensionType, 1, 1) == 0)
10970 HALT_UNALLOC;
10971
10972 if (extensionType == UXTX || extensionType == SXTX)
10973 extensionType = NoExtension;
10974
10975 if (!V)
10976 {
10977 /* GReg operations. */
10978 switch (dispatch)
10979 {
10980 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10981 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10982 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10983 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10984 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10985 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10986 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10987 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10988 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10989 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10990 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10991 case 12: str_scale_ext (cpu, scale, extensionType); return;
10992 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10993 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10994
10995 default:
10996 case 11:
10997 case 15:
10998 HALT_UNALLOC;
10999 }
11000 }
11001
11002 /* FReg operations. */
11003 switch (dispatch)
11004 {
11005 case 1: /* LDUR 8 bit FP. */
11006 HALT_NYI;
11007 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
11008 case 5: /* LDUR 8 bit FP. */
11009 HALT_NYI;
11010 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
11011 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
11012
11013 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
11014 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
11015 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
11016 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
11017 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
11018
11019 default:
11020 case 6:
11021 case 7:
11022 case 10:
11023 case 11:
11024 case 14:
11025 case 15:
11026 HALT_UNALLOC;
11027 }
11028 }
11029
11030 static void
11031 dexLoadUnsignedImmediate (sim_cpu *cpu)
11032 {
11033 /* instr[29,24] == 111_01
11034 instr[31,30] = size
11035 instr[26] = V
11036 instr[23,22] = opc
11037 instr[21,10] = uimm12 : unsigned immediate offset
11038 instr[9,5] = rn may be SP.
11039 instr[4,0] = rt. */
11040
11041 uint32_t V = INSTR (26,26);
11042 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
11043 uint32_t imm = INSTR (21, 10);
11044
11045 if (!V)
11046 {
11047 /* GReg operations. */
11048 switch (dispatch)
11049 {
11050 case 0: strb_abs (cpu, imm); return;
11051 case 1: ldrb32_abs (cpu, imm); return;
11052 case 2: ldrsb_abs (cpu, imm); return;
11053 case 3: ldrsb32_abs (cpu, imm); return;
11054 case 4: strh_abs (cpu, imm); return;
11055 case 5: ldrh32_abs (cpu, imm); return;
11056 case 6: ldrsh_abs (cpu, imm); return;
11057 case 7: ldrsh32_abs (cpu, imm); return;
11058 case 8: str32_abs (cpu, imm); return;
11059 case 9: ldr32_abs (cpu, imm); return;
11060 case 10: ldrsw_abs (cpu, imm); return;
11061 case 12: str_abs (cpu, imm); return;
11062 case 13: ldr_abs (cpu, imm); return;
11063 case 14: prfm_abs (cpu, imm); return;
11064
11065 default:
11066 case 11:
11067 case 15:
11068 HALT_UNALLOC;
11069 }
11070 }
11071
11072 /* FReg operations. */
11073 switch (dispatch)
11074 {
11075 case 0: fstrb_abs (cpu, imm); return;
11076 case 4: fstrh_abs (cpu, imm); return;
11077 case 8: fstrs_abs (cpu, imm); return;
11078 case 12: fstrd_abs (cpu, imm); return;
11079 case 2: fstrq_abs (cpu, imm); return;
11080
11081 case 1: fldrb_abs (cpu, imm); return;
11082 case 5: fldrh_abs (cpu, imm); return;
11083 case 9: fldrs_abs (cpu, imm); return;
11084 case 13: fldrd_abs (cpu, imm); return;
11085 case 3: fldrq_abs (cpu, imm); return;
11086
11087 default:
11088 case 6:
11089 case 7:
11090 case 10:
11091 case 11:
11092 case 14:
11093 case 15:
11094 HALT_UNALLOC;
11095 }
11096 }
11097
11098 static void
11099 dexLoadExclusive (sim_cpu *cpu)
11100 {
11101 /* assert instr[29:24] = 001000;
11102 instr[31,30] = size
11103 instr[23] = 0 if exclusive
11104 instr[22] = L : 1 if load, 0 if store
11105 instr[21] = 1 if pair
11106 instr[20,16] = Rs
11107 instr[15] = o0 : 1 if ordered
11108 instr[14,10] = Rt2
11109 instr[9,5] = Rn
11110 instr[4.0] = Rt. */
11111
11112 switch (INSTR (22, 21))
11113 {
11114 case 2: ldxr (cpu); return;
11115 case 0: stxr (cpu); return;
11116 default: HALT_NYI;
11117 }
11118 }
11119
11120 static void
11121 dexLoadOther (sim_cpu *cpu)
11122 {
11123 uint32_t dispatch;
11124
11125 /* instr[29,25] = 111_0
11126 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11127 instr[21:11,10] is the secondary dispatch. */
11128 if (INSTR (24, 24))
11129 {
11130 dexLoadUnsignedImmediate (cpu);
11131 return;
11132 }
11133
11134 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11135 switch (dispatch)
11136 {
11137 case 0: dexLoadUnscaledImmediate (cpu); return;
11138 case 1: dexLoadImmediatePrePost (cpu); return;
11139 case 3: dexLoadImmediatePrePost (cpu); return;
11140 case 6: dexLoadRegisterOffset (cpu); return;
11141
11142 default:
11143 case 2:
11144 case 4:
11145 case 5:
11146 case 7:
11147 HALT_NYI;
11148 }
11149 }
11150
11151 static void
11152 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11153 {
11154 unsigned rn = INSTR (14, 10);
11155 unsigned rd = INSTR (9, 5);
11156 unsigned rm = INSTR (4, 0);
11157 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11158
11159 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11160 HALT_UNALLOC; /* ??? */
11161
11162 offset <<= 2;
11163
11164 if (wb != Post)
11165 address += offset;
11166
11167 aarch64_set_mem_u32 (cpu, address,
11168 aarch64_get_reg_u32 (cpu, rm, NO_SP));
11169 aarch64_set_mem_u32 (cpu, address + 4,
11170 aarch64_get_reg_u32 (cpu, rn, NO_SP));
11171
11172 if (wb == Post)
11173 address += offset;
11174
11175 if (wb != NoWriteBack)
11176 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11177 }
11178
11179 static void
11180 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11181 {
11182 unsigned rn = INSTR (14, 10);
11183 unsigned rd = INSTR (9, 5);
11184 unsigned rm = INSTR (4, 0);
11185 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11186
11187 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11188 HALT_UNALLOC; /* ??? */
11189
11190 offset <<= 3;
11191
11192 if (wb != Post)
11193 address += offset;
11194
11195 aarch64_set_mem_u64 (cpu, address,
11196 aarch64_get_reg_u64 (cpu, rm, NO_SP));
11197 aarch64_set_mem_u64 (cpu, address + 8,
11198 aarch64_get_reg_u64 (cpu, rn, NO_SP));
11199
11200 if (wb == Post)
11201 address += offset;
11202
11203 if (wb != NoWriteBack)
11204 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11205 }
11206
11207 static void
11208 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11209 {
11210 unsigned rn = INSTR (14, 10);
11211 unsigned rd = INSTR (9, 5);
11212 unsigned rm = INSTR (4, 0);
11213 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11214
11215 /* Treat this as unalloc to make sure we don't do it. */
11216 if (rn == rm)
11217 HALT_UNALLOC;
11218
11219 offset <<= 2;
11220
11221 if (wb != Post)
11222 address += offset;
11223
11224 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11225 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11226
11227 if (wb == Post)
11228 address += offset;
11229
11230 if (wb != NoWriteBack)
11231 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11232 }
11233
11234 static void
11235 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11236 {
11237 unsigned rn = INSTR (14, 10);
11238 unsigned rd = INSTR (9, 5);
11239 unsigned rm = INSTR (4, 0);
11240 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11241
11242 /* Treat this as unalloc to make sure we don't do it. */
11243 if (rn == rm)
11244 HALT_UNALLOC;
11245
11246 offset <<= 2;
11247
11248 if (wb != Post)
11249 address += offset;
11250
11251 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11252 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11253
11254 if (wb == Post)
11255 address += offset;
11256
11257 if (wb != NoWriteBack)
11258 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11259 }
11260
11261 static void
11262 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11263 {
11264 unsigned rn = INSTR (14, 10);
11265 unsigned rd = INSTR (9, 5);
11266 unsigned rm = INSTR (4, 0);
11267 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11268
11269 /* Treat this as unalloc to make sure we don't do it. */
11270 if (rn == rm)
11271 HALT_UNALLOC;
11272
11273 offset <<= 3;
11274
11275 if (wb != Post)
11276 address += offset;
11277
11278 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11279 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11280
11281 if (wb == Post)
11282 address += offset;
11283
11284 if (wb != NoWriteBack)
11285 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11286 }
11287
11288 static void
11289 dex_load_store_pair_gr (sim_cpu *cpu)
11290 {
11291 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11292 instr[29,25] = instruction encoding: 101_0
11293 instr[26] = V : 1 if fp 0 if gp
11294 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11295 instr[22] = load/store (1=> load)
11296 instr[21,15] = signed, scaled, offset
11297 instr[14,10] = Rn
11298 instr[ 9, 5] = Rd
11299 instr[ 4, 0] = Rm. */
11300
11301 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11302 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11303
11304 switch (dispatch)
11305 {
11306 case 2: store_pair_u32 (cpu, offset, Post); return;
11307 case 3: load_pair_u32 (cpu, offset, Post); return;
11308 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11309 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11310 case 6: store_pair_u32 (cpu, offset, Pre); return;
11311 case 7: load_pair_u32 (cpu, offset, Pre); return;
11312
11313 case 11: load_pair_s32 (cpu, offset, Post); return;
11314 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11315 case 15: load_pair_s32 (cpu, offset, Pre); return;
11316
11317 case 18: store_pair_u64 (cpu, offset, Post); return;
11318 case 19: load_pair_u64 (cpu, offset, Post); return;
11319 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11320 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11321 case 22: store_pair_u64 (cpu, offset, Pre); return;
11322 case 23: load_pair_u64 (cpu, offset, Pre); return;
11323
11324 default:
11325 HALT_UNALLOC;
11326 }
11327 }
11328
11329 static void
11330 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11331 {
11332 unsigned rn = INSTR (14, 10);
11333 unsigned rd = INSTR (9, 5);
11334 unsigned rm = INSTR (4, 0);
11335 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11336
11337 offset <<= 2;
11338
11339 if (wb != Post)
11340 address += offset;
11341
11342 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11343 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11344
11345 if (wb == Post)
11346 address += offset;
11347
11348 if (wb != NoWriteBack)
11349 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11350 }
11351
11352 static void
11353 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11354 {
11355 unsigned rn = INSTR (14, 10);
11356 unsigned rd = INSTR (9, 5);
11357 unsigned rm = INSTR (4, 0);
11358 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11359
11360 offset <<= 3;
11361
11362 if (wb != Post)
11363 address += offset;
11364
11365 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11366 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11367
11368 if (wb == Post)
11369 address += offset;
11370
11371 if (wb != NoWriteBack)
11372 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11373 }
11374
11375 static void
11376 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11377 {
11378 FRegister a;
11379 unsigned rn = INSTR (14, 10);
11380 unsigned rd = INSTR (9, 5);
11381 unsigned rm = INSTR (4, 0);
11382 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11383
11384 offset <<= 4;
11385
11386 if (wb != Post)
11387 address += offset;
11388
11389 aarch64_get_FP_long_double (cpu, rm, & a);
11390 aarch64_set_mem_long_double (cpu, address, a);
11391 aarch64_get_FP_long_double (cpu, rn, & a);
11392 aarch64_set_mem_long_double (cpu, address + 16, a);
11393
11394 if (wb == Post)
11395 address += offset;
11396
11397 if (wb != NoWriteBack)
11398 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11399 }
11400
11401 static void
11402 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11403 {
11404 unsigned rn = INSTR (14, 10);
11405 unsigned rd = INSTR (9, 5);
11406 unsigned rm = INSTR (4, 0);
11407 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11408
11409 if (rm == rn)
11410 HALT_UNALLOC;
11411
11412 offset <<= 2;
11413
11414 if (wb != Post)
11415 address += offset;
11416
11417 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11418 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11419
11420 if (wb == Post)
11421 address += offset;
11422
11423 if (wb != NoWriteBack)
11424 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11425 }
11426
11427 static void
11428 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11429 {
11430 unsigned rn = INSTR (14, 10);
11431 unsigned rd = INSTR (9, 5);
11432 unsigned rm = INSTR (4, 0);
11433 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11434
11435 if (rm == rn)
11436 HALT_UNALLOC;
11437
11438 offset <<= 3;
11439
11440 if (wb != Post)
11441 address += offset;
11442
11443 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11444 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11445
11446 if (wb == Post)
11447 address += offset;
11448
11449 if (wb != NoWriteBack)
11450 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11451 }
11452
11453 static void
11454 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11455 {
11456 FRegister a;
11457 unsigned rn = INSTR (14, 10);
11458 unsigned rd = INSTR (9, 5);
11459 unsigned rm = INSTR (4, 0);
11460 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11461
11462 if (rm == rn)
11463 HALT_UNALLOC;
11464
11465 offset <<= 4;
11466
11467 if (wb != Post)
11468 address += offset;
11469
11470 aarch64_get_mem_long_double (cpu, address, & a);
11471 aarch64_set_FP_long_double (cpu, rm, a);
11472 aarch64_get_mem_long_double (cpu, address + 16, & a);
11473 aarch64_set_FP_long_double (cpu, rn, a);
11474
11475 if (wb == Post)
11476 address += offset;
11477
11478 if (wb != NoWriteBack)
11479 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11480 }
11481
11482 static void
11483 dex_load_store_pair_fp (sim_cpu *cpu)
11484 {
11485 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11486 instr[29,25] = instruction encoding
11487 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11488 instr[22] = load/store (1=> load)
11489 instr[21,15] = signed, scaled, offset
11490 instr[14,10] = Rn
11491 instr[ 9, 5] = Rd
11492 instr[ 4, 0] = Rm */
11493
11494 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11495 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11496
11497 switch (dispatch)
11498 {
11499 case 2: store_pair_float (cpu, offset, Post); return;
11500 case 3: load_pair_float (cpu, offset, Post); return;
11501 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11502 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11503 case 6: store_pair_float (cpu, offset, Pre); return;
11504 case 7: load_pair_float (cpu, offset, Pre); return;
11505
11506 case 10: store_pair_double (cpu, offset, Post); return;
11507 case 11: load_pair_double (cpu, offset, Post); return;
11508 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11509 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11510 case 14: store_pair_double (cpu, offset, Pre); return;
11511 case 15: load_pair_double (cpu, offset, Pre); return;
11512
11513 case 18: store_pair_long_double (cpu, offset, Post); return;
11514 case 19: load_pair_long_double (cpu, offset, Post); return;
11515 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11516 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11517 case 22: store_pair_long_double (cpu, offset, Pre); return;
11518 case 23: load_pair_long_double (cpu, offset, Pre); return;
11519
11520 default:
11521 HALT_UNALLOC;
11522 }
11523 }
11524
11525 static inline unsigned
11526 vec_reg (unsigned v, unsigned o)
11527 {
11528 return (v + o) & 0x3F;
11529 }
11530
11531 /* Load multiple N-element structures to M consecutive registers. */
11532 static void
11533 vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11534 {
11535 int all = INSTR (30, 30);
11536 unsigned size = INSTR (11, 10);
11537 unsigned vd = INSTR (4, 0);
11538 unsigned rpt = (N == M) ? 1 : M;
11539 unsigned selem = N;
11540 unsigned i, j, k;
11541
11542 switch (size)
11543 {
11544 case 0: /* 8-bit operations. */
11545 for (i = 0; i < rpt; i++)
11546 for (j = 0; j < (8 + (8 * all)); j++)
11547 for (k = 0; k < selem; k++)
11548 {
11549 aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j,
11550 aarch64_get_mem_u8 (cpu, address));
11551 address += 1;
11552 }
11553 return;
11554
11555 case 1: /* 16-bit operations. */
11556 for (i = 0; i < rpt; i++)
11557 for (j = 0; j < (4 + (4 * all)); j++)
11558 for (k = 0; k < selem; k++)
11559 {
11560 aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j,
11561 aarch64_get_mem_u16 (cpu, address));
11562 address += 2;
11563 }
11564 return;
11565
11566 case 2: /* 32-bit operations. */
11567 for (i = 0; i < rpt; i++)
11568 for (j = 0; j < (2 + (2 * all)); j++)
11569 for (k = 0; k < selem; k++)
11570 {
11571 aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j,
11572 aarch64_get_mem_u32 (cpu, address));
11573 address += 4;
11574 }
11575 return;
11576
11577 case 3: /* 64-bit operations. */
11578 for (i = 0; i < rpt; i++)
11579 for (j = 0; j < (1 + all); j++)
11580 for (k = 0; k < selem; k++)
11581 {
11582 aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j,
11583 aarch64_get_mem_u64 (cpu, address));
11584 address += 8;
11585 }
11586 return;
11587 }
11588 }
11589
11590 /* Load multiple 4-element structures into four consecutive registers. */
11591 static void
11592 LD4 (sim_cpu *cpu, uint64_t address)
11593 {
11594 vec_load (cpu, address, 4, 4);
11595 }
11596
11597 /* Load multiple 3-element structures into three consecutive registers. */
11598 static void
11599 LD3 (sim_cpu *cpu, uint64_t address)
11600 {
11601 vec_load (cpu, address, 3, 3);
11602 }
11603
11604 /* Load multiple 2-element structures into two consecutive registers. */
11605 static void
11606 LD2 (sim_cpu *cpu, uint64_t address)
11607 {
11608 vec_load (cpu, address, 2, 2);
11609 }
11610
11611 /* Load multiple 1-element structures into one register. */
11612 static void
11613 LD1_1 (sim_cpu *cpu, uint64_t address)
11614 {
11615 vec_load (cpu, address, 1, 1);
11616 }
11617
11618 /* Load multiple 1-element structures into two registers. */
11619 static void
11620 LD1_2 (sim_cpu *cpu, uint64_t address)
11621 {
11622 vec_load (cpu, address, 1, 2);
11623 }
11624
11625 /* Load multiple 1-element structures into three registers. */
11626 static void
11627 LD1_3 (sim_cpu *cpu, uint64_t address)
11628 {
11629 vec_load (cpu, address, 1, 3);
11630 }
11631
11632 /* Load multiple 1-element structures into four registers. */
11633 static void
11634 LD1_4 (sim_cpu *cpu, uint64_t address)
11635 {
11636 vec_load (cpu, address, 1, 4);
11637 }
11638
11639 /* Store multiple N-element structures from M consecutive registers. */
11640 static void
11641 vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11642 {
11643 int all = INSTR (30, 30);
11644 unsigned size = INSTR (11, 10);
11645 unsigned vd = INSTR (4, 0);
11646 unsigned rpt = (N == M) ? 1 : M;
11647 unsigned selem = N;
11648 unsigned i, j, k;
11649
11650 switch (size)
11651 {
11652 case 0: /* 8-bit operations. */
11653 for (i = 0; i < rpt; i++)
11654 for (j = 0; j < (8 + (8 * all)); j++)
11655 for (k = 0; k < selem; k++)
11656 {
11657 aarch64_set_mem_u8
11658 (cpu, address,
11659 aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j));
11660 address += 1;
11661 }
11662 return;
11663
11664 case 1: /* 16-bit operations. */
11665 for (i = 0; i < rpt; i++)
11666 for (j = 0; j < (4 + (4 * all)); j++)
11667 for (k = 0; k < selem; k++)
11668 {
11669 aarch64_set_mem_u16
11670 (cpu, address,
11671 aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j));
11672 address += 2;
11673 }
11674 return;
11675
11676 case 2: /* 32-bit operations. */
11677 for (i = 0; i < rpt; i++)
11678 for (j = 0; j < (2 + (2 * all)); j++)
11679 for (k = 0; k < selem; k++)
11680 {
11681 aarch64_set_mem_u32
11682 (cpu, address,
11683 aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j));
11684 address += 4;
11685 }
11686 return;
11687
11688 case 3: /* 64-bit operations. */
11689 for (i = 0; i < rpt; i++)
11690 for (j = 0; j < (1 + all); j++)
11691 for (k = 0; k < selem; k++)
11692 {
11693 aarch64_set_mem_u64
11694 (cpu, address,
11695 aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j));
11696 address += 8;
11697 }
11698 return;
11699 }
11700 }
11701
11702 /* Store multiple 4-element structure from four consecutive registers. */
11703 static void
11704 ST4 (sim_cpu *cpu, uint64_t address)
11705 {
11706 vec_store (cpu, address, 4, 4);
11707 }
11708
11709 /* Store multiple 3-element structures from three consecutive registers. */
11710 static void
11711 ST3 (sim_cpu *cpu, uint64_t address)
11712 {
11713 vec_store (cpu, address, 3, 3);
11714 }
11715
11716 /* Store multiple 2-element structures from two consecutive registers. */
11717 static void
11718 ST2 (sim_cpu *cpu, uint64_t address)
11719 {
11720 vec_store (cpu, address, 2, 2);
11721 }
11722
11723 /* Store multiple 1-element structures from one register. */
11724 static void
11725 ST1_1 (sim_cpu *cpu, uint64_t address)
11726 {
11727 vec_store (cpu, address, 1, 1);
11728 }
11729
11730 /* Store multiple 1-element structures from two registers. */
11731 static void
11732 ST1_2 (sim_cpu *cpu, uint64_t address)
11733 {
11734 vec_store (cpu, address, 1, 2);
11735 }
11736
11737 /* Store multiple 1-element structures from three registers. */
11738 static void
11739 ST1_3 (sim_cpu *cpu, uint64_t address)
11740 {
11741 vec_store (cpu, address, 1, 3);
11742 }
11743
11744 /* Store multiple 1-element structures from four registers. */
11745 static void
11746 ST1_4 (sim_cpu *cpu, uint64_t address)
11747 {
11748 vec_store (cpu, address, 1, 4);
11749 }
11750
11751 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11752 do \
11753 { \
11754 switch (INSTR (15, 14)) \
11755 { \
11756 case 0: \
11757 lane = (full << 3) | (s << 2) | size; \
11758 size = 0; \
11759 break; \
11760 \
11761 case 1: \
11762 if ((size & 1) == 1) \
11763 HALT_UNALLOC; \
11764 lane = (full << 2) | (s << 1) | (size >> 1); \
11765 size = 1; \
11766 break; \
11767 \
11768 case 2: \
11769 if ((size & 2) == 2) \
11770 HALT_UNALLOC; \
11771 \
11772 if ((size & 1) == 0) \
11773 { \
11774 lane = (full << 1) | s; \
11775 size = 2; \
11776 } \
11777 else \
11778 { \
11779 if (s) \
11780 HALT_UNALLOC; \
11781 lane = full; \
11782 size = 3; \
11783 } \
11784 break; \
11785 \
11786 default: \
11787 HALT_UNALLOC; \
11788 } \
11789 } \
11790 while (0)
11791
11792 /* Load single structure into one lane of N registers. */
11793 static void
11794 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11795 {
11796 /* instr[31] = 0
11797 instr[30] = element selector 0=>half, 1=>all elements
11798 instr[29,24] = 00 1101
11799 instr[23] = 0=>simple, 1=>post
11800 instr[22] = 1
11801 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11802 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11803 11111 (immediate post inc)
11804 instr[15,13] = opcode
11805 instr[12] = S, used for lane number
11806 instr[11,10] = size, also used for lane number
11807 instr[9,5] = address
11808 instr[4,0] = Vd */
11809
11810 unsigned full = INSTR (30, 30);
11811 unsigned vd = INSTR (4, 0);
11812 unsigned size = INSTR (11, 10);
11813 unsigned s = INSTR (12, 12);
11814 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11815 int lane = 0;
11816 int i;
11817
11818 NYI_assert (29, 24, 0x0D);
11819 NYI_assert (22, 22, 1);
11820
11821 /* Compute the lane number first (using size), and then compute size. */
11822 LDn_STn_SINGLE_LANE_AND_SIZE ();
11823
11824 for (i = 0; i < nregs; i++)
11825 switch (size)
11826 {
11827 case 0:
11828 {
11829 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11830 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11831 break;
11832 }
11833
11834 case 1:
11835 {
11836 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11837 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11838 break;
11839 }
11840
11841 case 2:
11842 {
11843 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11844 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11845 break;
11846 }
11847
11848 case 3:
11849 {
11850 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11851 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11852 break;
11853 }
11854 }
11855 }
11856
11857 /* Store single structure from one lane from N registers. */
11858 static void
11859 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11860 {
11861 /* instr[31] = 0
11862 instr[30] = element selector 0=>half, 1=>all elements
11863 instr[29,24] = 00 1101
11864 instr[23] = 0=>simple, 1=>post
11865 instr[22] = 0
11866 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11867 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11868 11111 (immediate post inc)
11869 instr[15,13] = opcode
11870 instr[12] = S, used for lane number
11871 instr[11,10] = size, also used for lane number
11872 instr[9,5] = address
11873 instr[4,0] = Vd */
11874
11875 unsigned full = INSTR (30, 30);
11876 unsigned vd = INSTR (4, 0);
11877 unsigned size = INSTR (11, 10);
11878 unsigned s = INSTR (12, 12);
11879 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11880 int lane = 0;
11881 int i;
11882
11883 NYI_assert (29, 24, 0x0D);
11884 NYI_assert (22, 22, 0);
11885
11886 /* Compute the lane number first (using size), and then compute size. */
11887 LDn_STn_SINGLE_LANE_AND_SIZE ();
11888
11889 for (i = 0; i < nregs; i++)
11890 switch (size)
11891 {
11892 case 0:
11893 {
11894 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11895 aarch64_set_mem_u8 (cpu, address + i, val);
11896 break;
11897 }
11898
11899 case 1:
11900 {
11901 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11902 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11903 break;
11904 }
11905
11906 case 2:
11907 {
11908 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11909 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11910 break;
11911 }
11912
11913 case 3:
11914 {
11915 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11916 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11917 break;
11918 }
11919 }
11920 }
11921
11922 /* Load single structure into all lanes of N registers. */
11923 static void
11924 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11925 {
11926 /* instr[31] = 0
11927 instr[30] = element selector 0=>half, 1=>all elements
11928 instr[29,24] = 00 1101
11929 instr[23] = 0=>simple, 1=>post
11930 instr[22] = 1
11931 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11932 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11933 11111 (immediate post inc)
11934 instr[15,14] = 11
11935 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11936 instr[12] = 0
11937 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11938 10=> word(s), 11=> double(d)
11939 instr[9,5] = address
11940 instr[4,0] = Vd */
11941
11942 unsigned full = INSTR (30, 30);
11943 unsigned vd = INSTR (4, 0);
11944 unsigned size = INSTR (11, 10);
11945 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11946 int i, n;
11947
11948 NYI_assert (29, 24, 0x0D);
11949 NYI_assert (22, 22, 1);
11950 NYI_assert (15, 14, 3);
11951 NYI_assert (12, 12, 0);
11952
11953 for (n = 0; n < nregs; n++)
11954 switch (size)
11955 {
11956 case 0:
11957 {
11958 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11959 for (i = 0; i < (full ? 16 : 8); i++)
11960 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11961 break;
11962 }
11963
11964 case 1:
11965 {
11966 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11967 for (i = 0; i < (full ? 8 : 4); i++)
11968 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11969 break;
11970 }
11971
11972 case 2:
11973 {
11974 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11975 for (i = 0; i < (full ? 4 : 2); i++)
11976 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11977 break;
11978 }
11979
11980 case 3:
11981 {
11982 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11983 for (i = 0; i < (full ? 2 : 1); i++)
11984 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11985 break;
11986 }
11987
11988 default:
11989 HALT_UNALLOC;
11990 }
11991 }
11992
11993 static void
11994 do_vec_load_store (sim_cpu *cpu)
11995 {
11996 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11997
11998 instr[31] = 0
11999 instr[30] = element selector 0=>half, 1=>all elements
12000 instr[29,25] = 00110
12001 instr[24] = 0=>multiple struct, 1=>single struct
12002 instr[23] = 0=>simple, 1=>post
12003 instr[22] = 0=>store, 1=>load
12004 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
12005 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
12006 11111 (immediate post inc)
12007 instr[15,12] = elements and destinations. eg for load:
12008 0000=>LD4 => load multiple 4-element to
12009 four consecutive registers
12010 0100=>LD3 => load multiple 3-element to
12011 three consecutive registers
12012 1000=>LD2 => load multiple 2-element to
12013 two consecutive registers
12014 0010=>LD1 => load multiple 1-element to
12015 four consecutive registers
12016 0110=>LD1 => load multiple 1-element to
12017 three consecutive registers
12018 1010=>LD1 => load multiple 1-element to
12019 two consecutive registers
12020 0111=>LD1 => load multiple 1-element to
12021 one register
12022 1100=>LDR1,LDR2
12023 1110=>LDR3,LDR4
12024 instr[11,10] = element size 00=> byte(b), 01=> half(h),
12025 10=> word(s), 11=> double(d)
12026 instr[9,5] = Vn, can be SP
12027 instr[4,0] = Vd */
12028
12029 int single;
12030 int post;
12031 int load;
12032 unsigned vn;
12033 uint64_t address;
12034 int type;
12035
12036 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12037 HALT_NYI;
12038
12039 single = INSTR (24, 24);
12040 post = INSTR (23, 23);
12041 load = INSTR (22, 22);
12042 type = INSTR (15, 12);
12043 vn = INSTR (9, 5);
12044 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12045
12046 if (! single && INSTR (21, 21) != 0)
12047 HALT_UNALLOC;
12048
12049 if (post)
12050 {
12051 unsigned vm = INSTR (20, 16);
12052
12053 if (vm == R31)
12054 {
12055 unsigned sizeof_operation;
12056
12057 if (single)
12058 {
12059 if ((type >= 0) && (type <= 11))
12060 {
12061 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12062 switch (INSTR (15, 14))
12063 {
12064 case 0:
12065 sizeof_operation = nregs * 1;
12066 break;
12067 case 1:
12068 sizeof_operation = nregs * 2;
12069 break;
12070 case 2:
12071 if (INSTR (10, 10) == 0)
12072 sizeof_operation = nregs * 4;
12073 else
12074 sizeof_operation = nregs * 8;
12075 break;
12076 default:
12077 HALT_UNALLOC;
12078 }
12079 }
12080 else if (type == 0xC)
12081 {
12082 sizeof_operation = INSTR (21, 21) ? 2 : 1;
12083 sizeof_operation <<= INSTR (11, 10);
12084 }
12085 else if (type == 0xE)
12086 {
12087 sizeof_operation = INSTR (21, 21) ? 4 : 3;
12088 sizeof_operation <<= INSTR (11, 10);
12089 }
12090 else
12091 HALT_UNALLOC;
12092 }
12093 else
12094 {
12095 switch (type)
12096 {
12097 case 0: sizeof_operation = 32; break;
12098 case 4: sizeof_operation = 24; break;
12099 case 8: sizeof_operation = 16; break;
12100
12101 case 7:
12102 /* One register, immediate offset variant. */
12103 sizeof_operation = 8;
12104 break;
12105
12106 case 10:
12107 /* Two registers, immediate offset variant. */
12108 sizeof_operation = 16;
12109 break;
12110
12111 case 6:
12112 /* Three registers, immediate offset variant. */
12113 sizeof_operation = 24;
12114 break;
12115
12116 case 2:
12117 /* Four registers, immediate offset variant. */
12118 sizeof_operation = 32;
12119 break;
12120
12121 default:
12122 HALT_UNALLOC;
12123 }
12124
12125 if (INSTR (30, 30))
12126 sizeof_operation *= 2;
12127 }
12128
12129 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12130 }
12131 else
12132 aarch64_set_reg_u64 (cpu, vn, SP_OK,
12133 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12134 }
12135 else
12136 {
12137 NYI_assert (20, 16, 0);
12138 }
12139
12140 if (single)
12141 {
12142 if (load)
12143 {
12144 if ((type >= 0) && (type <= 11))
12145 do_vec_LDn_single (cpu, address);
12146 else if ((type == 0xC) || (type == 0xE))
12147 do_vec_LDnR (cpu, address);
12148 else
12149 HALT_UNALLOC;
12150 return;
12151 }
12152
12153 /* Stores. */
12154 if ((type >= 0) && (type <= 11))
12155 {
12156 do_vec_STn_single (cpu, address);
12157 return;
12158 }
12159
12160 HALT_UNALLOC;
12161 }
12162
12163 if (load)
12164 {
12165 switch (type)
12166 {
12167 case 0: LD4 (cpu, address); return;
12168 case 4: LD3 (cpu, address); return;
12169 case 8: LD2 (cpu, address); return;
12170 case 2: LD1_4 (cpu, address); return;
12171 case 6: LD1_3 (cpu, address); return;
12172 case 10: LD1_2 (cpu, address); return;
12173 case 7: LD1_1 (cpu, address); return;
12174
12175 default:
12176 HALT_UNALLOC;
12177 }
12178 }
12179
12180 /* Stores. */
12181 switch (type)
12182 {
12183 case 0: ST4 (cpu, address); return;
12184 case 4: ST3 (cpu, address); return;
12185 case 8: ST2 (cpu, address); return;
12186 case 2: ST1_4 (cpu, address); return;
12187 case 6: ST1_3 (cpu, address); return;
12188 case 10: ST1_2 (cpu, address); return;
12189 case 7: ST1_1 (cpu, address); return;
12190 default:
12191 HALT_UNALLOC;
12192 }
12193 }
12194
12195 static void
12196 dexLdSt (sim_cpu *cpu)
12197 {
12198 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12199 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12200 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12201 bits [29,28:26] of a LS are the secondary dispatch vector. */
12202 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12203
12204 switch (group2)
12205 {
12206 case LS_EXCL_000:
12207 dexLoadExclusive (cpu); return;
12208
12209 case LS_LIT_010:
12210 case LS_LIT_011:
12211 dexLoadLiteral (cpu); return;
12212
12213 case LS_OTHER_110:
12214 case LS_OTHER_111:
12215 dexLoadOther (cpu); return;
12216
12217 case LS_ADVSIMD_001:
12218 do_vec_load_store (cpu); return;
12219
12220 case LS_PAIR_100:
12221 dex_load_store_pair_gr (cpu); return;
12222
12223 case LS_PAIR_101:
12224 dex_load_store_pair_fp (cpu); return;
12225
12226 default:
12227 /* Should never reach here. */
12228 HALT_NYI;
12229 }
12230 }
12231
12232 /* Specific decode and execute for group Data Processing Register. */
12233
12234 static void
12235 dexLogicalShiftedRegister (sim_cpu *cpu)
12236 {
12237 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12238 instr[30,29] = op
12239 instr[28:24] = 01010
12240 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12241 instr[21] = N
12242 instr[20,16] = Rm
12243 instr[15,10] = count : must be 0xxxxx for 32 bit
12244 instr[9,5] = Rn
12245 instr[4,0] = Rd */
12246
12247 uint32_t size = INSTR (31, 31);
12248 Shift shiftType = INSTR (23, 22);
12249 uint32_t count = INSTR (15, 10);
12250
12251 /* 32 bit operations must have count[5] = 0.
12252 or else we have an UNALLOC. */
12253 if (size == 0 && uimm (count, 5, 5))
12254 HALT_UNALLOC;
12255
12256 /* Dispatch on size:op:N. */
12257 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12258 {
12259 case 0: and32_shift (cpu, shiftType, count); return;
12260 case 1: bic32_shift (cpu, shiftType, count); return;
12261 case 2: orr32_shift (cpu, shiftType, count); return;
12262 case 3: orn32_shift (cpu, shiftType, count); return;
12263 case 4: eor32_shift (cpu, shiftType, count); return;
12264 case 5: eon32_shift (cpu, shiftType, count); return;
12265 case 6: ands32_shift (cpu, shiftType, count); return;
12266 case 7: bics32_shift (cpu, shiftType, count); return;
12267 case 8: and64_shift (cpu, shiftType, count); return;
12268 case 9: bic64_shift (cpu, shiftType, count); return;
12269 case 10:orr64_shift (cpu, shiftType, count); return;
12270 case 11:orn64_shift (cpu, shiftType, count); return;
12271 case 12:eor64_shift (cpu, shiftType, count); return;
12272 case 13:eon64_shift (cpu, shiftType, count); return;
12273 case 14:ands64_shift (cpu, shiftType, count); return;
12274 case 15:bics64_shift (cpu, shiftType, count); return;
12275 }
12276 }
12277
12278 /* 32 bit conditional select. */
12279 static void
12280 csel32 (sim_cpu *cpu, CondCode cc)
12281 {
12282 unsigned rm = INSTR (20, 16);
12283 unsigned rn = INSTR (9, 5);
12284 unsigned rd = INSTR (4, 0);
12285
12286 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12287 testConditionCode (cpu, cc)
12288 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12289 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12290 }
12291
12292 /* 64 bit conditional select. */
12293 static void
12294 csel64 (sim_cpu *cpu, CondCode cc)
12295 {
12296 unsigned rm = INSTR (20, 16);
12297 unsigned rn = INSTR (9, 5);
12298 unsigned rd = INSTR (4, 0);
12299
12300 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12301 testConditionCode (cpu, cc)
12302 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12303 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12304 }
12305
12306 /* 32 bit conditional increment. */
12307 static void
12308 csinc32 (sim_cpu *cpu, CondCode cc)
12309 {
12310 unsigned rm = INSTR (20, 16);
12311 unsigned rn = INSTR (9, 5);
12312 unsigned rd = INSTR (4, 0);
12313
12314 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12315 testConditionCode (cpu, cc)
12316 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12317 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12318 }
12319
12320 /* 64 bit conditional increment. */
12321 static void
12322 csinc64 (sim_cpu *cpu, CondCode cc)
12323 {
12324 unsigned rm = INSTR (20, 16);
12325 unsigned rn = INSTR (9, 5);
12326 unsigned rd = INSTR (4, 0);
12327
12328 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12329 testConditionCode (cpu, cc)
12330 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12331 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12332 }
12333
12334 /* 32 bit conditional invert. */
12335 static void
12336 csinv32 (sim_cpu *cpu, CondCode cc)
12337 {
12338 unsigned rm = INSTR (20, 16);
12339 unsigned rn = INSTR (9, 5);
12340 unsigned rd = INSTR (4, 0);
12341
12342 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12343 testConditionCode (cpu, cc)
12344 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12345 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12346 }
12347
12348 /* 64 bit conditional invert. */
12349 static void
12350 csinv64 (sim_cpu *cpu, CondCode cc)
12351 {
12352 unsigned rm = INSTR (20, 16);
12353 unsigned rn = INSTR (9, 5);
12354 unsigned rd = INSTR (4, 0);
12355
12356 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12357 testConditionCode (cpu, cc)
12358 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12359 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12360 }
12361
12362 /* 32 bit conditional negate. */
12363 static void
12364 csneg32 (sim_cpu *cpu, CondCode cc)
12365 {
12366 unsigned rm = INSTR (20, 16);
12367 unsigned rn = INSTR (9, 5);
12368 unsigned rd = INSTR (4, 0);
12369
12370 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12371 testConditionCode (cpu, cc)
12372 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12373 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12374 }
12375
12376 /* 64 bit conditional negate. */
12377 static void
12378 csneg64 (sim_cpu *cpu, CondCode cc)
12379 {
12380 unsigned rm = INSTR (20, 16);
12381 unsigned rn = INSTR (9, 5);
12382 unsigned rd = INSTR (4, 0);
12383
12384 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12385 testConditionCode (cpu, cc)
12386 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12387 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12388 }
12389
12390 static void
12391 dexCondSelect (sim_cpu *cpu)
12392 {
12393 /* instr[28,21] = 11011011
12394 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12395 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12396 100 ==> CSINV, 101 ==> CSNEG,
12397 _1_ ==> UNALLOC
12398 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12399 instr[15,12] = cond
12400 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12401
12402 CondCode cc = INSTR (15, 12);
12403 uint32_t S = INSTR (29, 29);
12404 uint32_t op2 = INSTR (11, 10);
12405
12406 if (S == 1)
12407 HALT_UNALLOC;
12408
12409 if (op2 & 0x2)
12410 HALT_UNALLOC;
12411
12412 switch ((INSTR (31, 30) << 1) | op2)
12413 {
12414 case 0: csel32 (cpu, cc); return;
12415 case 1: csinc32 (cpu, cc); return;
12416 case 2: csinv32 (cpu, cc); return;
12417 case 3: csneg32 (cpu, cc); return;
12418 case 4: csel64 (cpu, cc); return;
12419 case 5: csinc64 (cpu, cc); return;
12420 case 6: csinv64 (cpu, cc); return;
12421 case 7: csneg64 (cpu, cc); return;
12422 }
12423 }
12424
12425 /* Some helpers for counting leading 1 or 0 bits. */
12426
12427 /* Counts the number of leading bits which are the same
12428 in a 32 bit value in the range 1 to 32. */
12429 static uint32_t
12430 leading32 (uint32_t value)
12431 {
12432 int32_t mask= 0xffff0000;
12433 uint32_t count= 16; /* Counts number of bits set in mask. */
12434 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12435 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12436
12437 while (lo + 1 < hi)
12438 {
12439 int32_t test = (value & mask);
12440
12441 if (test == 0 || test == mask)
12442 {
12443 lo = count;
12444 count = (lo + hi) / 2;
12445 mask >>= (count - lo);
12446 }
12447 else
12448 {
12449 hi = count;
12450 count = (lo + hi) / 2;
12451 mask <<= hi - count;
12452 }
12453 }
12454
12455 if (lo != hi)
12456 {
12457 int32_t test;
12458
12459 mask >>= 1;
12460 test = (value & mask);
12461
12462 if (test == 0 || test == mask)
12463 count = hi;
12464 else
12465 count = lo;
12466 }
12467
12468 return count;
12469 }
12470
12471 /* Counts the number of leading bits which are the same
12472 in a 64 bit value in the range 1 to 64. */
12473 static uint64_t
12474 leading64 (uint64_t value)
12475 {
12476 int64_t mask= 0xffffffff00000000LL;
12477 uint64_t count = 32; /* Counts number of bits set in mask. */
12478 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12479 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12480
12481 while (lo + 1 < hi)
12482 {
12483 int64_t test = (value & mask);
12484
12485 if (test == 0 || test == mask)
12486 {
12487 lo = count;
12488 count = (lo + hi) / 2;
12489 mask >>= (count - lo);
12490 }
12491 else
12492 {
12493 hi = count;
12494 count = (lo + hi) / 2;
12495 mask <<= hi - count;
12496 }
12497 }
12498
12499 if (lo != hi)
12500 {
12501 int64_t test;
12502
12503 mask >>= 1;
12504 test = (value & mask);
12505
12506 if (test == 0 || test == mask)
12507 count = hi;
12508 else
12509 count = lo;
12510 }
12511
12512 return count;
12513 }
12514
12515 /* Bit operations. */
12516 /* N.B register args may not be SP. */
12517
12518 /* 32 bit count leading sign bits. */
12519 static void
12520 cls32 (sim_cpu *cpu)
12521 {
12522 unsigned rn = INSTR (9, 5);
12523 unsigned rd = INSTR (4, 0);
12524
12525 /* N.B. the result needs to exclude the leading bit. */
12526 aarch64_set_reg_u64
12527 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12528 }
12529
12530 /* 64 bit count leading sign bits. */
12531 static void
12532 cls64 (sim_cpu *cpu)
12533 {
12534 unsigned rn = INSTR (9, 5);
12535 unsigned rd = INSTR (4, 0);
12536
12537 /* N.B. the result needs to exclude the leading bit. */
12538 aarch64_set_reg_u64
12539 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12540 }
12541
12542 /* 32 bit count leading zero bits. */
12543 static void
12544 clz32 (sim_cpu *cpu)
12545 {
12546 unsigned rn = INSTR (9, 5);
12547 unsigned rd = INSTR (4, 0);
12548 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12549
12550 /* if the sign (top) bit is set then the count is 0. */
12551 if (pick32 (value, 31, 31))
12552 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12553 else
12554 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12555 }
12556
12557 /* 64 bit count leading zero bits. */
12558 static void
12559 clz64 (sim_cpu *cpu)
12560 {
12561 unsigned rn = INSTR (9, 5);
12562 unsigned rd = INSTR (4, 0);
12563 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12564
12565 /* if the sign (top) bit is set then the count is 0. */
12566 if (pick64 (value, 63, 63))
12567 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12568 else
12569 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12570 }
12571
12572 /* 32 bit reverse bits. */
12573 static void
12574 rbit32 (sim_cpu *cpu)
12575 {
12576 unsigned rn = INSTR (9, 5);
12577 unsigned rd = INSTR (4, 0);
12578 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12579 uint32_t result = 0;
12580 int i;
12581
12582 for (i = 0; i < 32; i++)
12583 {
12584 result <<= 1;
12585 result |= (value & 1);
12586 value >>= 1;
12587 }
12588 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12589 }
12590
12591 /* 64 bit reverse bits. */
12592 static void
12593 rbit64 (sim_cpu *cpu)
12594 {
12595 unsigned rn = INSTR (9, 5);
12596 unsigned rd = INSTR (4, 0);
12597 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12598 uint64_t result = 0;
12599 int i;
12600
12601 for (i = 0; i < 64; i++)
12602 {
12603 result <<= 1;
12604 result |= (value & 1UL);
12605 value >>= 1;
12606 }
12607 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12608 }
12609
12610 /* 32 bit reverse bytes. */
12611 static void
12612 rev32 (sim_cpu *cpu)
12613 {
12614 unsigned rn = INSTR (9, 5);
12615 unsigned rd = INSTR (4, 0);
12616 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12617 uint32_t result = 0;
12618 int i;
12619
12620 for (i = 0; i < 4; i++)
12621 {
12622 result <<= 8;
12623 result |= (value & 0xff);
12624 value >>= 8;
12625 }
12626 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12627 }
12628
12629 /* 64 bit reverse bytes. */
12630 static void
12631 rev64 (sim_cpu *cpu)
12632 {
12633 unsigned rn = INSTR (9, 5);
12634 unsigned rd = INSTR (4, 0);
12635 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12636 uint64_t result = 0;
12637 int i;
12638
12639 for (i = 0; i < 8; i++)
12640 {
12641 result <<= 8;
12642 result |= (value & 0xffULL);
12643 value >>= 8;
12644 }
12645 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12646 }
12647
12648 /* 32 bit reverse shorts. */
12649 /* N.B.this reverses the order of the bytes in each half word. */
12650 static void
12651 revh32 (sim_cpu *cpu)
12652 {
12653 unsigned rn = INSTR (9, 5);
12654 unsigned rd = INSTR (4, 0);
12655 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12656 uint32_t result = 0;
12657 int i;
12658
12659 for (i = 0; i < 2; i++)
12660 {
12661 result <<= 8;
12662 result |= (value & 0x00ff00ff);
12663 value >>= 8;
12664 }
12665 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12666 }
12667
12668 /* 64 bit reverse shorts. */
12669 /* N.B.this reverses the order of the bytes in each half word. */
12670 static void
12671 revh64 (sim_cpu *cpu)
12672 {
12673 unsigned rn = INSTR (9, 5);
12674 unsigned rd = INSTR (4, 0);
12675 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12676 uint64_t result = 0;
12677 int i;
12678
12679 for (i = 0; i < 2; i++)
12680 {
12681 result <<= 8;
12682 result |= (value & 0x00ff00ff00ff00ffULL);
12683 value >>= 8;
12684 }
12685 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12686 }
12687
12688 static void
12689 dexDataProc1Source (sim_cpu *cpu)
12690 {
12691 /* instr[30] = 1
12692 instr[28,21] = 111010110
12693 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12694 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12695 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12696 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12697 000010 ==> REV, 000011 ==> UNALLOC
12698 000100 ==> CLZ, 000101 ==> CLS
12699 ow ==> UNALLOC
12700 instr[9,5] = rn : may not be SP
12701 instr[4,0] = rd : may not be SP. */
12702
12703 uint32_t S = INSTR (29, 29);
12704 uint32_t opcode2 = INSTR (20, 16);
12705 uint32_t opcode = INSTR (15, 10);
12706 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12707
12708 if (S == 1)
12709 HALT_UNALLOC;
12710
12711 if (opcode2 != 0)
12712 HALT_UNALLOC;
12713
12714 if (opcode & 0x38)
12715 HALT_UNALLOC;
12716
12717 switch (dispatch)
12718 {
12719 case 0: rbit32 (cpu); return;
12720 case 1: revh32 (cpu); return;
12721 case 2: rev32 (cpu); return;
12722 case 4: clz32 (cpu); return;
12723 case 5: cls32 (cpu); return;
12724 case 8: rbit64 (cpu); return;
12725 case 9: revh64 (cpu); return;
12726 case 10:rev32 (cpu); return;
12727 case 11:rev64 (cpu); return;
12728 case 12:clz64 (cpu); return;
12729 case 13:cls64 (cpu); return;
12730 default: HALT_UNALLOC;
12731 }
12732 }
12733
12734 /* Variable shift.
12735 Shifts by count supplied in register.
12736 N.B register args may not be SP.
12737 These all use the shifted auxiliary function for
12738 simplicity and clarity. Writing the actual shift
12739 inline would avoid a branch and so be faster but
12740 would also necessitate getting signs right. */
12741
12742 /* 32 bit arithmetic shift right. */
12743 static void
12744 asrv32 (sim_cpu *cpu)
12745 {
12746 unsigned rm = INSTR (20, 16);
12747 unsigned rn = INSTR (9, 5);
12748 unsigned rd = INSTR (4, 0);
12749
12750 aarch64_set_reg_u64
12751 (cpu, rd, NO_SP,
12752 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12753 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12754 }
12755
12756 /* 64 bit arithmetic shift right. */
12757 static void
12758 asrv64 (sim_cpu *cpu)
12759 {
12760 unsigned rm = INSTR (20, 16);
12761 unsigned rn = INSTR (9, 5);
12762 unsigned rd = INSTR (4, 0);
12763
12764 aarch64_set_reg_u64
12765 (cpu, rd, NO_SP,
12766 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12767 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12768 }
12769
12770 /* 32 bit logical shift left. */
12771 static void
12772 lslv32 (sim_cpu *cpu)
12773 {
12774 unsigned rm = INSTR (20, 16);
12775 unsigned rn = INSTR (9, 5);
12776 unsigned rd = INSTR (4, 0);
12777
12778 aarch64_set_reg_u64
12779 (cpu, rd, NO_SP,
12780 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12781 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12782 }
12783
12784 /* 64 bit arithmetic shift left. */
12785 static void
12786 lslv64 (sim_cpu *cpu)
12787 {
12788 unsigned rm = INSTR (20, 16);
12789 unsigned rn = INSTR (9, 5);
12790 unsigned rd = INSTR (4, 0);
12791
12792 aarch64_set_reg_u64
12793 (cpu, rd, NO_SP,
12794 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12795 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12796 }
12797
12798 /* 32 bit logical shift right. */
12799 static void
12800 lsrv32 (sim_cpu *cpu)
12801 {
12802 unsigned rm = INSTR (20, 16);
12803 unsigned rn = INSTR (9, 5);
12804 unsigned rd = INSTR (4, 0);
12805
12806 aarch64_set_reg_u64
12807 (cpu, rd, NO_SP,
12808 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12809 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12810 }
12811
12812 /* 64 bit logical shift right. */
12813 static void
12814 lsrv64 (sim_cpu *cpu)
12815 {
12816 unsigned rm = INSTR (20, 16);
12817 unsigned rn = INSTR (9, 5);
12818 unsigned rd = INSTR (4, 0);
12819
12820 aarch64_set_reg_u64
12821 (cpu, rd, NO_SP,
12822 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12823 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12824 }
12825
12826 /* 32 bit rotate right. */
12827 static void
12828 rorv32 (sim_cpu *cpu)
12829 {
12830 unsigned rm = INSTR (20, 16);
12831 unsigned rn = INSTR (9, 5);
12832 unsigned rd = INSTR (4, 0);
12833
12834 aarch64_set_reg_u64
12835 (cpu, rd, NO_SP,
12836 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12837 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12838 }
12839
12840 /* 64 bit rotate right. */
12841 static void
12842 rorv64 (sim_cpu *cpu)
12843 {
12844 unsigned rm = INSTR (20, 16);
12845 unsigned rn = INSTR (9, 5);
12846 unsigned rd = INSTR (4, 0);
12847
12848 aarch64_set_reg_u64
12849 (cpu, rd, NO_SP,
12850 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12851 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12852 }
12853
12854
12855 /* divide. */
12856
12857 /* 32 bit signed divide. */
12858 static void
12859 cpuiv32 (sim_cpu *cpu)
12860 {
12861 unsigned rm = INSTR (20, 16);
12862 unsigned rn = INSTR (9, 5);
12863 unsigned rd = INSTR (4, 0);
12864 /* N.B. the pseudo-code does the divide using 64 bit data. */
12865 /* TODO : check that this rounds towards zero as required. */
12866 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12867 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12868
12869 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12870 divisor ? ((int32_t) (dividend / divisor)) : 0);
12871 }
12872
12873 /* 64 bit signed divide. */
12874 static void
12875 cpuiv64 (sim_cpu *cpu)
12876 {
12877 unsigned rm = INSTR (20, 16);
12878 unsigned rn = INSTR (9, 5);
12879 unsigned rd = INSTR (4, 0);
12880
12881 /* TODO : check that this rounds towards zero as required. */
12882 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12883
12884 aarch64_set_reg_s64
12885 (cpu, rd, NO_SP,
12886 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12887 }
12888
12889 /* 32 bit unsigned divide. */
12890 static void
12891 udiv32 (sim_cpu *cpu)
12892 {
12893 unsigned rm = INSTR (20, 16);
12894 unsigned rn = INSTR (9, 5);
12895 unsigned rd = INSTR (4, 0);
12896
12897 /* N.B. the pseudo-code does the divide using 64 bit data. */
12898 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12899 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12900
12901 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12902 divisor ? (uint32_t) (dividend / divisor) : 0);
12903 }
12904
12905 /* 64 bit unsigned divide. */
12906 static void
12907 udiv64 (sim_cpu *cpu)
12908 {
12909 unsigned rm = INSTR (20, 16);
12910 unsigned rn = INSTR (9, 5);
12911 unsigned rd = INSTR (4, 0);
12912
12913 /* TODO : check that this rounds towards zero as required. */
12914 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12915
12916 aarch64_set_reg_u64
12917 (cpu, rd, NO_SP,
12918 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12919 }
12920
12921 static void
12922 dexDataProc2Source (sim_cpu *cpu)
12923 {
12924 /* assert instr[30] == 0
12925 instr[28,21] == 11010110
12926 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12927 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12928 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12929 001000 ==> LSLV, 001001 ==> LSRV
12930 001010 ==> ASRV, 001011 ==> RORV
12931 ow ==> UNALLOC. */
12932
12933 uint32_t dispatch;
12934 uint32_t S = INSTR (29, 29);
12935 uint32_t opcode = INSTR (15, 10);
12936
12937 if (S == 1)
12938 HALT_UNALLOC;
12939
12940 if (opcode & 0x34)
12941 HALT_UNALLOC;
12942
12943 dispatch = ( (INSTR (31, 31) << 3)
12944 | (uimm (opcode, 3, 3) << 2)
12945 | uimm (opcode, 1, 0));
12946 switch (dispatch)
12947 {
12948 case 2: udiv32 (cpu); return;
12949 case 3: cpuiv32 (cpu); return;
12950 case 4: lslv32 (cpu); return;
12951 case 5: lsrv32 (cpu); return;
12952 case 6: asrv32 (cpu); return;
12953 case 7: rorv32 (cpu); return;
12954 case 10: udiv64 (cpu); return;
12955 case 11: cpuiv64 (cpu); return;
12956 case 12: lslv64 (cpu); return;
12957 case 13: lsrv64 (cpu); return;
12958 case 14: asrv64 (cpu); return;
12959 case 15: rorv64 (cpu); return;
12960 default: HALT_UNALLOC;
12961 }
12962 }
12963
12964
12965 /* Multiply. */
12966
12967 /* 32 bit multiply and add. */
12968 static void
12969 madd32 (sim_cpu *cpu)
12970 {
12971 unsigned rm = INSTR (20, 16);
12972 unsigned ra = INSTR (14, 10);
12973 unsigned rn = INSTR (9, 5);
12974 unsigned rd = INSTR (4, 0);
12975
12976 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12977 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12978 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12979 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12980 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12981 }
12982
12983 /* 64 bit multiply and add. */
12984 static void
12985 madd64 (sim_cpu *cpu)
12986 {
12987 unsigned rm = INSTR (20, 16);
12988 unsigned ra = INSTR (14, 10);
12989 unsigned rn = INSTR (9, 5);
12990 unsigned rd = INSTR (4, 0);
12991
12992 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12993 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12994 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12995 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12996 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12997 }
12998
12999 /* 32 bit multiply and sub. */
13000 static void
13001 msub32 (sim_cpu *cpu)
13002 {
13003 unsigned rm = INSTR (20, 16);
13004 unsigned ra = INSTR (14, 10);
13005 unsigned rn = INSTR (9, 5);
13006 unsigned rd = INSTR (4, 0);
13007
13008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13009 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13010 aarch64_get_reg_u32 (cpu, ra, NO_SP)
13011 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
13012 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13013 }
13014
13015 /* 64 bit multiply and sub. */
13016 static void
13017 msub64 (sim_cpu *cpu)
13018 {
13019 unsigned rm = INSTR (20, 16);
13020 unsigned ra = INSTR (14, 10);
13021 unsigned rn = INSTR (9, 5);
13022 unsigned rd = INSTR (4, 0);
13023
13024 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13025 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13026 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13027 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13028 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13029 }
13030
13031 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
13032 static void
13033 smaddl (sim_cpu *cpu)
13034 {
13035 unsigned rm = INSTR (20, 16);
13036 unsigned ra = INSTR (14, 10);
13037 unsigned rn = INSTR (9, 5);
13038 unsigned rd = INSTR (4, 0);
13039
13040 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13041 obtain a 64 bit product. */
13042 aarch64_set_reg_s64
13043 (cpu, rd, NO_SP,
13044 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13045 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13046 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13047 }
13048
13049 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13050 static void
13051 smsubl (sim_cpu *cpu)
13052 {
13053 unsigned rm = INSTR (20, 16);
13054 unsigned ra = INSTR (14, 10);
13055 unsigned rn = INSTR (9, 5);
13056 unsigned rd = INSTR (4, 0);
13057
13058 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13059 obtain a 64 bit product. */
13060 aarch64_set_reg_s64
13061 (cpu, rd, NO_SP,
13062 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13063 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13064 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13065 }
13066
13067 /* Integer Multiply/Divide. */
13068
13069 /* First some macros and a helper function. */
13070 /* Macros to test or access elements of 64 bit words. */
13071
13072 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
13073 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13074 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13075 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13076 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13077 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13078
13079 /* Offset of sign bit in 64 bit signed integger. */
13080 #define SIGN_SHIFT_U64 63
13081 /* The sign bit itself -- also identifies the minimum negative int value. */
13082 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13083 /* Return true if a 64 bit signed int presented as an unsigned int is the
13084 most negative value. */
13085 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13086 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13087 int has its sign bit set to false. */
13088 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13089 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13090 an unsigned int has its sign bit set or not. */
13091 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13092 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
13093 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13094
13095 /* Multiply two 64 bit ints and return.
13096 the hi 64 bits of the 128 bit product. */
13097
13098 static uint64_t
13099 mul64hi (uint64_t value1, uint64_t value2)
13100 {
13101 uint64_t resultmid1;
13102 uint64_t result;
13103 uint64_t value1_lo = lowWordToU64 (value1);
13104 uint64_t value1_hi = highWordToU64 (value1) ;
13105 uint64_t value2_lo = lowWordToU64 (value2);
13106 uint64_t value2_hi = highWordToU64 (value2);
13107
13108 /* Cross-multiply and collect results. */
13109 uint64_t xproductlo = value1_lo * value2_lo;
13110 uint64_t xproductmid1 = value1_lo * value2_hi;
13111 uint64_t xproductmid2 = value1_hi * value2_lo;
13112 uint64_t xproducthi = value1_hi * value2_hi;
13113 uint64_t carry = 0;
13114 /* Start accumulating 64 bit results. */
13115 /* Drop bottom half of lowest cross-product. */
13116 uint64_t resultmid = xproductlo >> 32;
13117 /* Add in middle products. */
13118 resultmid = resultmid + xproductmid1;
13119
13120 /* Check for overflow. */
13121 if (resultmid < xproductmid1)
13122 /* Carry over 1 into top cross-product. */
13123 carry++;
13124
13125 resultmid1 = resultmid + xproductmid2;
13126
13127 /* Check for overflow. */
13128 if (resultmid1 < xproductmid2)
13129 /* Carry over 1 into top cross-product. */
13130 carry++;
13131
13132 /* Drop lowest 32 bits of middle cross-product. */
13133 result = resultmid1 >> 32;
13134 /* Move carry bit to just above middle cross-product highest bit. */
13135 carry = carry << 32;
13136
13137 /* Add top cross-product plus and any carry. */
13138 result += xproducthi + carry;
13139
13140 return result;
13141 }
13142
13143 /* Signed multiply high, source, source2 :
13144 64 bit, dest <-- high 64-bit of result. */
13145 static void
13146 smulh (sim_cpu *cpu)
13147 {
13148 uint64_t uresult;
13149 int64_t result;
13150 unsigned rm = INSTR (20, 16);
13151 unsigned rn = INSTR (9, 5);
13152 unsigned rd = INSTR (4, 0);
13153 GReg ra = INSTR (14, 10);
13154 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13155 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13156 uint64_t uvalue1;
13157 uint64_t uvalue2;
13158 int negate = 0;
13159
13160 if (ra != R31)
13161 HALT_UNALLOC;
13162
13163 /* Convert to unsigned and use the unsigned mul64hi routine
13164 the fix the sign up afterwards. */
13165 if (value1 < 0)
13166 {
13167 negate = !negate;
13168 uvalue1 = -value1;
13169 }
13170 else
13171 {
13172 uvalue1 = value1;
13173 }
13174
13175 if (value2 < 0)
13176 {
13177 negate = !negate;
13178 uvalue2 = -value2;
13179 }
13180 else
13181 {
13182 uvalue2 = value2;
13183 }
13184
13185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13186
13187 uresult = mul64hi (uvalue1, uvalue2);
13188 result = uresult;
13189
13190 if (negate)
13191 {
13192 /* Multiply 128-bit result by -1, which means highpart gets inverted,
13193 and has carry in added only if low part is 0. */
13194 result = ~result;
13195 if ((uvalue1 * uvalue2) == 0)
13196 result += 1;
13197 }
13198
13199 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13200 }
13201
13202 /* Unsigned multiply add long -- source, source2 :
13203 32 bit, source3 : 64 bit. */
13204 static void
13205 umaddl (sim_cpu *cpu)
13206 {
13207 unsigned rm = INSTR (20, 16);
13208 unsigned ra = INSTR (14, 10);
13209 unsigned rn = INSTR (9, 5);
13210 unsigned rd = INSTR (4, 0);
13211
13212 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13213 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13214 obtain a 64 bit product. */
13215 aarch64_set_reg_u64
13216 (cpu, rd, NO_SP,
13217 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13218 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13219 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13220 }
13221
13222 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13223 static void
13224 umsubl (sim_cpu *cpu)
13225 {
13226 unsigned rm = INSTR (20, 16);
13227 unsigned ra = INSTR (14, 10);
13228 unsigned rn = INSTR (9, 5);
13229 unsigned rd = INSTR (4, 0);
13230
13231 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13232 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13233 obtain a 64 bit product. */
13234 aarch64_set_reg_u64
13235 (cpu, rd, NO_SP,
13236 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13237 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13238 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13239 }
13240
13241 /* Unsigned multiply high, source, source2 :
13242 64 bit, dest <-- high 64-bit of result. */
13243 static void
13244 umulh (sim_cpu *cpu)
13245 {
13246 unsigned rm = INSTR (20, 16);
13247 unsigned rn = INSTR (9, 5);
13248 unsigned rd = INSTR (4, 0);
13249 GReg ra = INSTR (14, 10);
13250
13251 if (ra != R31)
13252 HALT_UNALLOC;
13253
13254 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13255 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13256 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13257 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13258 }
13259
13260 static void
13261 dexDataProc3Source (sim_cpu *cpu)
13262 {
13263 /* assert instr[28,24] == 11011. */
13264 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13265 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13266 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13267 instr[15] = o0 : 0/1 ==> ok
13268 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13269 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13270 0100 ==> SMULH, (64 bit only)
13271 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13272 1100 ==> UMULH (64 bit only)
13273 ow ==> UNALLOC. */
13274
13275 uint32_t dispatch;
13276 uint32_t size = INSTR (31, 31);
13277 uint32_t op54 = INSTR (30, 29);
13278 uint32_t op31 = INSTR (23, 21);
13279 uint32_t o0 = INSTR (15, 15);
13280
13281 if (op54 != 0)
13282 HALT_UNALLOC;
13283
13284 if (size == 0)
13285 {
13286 if (op31 != 0)
13287 HALT_UNALLOC;
13288
13289 if (o0 == 0)
13290 madd32 (cpu);
13291 else
13292 msub32 (cpu);
13293 return;
13294 }
13295
13296 dispatch = (op31 << 1) | o0;
13297
13298 switch (dispatch)
13299 {
13300 case 0: madd64 (cpu); return;
13301 case 1: msub64 (cpu); return;
13302 case 2: smaddl (cpu); return;
13303 case 3: smsubl (cpu); return;
13304 case 4: smulh (cpu); return;
13305 case 10: umaddl (cpu); return;
13306 case 11: umsubl (cpu); return;
13307 case 12: umulh (cpu); return;
13308 default: HALT_UNALLOC;
13309 }
13310 }
13311
13312 static void
13313 dexDPReg (sim_cpu *cpu)
13314 {
13315 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13316 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13317 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13318 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13319
13320 switch (group2)
13321 {
13322 case DPREG_LOG_000:
13323 case DPREG_LOG_001:
13324 dexLogicalShiftedRegister (cpu); return;
13325
13326 case DPREG_ADDSHF_010:
13327 dexAddSubtractShiftedRegister (cpu); return;
13328
13329 case DPREG_ADDEXT_011:
13330 dexAddSubtractExtendedRegister (cpu); return;
13331
13332 case DPREG_ADDCOND_100:
13333 {
13334 /* This set bundles a variety of different operations. */
13335 /* Check for. */
13336 /* 1) add/sub w carry. */
13337 uint32_t mask1 = 0x1FE00000U;
13338 uint32_t val1 = 0x1A000000U;
13339 /* 2) cond compare register/immediate. */
13340 uint32_t mask2 = 0x1FE00000U;
13341 uint32_t val2 = 0x1A400000U;
13342 /* 3) cond select. */
13343 uint32_t mask3 = 0x1FE00000U;
13344 uint32_t val3 = 0x1A800000U;
13345 /* 4) data proc 1/2 source. */
13346 uint32_t mask4 = 0x1FE00000U;
13347 uint32_t val4 = 0x1AC00000U;
13348
13349 if ((aarch64_get_instr (cpu) & mask1) == val1)
13350 dexAddSubtractWithCarry (cpu);
13351
13352 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13353 CondCompare (cpu);
13354
13355 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13356 dexCondSelect (cpu);
13357
13358 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13359 {
13360 /* Bit 30 is clear for data proc 2 source
13361 and set for data proc 1 source. */
13362 if (aarch64_get_instr (cpu) & (1U << 30))
13363 dexDataProc1Source (cpu);
13364 else
13365 dexDataProc2Source (cpu);
13366 }
13367
13368 else
13369 /* Should not reach here. */
13370 HALT_NYI;
13371
13372 return;
13373 }
13374
13375 case DPREG_3SRC_110:
13376 dexDataProc3Source (cpu); return;
13377
13378 case DPREG_UNALLOC_101:
13379 HALT_UNALLOC;
13380
13381 case DPREG_3SRC_111:
13382 dexDataProc3Source (cpu); return;
13383
13384 default:
13385 /* Should never reach here. */
13386 HALT_NYI;
13387 }
13388 }
13389
13390 /* Unconditional Branch immediate.
13391 Offset is a PC-relative byte offset in the range +/- 128MiB.
13392 The offset is assumed to be raw from the decode i.e. the
13393 simulator is expected to scale them from word offsets to byte. */
13394
13395 /* Unconditional branch. */
13396 static void
13397 buc (sim_cpu *cpu, int32_t offset)
13398 {
13399 aarch64_set_next_PC_by_offset (cpu, offset);
13400 }
13401
13402 static unsigned stack_depth = 0;
13403
13404 /* Unconditional branch and link -- writes return PC to LR. */
13405 static void
13406 bl (sim_cpu *cpu, int32_t offset)
13407 {
13408 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13409 aarch64_save_LR (cpu);
13410 aarch64_set_next_PC_by_offset (cpu, offset);
13411
13412 if (TRACE_BRANCH_P (cpu))
13413 {
13414 ++ stack_depth;
13415 TRACE_BRANCH (cpu,
13416 " %*scall %" PRIx64 " [%s]"
13417 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13418 stack_depth, " ", aarch64_get_next_PC (cpu),
13419 aarch64_get_func (CPU_STATE (cpu),
13420 aarch64_get_next_PC (cpu)),
13421 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13422 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13423 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13424 );
13425 }
13426 }
13427
13428 /* Unconditional Branch register.
13429 Branch/return address is in source register. */
13430
13431 /* Unconditional branch. */
13432 static void
13433 br (sim_cpu *cpu)
13434 {
13435 unsigned rn = INSTR (9, 5);
13436 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13437 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13438 }
13439
13440 /* Unconditional branch and link -- writes return PC to LR. */
13441 static void
13442 blr (sim_cpu *cpu)
13443 {
13444 /* Ensure we read the destination before we write LR. */
13445 uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP);
13446
13447 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13448 aarch64_save_LR (cpu);
13449 aarch64_set_next_PC (cpu, target);
13450
13451 if (TRACE_BRANCH_P (cpu))
13452 {
13453 ++ stack_depth;
13454 TRACE_BRANCH (cpu,
13455 " %*scall %" PRIx64 " [%s]"
13456 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13457 stack_depth, " ", aarch64_get_next_PC (cpu),
13458 aarch64_get_func (CPU_STATE (cpu),
13459 aarch64_get_next_PC (cpu)),
13460 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13461 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13462 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13463 );
13464 }
13465 }
13466
13467 /* Return -- assembler will default source to LR this is functionally
13468 equivalent to br but, presumably, unlike br it side effects the
13469 branch predictor. */
13470 static void
13471 ret (sim_cpu *cpu)
13472 {
13473 unsigned rn = INSTR (9, 5);
13474 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13475
13476 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13477 if (TRACE_BRANCH_P (cpu))
13478 {
13479 TRACE_BRANCH (cpu,
13480 " %*sreturn [result: %" PRIx64 "]",
13481 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13482 -- stack_depth;
13483 }
13484 }
13485
13486 /* NOP -- we implement this and call it from the decode in case we
13487 want to intercept it later. */
13488
13489 static void
13490 nop (sim_cpu *cpu)
13491 {
13492 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13493 }
13494
13495 /* Data synchronization barrier. */
13496
13497 static void
13498 dsb (sim_cpu *cpu)
13499 {
13500 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13501 }
13502
13503 /* Data memory barrier. */
13504
13505 static void
13506 dmb (sim_cpu *cpu)
13507 {
13508 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13509 }
13510
13511 /* Instruction synchronization barrier. */
13512
13513 static void
13514 isb (sim_cpu *cpu)
13515 {
13516 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13517 }
13518
13519 static void
13520 dexBranchImmediate (sim_cpu *cpu)
13521 {
13522 /* assert instr[30,26] == 00101
13523 instr[31] ==> 0 == B, 1 == BL
13524 instr[25,0] == imm26 branch offset counted in words. */
13525
13526 uint32_t top = INSTR (31, 31);
13527 /* We have a 26 byte signed word offset which we need to pass to the
13528 execute routine as a signed byte offset. */
13529 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13530
13531 if (top)
13532 bl (cpu, offset);
13533 else
13534 buc (cpu, offset);
13535 }
13536
13537 /* Control Flow. */
13538
13539 /* Conditional branch
13540
13541 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13542 a bit position in the range 0 .. 63
13543
13544 cc is a CondCode enum value as pulled out of the decode
13545
13546 N.B. any offset register (source) can only be Xn or Wn. */
13547
13548 static void
13549 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13550 {
13551 /* The test returns TRUE if CC is met. */
13552 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13553 if (testConditionCode (cpu, cc))
13554 aarch64_set_next_PC_by_offset (cpu, offset);
13555 }
13556
13557 /* 32 bit branch on register non-zero. */
13558 static void
13559 cbnz32 (sim_cpu *cpu, int32_t offset)
13560 {
13561 unsigned rt = INSTR (4, 0);
13562
13563 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13564 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13565 aarch64_set_next_PC_by_offset (cpu, offset);
13566 }
13567
13568 /* 64 bit branch on register zero. */
13569 static void
13570 cbnz (sim_cpu *cpu, int32_t offset)
13571 {
13572 unsigned rt = INSTR (4, 0);
13573
13574 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13575 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13576 aarch64_set_next_PC_by_offset (cpu, offset);
13577 }
13578
13579 /* 32 bit branch on register non-zero. */
13580 static void
13581 cbz32 (sim_cpu *cpu, int32_t offset)
13582 {
13583 unsigned rt = INSTR (4, 0);
13584
13585 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13586 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13587 aarch64_set_next_PC_by_offset (cpu, offset);
13588 }
13589
13590 /* 64 bit branch on register zero. */
13591 static void
13592 cbz (sim_cpu *cpu, int32_t offset)
13593 {
13594 unsigned rt = INSTR (4, 0);
13595
13596 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13597 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13598 aarch64_set_next_PC_by_offset (cpu, offset);
13599 }
13600
13601 /* Branch on register bit test non-zero -- one size fits all. */
13602 static void
13603 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13604 {
13605 unsigned rt = INSTR (4, 0);
13606
13607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13608 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13609 aarch64_set_next_PC_by_offset (cpu, offset);
13610 }
13611
13612 /* Branch on register bit test zero -- one size fits all. */
13613 static void
13614 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13615 {
13616 unsigned rt = INSTR (4, 0);
13617
13618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13619 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13620 aarch64_set_next_PC_by_offset (cpu, offset);
13621 }
13622
13623 static void
13624 dexCompareBranchImmediate (sim_cpu *cpu)
13625 {
13626 /* instr[30,25] = 01 1010
13627 instr[31] = size : 0 ==> 32, 1 ==> 64
13628 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13629 instr[23,5] = simm19 branch offset counted in words
13630 instr[4,0] = rt */
13631
13632 uint32_t size = INSTR (31, 31);
13633 uint32_t op = INSTR (24, 24);
13634 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13635
13636 if (size == 0)
13637 {
13638 if (op == 0)
13639 cbz32 (cpu, offset);
13640 else
13641 cbnz32 (cpu, offset);
13642 }
13643 else
13644 {
13645 if (op == 0)
13646 cbz (cpu, offset);
13647 else
13648 cbnz (cpu, offset);
13649 }
13650 }
13651
13652 static void
13653 dexTestBranchImmediate (sim_cpu *cpu)
13654 {
13655 /* instr[31] = b5 : bit 5 of test bit idx
13656 instr[30,25] = 01 1011
13657 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13658 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13659 instr[18,5] = simm14 : signed offset counted in words
13660 instr[4,0] = uimm5 */
13661
13662 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13663 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13664
13665 NYI_assert (30, 25, 0x1b);
13666
13667 if (INSTR (24, 24) == 0)
13668 tbz (cpu, pos, offset);
13669 else
13670 tbnz (cpu, pos, offset);
13671 }
13672
13673 static void
13674 dexCondBranchImmediate (sim_cpu *cpu)
13675 {
13676 /* instr[31,25] = 010 1010
13677 instr[24] = op1; op => 00 ==> B.cond
13678 instr[23,5] = simm19 : signed offset counted in words
13679 instr[4] = op0
13680 instr[3,0] = cond */
13681
13682 int32_t offset;
13683 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13684
13685 NYI_assert (31, 25, 0x2a);
13686
13687 if (op != 0)
13688 HALT_UNALLOC;
13689
13690 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13691
13692 bcc (cpu, offset, INSTR (3, 0));
13693 }
13694
13695 static void
13696 dexBranchRegister (sim_cpu *cpu)
13697 {
13698 /* instr[31,25] = 110 1011
13699 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13700 instr[20,16] = op2 : must be 11111
13701 instr[15,10] = op3 : must be 000000
13702 instr[4,0] = op2 : must be 11111. */
13703
13704 uint32_t op = INSTR (24, 21);
13705 uint32_t op2 = INSTR (20, 16);
13706 uint32_t op3 = INSTR (15, 10);
13707 uint32_t op4 = INSTR (4, 0);
13708
13709 NYI_assert (31, 25, 0x6b);
13710
13711 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13712 HALT_UNALLOC;
13713
13714 if (op == 0)
13715 br (cpu);
13716
13717 else if (op == 1)
13718 blr (cpu);
13719
13720 else if (op == 2)
13721 ret (cpu);
13722
13723 else
13724 {
13725 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13726 /* anything else is unallocated. */
13727 uint32_t rn = INSTR (4, 0);
13728
13729 if (rn != 0x1f)
13730 HALT_UNALLOC;
13731
13732 if (op == 4 || op == 5)
13733 HALT_NYI;
13734
13735 HALT_UNALLOC;
13736 }
13737 }
13738
13739 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13740 but this may not be available. So instead we define the values we need
13741 here. */
13742 #define AngelSVC_Reason_Open 0x01
13743 #define AngelSVC_Reason_Close 0x02
13744 #define AngelSVC_Reason_Write 0x05
13745 #define AngelSVC_Reason_Read 0x06
13746 #define AngelSVC_Reason_IsTTY 0x09
13747 #define AngelSVC_Reason_Seek 0x0A
13748 #define AngelSVC_Reason_FLen 0x0C
13749 #define AngelSVC_Reason_Remove 0x0E
13750 #define AngelSVC_Reason_Rename 0x0F
13751 #define AngelSVC_Reason_Clock 0x10
13752 #define AngelSVC_Reason_Time 0x11
13753 #define AngelSVC_Reason_System 0x12
13754 #define AngelSVC_Reason_Errno 0x13
13755 #define AngelSVC_Reason_GetCmdLine 0x15
13756 #define AngelSVC_Reason_HeapInfo 0x16
13757 #define AngelSVC_Reason_ReportException 0x18
13758 #define AngelSVC_Reason_Elapsed 0x30
13759
13760
13761 static void
13762 handle_halt (sim_cpu *cpu, uint32_t val)
13763 {
13764 uint64_t result = 0;
13765
13766 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13767 if (val != 0xf000)
13768 {
13769 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13770 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13771 sim_stopped, SIM_SIGTRAP);
13772 }
13773
13774 /* We have encountered an Angel SVC call. See if we can process it. */
13775 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13776 {
13777 case AngelSVC_Reason_HeapInfo:
13778 {
13779 /* Get the values. */
13780 uint64_t stack_top = aarch64_get_stack_start (cpu);
13781 uint64_t heap_base = aarch64_get_heap_start (cpu);
13782
13783 /* Get the pointer */
13784 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13785 ptr = aarch64_get_mem_u64 (cpu, ptr);
13786
13787 /* Fill in the memory block. */
13788 /* Start addr of heap. */
13789 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13790 /* End addr of heap. */
13791 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13792 /* Lowest stack addr. */
13793 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13794 /* Initial stack addr. */
13795 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13796
13797 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13798 }
13799 break;
13800
13801 case AngelSVC_Reason_Open:
13802 {
13803 /* Get the pointer */
13804 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13805 /* FIXME: For now we just assume that we will only be asked
13806 to open the standard file descriptors. */
13807 static int fd = 0;
13808 result = fd ++;
13809
13810 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13811 }
13812 break;
13813
13814 case AngelSVC_Reason_Close:
13815 {
13816 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13817 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13818 result = 0;
13819 }
13820 break;
13821
13822 case AngelSVC_Reason_Errno:
13823 result = 0;
13824 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13825 break;
13826
13827 case AngelSVC_Reason_Clock:
13828 result =
13829 #ifdef CLOCKS_PER_SEC
13830 (CLOCKS_PER_SEC >= 100)
13831 ? (clock () / (CLOCKS_PER_SEC / 100))
13832 : ((clock () * 100) / CLOCKS_PER_SEC)
13833 #else
13834 /* Presume unix... clock() returns microseconds. */
13835 (clock () / 10000)
13836 #endif
13837 ;
13838 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13839 break;
13840
13841 case AngelSVC_Reason_GetCmdLine:
13842 {
13843 /* Get the pointer */
13844 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13845 ptr = aarch64_get_mem_u64 (cpu, ptr);
13846
13847 /* FIXME: No command line for now. */
13848 aarch64_set_mem_u64 (cpu, ptr, 0);
13849 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13850 }
13851 break;
13852
13853 case AngelSVC_Reason_IsTTY:
13854 result = 1;
13855 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13856 break;
13857
13858 case AngelSVC_Reason_Write:
13859 {
13860 /* Get the pointer */
13861 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13862 /* Get the write control block. */
13863 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13864 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13865 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13866
13867 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13868 PRIx64 " on descriptor %" PRIx64,
13869 len, buf, fd);
13870
13871 if (len > 1280)
13872 {
13873 TRACE_SYSCALL (cpu,
13874 " AngelSVC: Write: Suspiciously long write: %ld",
13875 (long) len);
13876 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13877 sim_stopped, SIM_SIGBUS);
13878 }
13879 else if (fd == 1)
13880 {
13881 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13882 }
13883 else if (fd == 2)
13884 {
13885 TRACE (cpu, 0, "\n");
13886 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13887 (int) len, aarch64_get_mem_ptr (cpu, buf));
13888 TRACE (cpu, 0, "\n");
13889 }
13890 else
13891 {
13892 TRACE_SYSCALL (cpu,
13893 " AngelSVC: Write: Unexpected file handle: %d",
13894 (int) fd);
13895 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13896 sim_stopped, SIM_SIGABRT);
13897 }
13898 }
13899 break;
13900
13901 case AngelSVC_Reason_ReportException:
13902 {
13903 /* Get the pointer */
13904 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13905 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13906 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13907 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13908
13909 TRACE_SYSCALL (cpu,
13910 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13911 type, state);
13912
13913 if (type == 0x20026)
13914 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13915 sim_exited, state);
13916 else
13917 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13918 sim_stopped, SIM_SIGINT);
13919 }
13920 break;
13921
13922 case AngelSVC_Reason_Read:
13923 case AngelSVC_Reason_FLen:
13924 case AngelSVC_Reason_Seek:
13925 case AngelSVC_Reason_Remove:
13926 case AngelSVC_Reason_Time:
13927 case AngelSVC_Reason_System:
13928 case AngelSVC_Reason_Rename:
13929 case AngelSVC_Reason_Elapsed:
13930 default:
13931 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13932 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13933 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13934 sim_stopped, SIM_SIGTRAP);
13935 }
13936
13937 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13938 }
13939
13940 static void
13941 dexExcpnGen (sim_cpu *cpu)
13942 {
13943 /* instr[31:24] = 11010100
13944 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13945 010 ==> HLT, 101 ==> DBG GEN EXCPN
13946 instr[20,5] = imm16
13947 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13948 instr[1,0] = LL : discriminates opc */
13949
13950 uint32_t opc = INSTR (23, 21);
13951 uint32_t imm16 = INSTR (20, 5);
13952 uint32_t opc2 = INSTR (4, 2);
13953 uint32_t LL;
13954
13955 NYI_assert (31, 24, 0xd4);
13956
13957 if (opc2 != 0)
13958 HALT_UNALLOC;
13959
13960 LL = INSTR (1, 0);
13961
13962 /* We only implement HLT and BRK for now. */
13963 if (opc == 1 && LL == 0)
13964 {
13965 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13966 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13967 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13968 }
13969
13970 if (opc == 2 && LL == 0)
13971 handle_halt (cpu, imm16);
13972
13973 else if (opc == 0 || opc == 5)
13974 HALT_NYI;
13975
13976 else
13977 HALT_UNALLOC;
13978 }
13979
13980 /* Stub for accessing system registers. */
13981
13982 static uint64_t
13983 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13984 unsigned crm, unsigned op2)
13985 {
13986 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13987 /* DCZID_EL0 - the Data Cache Zero ID register.
13988 We do not support DC ZVA at the moment, so
13989 we return a value with the disable bit set.
13990 We implement support for the DCZID register since
13991 it is used by the C library's memset function. */
13992 return ((uint64_t) 1) << 4;
13993
13994 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13995 /* Cache Type Register. */
13996 return 0x80008000UL;
13997
13998 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13999 /* TPIDR_EL0 - thread pointer id. */
14000 return aarch64_get_thread_id (cpu);
14001
14002 if (op1 == 3 && crm == 4 && op2 == 0)
14003 return aarch64_get_FPCR (cpu);
14004
14005 if (op1 == 3 && crm == 4 && op2 == 1)
14006 return aarch64_get_FPSR (cpu);
14007
14008 else if (op1 == 3 && crm == 2 && op2 == 0)
14009 return aarch64_get_CPSR (cpu);
14010
14011 HALT_NYI;
14012 }
14013
14014 static void
14015 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14016 unsigned crm, unsigned op2, uint64_t val)
14017 {
14018 if (op1 == 3 && crm == 4 && op2 == 0)
14019 aarch64_set_FPCR (cpu, val);
14020
14021 else if (op1 == 3 && crm == 4 && op2 == 1)
14022 aarch64_set_FPSR (cpu, val);
14023
14024 else if (op1 == 3 && crm == 2 && op2 == 0)
14025 aarch64_set_CPSR (cpu, val);
14026
14027 else
14028 HALT_NYI;
14029 }
14030
14031 static void
14032 do_mrs (sim_cpu *cpu)
14033 {
14034 /* instr[31:20] = 1101 0101 0001 1
14035 instr[19] = op0
14036 instr[18,16] = op1
14037 instr[15,12] = CRn
14038 instr[11,8] = CRm
14039 instr[7,5] = op2
14040 instr[4,0] = Rt */
14041 unsigned sys_op0 = INSTR (19, 19) + 2;
14042 unsigned sys_op1 = INSTR (18, 16);
14043 unsigned sys_crn = INSTR (15, 12);
14044 unsigned sys_crm = INSTR (11, 8);
14045 unsigned sys_op2 = INSTR (7, 5);
14046 unsigned rt = INSTR (4, 0);
14047
14048 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14049 aarch64_set_reg_u64 (cpu, rt, NO_SP,
14050 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14051 }
14052
14053 static void
14054 do_MSR_immediate (sim_cpu *cpu)
14055 {
14056 /* instr[31:19] = 1101 0101 0000 0
14057 instr[18,16] = op1
14058 instr[15,12] = 0100
14059 instr[11,8] = CRm
14060 instr[7,5] = op2
14061 instr[4,0] = 1 1111 */
14062
14063 unsigned op1 = INSTR (18, 16);
14064 /*unsigned crm = INSTR (11, 8);*/
14065 unsigned op2 = INSTR (7, 5);
14066
14067 NYI_assert (31, 19, 0x1AA0);
14068 NYI_assert (15, 12, 0x4);
14069 NYI_assert (4, 0, 0x1F);
14070
14071 if (op1 == 0)
14072 {
14073 if (op2 == 5)
14074 HALT_NYI; /* set SPSel. */
14075 else
14076 HALT_UNALLOC;
14077 }
14078 else if (op1 == 3)
14079 {
14080 if (op2 == 6)
14081 HALT_NYI; /* set DAIFset. */
14082 else if (op2 == 7)
14083 HALT_NYI; /* set DAIFclr. */
14084 else
14085 HALT_UNALLOC;
14086 }
14087 else
14088 HALT_UNALLOC;
14089 }
14090
14091 static void
14092 do_MSR_reg (sim_cpu *cpu)
14093 {
14094 /* instr[31:20] = 1101 0101 0001
14095 instr[19] = op0
14096 instr[18,16] = op1
14097 instr[15,12] = CRn
14098 instr[11,8] = CRm
14099 instr[7,5] = op2
14100 instr[4,0] = Rt */
14101
14102 unsigned sys_op0 = INSTR (19, 19) + 2;
14103 unsigned sys_op1 = INSTR (18, 16);
14104 unsigned sys_crn = INSTR (15, 12);
14105 unsigned sys_crm = INSTR (11, 8);
14106 unsigned sys_op2 = INSTR (7, 5);
14107 unsigned rt = INSTR (4, 0);
14108
14109 NYI_assert (31, 20, 0xD51);
14110
14111 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14112 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14113 aarch64_get_reg_u64 (cpu, rt, NO_SP));
14114 }
14115
14116 static void
14117 do_SYS (sim_cpu *cpu)
14118 {
14119 /* instr[31,19] = 1101 0101 0000 1
14120 instr[18,16] = op1
14121 instr[15,12] = CRn
14122 instr[11,8] = CRm
14123 instr[7,5] = op2
14124 instr[4,0] = Rt */
14125 NYI_assert (31, 19, 0x1AA1);
14126
14127 /* FIXME: For now we just silently accept system ops. */
14128 }
14129
14130 static void
14131 dexSystem (sim_cpu *cpu)
14132 {
14133 /* instr[31:22] = 1101 01010 0
14134 instr[21] = L
14135 instr[20,19] = op0
14136 instr[18,16] = op1
14137 instr[15,12] = CRn
14138 instr[11,8] = CRm
14139 instr[7,5] = op2
14140 instr[4,0] = uimm5 */
14141
14142 /* We are interested in HINT, DSB, DMB and ISB
14143
14144 Hint #0 encodes NOOP (this is the only hint we care about)
14145 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14146 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14147
14148 DSB, DMB, ISB are data store barrier, data memory barrier and
14149 instruction store barrier, respectively, where
14150
14151 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14152 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14153 CRm<3:2> ==> domain, CRm<1:0> ==> types,
14154 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14155 10 ==> InerShareable, 11 ==> FullSystem
14156 types : 01 ==> Reads, 10 ==> Writes,
14157 11 ==> All, 00 ==> All (domain == FullSystem). */
14158
14159 unsigned rt = INSTR (4, 0);
14160
14161 NYI_assert (31, 22, 0x354);
14162
14163 switch (INSTR (21, 12))
14164 {
14165 case 0x032:
14166 if (rt == 0x1F)
14167 {
14168 /* NOP has CRm != 0000 OR. */
14169 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
14170 uint32_t crm = INSTR (11, 8);
14171 uint32_t op2 = INSTR (7, 5);
14172
14173 if (crm != 0 || (op2 == 0 || op2 > 5))
14174 {
14175 /* Actually call nop method so we can reimplement it later. */
14176 nop (cpu);
14177 return;
14178 }
14179 }
14180 HALT_NYI;
14181
14182 case 0x033:
14183 {
14184 uint32_t op2 = INSTR (7, 5);
14185
14186 switch (op2)
14187 {
14188 case 2: HALT_NYI;
14189 case 4: dsb (cpu); return;
14190 case 5: dmb (cpu); return;
14191 case 6: isb (cpu); return;
14192 default: HALT_UNALLOC;
14193 }
14194 }
14195
14196 case 0x3B0:
14197 case 0x3B4:
14198 case 0x3BD:
14199 do_mrs (cpu);
14200 return;
14201
14202 case 0x0B7:
14203 do_SYS (cpu); /* DC is an alias of SYS. */
14204 return;
14205
14206 default:
14207 if (INSTR (21, 20) == 0x1)
14208 do_MSR_reg (cpu);
14209 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14210 do_MSR_immediate (cpu);
14211 else
14212 HALT_NYI;
14213 return;
14214 }
14215 }
14216
14217 static void
14218 dexBr (sim_cpu *cpu)
14219 {
14220 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14221 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14222 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14223 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14224
14225 switch (group2)
14226 {
14227 case BR_IMM_000:
14228 return dexBranchImmediate (cpu);
14229
14230 case BR_IMMCMP_001:
14231 /* Compare has bit 25 clear while test has it set. */
14232 if (!INSTR (25, 25))
14233 dexCompareBranchImmediate (cpu);
14234 else
14235 dexTestBranchImmediate (cpu);
14236 return;
14237
14238 case BR_IMMCOND_010:
14239 /* This is a conditional branch if bit 25 is clear otherwise
14240 unallocated. */
14241 if (!INSTR (25, 25))
14242 dexCondBranchImmediate (cpu);
14243 else
14244 HALT_UNALLOC;
14245 return;
14246
14247 case BR_UNALLOC_011:
14248 HALT_UNALLOC;
14249
14250 case BR_IMM_100:
14251 dexBranchImmediate (cpu);
14252 return;
14253
14254 case BR_IMMCMP_101:
14255 /* Compare has bit 25 clear while test has it set. */
14256 if (!INSTR (25, 25))
14257 dexCompareBranchImmediate (cpu);
14258 else
14259 dexTestBranchImmediate (cpu);
14260 return;
14261
14262 case BR_REG_110:
14263 /* Unconditional branch reg has bit 25 set. */
14264 if (INSTR (25, 25))
14265 dexBranchRegister (cpu);
14266
14267 /* This includes both Excpn Gen, System and unalloc operations.
14268 We need to decode the Excpn Gen operation BRK so we can plant
14269 debugger entry points.
14270 Excpn Gen operations have instr [24] = 0.
14271 we need to decode at least one of the System operations NOP
14272 which is an alias for HINT #0.
14273 System operations have instr [24,22] = 100. */
14274 else if (INSTR (24, 24) == 0)
14275 dexExcpnGen (cpu);
14276
14277 else if (INSTR (24, 22) == 4)
14278 dexSystem (cpu);
14279
14280 else
14281 HALT_UNALLOC;
14282
14283 return;
14284
14285 case BR_UNALLOC_111:
14286 HALT_UNALLOC;
14287
14288 default:
14289 /* Should never reach here. */
14290 HALT_NYI;
14291 }
14292 }
14293
14294 static void
14295 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14296 {
14297 /* We need to check if gdb wants an in here. */
14298 /* checkBreak (cpu);. */
14299
14300 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14301
14302 switch (group)
14303 {
14304 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14305 case GROUP_LDST_0100: dexLdSt (cpu); break;
14306 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14307 case GROUP_LDST_0110: dexLdSt (cpu); break;
14308 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14309 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14310 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14311 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14312 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14313 case GROUP_LDST_1100: dexLdSt (cpu); break;
14314 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14315 case GROUP_LDST_1110: dexLdSt (cpu); break;
14316 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14317
14318 case GROUP_UNALLOC_0001:
14319 case GROUP_UNALLOC_0010:
14320 case GROUP_UNALLOC_0011:
14321 HALT_UNALLOC;
14322
14323 default:
14324 /* Should never reach here. */
14325 HALT_NYI;
14326 }
14327 }
14328
14329 static bfd_boolean
14330 aarch64_step (sim_cpu *cpu)
14331 {
14332 uint64_t pc = aarch64_get_PC (cpu);
14333
14334 if (pc == TOP_LEVEL_RETURN_PC)
14335 return FALSE;
14336
14337 aarch64_set_next_PC (cpu, pc + 4);
14338
14339 /* Code is always little-endian. */
14340 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14341 & aarch64_get_instr (cpu), pc, 4);
14342 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14343
14344 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14345 aarch64_get_instr (cpu));
14346 TRACE_DISASM (cpu, pc);
14347
14348 aarch64_decode_and_execute (cpu, pc);
14349
14350 return TRUE;
14351 }
14352
14353 void
14354 aarch64_run (SIM_DESC sd)
14355 {
14356 sim_cpu *cpu = STATE_CPU (sd, 0);
14357
14358 while (aarch64_step (cpu))
14359 {
14360 aarch64_update_PC (cpu);
14361
14362 if (sim_events_tick (sd))
14363 sim_events_process (sd);
14364 }
14365
14366 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14367 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14368 }
14369
14370 void
14371 aarch64_init (sim_cpu *cpu, uint64_t pc)
14372 {
14373 uint64_t sp = aarch64_get_stack_start (cpu);
14374
14375 /* Install SP, FP and PC and set LR to -20
14376 so we can detect a top-level return. */
14377 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14378 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14379 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14380 aarch64_set_next_PC (cpu, pc);
14381 aarch64_update_PC (cpu);
14382 aarch64_init_LIT_table ();
14383 }
This page took 0.315768 seconds and 3 git commands to generate.