Add ldn/stn single support, fix ldnr support.
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2017 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
71 } \
72 while (0)
73
74 #define NYI_assert(HI, LO, EXPECTED) \
75 do \
76 { \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
78 HALT_NYI; \
79 } \
80 while (0)
81
82 /* Helper functions used by expandLogicalImmediate. */
83
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
86 ones (int N)
87 {
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
89 }
90
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
94 {
95 return pickbits64 (val, N, N);
96 }
97
98 static uint64_t
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
100 {
101 uint64_t mask;
102 uint64_t imm;
103 unsigned simd_size;
104
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
107 if (N != 0)
108 {
109 simd_size = 64;
110 mask = 0xffffffffffffffffull;
111 }
112 else
113 {
114 switch (S)
115 {
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
121 default: return 0;
122 }
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
125 R &= simd_size - 1;
126 }
127
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
130 return 0;
131
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
135
136 /* Rotate to the left by simd_size - R. */
137 if (R != 0)
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
139
140 /* Replicate the value according to SIMD size. */
141 switch (simd_size)
142 {
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
148 case 64: break;
149 default: return 0;
150 }
151
152 return imm;
153 }
154
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
159
160 void
161 aarch64_init_LIT_table (void)
162 {
163 unsigned index;
164
165 for (index = 0; index < LI_TABLE_SIZE; index++)
166 {
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
170
171 LITable [index] = expand_logical_immediate (imms, immr, N);
172 }
173 }
174
175 static void
176 dexNotify (sim_cpu *cpu)
177 {
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
181
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
183
184 switch (type)
185 {
186 case 0:
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 case 1:
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 break;
194 case 2:
195 /* aarch64_notifyMethodExit (); */
196 break;
197 case 3:
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
200 break;
201 }
202 }
203
204 /* secondary decode within top level groups */
205
206 static void
207 dexPseudo (sim_cpu *cpu)
208 {
209 /* assert instr[28,27] = 00
210
211 We provide 2 pseudo instructions:
212
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
215
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
221
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
226 uint32_t dispatch;
227
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
229 {
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
233 }
234
235 dispatch = INSTR (31, 15);
236
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
239 {
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
243 }
244
245 else if (dispatch == PSEUDO_NOTIFY)
246 dexNotify (cpu);
247
248 else
249 HALT_UNALLOC;
250 }
251
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
254 9 bit offset.
255
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
258
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
260 static void
261 ldur32 (sim_cpu *cpu, int32_t offset)
262 {
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
265
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
269 + offset));
270 }
271
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
273 static void
274 ldur64 (sim_cpu *cpu, int32_t offset)
275 {
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
278
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
295 + offset));
296 }
297
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 static void
300 ldursb32 (sim_cpu *cpu, int32_t offset)
301 {
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
304
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
308 + offset));
309 }
310
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
312 static void
313 ldursb64 (sim_cpu *cpu, int32_t offset)
314 {
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
317
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 + offset));
322 }
323
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
325 static void
326 ldurh32 (sim_cpu *cpu, int32_t offset)
327 {
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
330
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
334 + offset));
335 }
336
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
338 static void
339 ldursh32 (sim_cpu *cpu, int32_t offset)
340 {
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
343
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
347 + offset));
348 }
349
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
351 static void
352 ldursh64 (sim_cpu *cpu, int32_t offset)
353 {
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
356
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
360 + offset));
361 }
362
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
364 static void
365 ldursw (sim_cpu *cpu, int32_t offset)
366 {
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
369
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
373 + offset));
374 }
375
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
378
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
380 static void
381 stur32 (sim_cpu *cpu, int32_t offset)
382 {
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
385
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
390 }
391
392 /* 64 bit store 64 bit unscaled signed 9 bit */
393 static void
394 stur64 (sim_cpu *cpu, int32_t offset)
395 {
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
398
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
403 }
404
405 /* 32 bit store byte unscaled signed 9 bit */
406 static void
407 sturb (sim_cpu *cpu, int32_t offset)
408 {
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
411
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
416 }
417
418 /* 32 bit store short unscaled signed 9 bit */
419 static void
420 sturh (sim_cpu *cpu, int32_t offset)
421 {
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
424
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
429 }
430
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
433 rt may not be SP. */
434
435 /* 32 bit pc-relative load */
436 static void
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 aarch64_get_mem_u32
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
445 }
446
447 /* 64 bit pc-relative load */
448 static void
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
450 {
451 unsigned rd = INSTR (4, 0);
452
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
455 aarch64_get_mem_u64
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 }
458
459 /* sign extended 32 bit pc-relative load */
460 static void
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
462 {
463 unsigned rd = INSTR (4, 0);
464
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
467 aarch64_get_mem_s32
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
469 }
470
471 /* float pc-relative load */
472 static void
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
474 {
475 unsigned int rd = INSTR (4, 0);
476
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
479 aarch64_get_mem_u32
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
481 }
482
483 /* double pc-relative load */
484 static void
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
486 {
487 unsigned int st = INSTR (4, 0);
488
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
491 aarch64_get_mem_u64
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
493 }
494
495 /* long double pc-relative load. */
496 static void
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
498 {
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
501 FRegister a;
502
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
506 }
507
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
510 16, 32 or 64. */
511
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
514
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
521
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
524
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
529
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
532 {
533 union
534 {
535 uint32_t u;
536 int32_t n;
537 } x;
538
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
541 return value;
542
543 x.u = value;
544 return x.n;
545 }
546
547 /* Scalar Floating Point
548
549 FP load/store single register (4 addressing modes)
550
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
553
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
555 static void
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
557 {
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
561
562 if (wb != Post)
563 address += offset;
564
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
567 if (wb == Post)
568 address += offset;
569
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
572 }
573
574 /* Load 8 bit with unsigned 12 bit offset. */
575 static void
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
577 {
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
581
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
584 }
585
586 /* Load 16 bit scaled unsigned 12 bit. */
587 static void
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
589 {
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
593
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
596 }
597
598 /* Load 32 bit scaled unsigned 12 bit. */
599 static void
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
601 {
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
605
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
608 }
609
610 /* Load 64 bit scaled unsigned 12 bit. */
611 static void
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
613 {
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
617
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
620 }
621
622 /* Load 128 bit scaled unsigned 12 bit. */
623 static void
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
625 {
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
629
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
633 }
634
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
637 static void
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
639 {
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
646
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
650 }
651
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
653 static void
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
655 {
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
659
660 if (wb != Post)
661 address += offset;
662
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
665
666 if (wb == Post)
667 address += offset;
668
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
671 }
672
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
674 static void
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
676 {
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
680
681 fldrd_wb (cpu, displacement, NoWriteBack);
682 }
683
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
685 static void
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
687 {
688 FRegister a;
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
692
693 if (wb != Post)
694 address += offset;
695
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
699
700 if (wb == Post)
701 address += offset;
702
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
705 }
706
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
708 static void
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
710 {
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
714
715 fldrq_wb (cpu, displacement, NoWriteBack);
716 }
717
718 /* Memory Access
719
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
723
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
726
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
728 post-index options.
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
730 writeback.
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
733
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
739
740 A separate method is provided for each possible addressing mode. */
741
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
743 static void
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
745 {
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
748
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
754 }
755
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
757 static void
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
759 {
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 uint64_t address;
763
764 if (rn == rt && wb != NoWriteBack)
765 HALT_UNALLOC;
766
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
768
769 if (wb != Post)
770 address += offset;
771
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
774
775 if (wb == Post)
776 address += offset;
777
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
780 }
781
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
784 static void
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
786 {
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
791
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
795
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
799 }
800
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
802 static void
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
804 {
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
807
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
813 }
814
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
816 static void
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
818 {
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
821 uint64_t address;
822
823 if (rn == rt && wb != NoWriteBack)
824 HALT_UNALLOC;
825
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
827
828 if (wb != Post)
829 address += offset;
830
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
833
834 if (wb == Post)
835 address += offset;
836
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
839 }
840
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
843 static void
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
845 {
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
850
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
854
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
858 }
859
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
861 static void
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
863 {
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
866
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
871 aarch64_get_mem_u8
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
873 }
874
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
876 static void
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
878 {
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
881 uint64_t address;
882
883 if (rn == rt && wb != NoWriteBack)
884 HALT_UNALLOC;
885
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
887
888 if (wb != Post)
889 address += offset;
890
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
893
894 if (wb == Post)
895 address += offset;
896
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
899 }
900
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
903 static void
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
905 {
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
910
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
913 extension);
914
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
919 }
920
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
923 static void
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
925 {
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
928 uint64_t address;
929 int64_t val;
930
931 if (rn == rt && wb != NoWriteBack)
932 HALT_UNALLOC;
933
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
935
936 if (wb != Post)
937 address += offset;
938
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
942
943 if (wb == Post)
944 address += offset;
945
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
948 }
949
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
951 static void
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
953 {
954 ldrsb_wb (cpu, offset, NoWriteBack);
955 }
956
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
959 static void
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
961 {
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
966
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
969 extension);
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
974 }
975
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
977 static void
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
979 {
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
982 uint32_t val;
983
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
989 }
990
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
993 static void
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
995 {
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
998 uint64_t address;
999
1000 if (rn == rt && wb != NoWriteBack)
1001 HALT_UNALLOC;
1002
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1004
1005 if (wb != Post)
1006 address += offset;
1007
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1010
1011 if (wb == Post)
1012 address += offset;
1013
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1016 }
1017
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1020 static void
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1022 {
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1027
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1031
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1035 }
1036
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1038 static void
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1040 {
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1043 int32_t val;
1044
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1050 }
1051
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1054 static void
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1056 {
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1059 uint64_t address;
1060
1061 if (rn == rt && wb != NoWriteBack)
1062 HALT_UNALLOC;
1063
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1065
1066 if (wb != Post)
1067 address += offset;
1068
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1072
1073 if (wb == Post)
1074 address += offset;
1075
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1078 }
1079
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1082 static void
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1084 {
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1089
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1093
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1098 }
1099
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1101 static void
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1103 {
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1106 int64_t val;
1107
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1113 }
1114
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1117 static void
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1119 {
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1122 uint64_t address;
1123 int64_t val;
1124
1125 if (rn == rt && wb != NoWriteBack)
1126 HALT_UNALLOC;
1127
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1130
1131 if (wb != Post)
1132 address += offset;
1133
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1136
1137 if (wb == Post)
1138 address += offset;
1139
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1142 }
1143
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1146 static void
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1148 {
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1152
1153 /* rn may reference SP, rm and rt must reference ZR */
1154
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1158 int64_t val;
1159
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1163 }
1164
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1166 static void
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1168 {
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1171 int64_t val;
1172
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1178 }
1179
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1182 static void
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1184 {
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1187 uint64_t address;
1188
1189 if (rn == rt && wb != NoWriteBack)
1190 HALT_UNALLOC;
1191
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1193
1194 if (wb != Post)
1195 address += offset;
1196
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1199
1200 if (wb == Post)
1201 address += offset;
1202
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1205 }
1206
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1209 static void
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1211 {
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1216
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1220
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1224 }
1225
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1228
1229 /* 32 bit store scaled unsigned 12 bit. */
1230 static void
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1232 {
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1235
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1241 }
1242
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1244 static void
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1246 {
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1249 uint64_t address;
1250
1251 if (rn == rt && wb != NoWriteBack)
1252 HALT_UNALLOC;
1253
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1255 if (wb != Post)
1256 address += offset;
1257
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1260
1261 if (wb == Post)
1262 address += offset;
1263
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1266 }
1267
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1270 static void
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1272 {
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1276
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1280
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1284 }
1285
1286 /* 64 bit store scaled unsigned 12 bit. */
1287 static void
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1289 {
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1292
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1298 }
1299
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1301 static void
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1303 {
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1306 uint64_t address;
1307
1308 if (rn == rt && wb != NoWriteBack)
1309 HALT_UNALLOC;
1310
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312
1313 if (wb != Post)
1314 address += offset;
1315
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1318
1319 if (wb == Post)
1320 address += offset;
1321
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1324 }
1325
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1328 static void
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1330 {
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1335
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1338 extension);
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1340
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1344 }
1345
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1347 static void
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1349 {
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1352
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1359 }
1360
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1362 static void
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1364 {
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1367 uint64_t address;
1368
1369 if (rn == rt && wb != NoWriteBack)
1370 HALT_UNALLOC;
1371
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1373
1374 if (wb != Post)
1375 address += offset;
1376
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1399 extension);
1400
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1405 }
1406
1407 /* 32 bit store short scaled unsigned 12 bit. */
1408 static void
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1410 {
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1413
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1419 }
1420
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1422 static void
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1424 {
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1427 uint64_t address;
1428
1429 if (rn == rt && wb != NoWriteBack)
1430 HALT_UNALLOC;
1431
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1433
1434 if (wb != Post)
1435 address += offset;
1436
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1439
1440 if (wb == Post)
1441 address += offset;
1442
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1445 }
1446
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1449 static void
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1451 {
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1456
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1460
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1464 }
1465
1466 /* Prefetch unsigned 12 bit. */
1467 static void
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1469 {
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1476 ow ==> UNALLOC
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1480
1481 /* TODO : implement prefetch of address. */
1482 }
1483
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1485 static void
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1487 {
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1494 ow ==> UNALLOC
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1499 extension);
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1502
1503 /* TODO : implement prefetch of address */
1504 }
1505
1506 /* 64 bit pc-relative prefetch. */
1507 static void
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1509 {
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1516 ow ==> UNALLOC
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1519
1520 /* TODO : implement this */
1521 }
1522
1523 /* Load-store exclusive. */
1524
1525 static void
1526 ldxr (sim_cpu *cpu)
1527 {
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1534
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1536 switch (size)
1537 {
1538 case 0:
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1540 break;
1541 case 1:
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1543 break;
1544 case 2:
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1546 break;
1547 case 3:
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1549 break;
1550 }
1551 }
1552
1553 static void
1554 stxr (sim_cpu *cpu)
1555 {
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1562
1563 switch (size)
1564 {
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1569 }
1570
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1573 }
1574
1575 static void
1576 dexLoadLiteral (sim_cpu *cpu)
1577 {
1578 /* instr[29,27] == 011
1579 instr[25,24] == 00
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1586
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1590
1591 switch (dispatch)
1592 {
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1600 case 7:
1601 default:
1602 HALT_UNALLOC;
1603 }
1604 }
1605
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1609
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1613
1614 /* 32 bit add immediate. */
1615 static void
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1617 {
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1620
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1624 }
1625
1626 /* 64 bit add immediate. */
1627 static void
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1629 {
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1632
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1636 }
1637
1638 static void
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1640 {
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1645 uint32_t flags = 0;
1646
1647 if (result == 0)
1648 flags |= Z;
1649
1650 if (result & (1 << 31))
1651 flags |= N;
1652
1653 if (uresult != result)
1654 flags |= C;
1655
1656 if (sresult != result)
1657 flags |= V;
1658
1659 aarch64_set_CPSR (cpu, flags);
1660 }
1661
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1664
1665 static void
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1667 {
1668 uint64_t result = value1 + value2;
1669 uint32_t flags = 0;
1670 uint64_t signbit = 1ULL << 63;
1671
1672 if (result == 0)
1673 flags |= Z;
1674
1675 if (NEG (result))
1676 flags |= N;
1677
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1681 flags |= C;
1682
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1685 flags |= V;
1686
1687 aarch64_set_CPSR (cpu, flags);
1688 }
1689
1690 static void
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1692 {
1693 uint32_t result = value1 - value2;
1694 uint32_t flags = 0;
1695 uint32_t signbit = 1U << 31;
1696
1697 if (result == 0)
1698 flags |= Z;
1699
1700 if (NEG (result))
1701 flags |= N;
1702
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1706 flags |= C;
1707
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1710 flags |= V;
1711
1712 aarch64_set_CPSR (cpu, flags);
1713 }
1714
1715 static void
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1717 {
1718 uint64_t result = value1 - value2;
1719 uint32_t flags = 0;
1720 uint64_t signbit = 1ULL << 63;
1721
1722 if (result == 0)
1723 flags |= Z;
1724
1725 if (NEG (result))
1726 flags |= N;
1727
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1731 flags |= C;
1732
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1735 flags |= V;
1736
1737 aarch64_set_CPSR (cpu, flags);
1738 }
1739
1740 static void
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1742 {
1743 uint32_t flags = 0;
1744
1745 if (result == 0)
1746 flags |= Z;
1747 else
1748 flags &= ~ Z;
1749
1750 if (result & (1 << 31))
1751 flags |= N;
1752 else
1753 flags &= ~ N;
1754
1755 aarch64_set_CPSR (cpu, flags);
1756 }
1757
1758 static void
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1760 {
1761 uint32_t flags = 0;
1762
1763 if (result == 0)
1764 flags |= Z;
1765 else
1766 flags &= ~ Z;
1767
1768 if (result & (1ULL << 63))
1769 flags |= N;
1770 else
1771 flags &= ~ N;
1772
1773 aarch64_set_CPSR (cpu, flags);
1774 }
1775
1776 /* 32 bit add immediate set flags. */
1777 static void
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1779 {
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1784
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1788 }
1789
1790 /* 64 bit add immediate set flags. */
1791 static void
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1793 {
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1798
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1802 }
1803
1804 /* 32 bit sub immediate. */
1805 static void
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1807 {
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1810
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1814 }
1815
1816 /* 64 bit sub immediate. */
1817 static void
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1819 {
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1822
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1826 }
1827
1828 /* 32 bit sub immediate set flags. */
1829 static void
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1831 {
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1836
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1840 }
1841
1842 /* 64 bit sub immediate set flags. */
1843 static void
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1845 {
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1850
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1854 }
1855
1856 /* Data Processing Register. */
1857
1858 /* First two helpers to perform the shift operations. */
1859
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1862 {
1863 switch (shift)
1864 {
1865 default:
1866 case LSL:
1867 return (value << count);
1868 case LSR:
1869 return (value >> count);
1870 case ASR:
1871 {
1872 int32_t svalue = value;
1873 return (svalue >> count);
1874 }
1875 case ROR:
1876 {
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1880 }
1881 }
1882 }
1883
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1886 {
1887 switch (shift)
1888 {
1889 default:
1890 case LSL:
1891 return (value << count);
1892 case LSR:
1893 return (value >> count);
1894 case ASR:
1895 {
1896 int64_t svalue = value;
1897 return (svalue >> count);
1898 }
1899 case ROR:
1900 {
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1904 }
1905 }
1906 }
1907
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1911
1912 N.B register args may not be SP. */
1913
1914 /* 32 bit ADD shifted register. */
1915 static void
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1917 {
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1921
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1926 shift, count));
1927 }
1928
1929 /* 64 bit ADD shifted register. */
1930 static void
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1932 {
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1936
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1941 shift, count));
1942 }
1943
1944 /* 32 bit ADD shifted register setting flags. */
1945 static void
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1947 {
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1951
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1954 shift, count);
1955
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1959 }
1960
1961 /* 64 bit ADD shifted register setting flags. */
1962 static void
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1964 {
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1968
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1971 shift, count);
1972
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1976 }
1977
1978 /* 32 bit SUB shifted register. */
1979 static void
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1981 {
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1985
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1990 shift, count));
1991 }
1992
1993 /* 64 bit SUB shifted register. */
1994 static void
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1996 {
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2000
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2005 shift, count));
2006 }
2007
2008 /* 32 bit SUB shifted register setting flags. */
2009 static void
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2011 {
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2015
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2018 shift, count);
2019
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2023 }
2024
2025 /* 64 bit SUB shifted register setting flags. */
2026 static void
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2028 {
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2032
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2035 shift, count);
2036
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2040 }
2041
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2045 extension value. */
2046
2047 static uint32_t
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2049 {
2050 switch (extension)
2051 {
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2061 }
2062 }
2063
2064 static uint64_t
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2066 {
2067 switch (extension)
2068 {
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2076 case SXTX:
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2078 }
2079 }
2080
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2085
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2089
2090 /* 32 bit ADD extending register. */
2091 static void
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2097
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2102 }
2103
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2106 static void
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2108 {
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2112
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2117 }
2118
2119 /* 32 bit ADD extending register setting flags. */
2120 static void
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2122 {
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2126
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2129
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2133 }
2134
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2137 static void
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2139 {
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2143
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2146
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2150 }
2151
2152 /* 32 bit SUB extending register. */
2153 static void
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2155 {
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2159
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2164 }
2165
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2168 static void
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2170 {
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2174
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2179 }
2180
2181 /* 32 bit SUB extending register setting flags. */
2182 static void
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2184 {
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2188
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2191
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2195 }
2196
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2199 static void
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2201 {
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2205
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2208
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2212 }
2213
2214 static void
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2216 {
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2223 instr[9,5] = Rn
2224 instr[4,0] = Rd */
2225
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2230
2231 NYI_assert (28, 24, 0x11);
2232
2233 if (shift > 1)
2234 HALT_UNALLOC;
2235
2236 if (shift)
2237 imm <<= 12;
2238
2239 switch (dispatch)
2240 {
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2249 }
2250 }
2251
2252 static void
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2254 {
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2259 instr[21] = 0
2260 instr[20,16] = Rm
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2262 instr[9,5] = Rn
2263 instr[4,0] = Rd */
2264
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2268
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2271
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2274 HALT_UNALLOC;
2275
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2279 HALT_UNALLOC;
2280
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2283 {
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2292 }
2293 }
2294
2295 static void
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2297 {
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2303 instr[21] = 1
2304 instr[20,16] = Rm
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2310 instr[9,5] = Rn
2311 instr[4,0] = Rd */
2312
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2315
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2318
2319 /* Shift may not exceed 4. */
2320 if (shift > 4)
2321 HALT_UNALLOC;
2322
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2325 {
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2334 }
2335 }
2336
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2339
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2342
2343 static void
2344 adc32 (sim_cpu *cpu)
2345 {
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2349
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2354 + IS_SET (C));
2355 }
2356
2357 /* 64 bit add with carry */
2358 static void
2359 adc64 (sim_cpu *cpu)
2360 {
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2364
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2369 + IS_SET (C));
2370 }
2371
2372 /* 32 bit add with carry setting flags. */
2373 static void
2374 adcs32 (sim_cpu *cpu)
2375 {
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2379
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2383
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2387 }
2388
2389 /* 64 bit add with carry setting flags. */
2390 static void
2391 adcs64 (sim_cpu *cpu)
2392 {
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2396
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2400
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2404 }
2405
2406 /* 32 bit sub with carry. */
2407 static void
2408 sbc32 (sim_cpu *cpu)
2409 {
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2413
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2418 - 1 + IS_SET (C));
2419 }
2420
2421 /* 64 bit sub with carry */
2422 static void
2423 sbc64 (sim_cpu *cpu)
2424 {
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2428
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2433 - 1 + IS_SET (C));
2434 }
2435
2436 /* 32 bit sub with carry setting flags */
2437 static void
2438 sbcs32 (sim_cpu *cpu)
2439 {
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2443
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2448
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2452 }
2453
2454 /* 64 bit sub with carry setting flags */
2455 static void
2456 sbcs64 (sim_cpu *cpu)
2457 {
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2461
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2466
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2470 }
2471
2472 static void
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2474 {
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2479 instr[20,16] = Rm
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2481 instr[9,5] = Rn
2482 instr[4,0] = Rd */
2483
2484 uint32_t op2 = INSTR (15, 10);
2485
2486 NYI_assert (28, 21, 0xD0);
2487
2488 if (op2 != 0)
2489 HALT_UNALLOC;
2490
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2493 {
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2502 }
2503 }
2504
2505 static uint32_t
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2507 {
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2512
2513 For now we do it with a switch. */
2514 int res;
2515
2516 switch (cc)
2517 {
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2532 case AL:
2533 case NV:
2534 default:
2535 res = 1;
2536 break;
2537 }
2538 return res;
2539 }
2540
2541 static void
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2543 {
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2548 instr[15,12] = cond
2549 instr[11] = compare reg (0) or const (1)
2550 instr[10] = 0
2551 instr[9,5] = Rn
2552 instr[4] = 0
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2554 signed int negate;
2555 unsigned rm;
2556 unsigned rn;
2557
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2561
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2564 {
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2566 return;
2567 }
2568
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2571 rn = INSTR ( 9, 5);
2572
2573 if (INSTR (31, 31))
2574 {
2575 if (INSTR (11, 11))
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2578 else
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2581 }
2582 else
2583 {
2584 if (INSTR (11, 11))
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2586 negate * rm);
2587 else
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2590 }
2591 }
2592
2593 static void
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2595 {
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2597
2598 instr[31] = 0
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2601 instr[20,16] = Vs
2602 instr[15,10] = 000111
2603 instr[9,5] = Vs
2604 instr[4,0] = Vd */
2605
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2608
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2611
2612 if (INSTR (20, 16) != vs)
2613 HALT_NYI;
2614
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2616 if (INSTR (30, 30))
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2618
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2620 }
2621
2622 static void
2623 do_vec_MOV_into_scalar (sim_cpu *cpu)
2624 {
2625 /* instr[31] = 0
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,18] = element size and index
2629 instr[17,10] = 00 0011 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2632
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2635
2636 NYI_assert (29, 21, 0x070);
2637 NYI_assert (17, 10, 0x0F);
2638
2639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2640 switch (INSTR (20, 18))
2641 {
2642 case 0x2:
2643 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2644 break;
2645
2646 case 0x6:
2647 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2648 break;
2649
2650 case 0x1:
2651 case 0x3:
2652 case 0x5:
2653 case 0x7:
2654 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2655 (cpu, vs, INSTR (20, 19)));
2656 break;
2657
2658 default:
2659 HALT_NYI;
2660 }
2661 }
2662
2663 static void
2664 do_vec_INS (sim_cpu *cpu)
2665 {
2666 /* instr[31,21] = 01001110000
2667 instr[20,16] = element size and index
2668 instr[15,10] = 000111
2669 instr[9,5] = W source
2670 instr[4,0] = V dest */
2671
2672 int index;
2673 unsigned rs = INSTR (9, 5);
2674 unsigned vd = INSTR (4, 0);
2675
2676 NYI_assert (31, 21, 0x270);
2677 NYI_assert (15, 10, 0x07);
2678
2679 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2680 if (INSTR (16, 16))
2681 {
2682 index = INSTR (20, 17);
2683 aarch64_set_vec_u8 (cpu, vd, index,
2684 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2685 }
2686 else if (INSTR (17, 17))
2687 {
2688 index = INSTR (20, 18);
2689 aarch64_set_vec_u16 (cpu, vd, index,
2690 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2691 }
2692 else if (INSTR (18, 18))
2693 {
2694 index = INSTR (20, 19);
2695 aarch64_set_vec_u32 (cpu, vd, index,
2696 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2697 }
2698 else if (INSTR (19, 19))
2699 {
2700 index = INSTR (20, 20);
2701 aarch64_set_vec_u64 (cpu, vd, index,
2702 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2703 }
2704 else
2705 HALT_NYI;
2706 }
2707
2708 static void
2709 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2710 {
2711 /* instr[31] = 0
2712 instr[30] = half(0)/full(1)
2713 instr[29,21] = 00 1110 000
2714 instr[20,16] = element size and index
2715 instr[15,10] = 0000 01
2716 instr[9,5] = V source
2717 instr[4,0] = V dest. */
2718
2719 unsigned full = INSTR (30, 30);
2720 unsigned vs = INSTR (9, 5);
2721 unsigned vd = INSTR (4, 0);
2722 int i, index;
2723
2724 NYI_assert (29, 21, 0x070);
2725 NYI_assert (15, 10, 0x01);
2726
2727 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2728 if (INSTR (16, 16))
2729 {
2730 index = INSTR (20, 17);
2731
2732 for (i = 0; i < (full ? 16 : 8); i++)
2733 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2734 }
2735 else if (INSTR (17, 17))
2736 {
2737 index = INSTR (20, 18);
2738
2739 for (i = 0; i < (full ? 8 : 4); i++)
2740 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2741 }
2742 else if (INSTR (18, 18))
2743 {
2744 index = INSTR (20, 19);
2745
2746 for (i = 0; i < (full ? 4 : 2); i++)
2747 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2748 }
2749 else
2750 {
2751 if (INSTR (19, 19) == 0)
2752 HALT_UNALLOC;
2753
2754 if (! full)
2755 HALT_UNALLOC;
2756
2757 index = INSTR (20, 20);
2758
2759 for (i = 0; i < 2; i++)
2760 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2761 }
2762 }
2763
2764 static void
2765 do_vec_TBL (sim_cpu *cpu)
2766 {
2767 /* instr[31] = 0
2768 instr[30] = half(0)/full(1)
2769 instr[29,21] = 00 1110 000
2770 instr[20,16] = Vm
2771 instr[15] = 0
2772 instr[14,13] = vec length
2773 instr[12,10] = 000
2774 instr[9,5] = V start
2775 instr[4,0] = V dest */
2776
2777 int full = INSTR (30, 30);
2778 int len = INSTR (14, 13) + 1;
2779 unsigned vm = INSTR (20, 16);
2780 unsigned vn = INSTR (9, 5);
2781 unsigned vd = INSTR (4, 0);
2782 unsigned i;
2783
2784 NYI_assert (29, 21, 0x070);
2785 NYI_assert (12, 10, 0);
2786
2787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 for (i = 0; i < (full ? 16 : 8); i++)
2789 {
2790 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2791 uint8_t val;
2792
2793 if (selector < 16)
2794 val = aarch64_get_vec_u8 (cpu, vn, selector);
2795 else if (selector < 32)
2796 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2797 else if (selector < 48)
2798 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2799 else if (selector < 64)
2800 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2801 else
2802 val = 0;
2803
2804 aarch64_set_vec_u8 (cpu, vd, i, val);
2805 }
2806 }
2807
2808 static void
2809 do_vec_TRN (sim_cpu *cpu)
2810 {
2811 /* instr[31] = 0
2812 instr[30] = half(0)/full(1)
2813 instr[29,24] = 00 1110
2814 instr[23,22] = size
2815 instr[21] = 0
2816 instr[20,16] = Vm
2817 instr[15] = 0
2818 instr[14] = TRN1 (0) / TRN2 (1)
2819 instr[13,10] = 1010
2820 instr[9,5] = V source
2821 instr[4,0] = V dest. */
2822
2823 int full = INSTR (30, 30);
2824 int second = INSTR (14, 14);
2825 unsigned vm = INSTR (20, 16);
2826 unsigned vn = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2828 unsigned i;
2829
2830 NYI_assert (29, 24, 0x0E);
2831 NYI_assert (13, 10, 0xA);
2832
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2834 switch (INSTR (23, 22))
2835 {
2836 case 0:
2837 for (i = 0; i < (full ? 8 : 4); i++)
2838 {
2839 aarch64_set_vec_u8
2840 (cpu, vd, i * 2,
2841 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2842 aarch64_set_vec_u8
2843 (cpu, vd, 1 * 2 + 1,
2844 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2845 }
2846 break;
2847
2848 case 1:
2849 for (i = 0; i < (full ? 4 : 2); i++)
2850 {
2851 aarch64_set_vec_u16
2852 (cpu, vd, i * 2,
2853 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2854 aarch64_set_vec_u16
2855 (cpu, vd, 1 * 2 + 1,
2856 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2857 }
2858 break;
2859
2860 case 2:
2861 aarch64_set_vec_u32
2862 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2863 aarch64_set_vec_u32
2864 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2865 aarch64_set_vec_u32
2866 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2867 aarch64_set_vec_u32
2868 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2869 break;
2870
2871 case 3:
2872 if (! full)
2873 HALT_UNALLOC;
2874
2875 aarch64_set_vec_u64 (cpu, vd, 0,
2876 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2877 aarch64_set_vec_u64 (cpu, vd, 1,
2878 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2879 break;
2880 }
2881 }
2882
2883 static void
2884 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2885 {
2886 /* instr[31] = 0
2887 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2888 [must be 1 for 64-bit xfer]
2889 instr[29,20] = 00 1110 0000
2890 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2891 0100=> 32-bits. 1000=>64-bits
2892 instr[15,10] = 0000 11
2893 instr[9,5] = W source
2894 instr[4,0] = V dest. */
2895
2896 unsigned i;
2897 unsigned Vd = INSTR (4, 0);
2898 unsigned Rs = INSTR (9, 5);
2899 int both = INSTR (30, 30);
2900
2901 NYI_assert (29, 20, 0x0E0);
2902 NYI_assert (15, 10, 0x03);
2903
2904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2905 switch (INSTR (19, 16))
2906 {
2907 case 1:
2908 for (i = 0; i < (both ? 16 : 8); i++)
2909 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2910 break;
2911
2912 case 2:
2913 for (i = 0; i < (both ? 8 : 4); i++)
2914 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2915 break;
2916
2917 case 4:
2918 for (i = 0; i < (both ? 4 : 2); i++)
2919 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2920 break;
2921
2922 case 8:
2923 if (!both)
2924 HALT_NYI;
2925 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2926 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2927 break;
2928
2929 default:
2930 HALT_NYI;
2931 }
2932 }
2933
2934 static void
2935 do_vec_UZP (sim_cpu *cpu)
2936 {
2937 /* instr[31] = 0
2938 instr[30] = half(0)/full(1)
2939 instr[29,24] = 00 1110
2940 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2941 instr[21] = 0
2942 instr[20,16] = Vm
2943 instr[15] = 0
2944 instr[14] = lower (0) / upper (1)
2945 instr[13,10] = 0110
2946 instr[9,5] = Vn
2947 instr[4,0] = Vd. */
2948
2949 int full = INSTR (30, 30);
2950 int upper = INSTR (14, 14);
2951
2952 unsigned vm = INSTR (20, 16);
2953 unsigned vn = INSTR (9, 5);
2954 unsigned vd = INSTR (4, 0);
2955
2956 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2957 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2958 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2959 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2960
2961 uint64_t val1;
2962 uint64_t val2;
2963
2964 uint64_t input2 = full ? val_n2 : val_m1;
2965
2966 NYI_assert (29, 24, 0x0E);
2967 NYI_assert (21, 21, 0);
2968 NYI_assert (15, 15, 0);
2969 NYI_assert (13, 10, 6);
2970
2971 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2972 switch (INSTR (23, 22))
2973 {
2974 case 0:
2975 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
2976 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2977 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2978 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2979
2980 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2981 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2982 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2983 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2984
2985 if (full)
2986 {
2987 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
2988 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2989 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2990 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2991
2992 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2993 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2994 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2995 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2996 }
2997 break;
2998
2999 case 1:
3000 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3001 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3002
3003 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3004 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3005
3006 if (full)
3007 {
3008 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3009 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3010
3011 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3012 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3013 }
3014 break;
3015
3016 case 2:
3017 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3018 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3019
3020 if (full)
3021 {
3022 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3023 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3024 }
3025 break;
3026
3027 case 3:
3028 if (! full)
3029 HALT_UNALLOC;
3030
3031 val1 = upper ? val_n2 : val_n1;
3032 val2 = upper ? val_m2 : val_m1;
3033 break;
3034 }
3035
3036 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3037 if (full)
3038 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3039 }
3040
3041 static void
3042 do_vec_ZIP (sim_cpu *cpu)
3043 {
3044 /* instr[31] = 0
3045 instr[30] = half(0)/full(1)
3046 instr[29,24] = 00 1110
3047 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3048 instr[21] = 0
3049 instr[20,16] = Vm
3050 instr[15] = 0
3051 instr[14] = lower (0) / upper (1)
3052 instr[13,10] = 1110
3053 instr[9,5] = Vn
3054 instr[4,0] = Vd. */
3055
3056 int full = INSTR (30, 30);
3057 int upper = INSTR (14, 14);
3058
3059 unsigned vm = INSTR (20, 16);
3060 unsigned vn = INSTR (9, 5);
3061 unsigned vd = INSTR (4, 0);
3062
3063 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3064 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3065 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3066 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3067
3068 uint64_t val1 = 0;
3069 uint64_t val2 = 0;
3070
3071 uint64_t input1 = upper ? val_n1 : val_m1;
3072 uint64_t input2 = upper ? val_n2 : val_m2;
3073
3074 NYI_assert (29, 24, 0x0E);
3075 NYI_assert (21, 21, 0);
3076 NYI_assert (15, 15, 0);
3077 NYI_assert (13, 10, 0xE);
3078
3079 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3080 switch (INSTR (23, 23))
3081 {
3082 case 0:
3083 val1 =
3084 ((input1 << 0) & (0xFF << 0))
3085 | ((input2 << 8) & (0xFF << 8))
3086 | ((input1 << 8) & (0xFF << 16))
3087 | ((input2 << 16) & (0xFF << 24))
3088 | ((input1 << 16) & (0xFFULL << 32))
3089 | ((input2 << 24) & (0xFFULL << 40))
3090 | ((input1 << 24) & (0xFFULL << 48))
3091 | ((input2 << 32) & (0xFFULL << 56));
3092
3093 val2 =
3094 ((input1 >> 32) & (0xFF << 0))
3095 | ((input2 >> 24) & (0xFF << 8))
3096 | ((input1 >> 24) & (0xFF << 16))
3097 | ((input2 >> 16) & (0xFF << 24))
3098 | ((input1 >> 16) & (0xFFULL << 32))
3099 | ((input2 >> 8) & (0xFFULL << 40))
3100 | ((input1 >> 8) & (0xFFULL << 48))
3101 | ((input2 >> 0) & (0xFFULL << 56));
3102 break;
3103
3104 case 1:
3105 val1 =
3106 ((input1 << 0) & (0xFFFF << 0))
3107 | ((input2 << 16) & (0xFFFF << 16))
3108 | ((input1 << 16) & (0xFFFFULL << 32))
3109 | ((input2 << 32) & (0xFFFFULL << 48));
3110
3111 val2 =
3112 ((input1 >> 32) & (0xFFFF << 0))
3113 | ((input2 >> 16) & (0xFFFF << 16))
3114 | ((input1 >> 16) & (0xFFFFULL << 32))
3115 | ((input2 >> 0) & (0xFFFFULL << 48));
3116 break;
3117
3118 case 2:
3119 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3120 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3121 break;
3122
3123 case 3:
3124 val1 = input1;
3125 val2 = input2;
3126 break;
3127 }
3128
3129 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3130 if (full)
3131 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3132 }
3133
3134 /* Floating point immediates are encoded in 8 bits.
3135 fpimm[7] = sign bit.
3136 fpimm[6:4] = signed exponent.
3137 fpimm[3:0] = fraction (assuming leading 1).
3138 i.e. F = s * 1.f * 2^(e - b). */
3139
3140 static float
3141 fp_immediate_for_encoding_32 (uint32_t imm8)
3142 {
3143 float u;
3144 uint32_t s, e, f, i;
3145
3146 s = (imm8 >> 7) & 0x1;
3147 e = (imm8 >> 4) & 0x7;
3148 f = imm8 & 0xf;
3149
3150 /* The fp value is s * n/16 * 2r where n is 16+e. */
3151 u = (16.0 + f) / 16.0;
3152
3153 /* N.B. exponent is signed. */
3154 if (e < 4)
3155 {
3156 int epos = e;
3157
3158 for (i = 0; i <= epos; i++)
3159 u *= 2.0;
3160 }
3161 else
3162 {
3163 int eneg = 7 - e;
3164
3165 for (i = 0; i < eneg; i++)
3166 u /= 2.0;
3167 }
3168
3169 if (s)
3170 u = - u;
3171
3172 return u;
3173 }
3174
3175 static double
3176 fp_immediate_for_encoding_64 (uint32_t imm8)
3177 {
3178 double u;
3179 uint32_t s, e, f, i;
3180
3181 s = (imm8 >> 7) & 0x1;
3182 e = (imm8 >> 4) & 0x7;
3183 f = imm8 & 0xf;
3184
3185 /* The fp value is s * n/16 * 2r where n is 16+e. */
3186 u = (16.0 + f) / 16.0;
3187
3188 /* N.B. exponent is signed. */
3189 if (e < 4)
3190 {
3191 int epos = e;
3192
3193 for (i = 0; i <= epos; i++)
3194 u *= 2.0;
3195 }
3196 else
3197 {
3198 int eneg = 7 - e;
3199
3200 for (i = 0; i < eneg; i++)
3201 u /= 2.0;
3202 }
3203
3204 if (s)
3205 u = - u;
3206
3207 return u;
3208 }
3209
3210 static void
3211 do_vec_MOV_immediate (sim_cpu *cpu)
3212 {
3213 /* instr[31] = 0
3214 instr[30] = full/half selector
3215 instr[29,19] = 00111100000
3216 instr[18,16] = high 3 bits of uimm8
3217 instr[15,12] = size & shift:
3218 0000 => 32-bit
3219 0010 => 32-bit + LSL#8
3220 0100 => 32-bit + LSL#16
3221 0110 => 32-bit + LSL#24
3222 1010 => 16-bit + LSL#8
3223 1000 => 16-bit
3224 1101 => 32-bit + MSL#16
3225 1100 => 32-bit + MSL#8
3226 1110 => 8-bit
3227 1111 => double
3228 instr[11,10] = 01
3229 instr[9,5] = low 5-bits of uimm8
3230 instr[4,0] = Vd. */
3231
3232 int full = INSTR (30, 30);
3233 unsigned vd = INSTR (4, 0);
3234 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3235 unsigned i;
3236
3237 NYI_assert (29, 19, 0x1E0);
3238 NYI_assert (11, 10, 1);
3239
3240 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3241 switch (INSTR (15, 12))
3242 {
3243 case 0x0: /* 32-bit, no shift. */
3244 case 0x2: /* 32-bit, shift by 8. */
3245 case 0x4: /* 32-bit, shift by 16. */
3246 case 0x6: /* 32-bit, shift by 24. */
3247 val <<= (8 * INSTR (14, 13));
3248 for (i = 0; i < (full ? 4 : 2); i++)
3249 aarch64_set_vec_u32 (cpu, vd, i, val);
3250 break;
3251
3252 case 0xa: /* 16-bit, shift by 8. */
3253 val <<= 8;
3254 /* Fall through. */
3255 case 0x8: /* 16-bit, no shift. */
3256 for (i = 0; i < (full ? 8 : 4); i++)
3257 aarch64_set_vec_u16 (cpu, vd, i, val);
3258 break;
3259
3260 case 0xd: /* 32-bit, mask shift by 16. */
3261 val <<= 8;
3262 val |= 0xFF;
3263 /* Fall through. */
3264 case 0xc: /* 32-bit, mask shift by 8. */
3265 val <<= 8;
3266 val |= 0xFF;
3267 for (i = 0; i < (full ? 4 : 2); i++)
3268 aarch64_set_vec_u32 (cpu, vd, i, val);
3269 break;
3270
3271 case 0xe: /* 8-bit, no shift. */
3272 for (i = 0; i < (full ? 16 : 8); i++)
3273 aarch64_set_vec_u8 (cpu, vd, i, val);
3274 break;
3275
3276 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3277 {
3278 float u = fp_immediate_for_encoding_32 (val);
3279 for (i = 0; i < (full ? 4 : 2); i++)
3280 aarch64_set_vec_float (cpu, vd, i, u);
3281 break;
3282 }
3283
3284 default:
3285 HALT_NYI;
3286 }
3287 }
3288
3289 static void
3290 do_vec_MVNI (sim_cpu *cpu)
3291 {
3292 /* instr[31] = 0
3293 instr[30] = full/half selector
3294 instr[29,19] = 10111100000
3295 instr[18,16] = high 3 bits of uimm8
3296 instr[15,12] = selector
3297 instr[11,10] = 01
3298 instr[9,5] = low 5-bits of uimm8
3299 instr[4,0] = Vd. */
3300
3301 int full = INSTR (30, 30);
3302 unsigned vd = INSTR (4, 0);
3303 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3304 unsigned i;
3305
3306 NYI_assert (29, 19, 0x5E0);
3307 NYI_assert (11, 10, 1);
3308
3309 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3310 switch (INSTR (15, 12))
3311 {
3312 case 0x0: /* 32-bit, no shift. */
3313 case 0x2: /* 32-bit, shift by 8. */
3314 case 0x4: /* 32-bit, shift by 16. */
3315 case 0x6: /* 32-bit, shift by 24. */
3316 val <<= (8 * INSTR (14, 13));
3317 val = ~ val;
3318 for (i = 0; i < (full ? 4 : 2); i++)
3319 aarch64_set_vec_u32 (cpu, vd, i, val);
3320 return;
3321
3322 case 0xa: /* 16-bit, 8 bit shift. */
3323 val <<= 8;
3324 case 0x8: /* 16-bit, no shift. */
3325 val = ~ val;
3326 for (i = 0; i < (full ? 8 : 4); i++)
3327 aarch64_set_vec_u16 (cpu, vd, i, val);
3328 return;
3329
3330 case 0xd: /* 32-bit, mask shift by 16. */
3331 val <<= 8;
3332 val |= 0xFF;
3333 case 0xc: /* 32-bit, mask shift by 8. */
3334 val <<= 8;
3335 val |= 0xFF;
3336 val = ~ val;
3337 for (i = 0; i < (full ? 4 : 2); i++)
3338 aarch64_set_vec_u32 (cpu, vd, i, val);
3339 return;
3340
3341 case 0xE: /* MOVI Dn, #mask64 */
3342 {
3343 uint64_t mask = 0;
3344
3345 for (i = 0; i < 8; i++)
3346 if (val & (1 << i))
3347 mask |= (0xFFUL << (i * 8));
3348 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3349 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3350 return;
3351 }
3352
3353 case 0xf: /* FMOV Vd.2D, #fpimm. */
3354 {
3355 double u = fp_immediate_for_encoding_64 (val);
3356
3357 if (! full)
3358 HALT_UNALLOC;
3359
3360 aarch64_set_vec_double (cpu, vd, 0, u);
3361 aarch64_set_vec_double (cpu, vd, 1, u);
3362 return;
3363 }
3364
3365 default:
3366 HALT_NYI;
3367 }
3368 }
3369
3370 #define ABS(A) ((A) < 0 ? - (A) : (A))
3371
3372 static void
3373 do_vec_ABS (sim_cpu *cpu)
3374 {
3375 /* instr[31] = 0
3376 instr[30] = half(0)/full(1)
3377 instr[29,24] = 00 1110
3378 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3379 instr[21,10] = 10 0000 1011 10
3380 instr[9,5] = Vn
3381 instr[4.0] = Vd. */
3382
3383 unsigned vn = INSTR (9, 5);
3384 unsigned vd = INSTR (4, 0);
3385 unsigned full = INSTR (30, 30);
3386 unsigned i;
3387
3388 NYI_assert (29, 24, 0x0E);
3389 NYI_assert (21, 10, 0x82E);
3390
3391 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3392 switch (INSTR (23, 22))
3393 {
3394 case 0:
3395 for (i = 0; i < (full ? 16 : 8); i++)
3396 aarch64_set_vec_s8 (cpu, vd, i,
3397 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3398 break;
3399
3400 case 1:
3401 for (i = 0; i < (full ? 8 : 4); i++)
3402 aarch64_set_vec_s16 (cpu, vd, i,
3403 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3404 break;
3405
3406 case 2:
3407 for (i = 0; i < (full ? 4 : 2); i++)
3408 aarch64_set_vec_s32 (cpu, vd, i,
3409 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3410 break;
3411
3412 case 3:
3413 if (! full)
3414 HALT_NYI;
3415 for (i = 0; i < 2; i++)
3416 aarch64_set_vec_s64 (cpu, vd, i,
3417 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3418 break;
3419 }
3420 }
3421
3422 static void
3423 do_vec_ADDV (sim_cpu *cpu)
3424 {
3425 /* instr[31] = 0
3426 instr[30] = full/half selector
3427 instr[29,24] = 00 1110
3428 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3429 instr[21,10] = 11 0001 1011 10
3430 instr[9,5] = Vm
3431 instr[4.0] = Rd. */
3432
3433 unsigned vm = INSTR (9, 5);
3434 unsigned rd = INSTR (4, 0);
3435 unsigned i;
3436 uint64_t val = 0;
3437 int full = INSTR (30, 30);
3438
3439 NYI_assert (29, 24, 0x0E);
3440 NYI_assert (21, 10, 0xC6E);
3441
3442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3443 switch (INSTR (23, 22))
3444 {
3445 case 0:
3446 for (i = 0; i < (full ? 16 : 8); i++)
3447 val += aarch64_get_vec_u8 (cpu, vm, i);
3448 aarch64_set_vec_u64 (cpu, rd, 0, val);
3449 return;
3450
3451 case 1:
3452 for (i = 0; i < (full ? 8 : 4); i++)
3453 val += aarch64_get_vec_u16 (cpu, vm, i);
3454 aarch64_set_vec_u64 (cpu, rd, 0, val);
3455 return;
3456
3457 case 2:
3458 if (! full)
3459 HALT_UNALLOC;
3460 for (i = 0; i < 4; i++)
3461 val += aarch64_get_vec_u32 (cpu, vm, i);
3462 aarch64_set_vec_u64 (cpu, rd, 0, val);
3463 return;
3464
3465 case 3:
3466 HALT_UNALLOC;
3467 }
3468 }
3469
3470 static void
3471 do_vec_ins_2 (sim_cpu *cpu)
3472 {
3473 /* instr[31,21] = 01001110000
3474 instr[20,18] = size & element selector
3475 instr[17,14] = 0000
3476 instr[13] = direction: to vec(0), from vec (1)
3477 instr[12,10] = 111
3478 instr[9,5] = Vm
3479 instr[4,0] = Vd. */
3480
3481 unsigned elem;
3482 unsigned vm = INSTR (9, 5);
3483 unsigned vd = INSTR (4, 0);
3484
3485 NYI_assert (31, 21, 0x270);
3486 NYI_assert (17, 14, 0);
3487 NYI_assert (12, 10, 7);
3488
3489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3490 if (INSTR (13, 13) == 1)
3491 {
3492 if (INSTR (18, 18) == 1)
3493 {
3494 /* 32-bit moves. */
3495 elem = INSTR (20, 19);
3496 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3497 aarch64_get_vec_u32 (cpu, vm, elem));
3498 }
3499 else
3500 {
3501 /* 64-bit moves. */
3502 if (INSTR (19, 19) != 1)
3503 HALT_NYI;
3504
3505 elem = INSTR (20, 20);
3506 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3507 aarch64_get_vec_u64 (cpu, vm, elem));
3508 }
3509 }
3510 else
3511 {
3512 if (INSTR (18, 18) == 1)
3513 {
3514 /* 32-bit moves. */
3515 elem = INSTR (20, 19);
3516 aarch64_set_vec_u32 (cpu, vd, elem,
3517 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3518 }
3519 else
3520 {
3521 /* 64-bit moves. */
3522 if (INSTR (19, 19) != 1)
3523 HALT_NYI;
3524
3525 elem = INSTR (20, 20);
3526 aarch64_set_vec_u64 (cpu, vd, elem,
3527 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3528 }
3529 }
3530 }
3531
3532 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3533 do \
3534 { \
3535 DST_TYPE a[N], b[N]; \
3536 \
3537 for (i = 0; i < (N); i++) \
3538 { \
3539 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3540 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3541 } \
3542 for (i = 0; i < (N); i++) \
3543 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3544 } \
3545 while (0)
3546
3547 static void
3548 do_vec_mull (sim_cpu *cpu)
3549 {
3550 /* instr[31] = 0
3551 instr[30] = lower(0)/upper(1) selector
3552 instr[29] = signed(0)/unsigned(1)
3553 instr[28,24] = 0 1110
3554 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3555 instr[21] = 1
3556 instr[20,16] = Vm
3557 instr[15,10] = 11 0000
3558 instr[9,5] = Vn
3559 instr[4.0] = Vd. */
3560
3561 int unsign = INSTR (29, 29);
3562 int bias = INSTR (30, 30);
3563 unsigned vm = INSTR (20, 16);
3564 unsigned vn = INSTR ( 9, 5);
3565 unsigned vd = INSTR ( 4, 0);
3566 unsigned i;
3567
3568 NYI_assert (28, 24, 0x0E);
3569 NYI_assert (15, 10, 0x30);
3570
3571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3572 /* NB: Read source values before writing results, in case
3573 the source and destination vectors are the same. */
3574 switch (INSTR (23, 22))
3575 {
3576 case 0:
3577 if (bias)
3578 bias = 8;
3579 if (unsign)
3580 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3581 else
3582 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3583 return;
3584
3585 case 1:
3586 if (bias)
3587 bias = 4;
3588 if (unsign)
3589 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3590 else
3591 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3592 return;
3593
3594 case 2:
3595 if (bias)
3596 bias = 2;
3597 if (unsign)
3598 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3599 else
3600 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3601 return;
3602
3603 case 3:
3604 HALT_NYI;
3605 }
3606 }
3607
3608 static void
3609 do_vec_fadd (sim_cpu *cpu)
3610 {
3611 /* instr[31] = 0
3612 instr[30] = half(0)/full(1)
3613 instr[29,24] = 001110
3614 instr[23] = FADD(0)/FSUB(1)
3615 instr[22] = float (0)/double(1)
3616 instr[21] = 1
3617 instr[20,16] = Vm
3618 instr[15,10] = 110101
3619 instr[9,5] = Vn
3620 instr[4.0] = Vd. */
3621
3622 unsigned vm = INSTR (20, 16);
3623 unsigned vn = INSTR (9, 5);
3624 unsigned vd = INSTR (4, 0);
3625 unsigned i;
3626 int full = INSTR (30, 30);
3627
3628 NYI_assert (29, 24, 0x0E);
3629 NYI_assert (21, 21, 1);
3630 NYI_assert (15, 10, 0x35);
3631
3632 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3633 if (INSTR (23, 23))
3634 {
3635 if (INSTR (22, 22))
3636 {
3637 if (! full)
3638 HALT_NYI;
3639
3640 for (i = 0; i < 2; i++)
3641 aarch64_set_vec_double (cpu, vd, i,
3642 aarch64_get_vec_double (cpu, vn, i)
3643 - aarch64_get_vec_double (cpu, vm, i));
3644 }
3645 else
3646 {
3647 for (i = 0; i < (full ? 4 : 2); i++)
3648 aarch64_set_vec_float (cpu, vd, i,
3649 aarch64_get_vec_float (cpu, vn, i)
3650 - aarch64_get_vec_float (cpu, vm, i));
3651 }
3652 }
3653 else
3654 {
3655 if (INSTR (22, 22))
3656 {
3657 if (! full)
3658 HALT_NYI;
3659
3660 for (i = 0; i < 2; i++)
3661 aarch64_set_vec_double (cpu, vd, i,
3662 aarch64_get_vec_double (cpu, vm, i)
3663 + aarch64_get_vec_double (cpu, vn, i));
3664 }
3665 else
3666 {
3667 for (i = 0; i < (full ? 4 : 2); i++)
3668 aarch64_set_vec_float (cpu, vd, i,
3669 aarch64_get_vec_float (cpu, vm, i)
3670 + aarch64_get_vec_float (cpu, vn, i));
3671 }
3672 }
3673 }
3674
3675 static void
3676 do_vec_add (sim_cpu *cpu)
3677 {
3678 /* instr[31] = 0
3679 instr[30] = full/half selector
3680 instr[29,24] = 001110
3681 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3682 instr[21] = 1
3683 instr[20,16] = Vn
3684 instr[15,10] = 100001
3685 instr[9,5] = Vm
3686 instr[4.0] = Vd. */
3687
3688 unsigned vm = INSTR (20, 16);
3689 unsigned vn = INSTR (9, 5);
3690 unsigned vd = INSTR (4, 0);
3691 unsigned i;
3692 int full = INSTR (30, 30);
3693
3694 NYI_assert (29, 24, 0x0E);
3695 NYI_assert (21, 21, 1);
3696 NYI_assert (15, 10, 0x21);
3697
3698 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3699 switch (INSTR (23, 22))
3700 {
3701 case 0:
3702 for (i = 0; i < (full ? 16 : 8); i++)
3703 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3704 + aarch64_get_vec_u8 (cpu, vm, i));
3705 return;
3706
3707 case 1:
3708 for (i = 0; i < (full ? 8 : 4); i++)
3709 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3710 + aarch64_get_vec_u16 (cpu, vm, i));
3711 return;
3712
3713 case 2:
3714 for (i = 0; i < (full ? 4 : 2); i++)
3715 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3716 + aarch64_get_vec_u32 (cpu, vm, i));
3717 return;
3718
3719 case 3:
3720 if (! full)
3721 HALT_UNALLOC;
3722 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3723 + aarch64_get_vec_u64 (cpu, vm, 0));
3724 aarch64_set_vec_u64 (cpu, vd, 1,
3725 aarch64_get_vec_u64 (cpu, vn, 1)
3726 + aarch64_get_vec_u64 (cpu, vm, 1));
3727 return;
3728 }
3729 }
3730
3731 static void
3732 do_vec_mul (sim_cpu *cpu)
3733 {
3734 /* instr[31] = 0
3735 instr[30] = full/half selector
3736 instr[29,24] = 00 1110
3737 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3738 instr[21] = 1
3739 instr[20,16] = Vn
3740 instr[15,10] = 10 0111
3741 instr[9,5] = Vm
3742 instr[4.0] = Vd. */
3743
3744 unsigned vm = INSTR (20, 16);
3745 unsigned vn = INSTR (9, 5);
3746 unsigned vd = INSTR (4, 0);
3747 unsigned i;
3748 int full = INSTR (30, 30);
3749 int bias = 0;
3750
3751 NYI_assert (29, 24, 0x0E);
3752 NYI_assert (21, 21, 1);
3753 NYI_assert (15, 10, 0x27);
3754
3755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3756 switch (INSTR (23, 22))
3757 {
3758 case 0:
3759 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3760 return;
3761
3762 case 1:
3763 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3764 return;
3765
3766 case 2:
3767 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3768 return;
3769
3770 case 3:
3771 HALT_UNALLOC;
3772 }
3773 }
3774
3775 static void
3776 do_vec_MLA (sim_cpu *cpu)
3777 {
3778 /* instr[31] = 0
3779 instr[30] = full/half selector
3780 instr[29,24] = 00 1110
3781 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3782 instr[21] = 1
3783 instr[20,16] = Vn
3784 instr[15,10] = 1001 01
3785 instr[9,5] = Vm
3786 instr[4.0] = Vd. */
3787
3788 unsigned vm = INSTR (20, 16);
3789 unsigned vn = INSTR (9, 5);
3790 unsigned vd = INSTR (4, 0);
3791 unsigned i;
3792 int full = INSTR (30, 30);
3793
3794 NYI_assert (29, 24, 0x0E);
3795 NYI_assert (21, 21, 1);
3796 NYI_assert (15, 10, 0x25);
3797
3798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3799 switch (INSTR (23, 22))
3800 {
3801 case 0:
3802 {
3803 uint16_t a[16], b[16];
3804
3805 for (i = 0; i < (full ? 16 : 8); i++)
3806 {
3807 a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3808 b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3809 }
3810
3811 for (i = 0; i < (full ? 16 : 8); i++)
3812 {
3813 uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3814
3815 aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3816 }
3817 }
3818 return;
3819
3820 case 1:
3821 {
3822 uint32_t a[8], b[8];
3823
3824 for (i = 0; i < (full ? 8 : 4); i++)
3825 {
3826 a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3827 b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3828 }
3829
3830 for (i = 0; i < (full ? 8 : 4); i++)
3831 {
3832 uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3833
3834 aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3835 }
3836 }
3837 return;
3838
3839 case 2:
3840 {
3841 uint64_t a[4], b[4];
3842
3843 for (i = 0; i < (full ? 4 : 2); i++)
3844 {
3845 a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3846 b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3847 }
3848
3849 for (i = 0; i < (full ? 4 : 2); i++)
3850 {
3851 uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3852
3853 aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3854 }
3855 }
3856 return;
3857
3858 case 3:
3859 HALT_UNALLOC;
3860 }
3861 }
3862
3863 static float
3864 fmaxnm (float a, float b)
3865 {
3866 if (! isnan (a))
3867 {
3868 if (! isnan (b))
3869 return a > b ? a : b;
3870 return a;
3871 }
3872 else if (! isnan (b))
3873 return b;
3874 return a;
3875 }
3876
3877 static float
3878 fminnm (float a, float b)
3879 {
3880 if (! isnan (a))
3881 {
3882 if (! isnan (b))
3883 return a < b ? a : b;
3884 return a;
3885 }
3886 else if (! isnan (b))
3887 return b;
3888 return a;
3889 }
3890
3891 static double
3892 dmaxnm (double a, double b)
3893 {
3894 if (! isnan (a))
3895 {
3896 if (! isnan (b))
3897 return a > b ? a : b;
3898 return a;
3899 }
3900 else if (! isnan (b))
3901 return b;
3902 return a;
3903 }
3904
3905 static double
3906 dminnm (double a, double b)
3907 {
3908 if (! isnan (a))
3909 {
3910 if (! isnan (b))
3911 return a < b ? a : b;
3912 return a;
3913 }
3914 else if (! isnan (b))
3915 return b;
3916 return a;
3917 }
3918
3919 static void
3920 do_vec_FminmaxNMP (sim_cpu *cpu)
3921 {
3922 /* instr [31] = 0
3923 instr [30] = half (0)/full (1)
3924 instr [29,24] = 10 1110
3925 instr [23] = max(0)/min(1)
3926 instr [22] = float (0)/double (1)
3927 instr [21] = 1
3928 instr [20,16] = Vn
3929 instr [15,10] = 1100 01
3930 instr [9,5] = Vm
3931 instr [4.0] = Vd. */
3932
3933 unsigned vm = INSTR (20, 16);
3934 unsigned vn = INSTR (9, 5);
3935 unsigned vd = INSTR (4, 0);
3936 int full = INSTR (30, 30);
3937
3938 NYI_assert (29, 24, 0x2E);
3939 NYI_assert (21, 21, 1);
3940 NYI_assert (15, 10, 0x31);
3941
3942 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3943 if (INSTR (22, 22))
3944 {
3945 double (* fn)(double, double) = INSTR (23, 23)
3946 ? dminnm : dmaxnm;
3947
3948 if (! full)
3949 HALT_NYI;
3950 aarch64_set_vec_double (cpu, vd, 0,
3951 fn (aarch64_get_vec_double (cpu, vn, 0),
3952 aarch64_get_vec_double (cpu, vn, 1)));
3953 aarch64_set_vec_double (cpu, vd, 0,
3954 fn (aarch64_get_vec_double (cpu, vm, 0),
3955 aarch64_get_vec_double (cpu, vm, 1)));
3956 }
3957 else
3958 {
3959 float (* fn)(float, float) = INSTR (23, 23)
3960 ? fminnm : fmaxnm;
3961
3962 aarch64_set_vec_float (cpu, vd, 0,
3963 fn (aarch64_get_vec_float (cpu, vn, 0),
3964 aarch64_get_vec_float (cpu, vn, 1)));
3965 if (full)
3966 aarch64_set_vec_float (cpu, vd, 1,
3967 fn (aarch64_get_vec_float (cpu, vn, 2),
3968 aarch64_get_vec_float (cpu, vn, 3)));
3969
3970 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3971 fn (aarch64_get_vec_float (cpu, vm, 0),
3972 aarch64_get_vec_float (cpu, vm, 1)));
3973 if (full)
3974 aarch64_set_vec_float (cpu, vd, 3,
3975 fn (aarch64_get_vec_float (cpu, vm, 2),
3976 aarch64_get_vec_float (cpu, vm, 3)));
3977 }
3978 }
3979
3980 static void
3981 do_vec_AND (sim_cpu *cpu)
3982 {
3983 /* instr[31] = 0
3984 instr[30] = half (0)/full (1)
3985 instr[29,21] = 001110001
3986 instr[20,16] = Vm
3987 instr[15,10] = 000111
3988 instr[9,5] = Vn
3989 instr[4.0] = Vd. */
3990
3991 unsigned vm = INSTR (20, 16);
3992 unsigned vn = INSTR (9, 5);
3993 unsigned vd = INSTR (4, 0);
3994 unsigned i;
3995 int full = INSTR (30, 30);
3996
3997 NYI_assert (29, 21, 0x071);
3998 NYI_assert (15, 10, 0x07);
3999
4000 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4001 for (i = 0; i < (full ? 4 : 2); i++)
4002 aarch64_set_vec_u32 (cpu, vd, i,
4003 aarch64_get_vec_u32 (cpu, vn, i)
4004 & aarch64_get_vec_u32 (cpu, vm, i));
4005 }
4006
4007 static void
4008 do_vec_BSL (sim_cpu *cpu)
4009 {
4010 /* instr[31] = 0
4011 instr[30] = half (0)/full (1)
4012 instr[29,21] = 101110011
4013 instr[20,16] = Vm
4014 instr[15,10] = 000111
4015 instr[9,5] = Vn
4016 instr[4.0] = Vd. */
4017
4018 unsigned vm = INSTR (20, 16);
4019 unsigned vn = INSTR (9, 5);
4020 unsigned vd = INSTR (4, 0);
4021 unsigned i;
4022 int full = INSTR (30, 30);
4023
4024 NYI_assert (29, 21, 0x173);
4025 NYI_assert (15, 10, 0x07);
4026
4027 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4028 for (i = 0; i < (full ? 16 : 8); i++)
4029 aarch64_set_vec_u8 (cpu, vd, i,
4030 ( aarch64_get_vec_u8 (cpu, vd, i)
4031 & aarch64_get_vec_u8 (cpu, vn, i))
4032 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4033 & aarch64_get_vec_u8 (cpu, vm, i)));
4034 }
4035
4036 static void
4037 do_vec_EOR (sim_cpu *cpu)
4038 {
4039 /* instr[31] = 0
4040 instr[30] = half (0)/full (1)
4041 instr[29,21] = 10 1110 001
4042 instr[20,16] = Vm
4043 instr[15,10] = 000111
4044 instr[9,5] = Vn
4045 instr[4.0] = Vd. */
4046
4047 unsigned vm = INSTR (20, 16);
4048 unsigned vn = INSTR (9, 5);
4049 unsigned vd = INSTR (4, 0);
4050 unsigned i;
4051 int full = INSTR (30, 30);
4052
4053 NYI_assert (29, 21, 0x171);
4054 NYI_assert (15, 10, 0x07);
4055
4056 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4057 for (i = 0; i < (full ? 4 : 2); i++)
4058 aarch64_set_vec_u32 (cpu, vd, i,
4059 aarch64_get_vec_u32 (cpu, vn, i)
4060 ^ aarch64_get_vec_u32 (cpu, vm, i));
4061 }
4062
4063 static void
4064 do_vec_bit (sim_cpu *cpu)
4065 {
4066 /* instr[31] = 0
4067 instr[30] = half (0)/full (1)
4068 instr[29,23] = 10 1110 1
4069 instr[22] = BIT (0) / BIF (1)
4070 instr[21] = 1
4071 instr[20,16] = Vm
4072 instr[15,10] = 0001 11
4073 instr[9,5] = Vn
4074 instr[4.0] = Vd. */
4075
4076 unsigned vm = INSTR (20, 16);
4077 unsigned vn = INSTR (9, 5);
4078 unsigned vd = INSTR (4, 0);
4079 unsigned full = INSTR (30, 30);
4080 unsigned test_false = INSTR (22, 22);
4081 unsigned i;
4082
4083 NYI_assert (29, 23, 0x5D);
4084 NYI_assert (21, 21, 1);
4085 NYI_assert (15, 10, 0x07);
4086
4087 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4088 if (test_false)
4089 {
4090 for (i = 0; i < (full ? 16 : 8); i++)
4091 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
4092 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4093 }
4094 else
4095 {
4096 for (i = 0; i < (full ? 16 : 8); i++)
4097 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
4098 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4099 }
4100 }
4101
4102 static void
4103 do_vec_ORN (sim_cpu *cpu)
4104 {
4105 /* instr[31] = 0
4106 instr[30] = half (0)/full (1)
4107 instr[29,21] = 00 1110 111
4108 instr[20,16] = Vm
4109 instr[15,10] = 00 0111
4110 instr[9,5] = Vn
4111 instr[4.0] = Vd. */
4112
4113 unsigned vm = INSTR (20, 16);
4114 unsigned vn = INSTR (9, 5);
4115 unsigned vd = INSTR (4, 0);
4116 unsigned i;
4117 int full = INSTR (30, 30);
4118
4119 NYI_assert (29, 21, 0x077);
4120 NYI_assert (15, 10, 0x07);
4121
4122 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4123 for (i = 0; i < (full ? 16 : 8); i++)
4124 aarch64_set_vec_u8 (cpu, vd, i,
4125 aarch64_get_vec_u8 (cpu, vn, i)
4126 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4127 }
4128
4129 static void
4130 do_vec_ORR (sim_cpu *cpu)
4131 {
4132 /* instr[31] = 0
4133 instr[30] = half (0)/full (1)
4134 instr[29,21] = 00 1110 101
4135 instr[20,16] = Vm
4136 instr[15,10] = 0001 11
4137 instr[9,5] = Vn
4138 instr[4.0] = Vd. */
4139
4140 unsigned vm = INSTR (20, 16);
4141 unsigned vn = INSTR (9, 5);
4142 unsigned vd = INSTR (4, 0);
4143 unsigned i;
4144 int full = INSTR (30, 30);
4145
4146 NYI_assert (29, 21, 0x075);
4147 NYI_assert (15, 10, 0x07);
4148
4149 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4150 for (i = 0; i < (full ? 16 : 8); i++)
4151 aarch64_set_vec_u8 (cpu, vd, i,
4152 aarch64_get_vec_u8 (cpu, vn, i)
4153 | aarch64_get_vec_u8 (cpu, vm, i));
4154 }
4155
4156 static void
4157 do_vec_BIC (sim_cpu *cpu)
4158 {
4159 /* instr[31] = 0
4160 instr[30] = half (0)/full (1)
4161 instr[29,21] = 00 1110 011
4162 instr[20,16] = Vm
4163 instr[15,10] = 00 0111
4164 instr[9,5] = Vn
4165 instr[4.0] = Vd. */
4166
4167 unsigned vm = INSTR (20, 16);
4168 unsigned vn = INSTR (9, 5);
4169 unsigned vd = INSTR (4, 0);
4170 unsigned i;
4171 int full = INSTR (30, 30);
4172
4173 NYI_assert (29, 21, 0x073);
4174 NYI_assert (15, 10, 0x07);
4175
4176 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4177 for (i = 0; i < (full ? 16 : 8); i++)
4178 aarch64_set_vec_u8 (cpu, vd, i,
4179 aarch64_get_vec_u8 (cpu, vn, i)
4180 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4181 }
4182
4183 static void
4184 do_vec_XTN (sim_cpu *cpu)
4185 {
4186 /* instr[31] = 0
4187 instr[30] = first part (0)/ second part (1)
4188 instr[29,24] = 00 1110
4189 instr[23,22] = size: byte(00), half(01), word (10)
4190 instr[21,10] = 1000 0100 1010
4191 instr[9,5] = Vs
4192 instr[4,0] = Vd. */
4193
4194 unsigned vs = INSTR (9, 5);
4195 unsigned vd = INSTR (4, 0);
4196 unsigned bias = INSTR (30, 30);
4197 unsigned i;
4198
4199 NYI_assert (29, 24, 0x0E);
4200 NYI_assert (21, 10, 0x84A);
4201
4202 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4203 switch (INSTR (23, 22))
4204 {
4205 case 0:
4206 for (i = 0; i < 8; i++)
4207 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4208 aarch64_get_vec_u16 (cpu, vs, i));
4209 return;
4210
4211 case 1:
4212 for (i = 0; i < 4; i++)
4213 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4214 aarch64_get_vec_u32 (cpu, vs, i));
4215 return;
4216
4217 case 2:
4218 for (i = 0; i < 2; i++)
4219 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4220 aarch64_get_vec_u64 (cpu, vs, i));
4221 return;
4222 }
4223 }
4224
4225 static void
4226 do_vec_maxv (sim_cpu *cpu)
4227 {
4228 /* instr[31] = 0
4229 instr[30] = half(0)/full(1)
4230 instr[29] = signed (0)/unsigned(1)
4231 instr[28,24] = 0 1110
4232 instr[23,22] = size: byte(00), half(01), word (10)
4233 instr[21] = 1
4234 instr[20,17] = 1 000
4235 instr[16] = max(0)/min(1)
4236 instr[15,10] = 1010 10
4237 instr[9,5] = V source
4238 instr[4.0] = R dest. */
4239
4240 unsigned vs = INSTR (9, 5);
4241 unsigned rd = INSTR (4, 0);
4242 unsigned full = INSTR (30, 30);
4243 unsigned i;
4244
4245 NYI_assert (28, 24, 0x0E);
4246 NYI_assert (21, 21, 1);
4247 NYI_assert (20, 17, 8);
4248 NYI_assert (15, 10, 0x2A);
4249
4250 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4251 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4252 {
4253 case 0: /* SMAXV. */
4254 {
4255 int64_t smax;
4256 switch (INSTR (23, 22))
4257 {
4258 case 0:
4259 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4260 for (i = 1; i < (full ? 16 : 8); i++)
4261 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4262 break;
4263 case 1:
4264 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4265 for (i = 1; i < (full ? 8 : 4); i++)
4266 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4267 break;
4268 case 2:
4269 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4270 for (i = 1; i < (full ? 4 : 2); i++)
4271 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4272 break;
4273 case 3:
4274 HALT_UNALLOC;
4275 }
4276 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4277 return;
4278 }
4279
4280 case 1: /* SMINV. */
4281 {
4282 int64_t smin;
4283 switch (INSTR (23, 22))
4284 {
4285 case 0:
4286 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4287 for (i = 1; i < (full ? 16 : 8); i++)
4288 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4289 break;
4290 case 1:
4291 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4292 for (i = 1; i < (full ? 8 : 4); i++)
4293 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4294 break;
4295 case 2:
4296 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4297 for (i = 1; i < (full ? 4 : 2); i++)
4298 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4299 break;
4300
4301 case 3:
4302 HALT_UNALLOC;
4303 }
4304 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4305 return;
4306 }
4307
4308 case 2: /* UMAXV. */
4309 {
4310 uint64_t umax;
4311 switch (INSTR (23, 22))
4312 {
4313 case 0:
4314 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4315 for (i = 1; i < (full ? 16 : 8); i++)
4316 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4317 break;
4318 case 1:
4319 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4320 for (i = 1; i < (full ? 8 : 4); i++)
4321 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4322 break;
4323 case 2:
4324 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4325 for (i = 1; i < (full ? 4 : 2); i++)
4326 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4327 break;
4328
4329 case 3:
4330 HALT_UNALLOC;
4331 }
4332 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4333 return;
4334 }
4335
4336 case 3: /* UMINV. */
4337 {
4338 uint64_t umin;
4339 switch (INSTR (23, 22))
4340 {
4341 case 0:
4342 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4343 for (i = 1; i < (full ? 16 : 8); i++)
4344 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4345 break;
4346 case 1:
4347 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4348 for (i = 1; i < (full ? 8 : 4); i++)
4349 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4350 break;
4351 case 2:
4352 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4353 for (i = 1; i < (full ? 4 : 2); i++)
4354 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4355 break;
4356
4357 case 3:
4358 HALT_UNALLOC;
4359 }
4360 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4361 return;
4362 }
4363 }
4364 }
4365
4366 static void
4367 do_vec_fminmaxV (sim_cpu *cpu)
4368 {
4369 /* instr[31,24] = 0110 1110
4370 instr[23] = max(0)/min(1)
4371 instr[22,14] = 011 0000 11
4372 instr[13,12] = nm(00)/normal(11)
4373 instr[11,10] = 10
4374 instr[9,5] = V source
4375 instr[4.0] = R dest. */
4376
4377 unsigned vs = INSTR (9, 5);
4378 unsigned rd = INSTR (4, 0);
4379 unsigned i;
4380 float res = aarch64_get_vec_float (cpu, vs, 0);
4381
4382 NYI_assert (31, 24, 0x6E);
4383 NYI_assert (22, 14, 0x0C3);
4384 NYI_assert (11, 10, 2);
4385
4386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4387 if (INSTR (23, 23))
4388 {
4389 switch (INSTR (13, 12))
4390 {
4391 case 0: /* FMNINNMV. */
4392 for (i = 1; i < 4; i++)
4393 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4394 break;
4395
4396 case 3: /* FMINV. */
4397 for (i = 1; i < 4; i++)
4398 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4399 break;
4400
4401 default:
4402 HALT_NYI;
4403 }
4404 }
4405 else
4406 {
4407 switch (INSTR (13, 12))
4408 {
4409 case 0: /* FMNAXNMV. */
4410 for (i = 1; i < 4; i++)
4411 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4412 break;
4413
4414 case 3: /* FMAXV. */
4415 for (i = 1; i < 4; i++)
4416 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4417 break;
4418
4419 default:
4420 HALT_NYI;
4421 }
4422 }
4423
4424 aarch64_set_FP_float (cpu, rd, res);
4425 }
4426
4427 static void
4428 do_vec_Fminmax (sim_cpu *cpu)
4429 {
4430 /* instr[31] = 0
4431 instr[30] = half(0)/full(1)
4432 instr[29,24] = 00 1110
4433 instr[23] = max(0)/min(1)
4434 instr[22] = float(0)/double(1)
4435 instr[21] = 1
4436 instr[20,16] = Vm
4437 instr[15,14] = 11
4438 instr[13,12] = nm(00)/normal(11)
4439 instr[11,10] = 01
4440 instr[9,5] = Vn
4441 instr[4,0] = Vd. */
4442
4443 unsigned vm = INSTR (20, 16);
4444 unsigned vn = INSTR (9, 5);
4445 unsigned vd = INSTR (4, 0);
4446 unsigned full = INSTR (30, 30);
4447 unsigned min = INSTR (23, 23);
4448 unsigned i;
4449
4450 NYI_assert (29, 24, 0x0E);
4451 NYI_assert (21, 21, 1);
4452 NYI_assert (15, 14, 3);
4453 NYI_assert (11, 10, 1);
4454
4455 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4456 if (INSTR (22, 22))
4457 {
4458 double (* func)(double, double);
4459
4460 if (! full)
4461 HALT_NYI;
4462
4463 if (INSTR (13, 12) == 0)
4464 func = min ? dminnm : dmaxnm;
4465 else if (INSTR (13, 12) == 3)
4466 func = min ? fmin : fmax;
4467 else
4468 HALT_NYI;
4469
4470 for (i = 0; i < 2; i++)
4471 aarch64_set_vec_double (cpu, vd, i,
4472 func (aarch64_get_vec_double (cpu, vn, i),
4473 aarch64_get_vec_double (cpu, vm, i)));
4474 }
4475 else
4476 {
4477 float (* func)(float, float);
4478
4479 if (INSTR (13, 12) == 0)
4480 func = min ? fminnm : fmaxnm;
4481 else if (INSTR (13, 12) == 3)
4482 func = min ? fminf : fmaxf;
4483 else
4484 HALT_NYI;
4485
4486 for (i = 0; i < (full ? 4 : 2); i++)
4487 aarch64_set_vec_float (cpu, vd, i,
4488 func (aarch64_get_vec_float (cpu, vn, i),
4489 aarch64_get_vec_float (cpu, vm, i)));
4490 }
4491 }
4492
4493 static void
4494 do_vec_SCVTF (sim_cpu *cpu)
4495 {
4496 /* instr[31] = 0
4497 instr[30] = Q
4498 instr[29,23] = 00 1110 0
4499 instr[22] = float(0)/double(1)
4500 instr[21,10] = 10 0001 1101 10
4501 instr[9,5] = Vn
4502 instr[4,0] = Vd. */
4503
4504 unsigned vn = INSTR (9, 5);
4505 unsigned vd = INSTR (4, 0);
4506 unsigned full = INSTR (30, 30);
4507 unsigned size = INSTR (22, 22);
4508 unsigned i;
4509
4510 NYI_assert (29, 23, 0x1C);
4511 NYI_assert (21, 10, 0x876);
4512
4513 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4514 if (size)
4515 {
4516 if (! full)
4517 HALT_UNALLOC;
4518
4519 for (i = 0; i < 2; i++)
4520 {
4521 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4522 aarch64_set_vec_double (cpu, vd, i, val);
4523 }
4524 }
4525 else
4526 {
4527 for (i = 0; i < (full ? 4 : 2); i++)
4528 {
4529 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4530 aarch64_set_vec_float (cpu, vd, i, val);
4531 }
4532 }
4533 }
4534
4535 #define VEC_CMP(SOURCE, CMP) \
4536 do \
4537 { \
4538 switch (size) \
4539 { \
4540 case 0: \
4541 for (i = 0; i < (full ? 16 : 8); i++) \
4542 aarch64_set_vec_u8 (cpu, vd, i, \
4543 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4544 CMP \
4545 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4546 ? -1 : 0); \
4547 return; \
4548 case 1: \
4549 for (i = 0; i < (full ? 8 : 4); i++) \
4550 aarch64_set_vec_u16 (cpu, vd, i, \
4551 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4552 CMP \
4553 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4554 ? -1 : 0); \
4555 return; \
4556 case 2: \
4557 for (i = 0; i < (full ? 4 : 2); i++) \
4558 aarch64_set_vec_u32 (cpu, vd, i, \
4559 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4560 CMP \
4561 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4562 ? -1 : 0); \
4563 return; \
4564 case 3: \
4565 if (! full) \
4566 HALT_UNALLOC; \
4567 for (i = 0; i < 2; i++) \
4568 aarch64_set_vec_u64 (cpu, vd, i, \
4569 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4570 CMP \
4571 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4572 ? -1ULL : 0); \
4573 return; \
4574 } \
4575 } \
4576 while (0)
4577
4578 #define VEC_CMP0(SOURCE, CMP) \
4579 do \
4580 { \
4581 switch (size) \
4582 { \
4583 case 0: \
4584 for (i = 0; i < (full ? 16 : 8); i++) \
4585 aarch64_set_vec_u8 (cpu, vd, i, \
4586 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4587 CMP 0 ? -1 : 0); \
4588 return; \
4589 case 1: \
4590 for (i = 0; i < (full ? 8 : 4); i++) \
4591 aarch64_set_vec_u16 (cpu, vd, i, \
4592 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4593 CMP 0 ? -1 : 0); \
4594 return; \
4595 case 2: \
4596 for (i = 0; i < (full ? 4 : 2); i++) \
4597 aarch64_set_vec_u32 (cpu, vd, i, \
4598 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4599 CMP 0 ? -1 : 0); \
4600 return; \
4601 case 3: \
4602 if (! full) \
4603 HALT_UNALLOC; \
4604 for (i = 0; i < 2; i++) \
4605 aarch64_set_vec_u64 (cpu, vd, i, \
4606 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4607 CMP 0 ? -1ULL : 0); \
4608 return; \
4609 } \
4610 } \
4611 while (0)
4612
4613 #define VEC_FCMP0(CMP) \
4614 do \
4615 { \
4616 if (vm != 0) \
4617 HALT_NYI; \
4618 if (INSTR (22, 22)) \
4619 { \
4620 if (! full) \
4621 HALT_NYI; \
4622 for (i = 0; i < 2; i++) \
4623 aarch64_set_vec_u64 (cpu, vd, i, \
4624 aarch64_get_vec_double (cpu, vn, i) \
4625 CMP 0.0 ? -1 : 0); \
4626 } \
4627 else \
4628 { \
4629 for (i = 0; i < (full ? 4 : 2); i++) \
4630 aarch64_set_vec_u32 (cpu, vd, i, \
4631 aarch64_get_vec_float (cpu, vn, i) \
4632 CMP 0.0 ? -1 : 0); \
4633 } \
4634 return; \
4635 } \
4636 while (0)
4637
4638 #define VEC_FCMP(CMP) \
4639 do \
4640 { \
4641 if (INSTR (22, 22)) \
4642 { \
4643 if (! full) \
4644 HALT_NYI; \
4645 for (i = 0; i < 2; i++) \
4646 aarch64_set_vec_u64 (cpu, vd, i, \
4647 aarch64_get_vec_double (cpu, vn, i) \
4648 CMP \
4649 aarch64_get_vec_double (cpu, vm, i) \
4650 ? -1 : 0); \
4651 } \
4652 else \
4653 { \
4654 for (i = 0; i < (full ? 4 : 2); i++) \
4655 aarch64_set_vec_u32 (cpu, vd, i, \
4656 aarch64_get_vec_float (cpu, vn, i) \
4657 CMP \
4658 aarch64_get_vec_float (cpu, vm, i) \
4659 ? -1 : 0); \
4660 } \
4661 return; \
4662 } \
4663 while (0)
4664
4665 static void
4666 do_vec_compare (sim_cpu *cpu)
4667 {
4668 /* instr[31] = 0
4669 instr[30] = half(0)/full(1)
4670 instr[29] = part-of-comparison-type
4671 instr[28,24] = 0 1110
4672 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4673 type of float compares: single (-0) / double (-1)
4674 instr[21] = 1
4675 instr[20,16] = Vm or 00000 (compare vs 0)
4676 instr[15,10] = part-of-comparison-type
4677 instr[9,5] = Vn
4678 instr[4.0] = Vd. */
4679
4680 int full = INSTR (30, 30);
4681 int size = INSTR (23, 22);
4682 unsigned vm = INSTR (20, 16);
4683 unsigned vn = INSTR (9, 5);
4684 unsigned vd = INSTR (4, 0);
4685 unsigned i;
4686
4687 NYI_assert (28, 24, 0x0E);
4688 NYI_assert (21, 21, 1);
4689
4690 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4691 if ((INSTR (11, 11)
4692 && INSTR (14, 14))
4693 || ((INSTR (11, 11) == 0
4694 && INSTR (10, 10) == 0)))
4695 {
4696 /* A compare vs 0. */
4697 if (vm != 0)
4698 {
4699 if (INSTR (15, 10) == 0x2A)
4700 do_vec_maxv (cpu);
4701 else if (INSTR (15, 10) == 0x32
4702 || INSTR (15, 10) == 0x3E)
4703 do_vec_fminmaxV (cpu);
4704 else if (INSTR (29, 23) == 0x1C
4705 && INSTR (21, 10) == 0x876)
4706 do_vec_SCVTF (cpu);
4707 else
4708 HALT_NYI;
4709 return;
4710 }
4711 }
4712
4713 if (INSTR (14, 14))
4714 {
4715 /* A floating point compare. */
4716 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4717 | INSTR (13, 10);
4718
4719 NYI_assert (15, 15, 1);
4720
4721 switch (decode)
4722 {
4723 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4724 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4725 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4726 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4727 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4728 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4729 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4730 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4731
4732 default:
4733 HALT_NYI;
4734 }
4735 }
4736 else
4737 {
4738 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4739
4740 switch (decode)
4741 {
4742 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4743 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4744 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4745 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4746 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4747 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4748 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4749 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4750 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4751 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4752 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4753 default:
4754 if (vm == 0)
4755 HALT_NYI;
4756 do_vec_maxv (cpu);
4757 }
4758 }
4759 }
4760
4761 static void
4762 do_vec_SSHL (sim_cpu *cpu)
4763 {
4764 /* instr[31] = 0
4765 instr[30] = first part (0)/ second part (1)
4766 instr[29,24] = 00 1110
4767 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4768 instr[21] = 1
4769 instr[20,16] = Vm
4770 instr[15,10] = 0100 01
4771 instr[9,5] = Vn
4772 instr[4,0] = Vd. */
4773
4774 unsigned full = INSTR (30, 30);
4775 unsigned vm = INSTR (20, 16);
4776 unsigned vn = INSTR (9, 5);
4777 unsigned vd = INSTR (4, 0);
4778 unsigned i;
4779 signed int shift;
4780
4781 NYI_assert (29, 24, 0x0E);
4782 NYI_assert (21, 21, 1);
4783 NYI_assert (15, 10, 0x11);
4784
4785 /* FIXME: What is a signed shift left in this context ?. */
4786
4787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4788 switch (INSTR (23, 22))
4789 {
4790 case 0:
4791 for (i = 0; i < (full ? 16 : 8); i++)
4792 {
4793 shift = aarch64_get_vec_s8 (cpu, vm, i);
4794 if (shift >= 0)
4795 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4796 << shift);
4797 else
4798 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4799 >> - shift);
4800 }
4801 return;
4802
4803 case 1:
4804 for (i = 0; i < (full ? 8 : 4); i++)
4805 {
4806 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4807 if (shift >= 0)
4808 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4809 << shift);
4810 else
4811 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4812 >> - shift);
4813 }
4814 return;
4815
4816 case 2:
4817 for (i = 0; i < (full ? 4 : 2); i++)
4818 {
4819 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4820 if (shift >= 0)
4821 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4822 << shift);
4823 else
4824 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4825 >> - shift);
4826 }
4827 return;
4828
4829 case 3:
4830 if (! full)
4831 HALT_UNALLOC;
4832 for (i = 0; i < 2; i++)
4833 {
4834 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4835 if (shift >= 0)
4836 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4837 << shift);
4838 else
4839 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4840 >> - shift);
4841 }
4842 return;
4843 }
4844 }
4845
4846 static void
4847 do_vec_USHL (sim_cpu *cpu)
4848 {
4849 /* instr[31] = 0
4850 instr[30] = first part (0)/ second part (1)
4851 instr[29,24] = 10 1110
4852 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4853 instr[21] = 1
4854 instr[20,16] = Vm
4855 instr[15,10] = 0100 01
4856 instr[9,5] = Vn
4857 instr[4,0] = Vd */
4858
4859 unsigned full = INSTR (30, 30);
4860 unsigned vm = INSTR (20, 16);
4861 unsigned vn = INSTR (9, 5);
4862 unsigned vd = INSTR (4, 0);
4863 unsigned i;
4864 signed int shift;
4865
4866 NYI_assert (29, 24, 0x2E);
4867 NYI_assert (15, 10, 0x11);
4868
4869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4870 switch (INSTR (23, 22))
4871 {
4872 case 0:
4873 for (i = 0; i < (full ? 16 : 8); i++)
4874 {
4875 shift = aarch64_get_vec_s8 (cpu, vm, i);
4876 if (shift >= 0)
4877 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4878 << shift);
4879 else
4880 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4881 >> - shift);
4882 }
4883 return;
4884
4885 case 1:
4886 for (i = 0; i < (full ? 8 : 4); i++)
4887 {
4888 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4889 if (shift >= 0)
4890 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4891 << shift);
4892 else
4893 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4894 >> - shift);
4895 }
4896 return;
4897
4898 case 2:
4899 for (i = 0; i < (full ? 4 : 2); i++)
4900 {
4901 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4902 if (shift >= 0)
4903 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4904 << shift);
4905 else
4906 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4907 >> - shift);
4908 }
4909 return;
4910
4911 case 3:
4912 if (! full)
4913 HALT_UNALLOC;
4914 for (i = 0; i < 2; i++)
4915 {
4916 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4917 if (shift >= 0)
4918 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4919 << shift);
4920 else
4921 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4922 >> - shift);
4923 }
4924 return;
4925 }
4926 }
4927
4928 static void
4929 do_vec_FMLA (sim_cpu *cpu)
4930 {
4931 /* instr[31] = 0
4932 instr[30] = full/half selector
4933 instr[29,23] = 0011100
4934 instr[22] = size: 0=>float, 1=>double
4935 instr[21] = 1
4936 instr[20,16] = Vn
4937 instr[15,10] = 1100 11
4938 instr[9,5] = Vm
4939 instr[4.0] = Vd. */
4940
4941 unsigned vm = INSTR (20, 16);
4942 unsigned vn = INSTR (9, 5);
4943 unsigned vd = INSTR (4, 0);
4944 unsigned i;
4945 int full = INSTR (30, 30);
4946
4947 NYI_assert (29, 23, 0x1C);
4948 NYI_assert (21, 21, 1);
4949 NYI_assert (15, 10, 0x33);
4950
4951 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4952 if (INSTR (22, 22))
4953 {
4954 if (! full)
4955 HALT_UNALLOC;
4956 for (i = 0; i < 2; i++)
4957 aarch64_set_vec_double (cpu, vd, i,
4958 aarch64_get_vec_double (cpu, vn, i) *
4959 aarch64_get_vec_double (cpu, vm, i) +
4960 aarch64_get_vec_double (cpu, vd, i));
4961 }
4962 else
4963 {
4964 for (i = 0; i < (full ? 4 : 2); i++)
4965 aarch64_set_vec_float (cpu, vd, i,
4966 aarch64_get_vec_float (cpu, vn, i) *
4967 aarch64_get_vec_float (cpu, vm, i) +
4968 aarch64_get_vec_float (cpu, vd, i));
4969 }
4970 }
4971
4972 static void
4973 do_vec_max (sim_cpu *cpu)
4974 {
4975 /* instr[31] = 0
4976 instr[30] = full/half selector
4977 instr[29] = SMAX (0) / UMAX (1)
4978 instr[28,24] = 0 1110
4979 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4980 instr[21] = 1
4981 instr[20,16] = Vn
4982 instr[15,10] = 0110 01
4983 instr[9,5] = Vm
4984 instr[4.0] = Vd. */
4985
4986 unsigned vm = INSTR (20, 16);
4987 unsigned vn = INSTR (9, 5);
4988 unsigned vd = INSTR (4, 0);
4989 unsigned i;
4990 int full = INSTR (30, 30);
4991
4992 NYI_assert (28, 24, 0x0E);
4993 NYI_assert (21, 21, 1);
4994 NYI_assert (15, 10, 0x19);
4995
4996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4997 if (INSTR (29, 29))
4998 {
4999 switch (INSTR (23, 22))
5000 {
5001 case 0:
5002 for (i = 0; i < (full ? 16 : 8); i++)
5003 aarch64_set_vec_u8 (cpu, vd, i,
5004 aarch64_get_vec_u8 (cpu, vn, i)
5005 > aarch64_get_vec_u8 (cpu, vm, i)
5006 ? aarch64_get_vec_u8 (cpu, vn, i)
5007 : aarch64_get_vec_u8 (cpu, vm, i));
5008 return;
5009
5010 case 1:
5011 for (i = 0; i < (full ? 8 : 4); i++)
5012 aarch64_set_vec_u16 (cpu, vd, i,
5013 aarch64_get_vec_u16 (cpu, vn, i)
5014 > aarch64_get_vec_u16 (cpu, vm, i)
5015 ? aarch64_get_vec_u16 (cpu, vn, i)
5016 : aarch64_get_vec_u16 (cpu, vm, i));
5017 return;
5018
5019 case 2:
5020 for (i = 0; i < (full ? 4 : 2); i++)
5021 aarch64_set_vec_u32 (cpu, vd, i,
5022 aarch64_get_vec_u32 (cpu, vn, i)
5023 > aarch64_get_vec_u32 (cpu, vm, i)
5024 ? aarch64_get_vec_u32 (cpu, vn, i)
5025 : aarch64_get_vec_u32 (cpu, vm, i));
5026 return;
5027
5028 case 3:
5029 HALT_UNALLOC;
5030 }
5031 }
5032 else
5033 {
5034 switch (INSTR (23, 22))
5035 {
5036 case 0:
5037 for (i = 0; i < (full ? 16 : 8); i++)
5038 aarch64_set_vec_s8 (cpu, vd, i,
5039 aarch64_get_vec_s8 (cpu, vn, i)
5040 > aarch64_get_vec_s8 (cpu, vm, i)
5041 ? aarch64_get_vec_s8 (cpu, vn, i)
5042 : aarch64_get_vec_s8 (cpu, vm, i));
5043 return;
5044
5045 case 1:
5046 for (i = 0; i < (full ? 8 : 4); i++)
5047 aarch64_set_vec_s16 (cpu, vd, i,
5048 aarch64_get_vec_s16 (cpu, vn, i)
5049 > aarch64_get_vec_s16 (cpu, vm, i)
5050 ? aarch64_get_vec_s16 (cpu, vn, i)
5051 : aarch64_get_vec_s16 (cpu, vm, i));
5052 return;
5053
5054 case 2:
5055 for (i = 0; i < (full ? 4 : 2); i++)
5056 aarch64_set_vec_s32 (cpu, vd, i,
5057 aarch64_get_vec_s32 (cpu, vn, i)
5058 > aarch64_get_vec_s32 (cpu, vm, i)
5059 ? aarch64_get_vec_s32 (cpu, vn, i)
5060 : aarch64_get_vec_s32 (cpu, vm, i));
5061 return;
5062
5063 case 3:
5064 HALT_UNALLOC;
5065 }
5066 }
5067 }
5068
5069 static void
5070 do_vec_min (sim_cpu *cpu)
5071 {
5072 /* instr[31] = 0
5073 instr[30] = full/half selector
5074 instr[29] = SMIN (0) / UMIN (1)
5075 instr[28,24] = 0 1110
5076 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5077 instr[21] = 1
5078 instr[20,16] = Vn
5079 instr[15,10] = 0110 11
5080 instr[9,5] = Vm
5081 instr[4.0] = Vd. */
5082
5083 unsigned vm = INSTR (20, 16);
5084 unsigned vn = INSTR (9, 5);
5085 unsigned vd = INSTR (4, 0);
5086 unsigned i;
5087 int full = INSTR (30, 30);
5088
5089 NYI_assert (28, 24, 0x0E);
5090 NYI_assert (21, 21, 1);
5091 NYI_assert (15, 10, 0x1B);
5092
5093 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5094 if (INSTR (29, 29))
5095 {
5096 switch (INSTR (23, 22))
5097 {
5098 case 0:
5099 for (i = 0; i < (full ? 16 : 8); i++)
5100 aarch64_set_vec_u8 (cpu, vd, i,
5101 aarch64_get_vec_u8 (cpu, vn, i)
5102 < aarch64_get_vec_u8 (cpu, vm, i)
5103 ? aarch64_get_vec_u8 (cpu, vn, i)
5104 : aarch64_get_vec_u8 (cpu, vm, i));
5105 return;
5106
5107 case 1:
5108 for (i = 0; i < (full ? 8 : 4); i++)
5109 aarch64_set_vec_u16 (cpu, vd, i,
5110 aarch64_get_vec_u16 (cpu, vn, i)
5111 < aarch64_get_vec_u16 (cpu, vm, i)
5112 ? aarch64_get_vec_u16 (cpu, vn, i)
5113 : aarch64_get_vec_u16 (cpu, vm, i));
5114 return;
5115
5116 case 2:
5117 for (i = 0; i < (full ? 4 : 2); i++)
5118 aarch64_set_vec_u32 (cpu, vd, i,
5119 aarch64_get_vec_u32 (cpu, vn, i)
5120 < aarch64_get_vec_u32 (cpu, vm, i)
5121 ? aarch64_get_vec_u32 (cpu, vn, i)
5122 : aarch64_get_vec_u32 (cpu, vm, i));
5123 return;
5124
5125 case 3:
5126 HALT_UNALLOC;
5127 }
5128 }
5129 else
5130 {
5131 switch (INSTR (23, 22))
5132 {
5133 case 0:
5134 for (i = 0; i < (full ? 16 : 8); i++)
5135 aarch64_set_vec_s8 (cpu, vd, i,
5136 aarch64_get_vec_s8 (cpu, vn, i)
5137 < aarch64_get_vec_s8 (cpu, vm, i)
5138 ? aarch64_get_vec_s8 (cpu, vn, i)
5139 : aarch64_get_vec_s8 (cpu, vm, i));
5140 return;
5141
5142 case 1:
5143 for (i = 0; i < (full ? 8 : 4); i++)
5144 aarch64_set_vec_s16 (cpu, vd, i,
5145 aarch64_get_vec_s16 (cpu, vn, i)
5146 < aarch64_get_vec_s16 (cpu, vm, i)
5147 ? aarch64_get_vec_s16 (cpu, vn, i)
5148 : aarch64_get_vec_s16 (cpu, vm, i));
5149 return;
5150
5151 case 2:
5152 for (i = 0; i < (full ? 4 : 2); i++)
5153 aarch64_set_vec_s32 (cpu, vd, i,
5154 aarch64_get_vec_s32 (cpu, vn, i)
5155 < aarch64_get_vec_s32 (cpu, vm, i)
5156 ? aarch64_get_vec_s32 (cpu, vn, i)
5157 : aarch64_get_vec_s32 (cpu, vm, i));
5158 return;
5159
5160 case 3:
5161 HALT_UNALLOC;
5162 }
5163 }
5164 }
5165
5166 static void
5167 do_vec_sub_long (sim_cpu *cpu)
5168 {
5169 /* instr[31] = 0
5170 instr[30] = lower (0) / upper (1)
5171 instr[29] = signed (0) / unsigned (1)
5172 instr[28,24] = 0 1110
5173 instr[23,22] = size: bytes (00), half (01), word (10)
5174 instr[21] = 1
5175 insrt[20,16] = Vm
5176 instr[15,10] = 0010 00
5177 instr[9,5] = Vn
5178 instr[4,0] = V dest. */
5179
5180 unsigned size = INSTR (23, 22);
5181 unsigned vm = INSTR (20, 16);
5182 unsigned vn = INSTR (9, 5);
5183 unsigned vd = INSTR (4, 0);
5184 unsigned bias = 0;
5185 unsigned i;
5186
5187 NYI_assert (28, 24, 0x0E);
5188 NYI_assert (21, 21, 1);
5189 NYI_assert (15, 10, 0x08);
5190
5191 if (size == 3)
5192 HALT_UNALLOC;
5193
5194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5195 switch (INSTR (30, 29))
5196 {
5197 case 2: /* SSUBL2. */
5198 bias = 2;
5199 case 0: /* SSUBL. */
5200 switch (size)
5201 {
5202 case 0:
5203 bias *= 3;
5204 for (i = 0; i < 8; i++)
5205 aarch64_set_vec_s16 (cpu, vd, i,
5206 aarch64_get_vec_s8 (cpu, vn, i + bias)
5207 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5208 break;
5209
5210 case 1:
5211 bias *= 2;
5212 for (i = 0; i < 4; i++)
5213 aarch64_set_vec_s32 (cpu, vd, i,
5214 aarch64_get_vec_s16 (cpu, vn, i + bias)
5215 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5216 break;
5217
5218 case 2:
5219 for (i = 0; i < 2; i++)
5220 aarch64_set_vec_s64 (cpu, vd, i,
5221 aarch64_get_vec_s32 (cpu, vn, i + bias)
5222 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5223 break;
5224
5225 default:
5226 HALT_UNALLOC;
5227 }
5228 break;
5229
5230 case 3: /* USUBL2. */
5231 bias = 2;
5232 case 1: /* USUBL. */
5233 switch (size)
5234 {
5235 case 0:
5236 bias *= 3;
5237 for (i = 0; i < 8; i++)
5238 aarch64_set_vec_u16 (cpu, vd, i,
5239 aarch64_get_vec_u8 (cpu, vn, i + bias)
5240 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5241 break;
5242
5243 case 1:
5244 bias *= 2;
5245 for (i = 0; i < 4; i++)
5246 aarch64_set_vec_u32 (cpu, vd, i,
5247 aarch64_get_vec_u16 (cpu, vn, i + bias)
5248 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5249 break;
5250
5251 case 2:
5252 for (i = 0; i < 2; i++)
5253 aarch64_set_vec_u64 (cpu, vd, i,
5254 aarch64_get_vec_u32 (cpu, vn, i + bias)
5255 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5256 break;
5257
5258 default:
5259 HALT_UNALLOC;
5260 }
5261 break;
5262 }
5263 }
5264
5265 static void
5266 do_vec_ADDP (sim_cpu *cpu)
5267 {
5268 /* instr[31] = 0
5269 instr[30] = half(0)/full(1)
5270 instr[29,24] = 00 1110
5271 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5272 instr[21] = 1
5273 insrt[20,16] = Vm
5274 instr[15,10] = 1011 11
5275 instr[9,5] = Vn
5276 instr[4,0] = V dest. */
5277
5278 FRegister copy_vn;
5279 FRegister copy_vm;
5280 unsigned full = INSTR (30, 30);
5281 unsigned size = INSTR (23, 22);
5282 unsigned vm = INSTR (20, 16);
5283 unsigned vn = INSTR (9, 5);
5284 unsigned vd = INSTR (4, 0);
5285 unsigned i, range;
5286
5287 NYI_assert (29, 24, 0x0E);
5288 NYI_assert (21, 21, 1);
5289 NYI_assert (15, 10, 0x2F);
5290
5291 /* Make copies of the source registers in case vd == vn/vm. */
5292 copy_vn = cpu->fr[vn];
5293 copy_vm = cpu->fr[vm];
5294
5295 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5296 switch (size)
5297 {
5298 case 0:
5299 range = full ? 8 : 4;
5300 for (i = 0; i < range; i++)
5301 {
5302 aarch64_set_vec_u8 (cpu, vd, i,
5303 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5304 aarch64_set_vec_u8 (cpu, vd, i + range,
5305 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5306 }
5307 return;
5308
5309 case 1:
5310 range = full ? 4 : 2;
5311 for (i = 0; i < range; i++)
5312 {
5313 aarch64_set_vec_u16 (cpu, vd, i,
5314 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5315 aarch64_set_vec_u16 (cpu, vd, i + range,
5316 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5317 }
5318 return;
5319
5320 case 2:
5321 range = full ? 2 : 1;
5322 for (i = 0; i < range; i++)
5323 {
5324 aarch64_set_vec_u32 (cpu, vd, i,
5325 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5326 aarch64_set_vec_u32 (cpu, vd, i + range,
5327 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5328 }
5329 return;
5330
5331 case 3:
5332 if (! full)
5333 HALT_UNALLOC;
5334 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5335 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5336 return;
5337 }
5338 }
5339
5340 static void
5341 do_vec_UMOV (sim_cpu *cpu)
5342 {
5343 /* instr[31] = 0
5344 instr[30] = 32-bit(0)/64-bit(1)
5345 instr[29,21] = 00 1110 000
5346 insrt[20,16] = size & index
5347 instr[15,10] = 0011 11
5348 instr[9,5] = V source
5349 instr[4,0] = R dest. */
5350
5351 unsigned vs = INSTR (9, 5);
5352 unsigned rd = INSTR (4, 0);
5353 unsigned index;
5354
5355 NYI_assert (29, 21, 0x070);
5356 NYI_assert (15, 10, 0x0F);
5357
5358 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5359 if (INSTR (16, 16))
5360 {
5361 /* Byte transfer. */
5362 index = INSTR (20, 17);
5363 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5364 aarch64_get_vec_u8 (cpu, vs, index));
5365 }
5366 else if (INSTR (17, 17))
5367 {
5368 index = INSTR (20, 18);
5369 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5370 aarch64_get_vec_u16 (cpu, vs, index));
5371 }
5372 else if (INSTR (18, 18))
5373 {
5374 index = INSTR (20, 19);
5375 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5376 aarch64_get_vec_u32 (cpu, vs, index));
5377 }
5378 else
5379 {
5380 if (INSTR (30, 30) != 1)
5381 HALT_UNALLOC;
5382
5383 index = INSTR (20, 20);
5384 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5385 aarch64_get_vec_u64 (cpu, vs, index));
5386 }
5387 }
5388
5389 static void
5390 do_vec_FABS (sim_cpu *cpu)
5391 {
5392 /* instr[31] = 0
5393 instr[30] = half(0)/full(1)
5394 instr[29,23] = 00 1110 1
5395 instr[22] = float(0)/double(1)
5396 instr[21,16] = 10 0000
5397 instr[15,10] = 1111 10
5398 instr[9,5] = Vn
5399 instr[4,0] = Vd. */
5400
5401 unsigned vn = INSTR (9, 5);
5402 unsigned vd = INSTR (4, 0);
5403 unsigned full = INSTR (30, 30);
5404 unsigned i;
5405
5406 NYI_assert (29, 23, 0x1D);
5407 NYI_assert (21, 10, 0x83E);
5408
5409 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5410 if (INSTR (22, 22))
5411 {
5412 if (! full)
5413 HALT_NYI;
5414
5415 for (i = 0; i < 2; i++)
5416 aarch64_set_vec_double (cpu, vd, i,
5417 fabs (aarch64_get_vec_double (cpu, vn, i)));
5418 }
5419 else
5420 {
5421 for (i = 0; i < (full ? 4 : 2); i++)
5422 aarch64_set_vec_float (cpu, vd, i,
5423 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5424 }
5425 }
5426
5427 static void
5428 do_vec_FCVTZS (sim_cpu *cpu)
5429 {
5430 /* instr[31] = 0
5431 instr[30] = half (0) / all (1)
5432 instr[29,23] = 00 1110 1
5433 instr[22] = single (0) / double (1)
5434 instr[21,10] = 10 0001 1011 10
5435 instr[9,5] = Rn
5436 instr[4,0] = Rd. */
5437
5438 unsigned rn = INSTR (9, 5);
5439 unsigned rd = INSTR (4, 0);
5440 unsigned full = INSTR (30, 30);
5441 unsigned i;
5442
5443 NYI_assert (31, 31, 0);
5444 NYI_assert (29, 23, 0x1D);
5445 NYI_assert (21, 10, 0x86E);
5446
5447 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5448 if (INSTR (22, 22))
5449 {
5450 if (! full)
5451 HALT_UNALLOC;
5452
5453 for (i = 0; i < 2; i++)
5454 aarch64_set_vec_s64 (cpu, rd, i,
5455 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5456 }
5457 else
5458 for (i = 0; i < (full ? 4 : 2); i++)
5459 aarch64_set_vec_s32 (cpu, rd, i,
5460 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5461 }
5462
5463 static void
5464 do_vec_REV64 (sim_cpu *cpu)
5465 {
5466 /* instr[31] = 0
5467 instr[30] = full/half
5468 instr[29,24] = 00 1110
5469 instr[23,22] = size
5470 instr[21,10] = 10 0000 0000 10
5471 instr[9,5] = Rn
5472 instr[4,0] = Rd. */
5473
5474 unsigned rn = INSTR (9, 5);
5475 unsigned rd = INSTR (4, 0);
5476 unsigned size = INSTR (23, 22);
5477 unsigned full = INSTR (30, 30);
5478 unsigned i;
5479 FRegister val;
5480
5481 NYI_assert (29, 24, 0x0E);
5482 NYI_assert (21, 10, 0x802);
5483
5484 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5485 switch (size)
5486 {
5487 case 0:
5488 for (i = 0; i < (full ? 16 : 8); i++)
5489 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5490 break;
5491
5492 case 1:
5493 for (i = 0; i < (full ? 8 : 4); i++)
5494 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5495 break;
5496
5497 case 2:
5498 for (i = 0; i < (full ? 4 : 2); i++)
5499 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5500 break;
5501
5502 case 3:
5503 HALT_UNALLOC;
5504 }
5505
5506 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5507 if (full)
5508 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5509 }
5510
5511 static void
5512 do_vec_REV16 (sim_cpu *cpu)
5513 {
5514 /* instr[31] = 0
5515 instr[30] = full/half
5516 instr[29,24] = 00 1110
5517 instr[23,22] = size
5518 instr[21,10] = 10 0000 0001 10
5519 instr[9,5] = Rn
5520 instr[4,0] = Rd. */
5521
5522 unsigned rn = INSTR (9, 5);
5523 unsigned rd = INSTR (4, 0);
5524 unsigned size = INSTR (23, 22);
5525 unsigned full = INSTR (30, 30);
5526 unsigned i;
5527 FRegister val;
5528
5529 NYI_assert (29, 24, 0x0E);
5530 NYI_assert (21, 10, 0x806);
5531
5532 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5533 switch (size)
5534 {
5535 case 0:
5536 for (i = 0; i < (full ? 16 : 8); i++)
5537 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5538 break;
5539
5540 default:
5541 HALT_UNALLOC;
5542 }
5543
5544 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5545 if (full)
5546 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5547 }
5548
5549 static void
5550 do_vec_op1 (sim_cpu *cpu)
5551 {
5552 /* instr[31] = 0
5553 instr[30] = half/full
5554 instr[29,24] = 00 1110
5555 instr[23,21] = ???
5556 instr[20,16] = Vm
5557 instr[15,10] = sub-opcode
5558 instr[9,5] = Vn
5559 instr[4,0] = Vd */
5560 NYI_assert (29, 24, 0x0E);
5561
5562 if (INSTR (21, 21) == 0)
5563 {
5564 if (INSTR (23, 22) == 0)
5565 {
5566 if (INSTR (30, 30) == 1
5567 && INSTR (17, 14) == 0
5568 && INSTR (12, 10) == 7)
5569 return do_vec_ins_2 (cpu);
5570
5571 switch (INSTR (15, 10))
5572 {
5573 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5574 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5575 case 0x07: do_vec_INS (cpu); return;
5576 case 0x0A: do_vec_TRN (cpu); return;
5577
5578 case 0x0F:
5579 if (INSTR (17, 16) == 0)
5580 {
5581 do_vec_MOV_into_scalar (cpu);
5582 return;
5583 }
5584 break;
5585
5586 case 0x00:
5587 case 0x08:
5588 case 0x10:
5589 case 0x18:
5590 do_vec_TBL (cpu); return;
5591
5592 case 0x06:
5593 case 0x16:
5594 do_vec_UZP (cpu); return;
5595
5596 case 0x0E:
5597 case 0x1E:
5598 do_vec_ZIP (cpu); return;
5599
5600 default:
5601 HALT_NYI;
5602 }
5603 }
5604
5605 switch (INSTR (13, 10))
5606 {
5607 case 0x6: do_vec_UZP (cpu); return;
5608 case 0xE: do_vec_ZIP (cpu); return;
5609 case 0xA: do_vec_TRN (cpu); return;
5610 case 0xF: do_vec_UMOV (cpu); return;
5611 default: HALT_NYI;
5612 }
5613 }
5614
5615 switch (INSTR (15, 10))
5616 {
5617 case 0x02: do_vec_REV64 (cpu); return;
5618 case 0x06: do_vec_REV16 (cpu); return;
5619
5620 case 0x07:
5621 switch (INSTR (23, 21))
5622 {
5623 case 1: do_vec_AND (cpu); return;
5624 case 3: do_vec_BIC (cpu); return;
5625 case 5: do_vec_ORR (cpu); return;
5626 case 7: do_vec_ORN (cpu); return;
5627 default: HALT_NYI;
5628 }
5629
5630 case 0x08: do_vec_sub_long (cpu); return;
5631 case 0x0a: do_vec_XTN (cpu); return;
5632 case 0x11: do_vec_SSHL (cpu); return;
5633 case 0x19: do_vec_max (cpu); return;
5634 case 0x1B: do_vec_min (cpu); return;
5635 case 0x21: do_vec_add (cpu); return;
5636 case 0x25: do_vec_MLA (cpu); return;
5637 case 0x27: do_vec_mul (cpu); return;
5638 case 0x2F: do_vec_ADDP (cpu); return;
5639 case 0x30: do_vec_mull (cpu); return;
5640 case 0x33: do_vec_FMLA (cpu); return;
5641 case 0x35: do_vec_fadd (cpu); return;
5642
5643 case 0x2E:
5644 switch (INSTR (20, 16))
5645 {
5646 case 0x00: do_vec_ABS (cpu); return;
5647 case 0x01: do_vec_FCVTZS (cpu); return;
5648 case 0x11: do_vec_ADDV (cpu); return;
5649 default: HALT_NYI;
5650 }
5651
5652 case 0x31:
5653 case 0x3B:
5654 do_vec_Fminmax (cpu); return;
5655
5656 case 0x0D:
5657 case 0x0F:
5658 case 0x22:
5659 case 0x23:
5660 case 0x26:
5661 case 0x2A:
5662 case 0x32:
5663 case 0x36:
5664 case 0x39:
5665 case 0x3A:
5666 do_vec_compare (cpu); return;
5667
5668 case 0x3E:
5669 do_vec_FABS (cpu); return;
5670
5671 default:
5672 HALT_NYI;
5673 }
5674 }
5675
5676 static void
5677 do_vec_xtl (sim_cpu *cpu)
5678 {
5679 /* instr[31] = 0
5680 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5681 instr[28,22] = 0 1111 00
5682 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5683 instr[15,10] = 1010 01
5684 instr[9,5] = V source
5685 instr[4,0] = V dest. */
5686
5687 unsigned vs = INSTR (9, 5);
5688 unsigned vd = INSTR (4, 0);
5689 unsigned i, shift, bias = 0;
5690
5691 NYI_assert (28, 22, 0x3C);
5692 NYI_assert (15, 10, 0x29);
5693
5694 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5695 switch (INSTR (30, 29))
5696 {
5697 case 2: /* SXTL2, SSHLL2. */
5698 bias = 2;
5699 case 0: /* SXTL, SSHLL. */
5700 if (INSTR (21, 21))
5701 {
5702 int64_t val1, val2;
5703
5704 shift = INSTR (20, 16);
5705 /* Get the source values before setting the destination values
5706 in case the source and destination are the same. */
5707 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5708 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5709 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5710 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5711 }
5712 else if (INSTR (20, 20))
5713 {
5714 int32_t v[4];
5715 int32_t v1,v2,v3,v4;
5716
5717 shift = INSTR (19, 16);
5718 bias *= 2;
5719 for (i = 0; i < 4; i++)
5720 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5721 for (i = 0; i < 4; i++)
5722 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5723 }
5724 else
5725 {
5726 int16_t v[8];
5727 NYI_assert (19, 19, 1);
5728
5729 shift = INSTR (18, 16);
5730 bias *= 3;
5731 for (i = 0; i < 8; i++)
5732 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5733 for (i = 0; i < 8; i++)
5734 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5735 }
5736 return;
5737
5738 case 3: /* UXTL2, USHLL2. */
5739 bias = 2;
5740 case 1: /* UXTL, USHLL. */
5741 if (INSTR (21, 21))
5742 {
5743 uint64_t v1, v2;
5744 shift = INSTR (20, 16);
5745 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5746 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5747 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5748 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5749 }
5750 else if (INSTR (20, 20))
5751 {
5752 uint32_t v[4];
5753 shift = INSTR (19, 16);
5754 bias *= 2;
5755 for (i = 0; i < 4; i++)
5756 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5757 for (i = 0; i < 4; i++)
5758 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5759 }
5760 else
5761 {
5762 uint16_t v[8];
5763 NYI_assert (19, 19, 1);
5764
5765 shift = INSTR (18, 16);
5766 bias *= 3;
5767 for (i = 0; i < 8; i++)
5768 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5769 for (i = 0; i < 8; i++)
5770 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5771 }
5772 return;
5773 }
5774 }
5775
5776 static void
5777 do_vec_SHL (sim_cpu *cpu)
5778 {
5779 /* instr [31] = 0
5780 instr [30] = half(0)/full(1)
5781 instr [29,23] = 001 1110
5782 instr [22,16] = size and shift amount
5783 instr [15,10] = 01 0101
5784 instr [9, 5] = Vs
5785 instr [4, 0] = Vd. */
5786
5787 int shift;
5788 int full = INSTR (30, 30);
5789 unsigned vs = INSTR (9, 5);
5790 unsigned vd = INSTR (4, 0);
5791 unsigned i;
5792
5793 NYI_assert (29, 23, 0x1E);
5794 NYI_assert (15, 10, 0x15);
5795
5796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5797 if (INSTR (22, 22))
5798 {
5799 shift = INSTR (21, 16);
5800
5801 if (full == 0)
5802 HALT_UNALLOC;
5803
5804 for (i = 0; i < 2; i++)
5805 {
5806 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5807 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5808 }
5809
5810 return;
5811 }
5812
5813 if (INSTR (21, 21))
5814 {
5815 shift = INSTR (20, 16);
5816
5817 for (i = 0; i < (full ? 4 : 2); i++)
5818 {
5819 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5820 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5821 }
5822
5823 return;
5824 }
5825
5826 if (INSTR (20, 20))
5827 {
5828 shift = INSTR (19, 16);
5829
5830 for (i = 0; i < (full ? 8 : 4); i++)
5831 {
5832 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5833 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5834 }
5835
5836 return;
5837 }
5838
5839 if (INSTR (19, 19) == 0)
5840 HALT_UNALLOC;
5841
5842 shift = INSTR (18, 16);
5843
5844 for (i = 0; i < (full ? 16 : 8); i++)
5845 {
5846 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5847 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5848 }
5849 }
5850
5851 static void
5852 do_vec_SSHR_USHR (sim_cpu *cpu)
5853 {
5854 /* instr [31] = 0
5855 instr [30] = half(0)/full(1)
5856 instr [29] = signed(0)/unsigned(1)
5857 instr [28,23] = 0 1111 0
5858 instr [22,16] = size and shift amount
5859 instr [15,10] = 0000 01
5860 instr [9, 5] = Vs
5861 instr [4, 0] = Vd. */
5862
5863 int full = INSTR (30, 30);
5864 int sign = ! INSTR (29, 29);
5865 unsigned shift = INSTR (22, 16);
5866 unsigned vs = INSTR (9, 5);
5867 unsigned vd = INSTR (4, 0);
5868 unsigned i;
5869
5870 NYI_assert (28, 23, 0x1E);
5871 NYI_assert (15, 10, 0x01);
5872
5873 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5874 if (INSTR (22, 22))
5875 {
5876 shift = 128 - shift;
5877
5878 if (full == 0)
5879 HALT_UNALLOC;
5880
5881 if (sign)
5882 for (i = 0; i < 2; i++)
5883 {
5884 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5885 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5886 }
5887 else
5888 for (i = 0; i < 2; i++)
5889 {
5890 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5891 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5892 }
5893
5894 return;
5895 }
5896
5897 if (INSTR (21, 21))
5898 {
5899 shift = 64 - shift;
5900
5901 if (sign)
5902 for (i = 0; i < (full ? 4 : 2); i++)
5903 {
5904 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5905 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5906 }
5907 else
5908 for (i = 0; i < (full ? 4 : 2); i++)
5909 {
5910 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5911 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5912 }
5913
5914 return;
5915 }
5916
5917 if (INSTR (20, 20))
5918 {
5919 shift = 32 - shift;
5920
5921 if (sign)
5922 for (i = 0; i < (full ? 8 : 4); i++)
5923 {
5924 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5925 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5926 }
5927 else
5928 for (i = 0; i < (full ? 8 : 4); i++)
5929 {
5930 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5931 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5932 }
5933
5934 return;
5935 }
5936
5937 if (INSTR (19, 19) == 0)
5938 HALT_UNALLOC;
5939
5940 shift = 16 - shift;
5941
5942 if (sign)
5943 for (i = 0; i < (full ? 16 : 8); i++)
5944 {
5945 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5946 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5947 }
5948 else
5949 for (i = 0; i < (full ? 16 : 8); i++)
5950 {
5951 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5952 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5953 }
5954 }
5955
5956 static void
5957 do_vec_MUL_by_element (sim_cpu *cpu)
5958 {
5959 /* instr[31] = 0
5960 instr[30] = half/full
5961 instr[29,24] = 00 1111
5962 instr[23,22] = size
5963 instr[21] = L
5964 instr[20] = M
5965 instr[19,16] = m
5966 instr[15,12] = 1000
5967 instr[11] = H
5968 instr[10] = 0
5969 instr[9,5] = Vn
5970 instr[4,0] = Vd */
5971
5972 unsigned full = INSTR (30, 30);
5973 unsigned L = INSTR (21, 21);
5974 unsigned H = INSTR (11, 11);
5975 unsigned vn = INSTR (9, 5);
5976 unsigned vd = INSTR (4, 0);
5977 unsigned size = INSTR (23, 22);
5978 unsigned index;
5979 unsigned vm;
5980 unsigned e;
5981
5982 NYI_assert (29, 24, 0x0F);
5983 NYI_assert (15, 12, 0x8);
5984 NYI_assert (10, 10, 0);
5985
5986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5987 switch (size)
5988 {
5989 case 1:
5990 {
5991 /* 16 bit products. */
5992 uint16_t product;
5993 uint16_t element1;
5994 uint16_t element2;
5995
5996 index = (H << 2) | (L << 1) | INSTR (20, 20);
5997 vm = INSTR (19, 16);
5998 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5999
6000 for (e = 0; e < (full ? 8 : 4); e ++)
6001 {
6002 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6003 product = element1 * element2;
6004 aarch64_set_vec_u16 (cpu, vd, e, product);
6005 }
6006 }
6007 break;
6008
6009 case 2:
6010 {
6011 /* 32 bit products. */
6012 uint32_t product;
6013 uint32_t element1;
6014 uint32_t element2;
6015
6016 index = (H << 1) | L;
6017 vm = INSTR (20, 16);
6018 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6019
6020 for (e = 0; e < (full ? 4 : 2); e ++)
6021 {
6022 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6023 product = element1 * element2;
6024 aarch64_set_vec_u32 (cpu, vd, e, product);
6025 }
6026 }
6027 break;
6028
6029 default:
6030 HALT_UNALLOC;
6031 }
6032 }
6033
6034 static void
6035 do_FMLA_by_element (sim_cpu *cpu)
6036 {
6037 /* instr[31] = 0
6038 instr[30] = half/full
6039 instr[29,23] = 00 1111 1
6040 instr[22] = size
6041 instr[21] = L
6042 instr[20,16] = m
6043 instr[15,12] = 0001
6044 instr[11] = H
6045 instr[10] = 0
6046 instr[9,5] = Vn
6047 instr[4,0] = Vd */
6048
6049 unsigned full = INSTR (30, 30);
6050 unsigned size = INSTR (22, 22);
6051 unsigned L = INSTR (21, 21);
6052 unsigned vm = INSTR (20, 16);
6053 unsigned H = INSTR (11, 11);
6054 unsigned vn = INSTR (9, 5);
6055 unsigned vd = INSTR (4, 0);
6056 unsigned e;
6057
6058 NYI_assert (29, 23, 0x1F);
6059 NYI_assert (15, 12, 0x1);
6060 NYI_assert (10, 10, 0);
6061
6062 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6063 if (size)
6064 {
6065 double element1, element2;
6066
6067 if (! full || L)
6068 HALT_UNALLOC;
6069
6070 element2 = aarch64_get_vec_double (cpu, vm, H);
6071
6072 for (e = 0; e < 2; e++)
6073 {
6074 element1 = aarch64_get_vec_double (cpu, vn, e);
6075 element1 *= element2;
6076 element1 += aarch64_get_vec_double (cpu, vd, e);
6077 aarch64_set_vec_double (cpu, vd, e, element1);
6078 }
6079 }
6080 else
6081 {
6082 float element1;
6083 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6084
6085 for (e = 0; e < (full ? 4 : 2); e++)
6086 {
6087 element1 = aarch64_get_vec_float (cpu, vn, e);
6088 element1 *= element2;
6089 element1 += aarch64_get_vec_float (cpu, vd, e);
6090 aarch64_set_vec_float (cpu, vd, e, element1);
6091 }
6092 }
6093 }
6094
6095 static void
6096 do_vec_op2 (sim_cpu *cpu)
6097 {
6098 /* instr[31] = 0
6099 instr[30] = half/full
6100 instr[29,24] = 00 1111
6101 instr[23] = ?
6102 instr[22,16] = element size & index
6103 instr[15,10] = sub-opcode
6104 instr[9,5] = Vm
6105 instr[4,0] = Vd */
6106
6107 NYI_assert (29, 24, 0x0F);
6108
6109 if (INSTR (23, 23) != 0)
6110 {
6111 switch (INSTR (15, 10))
6112 {
6113 case 0x04:
6114 case 0x06:
6115 do_FMLA_by_element (cpu);
6116 return;
6117
6118 case 0x20:
6119 case 0x22:
6120 do_vec_MUL_by_element (cpu);
6121 return;
6122
6123 default:
6124 HALT_NYI;
6125 }
6126 }
6127 else
6128 {
6129 switch (INSTR (15, 10))
6130 {
6131 case 0x01: do_vec_SSHR_USHR (cpu); return;
6132 case 0x15: do_vec_SHL (cpu); return;
6133 case 0x20:
6134 case 0x22: do_vec_MUL_by_element (cpu); return;
6135 case 0x29: do_vec_xtl (cpu); return;
6136 default: HALT_NYI;
6137 }
6138 }
6139 }
6140
6141 static void
6142 do_vec_neg (sim_cpu *cpu)
6143 {
6144 /* instr[31] = 0
6145 instr[30] = full(1)/half(0)
6146 instr[29,24] = 10 1110
6147 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6148 instr[21,10] = 1000 0010 1110
6149 instr[9,5] = Vs
6150 instr[4,0] = Vd */
6151
6152 int full = INSTR (30, 30);
6153 unsigned vs = INSTR (9, 5);
6154 unsigned vd = INSTR (4, 0);
6155 unsigned i;
6156
6157 NYI_assert (29, 24, 0x2E);
6158 NYI_assert (21, 10, 0x82E);
6159
6160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6161 switch (INSTR (23, 22))
6162 {
6163 case 0:
6164 for (i = 0; i < (full ? 16 : 8); i++)
6165 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6166 return;
6167
6168 case 1:
6169 for (i = 0; i < (full ? 8 : 4); i++)
6170 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6171 return;
6172
6173 case 2:
6174 for (i = 0; i < (full ? 4 : 2); i++)
6175 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6176 return;
6177
6178 case 3:
6179 if (! full)
6180 HALT_NYI;
6181 for (i = 0; i < 2; i++)
6182 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6183 return;
6184 }
6185 }
6186
6187 static void
6188 do_vec_sqrt (sim_cpu *cpu)
6189 {
6190 /* instr[31] = 0
6191 instr[30] = full(1)/half(0)
6192 instr[29,23] = 101 1101
6193 instr[22] = single(0)/double(1)
6194 instr[21,10] = 1000 0111 1110
6195 instr[9,5] = Vs
6196 instr[4,0] = Vd. */
6197
6198 int full = INSTR (30, 30);
6199 unsigned vs = INSTR (9, 5);
6200 unsigned vd = INSTR (4, 0);
6201 unsigned i;
6202
6203 NYI_assert (29, 23, 0x5B);
6204 NYI_assert (21, 10, 0x87E);
6205
6206 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6207 if (INSTR (22, 22) == 0)
6208 for (i = 0; i < (full ? 4 : 2); i++)
6209 aarch64_set_vec_float (cpu, vd, i,
6210 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6211 else
6212 for (i = 0; i < 2; i++)
6213 aarch64_set_vec_double (cpu, vd, i,
6214 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6215 }
6216
6217 static void
6218 do_vec_mls_indexed (sim_cpu *cpu)
6219 {
6220 /* instr[31] = 0
6221 instr[30] = half(0)/full(1)
6222 instr[29,24] = 10 1111
6223 instr[23,22] = 16-bit(01)/32-bit(10)
6224 instr[21,20+11] = index (if 16-bit)
6225 instr[21+11] = index (if 32-bit)
6226 instr[20,16] = Vm
6227 instr[15,12] = 0100
6228 instr[11] = part of index
6229 instr[10] = 0
6230 instr[9,5] = Vs
6231 instr[4,0] = Vd. */
6232
6233 int full = INSTR (30, 30);
6234 unsigned vs = INSTR (9, 5);
6235 unsigned vd = INSTR (4, 0);
6236 unsigned vm = INSTR (20, 16);
6237 unsigned i;
6238
6239 NYI_assert (15, 12, 4);
6240 NYI_assert (10, 10, 0);
6241
6242 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6243 switch (INSTR (23, 22))
6244 {
6245 case 1:
6246 {
6247 unsigned elem;
6248 uint32_t val;
6249
6250 if (vm > 15)
6251 HALT_NYI;
6252
6253 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6254 val = aarch64_get_vec_u16 (cpu, vm, elem);
6255
6256 for (i = 0; i < (full ? 8 : 4); i++)
6257 aarch64_set_vec_u32 (cpu, vd, i,
6258 aarch64_get_vec_u32 (cpu, vd, i) -
6259 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6260 return;
6261 }
6262
6263 case 2:
6264 {
6265 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6266 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6267
6268 for (i = 0; i < (full ? 4 : 2); i++)
6269 aarch64_set_vec_u64 (cpu, vd, i,
6270 aarch64_get_vec_u64 (cpu, vd, i) -
6271 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6272 return;
6273 }
6274
6275 case 0:
6276 case 3:
6277 default:
6278 HALT_NYI;
6279 }
6280 }
6281
6282 static void
6283 do_vec_SUB (sim_cpu *cpu)
6284 {
6285 /* instr [31] = 0
6286 instr [30] = half(0)/full(1)
6287 instr [29,24] = 10 1110
6288 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6289 instr [21] = 1
6290 instr [20,16] = Vm
6291 instr [15,10] = 10 0001
6292 instr [9, 5] = Vn
6293 instr [4, 0] = Vd. */
6294
6295 unsigned full = INSTR (30, 30);
6296 unsigned vm = INSTR (20, 16);
6297 unsigned vn = INSTR (9, 5);
6298 unsigned vd = INSTR (4, 0);
6299 unsigned i;
6300
6301 NYI_assert (29, 24, 0x2E);
6302 NYI_assert (21, 21, 1);
6303 NYI_assert (15, 10, 0x21);
6304
6305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6306 switch (INSTR (23, 22))
6307 {
6308 case 0:
6309 for (i = 0; i < (full ? 16 : 8); i++)
6310 aarch64_set_vec_s8 (cpu, vd, i,
6311 aarch64_get_vec_s8 (cpu, vn, i)
6312 - aarch64_get_vec_s8 (cpu, vm, i));
6313 return;
6314
6315 case 1:
6316 for (i = 0; i < (full ? 8 : 4); i++)
6317 aarch64_set_vec_s16 (cpu, vd, i,
6318 aarch64_get_vec_s16 (cpu, vn, i)
6319 - aarch64_get_vec_s16 (cpu, vm, i));
6320 return;
6321
6322 case 2:
6323 for (i = 0; i < (full ? 4 : 2); i++)
6324 aarch64_set_vec_s32 (cpu, vd, i,
6325 aarch64_get_vec_s32 (cpu, vn, i)
6326 - aarch64_get_vec_s32 (cpu, vm, i));
6327 return;
6328
6329 case 3:
6330 if (full == 0)
6331 HALT_UNALLOC;
6332
6333 for (i = 0; i < 2; i++)
6334 aarch64_set_vec_s64 (cpu, vd, i,
6335 aarch64_get_vec_s64 (cpu, vn, i)
6336 - aarch64_get_vec_s64 (cpu, vm, i));
6337 return;
6338 }
6339 }
6340
6341 static void
6342 do_vec_MLS (sim_cpu *cpu)
6343 {
6344 /* instr [31] = 0
6345 instr [30] = half(0)/full(1)
6346 instr [29,24] = 10 1110
6347 instr [23,22] = size: byte(00, half(01), word (10)
6348 instr [21] = 1
6349 instr [20,16] = Vm
6350 instr [15,10] = 10 0101
6351 instr [9, 5] = Vn
6352 instr [4, 0] = Vd. */
6353
6354 unsigned full = INSTR (30, 30);
6355 unsigned vm = INSTR (20, 16);
6356 unsigned vn = INSTR (9, 5);
6357 unsigned vd = INSTR (4, 0);
6358 unsigned i;
6359
6360 NYI_assert (29, 24, 0x2E);
6361 NYI_assert (21, 21, 1);
6362 NYI_assert (15, 10, 0x25);
6363
6364 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6365 switch (INSTR (23, 22))
6366 {
6367 case 0:
6368 for (i = 0; i < (full ? 16 : 8); i++)
6369 aarch64_set_vec_u8 (cpu, vd, i,
6370 aarch64_get_vec_u8 (cpu, vd, i)
6371 - (aarch64_get_vec_u8 (cpu, vn, i)
6372 * aarch64_get_vec_u8 (cpu, vm, i)));
6373 return;
6374
6375 case 1:
6376 for (i = 0; i < (full ? 8 : 4); i++)
6377 aarch64_set_vec_u16 (cpu, vd, i,
6378 aarch64_get_vec_u16 (cpu, vd, i)
6379 - (aarch64_get_vec_u16 (cpu, vn, i)
6380 * aarch64_get_vec_u16 (cpu, vm, i)));
6381 return;
6382
6383 case 2:
6384 for (i = 0; i < (full ? 4 : 2); i++)
6385 aarch64_set_vec_u32 (cpu, vd, i,
6386 aarch64_get_vec_u32 (cpu, vd, i)
6387 - (aarch64_get_vec_u32 (cpu, vn, i)
6388 * aarch64_get_vec_u32 (cpu, vm, i)));
6389 return;
6390
6391 default:
6392 HALT_UNALLOC;
6393 }
6394 }
6395
6396 static void
6397 do_vec_FDIV (sim_cpu *cpu)
6398 {
6399 /* instr [31] = 0
6400 instr [30] = half(0)/full(1)
6401 instr [29,23] = 10 1110 0
6402 instr [22] = float()/double(1)
6403 instr [21] = 1
6404 instr [20,16] = Vm
6405 instr [15,10] = 1111 11
6406 instr [9, 5] = Vn
6407 instr [4, 0] = Vd. */
6408
6409 unsigned full = INSTR (30, 30);
6410 unsigned vm = INSTR (20, 16);
6411 unsigned vn = INSTR (9, 5);
6412 unsigned vd = INSTR (4, 0);
6413 unsigned i;
6414
6415 NYI_assert (29, 23, 0x5C);
6416 NYI_assert (21, 21, 1);
6417 NYI_assert (15, 10, 0x3F);
6418
6419 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6420 if (INSTR (22, 22))
6421 {
6422 if (! full)
6423 HALT_UNALLOC;
6424
6425 for (i = 0; i < 2; i++)
6426 aarch64_set_vec_double (cpu, vd, i,
6427 aarch64_get_vec_double (cpu, vn, i)
6428 / aarch64_get_vec_double (cpu, vm, i));
6429 }
6430 else
6431 for (i = 0; i < (full ? 4 : 2); i++)
6432 aarch64_set_vec_float (cpu, vd, i,
6433 aarch64_get_vec_float (cpu, vn, i)
6434 / aarch64_get_vec_float (cpu, vm, i));
6435 }
6436
6437 static void
6438 do_vec_FMUL (sim_cpu *cpu)
6439 {
6440 /* instr [31] = 0
6441 instr [30] = half(0)/full(1)
6442 instr [29,23] = 10 1110 0
6443 instr [22] = float(0)/double(1)
6444 instr [21] = 1
6445 instr [20,16] = Vm
6446 instr [15,10] = 1101 11
6447 instr [9, 5] = Vn
6448 instr [4, 0] = Vd. */
6449
6450 unsigned full = INSTR (30, 30);
6451 unsigned vm = INSTR (20, 16);
6452 unsigned vn = INSTR (9, 5);
6453 unsigned vd = INSTR (4, 0);
6454 unsigned i;
6455
6456 NYI_assert (29, 23, 0x5C);
6457 NYI_assert (21, 21, 1);
6458 NYI_assert (15, 10, 0x37);
6459
6460 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6461 if (INSTR (22, 22))
6462 {
6463 if (! full)
6464 HALT_UNALLOC;
6465
6466 for (i = 0; i < 2; i++)
6467 aarch64_set_vec_double (cpu, vd, i,
6468 aarch64_get_vec_double (cpu, vn, i)
6469 * aarch64_get_vec_double (cpu, vm, i));
6470 }
6471 else
6472 for (i = 0; i < (full ? 4 : 2); i++)
6473 aarch64_set_vec_float (cpu, vd, i,
6474 aarch64_get_vec_float (cpu, vn, i)
6475 * aarch64_get_vec_float (cpu, vm, i));
6476 }
6477
6478 static void
6479 do_vec_FADDP (sim_cpu *cpu)
6480 {
6481 /* instr [31] = 0
6482 instr [30] = half(0)/full(1)
6483 instr [29,23] = 10 1110 0
6484 instr [22] = float(0)/double(1)
6485 instr [21] = 1
6486 instr [20,16] = Vm
6487 instr [15,10] = 1101 01
6488 instr [9, 5] = Vn
6489 instr [4, 0] = Vd. */
6490
6491 unsigned full = INSTR (30, 30);
6492 unsigned vm = INSTR (20, 16);
6493 unsigned vn = INSTR (9, 5);
6494 unsigned vd = INSTR (4, 0);
6495
6496 NYI_assert (29, 23, 0x5C);
6497 NYI_assert (21, 21, 1);
6498 NYI_assert (15, 10, 0x35);
6499
6500 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6501 if (INSTR (22, 22))
6502 {
6503 /* Extract values before adding them incase vd == vn/vm. */
6504 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6505 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6506 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6507 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6508
6509 if (! full)
6510 HALT_UNALLOC;
6511
6512 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6513 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6514 }
6515 else
6516 {
6517 /* Extract values before adding them incase vd == vn/vm. */
6518 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6519 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6520 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6521 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6522
6523 if (full)
6524 {
6525 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6526 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6527 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6528 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6529
6530 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6531 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6532 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6533 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6534 }
6535 else
6536 {
6537 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6538 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6539 }
6540 }
6541 }
6542
6543 static void
6544 do_vec_FSQRT (sim_cpu *cpu)
6545 {
6546 /* instr[31] = 0
6547 instr[30] = half(0)/full(1)
6548 instr[29,23] = 10 1110 1
6549 instr[22] = single(0)/double(1)
6550 instr[21,10] = 10 0001 1111 10
6551 instr[9,5] = Vsrc
6552 instr[4,0] = Vdest. */
6553
6554 unsigned vn = INSTR (9, 5);
6555 unsigned vd = INSTR (4, 0);
6556 unsigned full = INSTR (30, 30);
6557 int i;
6558
6559 NYI_assert (29, 23, 0x5D);
6560 NYI_assert (21, 10, 0x87E);
6561
6562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6563 if (INSTR (22, 22))
6564 {
6565 if (! full)
6566 HALT_UNALLOC;
6567
6568 for (i = 0; i < 2; i++)
6569 aarch64_set_vec_double (cpu, vd, i,
6570 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6571 }
6572 else
6573 {
6574 for (i = 0; i < (full ? 4 : 2); i++)
6575 aarch64_set_vec_float (cpu, vd, i,
6576 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6577 }
6578 }
6579
6580 static void
6581 do_vec_FNEG (sim_cpu *cpu)
6582 {
6583 /* instr[31] = 0
6584 instr[30] = half (0)/full (1)
6585 instr[29,23] = 10 1110 1
6586 instr[22] = single (0)/double (1)
6587 instr[21,10] = 10 0000 1111 10
6588 instr[9,5] = Vsrc
6589 instr[4,0] = Vdest. */
6590
6591 unsigned vn = INSTR (9, 5);
6592 unsigned vd = INSTR (4, 0);
6593 unsigned full = INSTR (30, 30);
6594 int i;
6595
6596 NYI_assert (29, 23, 0x5D);
6597 NYI_assert (21, 10, 0x83E);
6598
6599 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6600 if (INSTR (22, 22))
6601 {
6602 if (! full)
6603 HALT_UNALLOC;
6604
6605 for (i = 0; i < 2; i++)
6606 aarch64_set_vec_double (cpu, vd, i,
6607 - aarch64_get_vec_double (cpu, vn, i));
6608 }
6609 else
6610 {
6611 for (i = 0; i < (full ? 4 : 2); i++)
6612 aarch64_set_vec_float (cpu, vd, i,
6613 - aarch64_get_vec_float (cpu, vn, i));
6614 }
6615 }
6616
6617 static void
6618 do_vec_NOT (sim_cpu *cpu)
6619 {
6620 /* instr[31] = 0
6621 instr[30] = half (0)/full (1)
6622 instr[29,10] = 10 1110 0010 0000 0101 10
6623 instr[9,5] = Vn
6624 instr[4.0] = Vd. */
6625
6626 unsigned vn = INSTR (9, 5);
6627 unsigned vd = INSTR (4, 0);
6628 unsigned i;
6629 int full = INSTR (30, 30);
6630
6631 NYI_assert (29, 10, 0xB8816);
6632
6633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6634 for (i = 0; i < (full ? 16 : 8); i++)
6635 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6636 }
6637
6638 static unsigned int
6639 clz (uint64_t val, unsigned size)
6640 {
6641 uint64_t mask = 1;
6642 int count;
6643
6644 mask <<= (size - 1);
6645 count = 0;
6646 do
6647 {
6648 if (val & mask)
6649 break;
6650 mask >>= 1;
6651 count ++;
6652 }
6653 while (mask);
6654
6655 return count;
6656 }
6657
6658 static void
6659 do_vec_CLZ (sim_cpu *cpu)
6660 {
6661 /* instr[31] = 0
6662 instr[30] = half (0)/full (1)
6663 instr[29,24] = 10 1110
6664 instr[23,22] = size
6665 instr[21,10] = 10 0000 0100 10
6666 instr[9,5] = Vn
6667 instr[4.0] = Vd. */
6668
6669 unsigned vn = INSTR (9, 5);
6670 unsigned vd = INSTR (4, 0);
6671 unsigned i;
6672 int full = INSTR (30,30);
6673
6674 NYI_assert (29, 24, 0x2E);
6675 NYI_assert (21, 10, 0x812);
6676
6677 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6678 switch (INSTR (23, 22))
6679 {
6680 case 0:
6681 for (i = 0; i < (full ? 16 : 8); i++)
6682 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6683 break;
6684 case 1:
6685 for (i = 0; i < (full ? 8 : 4); i++)
6686 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6687 break;
6688 case 2:
6689 for (i = 0; i < (full ? 4 : 2); i++)
6690 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6691 break;
6692 case 3:
6693 if (! full)
6694 HALT_UNALLOC;
6695 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6696 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6697 break;
6698 }
6699 }
6700
6701 static void
6702 do_vec_MOV_element (sim_cpu *cpu)
6703 {
6704 /* instr[31,21] = 0110 1110 000
6705 instr[20,16] = size & dest index
6706 instr[15] = 0
6707 instr[14,11] = source index
6708 instr[10] = 1
6709 instr[9,5] = Vs
6710 instr[4.0] = Vd. */
6711
6712 unsigned vs = INSTR (9, 5);
6713 unsigned vd = INSTR (4, 0);
6714 unsigned src_index;
6715 unsigned dst_index;
6716
6717 NYI_assert (31, 21, 0x370);
6718 NYI_assert (15, 15, 0);
6719 NYI_assert (10, 10, 1);
6720
6721 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6722 if (INSTR (16, 16))
6723 {
6724 /* Move a byte. */
6725 src_index = INSTR (14, 11);
6726 dst_index = INSTR (20, 17);
6727 aarch64_set_vec_u8 (cpu, vd, dst_index,
6728 aarch64_get_vec_u8 (cpu, vs, src_index));
6729 }
6730 else if (INSTR (17, 17))
6731 {
6732 /* Move 16-bits. */
6733 NYI_assert (11, 11, 0);
6734 src_index = INSTR (14, 12);
6735 dst_index = INSTR (20, 18);
6736 aarch64_set_vec_u16 (cpu, vd, dst_index,
6737 aarch64_get_vec_u16 (cpu, vs, src_index));
6738 }
6739 else if (INSTR (18, 18))
6740 {
6741 /* Move 32-bits. */
6742 NYI_assert (12, 11, 0);
6743 src_index = INSTR (14, 13);
6744 dst_index = INSTR (20, 19);
6745 aarch64_set_vec_u32 (cpu, vd, dst_index,
6746 aarch64_get_vec_u32 (cpu, vs, src_index));
6747 }
6748 else
6749 {
6750 NYI_assert (19, 19, 1);
6751 NYI_assert (13, 11, 0);
6752 src_index = INSTR (14, 14);
6753 dst_index = INSTR (20, 20);
6754 aarch64_set_vec_u64 (cpu, vd, dst_index,
6755 aarch64_get_vec_u64 (cpu, vs, src_index));
6756 }
6757 }
6758
6759 static void
6760 do_vec_REV32 (sim_cpu *cpu)
6761 {
6762 /* instr[31] = 0
6763 instr[30] = full/half
6764 instr[29,24] = 10 1110
6765 instr[23,22] = size
6766 instr[21,10] = 10 0000 0000 10
6767 instr[9,5] = Rn
6768 instr[4,0] = Rd. */
6769
6770 unsigned rn = INSTR (9, 5);
6771 unsigned rd = INSTR (4, 0);
6772 unsigned size = INSTR (23, 22);
6773 unsigned full = INSTR (30, 30);
6774 unsigned i;
6775 FRegister val;
6776
6777 NYI_assert (29, 24, 0x2E);
6778 NYI_assert (21, 10, 0x802);
6779
6780 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6781 switch (size)
6782 {
6783 case 0:
6784 for (i = 0; i < (full ? 16 : 8); i++)
6785 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6786 break;
6787
6788 case 1:
6789 for (i = 0; i < (full ? 8 : 4); i++)
6790 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6791 break;
6792
6793 default:
6794 HALT_UNALLOC;
6795 }
6796
6797 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6798 if (full)
6799 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6800 }
6801
6802 static void
6803 do_vec_EXT (sim_cpu *cpu)
6804 {
6805 /* instr[31] = 0
6806 instr[30] = full/half
6807 instr[29,21] = 10 1110 000
6808 instr[20,16] = Vm
6809 instr[15] = 0
6810 instr[14,11] = source index
6811 instr[10] = 0
6812 instr[9,5] = Vn
6813 instr[4.0] = Vd. */
6814
6815 unsigned vm = INSTR (20, 16);
6816 unsigned vn = INSTR (9, 5);
6817 unsigned vd = INSTR (4, 0);
6818 unsigned src_index = INSTR (14, 11);
6819 unsigned full = INSTR (30, 30);
6820 unsigned i;
6821 unsigned j;
6822 FRegister val;
6823
6824 NYI_assert (31, 21, 0x370);
6825 NYI_assert (15, 15, 0);
6826 NYI_assert (10, 10, 0);
6827
6828 if (!full && (src_index & 0x8))
6829 HALT_UNALLOC;
6830
6831 j = 0;
6832
6833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6834 for (i = src_index; i < (full ? 16 : 8); i++)
6835 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6836 for (i = 0; i < src_index; i++)
6837 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6838
6839 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6840 if (full)
6841 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6842 }
6843
6844 static void
6845 dexAdvSIMD0 (sim_cpu *cpu)
6846 {
6847 /* instr [28,25] = 0 111. */
6848 if ( INSTR (15, 10) == 0x07
6849 && (INSTR (9, 5) ==
6850 INSTR (20, 16)))
6851 {
6852 if (INSTR (31, 21) == 0x075
6853 || INSTR (31, 21) == 0x275)
6854 {
6855 do_vec_MOV_whole_vector (cpu);
6856 return;
6857 }
6858 }
6859
6860 if (INSTR (29, 19) == 0x1E0)
6861 {
6862 do_vec_MOV_immediate (cpu);
6863 return;
6864 }
6865
6866 if (INSTR (29, 19) == 0x5E0)
6867 {
6868 do_vec_MVNI (cpu);
6869 return;
6870 }
6871
6872 if (INSTR (29, 19) == 0x1C0
6873 || INSTR (29, 19) == 0x1C1)
6874 {
6875 if (INSTR (15, 10) == 0x03)
6876 {
6877 do_vec_DUP_scalar_into_vector (cpu);
6878 return;
6879 }
6880 }
6881
6882 switch (INSTR (29, 24))
6883 {
6884 case 0x0E: do_vec_op1 (cpu); return;
6885 case 0x0F: do_vec_op2 (cpu); return;
6886
6887 case 0x2E:
6888 if (INSTR (21, 21) == 1)
6889 {
6890 switch (INSTR (15, 10))
6891 {
6892 case 0x02:
6893 do_vec_REV32 (cpu);
6894 return;
6895
6896 case 0x07:
6897 switch (INSTR (23, 22))
6898 {
6899 case 0: do_vec_EOR (cpu); return;
6900 case 1: do_vec_BSL (cpu); return;
6901 case 2:
6902 case 3: do_vec_bit (cpu); return;
6903 }
6904 break;
6905
6906 case 0x08: do_vec_sub_long (cpu); return;
6907 case 0x11: do_vec_USHL (cpu); return;
6908 case 0x12: do_vec_CLZ (cpu); return;
6909 case 0x16: do_vec_NOT (cpu); return;
6910 case 0x19: do_vec_max (cpu); return;
6911 case 0x1B: do_vec_min (cpu); return;
6912 case 0x21: do_vec_SUB (cpu); return;
6913 case 0x25: do_vec_MLS (cpu); return;
6914 case 0x31: do_vec_FminmaxNMP (cpu); return;
6915 case 0x35: do_vec_FADDP (cpu); return;
6916 case 0x37: do_vec_FMUL (cpu); return;
6917 case 0x3F: do_vec_FDIV (cpu); return;
6918
6919 case 0x3E:
6920 switch (INSTR (20, 16))
6921 {
6922 case 0x00: do_vec_FNEG (cpu); return;
6923 case 0x01: do_vec_FSQRT (cpu); return;
6924 default: HALT_NYI;
6925 }
6926
6927 case 0x0D:
6928 case 0x0F:
6929 case 0x22:
6930 case 0x23:
6931 case 0x26:
6932 case 0x2A:
6933 case 0x32:
6934 case 0x36:
6935 case 0x39:
6936 case 0x3A:
6937 do_vec_compare (cpu); return;
6938
6939 default:
6940 break;
6941 }
6942 }
6943
6944 if (INSTR (31, 21) == 0x370)
6945 {
6946 if (INSTR (10, 10))
6947 do_vec_MOV_element (cpu);
6948 else
6949 do_vec_EXT (cpu);
6950 return;
6951 }
6952
6953 switch (INSTR (21, 10))
6954 {
6955 case 0x82E: do_vec_neg (cpu); return;
6956 case 0x87E: do_vec_sqrt (cpu); return;
6957 default:
6958 if (INSTR (15, 10) == 0x30)
6959 {
6960 do_vec_mull (cpu);
6961 return;
6962 }
6963 break;
6964 }
6965 break;
6966
6967 case 0x2f:
6968 switch (INSTR (15, 10))
6969 {
6970 case 0x01: do_vec_SSHR_USHR (cpu); return;
6971 case 0x10:
6972 case 0x12: do_vec_mls_indexed (cpu); return;
6973 case 0x29: do_vec_xtl (cpu); return;
6974 default:
6975 HALT_NYI;
6976 }
6977
6978 default:
6979 break;
6980 }
6981
6982 HALT_NYI;
6983 }
6984
6985 /* 3 sources. */
6986
6987 /* Float multiply add. */
6988 static void
6989 fmadds (sim_cpu *cpu)
6990 {
6991 unsigned sa = INSTR (14, 10);
6992 unsigned sm = INSTR (20, 16);
6993 unsigned sn = INSTR ( 9, 5);
6994 unsigned sd = INSTR ( 4, 0);
6995
6996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6997 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6998 + aarch64_get_FP_float (cpu, sn)
6999 * aarch64_get_FP_float (cpu, sm));
7000 }
7001
7002 /* Double multiply add. */
7003 static void
7004 fmaddd (sim_cpu *cpu)
7005 {
7006 unsigned sa = INSTR (14, 10);
7007 unsigned sm = INSTR (20, 16);
7008 unsigned sn = INSTR ( 9, 5);
7009 unsigned sd = INSTR ( 4, 0);
7010
7011 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7012 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7013 + aarch64_get_FP_double (cpu, sn)
7014 * aarch64_get_FP_double (cpu, sm));
7015 }
7016
7017 /* Float multiply subtract. */
7018 static void
7019 fmsubs (sim_cpu *cpu)
7020 {
7021 unsigned sa = INSTR (14, 10);
7022 unsigned sm = INSTR (20, 16);
7023 unsigned sn = INSTR ( 9, 5);
7024 unsigned sd = INSTR ( 4, 0);
7025
7026 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7027 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7028 - aarch64_get_FP_float (cpu, sn)
7029 * aarch64_get_FP_float (cpu, sm));
7030 }
7031
7032 /* Double multiply subtract. */
7033 static void
7034 fmsubd (sim_cpu *cpu)
7035 {
7036 unsigned sa = INSTR (14, 10);
7037 unsigned sm = INSTR (20, 16);
7038 unsigned sn = INSTR ( 9, 5);
7039 unsigned sd = INSTR ( 4, 0);
7040
7041 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7042 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7043 - aarch64_get_FP_double (cpu, sn)
7044 * aarch64_get_FP_double (cpu, sm));
7045 }
7046
7047 /* Float negative multiply add. */
7048 static void
7049 fnmadds (sim_cpu *cpu)
7050 {
7051 unsigned sa = INSTR (14, 10);
7052 unsigned sm = INSTR (20, 16);
7053 unsigned sn = INSTR ( 9, 5);
7054 unsigned sd = INSTR ( 4, 0);
7055
7056 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7057 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7058 + (- aarch64_get_FP_float (cpu, sn))
7059 * aarch64_get_FP_float (cpu, sm));
7060 }
7061
7062 /* Double negative multiply add. */
7063 static void
7064 fnmaddd (sim_cpu *cpu)
7065 {
7066 unsigned sa = INSTR (14, 10);
7067 unsigned sm = INSTR (20, 16);
7068 unsigned sn = INSTR ( 9, 5);
7069 unsigned sd = INSTR ( 4, 0);
7070
7071 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7072 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7073 + (- aarch64_get_FP_double (cpu, sn))
7074 * aarch64_get_FP_double (cpu, sm));
7075 }
7076
7077 /* Float negative multiply subtract. */
7078 static void
7079 fnmsubs (sim_cpu *cpu)
7080 {
7081 unsigned sa = INSTR (14, 10);
7082 unsigned sm = INSTR (20, 16);
7083 unsigned sn = INSTR ( 9, 5);
7084 unsigned sd = INSTR ( 4, 0);
7085
7086 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7087 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7088 + aarch64_get_FP_float (cpu, sn)
7089 * aarch64_get_FP_float (cpu, sm));
7090 }
7091
7092 /* Double negative multiply subtract. */
7093 static void
7094 fnmsubd (sim_cpu *cpu)
7095 {
7096 unsigned sa = INSTR (14, 10);
7097 unsigned sm = INSTR (20, 16);
7098 unsigned sn = INSTR ( 9, 5);
7099 unsigned sd = INSTR ( 4, 0);
7100
7101 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7102 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7103 + aarch64_get_FP_double (cpu, sn)
7104 * aarch64_get_FP_double (cpu, sm));
7105 }
7106
7107 static void
7108 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7109 {
7110 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7111 instr[30] = 0
7112 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7113 instr[28,25] = 1111
7114 instr[24] = 1
7115 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7116 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7117 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7118
7119 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7120 /* dispatch on combined type:o1:o2. */
7121 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7122
7123 if (M_S != 0)
7124 HALT_UNALLOC;
7125
7126 switch (dispatch)
7127 {
7128 case 0: fmadds (cpu); return;
7129 case 1: fmsubs (cpu); return;
7130 case 2: fnmadds (cpu); return;
7131 case 3: fnmsubs (cpu); return;
7132 case 4: fmaddd (cpu); return;
7133 case 5: fmsubd (cpu); return;
7134 case 6: fnmaddd (cpu); return;
7135 case 7: fnmsubd (cpu); return;
7136 default:
7137 /* type > 1 is currently unallocated. */
7138 HALT_UNALLOC;
7139 }
7140 }
7141
7142 static void
7143 dexSimpleFPFixedConvert (sim_cpu *cpu)
7144 {
7145 HALT_NYI;
7146 }
7147
7148 static void
7149 dexSimpleFPCondCompare (sim_cpu *cpu)
7150 {
7151 /* instr [31,23] = 0001 1110 0
7152 instr [22] = type
7153 instr [21] = 1
7154 instr [20,16] = Rm
7155 instr [15,12] = condition
7156 instr [11,10] = 01
7157 instr [9,5] = Rn
7158 instr [4] = 0
7159 instr [3,0] = nzcv */
7160
7161 unsigned rm = INSTR (20, 16);
7162 unsigned rn = INSTR (9, 5);
7163
7164 NYI_assert (31, 23, 0x3C);
7165 NYI_assert (11, 10, 0x1);
7166 NYI_assert (4, 4, 0);
7167
7168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7169 if (! testConditionCode (cpu, INSTR (15, 12)))
7170 {
7171 aarch64_set_CPSR (cpu, INSTR (3, 0));
7172 return;
7173 }
7174
7175 if (INSTR (22, 22))
7176 {
7177 /* Double precision. */
7178 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7179 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7180
7181 /* FIXME: Check for NaNs. */
7182 if (val1 == val2)
7183 aarch64_set_CPSR (cpu, (Z | C));
7184 else if (val1 < val2)
7185 aarch64_set_CPSR (cpu, N);
7186 else /* val1 > val2 */
7187 aarch64_set_CPSR (cpu, C);
7188 }
7189 else
7190 {
7191 /* Single precision. */
7192 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7193 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7194
7195 /* FIXME: Check for NaNs. */
7196 if (val1 == val2)
7197 aarch64_set_CPSR (cpu, (Z | C));
7198 else if (val1 < val2)
7199 aarch64_set_CPSR (cpu, N);
7200 else /* val1 > val2 */
7201 aarch64_set_CPSR (cpu, C);
7202 }
7203 }
7204
7205 /* 2 sources. */
7206
7207 /* Float add. */
7208 static void
7209 fadds (sim_cpu *cpu)
7210 {
7211 unsigned sm = INSTR (20, 16);
7212 unsigned sn = INSTR ( 9, 5);
7213 unsigned sd = INSTR ( 4, 0);
7214
7215 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7216 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7217 + aarch64_get_FP_float (cpu, sm));
7218 }
7219
7220 /* Double add. */
7221 static void
7222 faddd (sim_cpu *cpu)
7223 {
7224 unsigned sm = INSTR (20, 16);
7225 unsigned sn = INSTR ( 9, 5);
7226 unsigned sd = INSTR ( 4, 0);
7227
7228 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7229 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7230 + aarch64_get_FP_double (cpu, sm));
7231 }
7232
7233 /* Float divide. */
7234 static void
7235 fdivs (sim_cpu *cpu)
7236 {
7237 unsigned sm = INSTR (20, 16);
7238 unsigned sn = INSTR ( 9, 5);
7239 unsigned sd = INSTR ( 4, 0);
7240
7241 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7242 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7243 / aarch64_get_FP_float (cpu, sm));
7244 }
7245
7246 /* Double divide. */
7247 static void
7248 fdivd (sim_cpu *cpu)
7249 {
7250 unsigned sm = INSTR (20, 16);
7251 unsigned sn = INSTR ( 9, 5);
7252 unsigned sd = INSTR ( 4, 0);
7253
7254 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7255 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7256 / aarch64_get_FP_double (cpu, sm));
7257 }
7258
7259 /* Float multiply. */
7260 static void
7261 fmuls (sim_cpu *cpu)
7262 {
7263 unsigned sm = INSTR (20, 16);
7264 unsigned sn = INSTR ( 9, 5);
7265 unsigned sd = INSTR ( 4, 0);
7266
7267 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7268 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7269 * aarch64_get_FP_float (cpu, sm));
7270 }
7271
7272 /* Double multiply. */
7273 static void
7274 fmuld (sim_cpu *cpu)
7275 {
7276 unsigned sm = INSTR (20, 16);
7277 unsigned sn = INSTR ( 9, 5);
7278 unsigned sd = INSTR ( 4, 0);
7279
7280 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7281 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7282 * aarch64_get_FP_double (cpu, sm));
7283 }
7284
7285 /* Float negate and multiply. */
7286 static void
7287 fnmuls (sim_cpu *cpu)
7288 {
7289 unsigned sm = INSTR (20, 16);
7290 unsigned sn = INSTR ( 9, 5);
7291 unsigned sd = INSTR ( 4, 0);
7292
7293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7294 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7295 * aarch64_get_FP_float (cpu, sm)));
7296 }
7297
7298 /* Double negate and multiply. */
7299 static void
7300 fnmuld (sim_cpu *cpu)
7301 {
7302 unsigned sm = INSTR (20, 16);
7303 unsigned sn = INSTR ( 9, 5);
7304 unsigned sd = INSTR ( 4, 0);
7305
7306 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7307 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7308 * aarch64_get_FP_double (cpu, sm)));
7309 }
7310
7311 /* Float subtract. */
7312 static void
7313 fsubs (sim_cpu *cpu)
7314 {
7315 unsigned sm = INSTR (20, 16);
7316 unsigned sn = INSTR ( 9, 5);
7317 unsigned sd = INSTR ( 4, 0);
7318
7319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7320 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7321 - aarch64_get_FP_float (cpu, sm));
7322 }
7323
7324 /* Double subtract. */
7325 static void
7326 fsubd (sim_cpu *cpu)
7327 {
7328 unsigned sm = INSTR (20, 16);
7329 unsigned sn = INSTR ( 9, 5);
7330 unsigned sd = INSTR ( 4, 0);
7331
7332 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7333 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7334 - aarch64_get_FP_double (cpu, sm));
7335 }
7336
7337 static void
7338 do_FMINNM (sim_cpu *cpu)
7339 {
7340 /* instr[31,23] = 0 0011 1100
7341 instr[22] = float(0)/double(1)
7342 instr[21] = 1
7343 instr[20,16] = Sm
7344 instr[15,10] = 01 1110
7345 instr[9,5] = Sn
7346 instr[4,0] = Cpu */
7347
7348 unsigned sm = INSTR (20, 16);
7349 unsigned sn = INSTR ( 9, 5);
7350 unsigned sd = INSTR ( 4, 0);
7351
7352 NYI_assert (31, 23, 0x03C);
7353 NYI_assert (15, 10, 0x1E);
7354
7355 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7356 if (INSTR (22, 22))
7357 aarch64_set_FP_double (cpu, sd,
7358 dminnm (aarch64_get_FP_double (cpu, sn),
7359 aarch64_get_FP_double (cpu, sm)));
7360 else
7361 aarch64_set_FP_float (cpu, sd,
7362 fminnm (aarch64_get_FP_float (cpu, sn),
7363 aarch64_get_FP_float (cpu, sm)));
7364 }
7365
7366 static void
7367 do_FMAXNM (sim_cpu *cpu)
7368 {
7369 /* instr[31,23] = 0 0011 1100
7370 instr[22] = float(0)/double(1)
7371 instr[21] = 1
7372 instr[20,16] = Sm
7373 instr[15,10] = 01 1010
7374 instr[9,5] = Sn
7375 instr[4,0] = Cpu */
7376
7377 unsigned sm = INSTR (20, 16);
7378 unsigned sn = INSTR ( 9, 5);
7379 unsigned sd = INSTR ( 4, 0);
7380
7381 NYI_assert (31, 23, 0x03C);
7382 NYI_assert (15, 10, 0x1A);
7383
7384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7385 if (INSTR (22, 22))
7386 aarch64_set_FP_double (cpu, sd,
7387 dmaxnm (aarch64_get_FP_double (cpu, sn),
7388 aarch64_get_FP_double (cpu, sm)));
7389 else
7390 aarch64_set_FP_float (cpu, sd,
7391 fmaxnm (aarch64_get_FP_float (cpu, sn),
7392 aarch64_get_FP_float (cpu, sm)));
7393 }
7394
7395 static void
7396 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7397 {
7398 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7399 instr[30] = 0
7400 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7401 instr[28,25] = 1111
7402 instr[24] = 0
7403 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7404 instr[21] = 1
7405 instr[20,16] = Vm
7406 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7407 0010 ==> FADD, 0011 ==> FSUB,
7408 0100 ==> FMAX, 0101 ==> FMIN
7409 0110 ==> FMAXNM, 0111 ==> FMINNM
7410 1000 ==> FNMUL, ow ==> UNALLOC
7411 instr[11,10] = 10
7412 instr[9,5] = Vn
7413 instr[4,0] = Vd */
7414
7415 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7416 uint32_t type = INSTR (23, 22);
7417 /* Dispatch on opcode. */
7418 uint32_t dispatch = INSTR (15, 12);
7419
7420 if (type > 1)
7421 HALT_UNALLOC;
7422
7423 if (M_S != 0)
7424 HALT_UNALLOC;
7425
7426 if (type)
7427 switch (dispatch)
7428 {
7429 case 0: fmuld (cpu); return;
7430 case 1: fdivd (cpu); return;
7431 case 2: faddd (cpu); return;
7432 case 3: fsubd (cpu); return;
7433 case 6: do_FMAXNM (cpu); return;
7434 case 7: do_FMINNM (cpu); return;
7435 case 8: fnmuld (cpu); return;
7436
7437 /* Have not yet implemented fmax and fmin. */
7438 case 4:
7439 case 5:
7440 HALT_NYI;
7441
7442 default:
7443 HALT_UNALLOC;
7444 }
7445 else /* type == 0 => floats. */
7446 switch (dispatch)
7447 {
7448 case 0: fmuls (cpu); return;
7449 case 1: fdivs (cpu); return;
7450 case 2: fadds (cpu); return;
7451 case 3: fsubs (cpu); return;
7452 case 6: do_FMAXNM (cpu); return;
7453 case 7: do_FMINNM (cpu); return;
7454 case 8: fnmuls (cpu); return;
7455
7456 case 4:
7457 case 5:
7458 HALT_NYI;
7459
7460 default:
7461 HALT_UNALLOC;
7462 }
7463 }
7464
7465 static void
7466 dexSimpleFPCondSelect (sim_cpu *cpu)
7467 {
7468 /* FCSEL
7469 instr[31,23] = 0 0011 1100
7470 instr[22] = 0=>single 1=>double
7471 instr[21] = 1
7472 instr[20,16] = Sm
7473 instr[15,12] = cond
7474 instr[11,10] = 11
7475 instr[9,5] = Sn
7476 instr[4,0] = Cpu */
7477 unsigned sm = INSTR (20, 16);
7478 unsigned sn = INSTR ( 9, 5);
7479 unsigned sd = INSTR ( 4, 0);
7480 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7481
7482 NYI_assert (31, 23, 0x03C);
7483 NYI_assert (11, 10, 0x3);
7484
7485 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7486 if (INSTR (22, 22))
7487 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7488 : aarch64_get_FP_double (cpu, sm)));
7489 else
7490 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7491 : aarch64_get_FP_float (cpu, sm)));
7492 }
7493
7494 /* Store 32 bit unscaled signed 9 bit. */
7495 static void
7496 fsturs (sim_cpu *cpu, int32_t offset)
7497 {
7498 unsigned int rn = INSTR (9, 5);
7499 unsigned int st = INSTR (4, 0);
7500
7501 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7502 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7503 aarch64_get_vec_u32 (cpu, st, 0));
7504 }
7505
7506 /* Store 64 bit unscaled signed 9 bit. */
7507 static void
7508 fsturd (sim_cpu *cpu, int32_t offset)
7509 {
7510 unsigned int rn = INSTR (9, 5);
7511 unsigned int st = INSTR (4, 0);
7512
7513 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7514 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7515 aarch64_get_vec_u64 (cpu, st, 0));
7516 }
7517
7518 /* Store 128 bit unscaled signed 9 bit. */
7519 static void
7520 fsturq (sim_cpu *cpu, int32_t offset)
7521 {
7522 unsigned int rn = INSTR (9, 5);
7523 unsigned int st = INSTR (4, 0);
7524 FRegister a;
7525
7526 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7527 aarch64_get_FP_long_double (cpu, st, & a);
7528 aarch64_set_mem_long_double (cpu,
7529 aarch64_get_reg_u64 (cpu, rn, 1)
7530 + offset, a);
7531 }
7532
7533 /* TODO FP move register. */
7534
7535 /* 32 bit fp to fp move register. */
7536 static void
7537 ffmovs (sim_cpu *cpu)
7538 {
7539 unsigned int rn = INSTR (9, 5);
7540 unsigned int st = INSTR (4, 0);
7541
7542 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7543 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7544 }
7545
7546 /* 64 bit fp to fp move register. */
7547 static void
7548 ffmovd (sim_cpu *cpu)
7549 {
7550 unsigned int rn = INSTR (9, 5);
7551 unsigned int st = INSTR (4, 0);
7552
7553 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7554 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7555 }
7556
7557 /* 32 bit GReg to Vec move register. */
7558 static void
7559 fgmovs (sim_cpu *cpu)
7560 {
7561 unsigned int rn = INSTR (9, 5);
7562 unsigned int st = INSTR (4, 0);
7563
7564 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7565 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7566 }
7567
7568 /* 64 bit g to fp move register. */
7569 static void
7570 fgmovd (sim_cpu *cpu)
7571 {
7572 unsigned int rn = INSTR (9, 5);
7573 unsigned int st = INSTR (4, 0);
7574
7575 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7576 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7577 }
7578
7579 /* 32 bit fp to g move register. */
7580 static void
7581 gfmovs (sim_cpu *cpu)
7582 {
7583 unsigned int rn = INSTR (9, 5);
7584 unsigned int st = INSTR (4, 0);
7585
7586 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7587 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7588 }
7589
7590 /* 64 bit fp to g move register. */
7591 static void
7592 gfmovd (sim_cpu *cpu)
7593 {
7594 unsigned int rn = INSTR (9, 5);
7595 unsigned int st = INSTR (4, 0);
7596
7597 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7598 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7599 }
7600
7601 /* FP move immediate
7602
7603 These install an immediate 8 bit value in the target register
7604 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7605 bit exponent. */
7606
7607 static void
7608 fmovs (sim_cpu *cpu)
7609 {
7610 unsigned int sd = INSTR (4, 0);
7611 uint32_t imm = INSTR (20, 13);
7612 float f = fp_immediate_for_encoding_32 (imm);
7613
7614 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7615 aarch64_set_FP_float (cpu, sd, f);
7616 }
7617
7618 static void
7619 fmovd (sim_cpu *cpu)
7620 {
7621 unsigned int sd = INSTR (4, 0);
7622 uint32_t imm = INSTR (20, 13);
7623 double d = fp_immediate_for_encoding_64 (imm);
7624
7625 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7626 aarch64_set_FP_double (cpu, sd, d);
7627 }
7628
7629 static void
7630 dexSimpleFPImmediate (sim_cpu *cpu)
7631 {
7632 /* instr[31,23] == 00111100
7633 instr[22] == type : single(0)/double(1)
7634 instr[21] == 1
7635 instr[20,13] == imm8
7636 instr[12,10] == 100
7637 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7638 instr[4,0] == Rd */
7639 uint32_t imm5 = INSTR (9, 5);
7640
7641 NYI_assert (31, 23, 0x3C);
7642
7643 if (imm5 != 0)
7644 HALT_UNALLOC;
7645
7646 if (INSTR (22, 22))
7647 fmovd (cpu);
7648 else
7649 fmovs (cpu);
7650 }
7651
7652 /* TODO specific decode and execute for group Load Store. */
7653
7654 /* TODO FP load/store single register (unscaled offset). */
7655
7656 /* TODO load 8 bit unscaled signed 9 bit. */
7657 /* TODO load 16 bit unscaled signed 9 bit. */
7658
7659 /* Load 32 bit unscaled signed 9 bit. */
7660 static void
7661 fldurs (sim_cpu *cpu, int32_t offset)
7662 {
7663 unsigned int rn = INSTR (9, 5);
7664 unsigned int st = INSTR (4, 0);
7665
7666 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7667 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7668 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7669 }
7670
7671 /* Load 64 bit unscaled signed 9 bit. */
7672 static void
7673 fldurd (sim_cpu *cpu, int32_t offset)
7674 {
7675 unsigned int rn = INSTR (9, 5);
7676 unsigned int st = INSTR (4, 0);
7677
7678 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7679 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7680 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7681 }
7682
7683 /* Load 128 bit unscaled signed 9 bit. */
7684 static void
7685 fldurq (sim_cpu *cpu, int32_t offset)
7686 {
7687 unsigned int rn = INSTR (9, 5);
7688 unsigned int st = INSTR (4, 0);
7689 FRegister a;
7690 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7691
7692 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7693 aarch64_get_mem_long_double (cpu, addr, & a);
7694 aarch64_set_FP_long_double (cpu, st, a);
7695 }
7696
7697 /* TODO store 8 bit unscaled signed 9 bit. */
7698 /* TODO store 16 bit unscaled signed 9 bit. */
7699
7700
7701 /* 1 source. */
7702
7703 /* Float absolute value. */
7704 static void
7705 fabss (sim_cpu *cpu)
7706 {
7707 unsigned sn = INSTR (9, 5);
7708 unsigned sd = INSTR (4, 0);
7709 float value = aarch64_get_FP_float (cpu, sn);
7710
7711 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7712 aarch64_set_FP_float (cpu, sd, fabsf (value));
7713 }
7714
7715 /* Double absolute value. */
7716 static void
7717 fabcpu (sim_cpu *cpu)
7718 {
7719 unsigned sn = INSTR (9, 5);
7720 unsigned sd = INSTR (4, 0);
7721 double value = aarch64_get_FP_double (cpu, sn);
7722
7723 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7724 aarch64_set_FP_double (cpu, sd, fabs (value));
7725 }
7726
7727 /* Float negative value. */
7728 static void
7729 fnegs (sim_cpu *cpu)
7730 {
7731 unsigned sn = INSTR (9, 5);
7732 unsigned sd = INSTR (4, 0);
7733
7734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7735 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7736 }
7737
7738 /* Double negative value. */
7739 static void
7740 fnegd (sim_cpu *cpu)
7741 {
7742 unsigned sn = INSTR (9, 5);
7743 unsigned sd = INSTR (4, 0);
7744
7745 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7746 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7747 }
7748
7749 /* Float square root. */
7750 static void
7751 fsqrts (sim_cpu *cpu)
7752 {
7753 unsigned sn = INSTR (9, 5);
7754 unsigned sd = INSTR (4, 0);
7755
7756 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7757 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7758 }
7759
7760 /* Double square root. */
7761 static void
7762 fsqrtd (sim_cpu *cpu)
7763 {
7764 unsigned sn = INSTR (9, 5);
7765 unsigned sd = INSTR (4, 0);
7766
7767 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7768 aarch64_set_FP_double (cpu, sd,
7769 sqrt (aarch64_get_FP_double (cpu, sn)));
7770 }
7771
7772 /* Convert double to float. */
7773 static void
7774 fcvtds (sim_cpu *cpu)
7775 {
7776 unsigned sn = INSTR (9, 5);
7777 unsigned sd = INSTR (4, 0);
7778
7779 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7780 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7781 }
7782
7783 /* Convert float to double. */
7784 static void
7785 fcvtcpu (sim_cpu *cpu)
7786 {
7787 unsigned sn = INSTR (9, 5);
7788 unsigned sd = INSTR (4, 0);
7789
7790 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7791 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7792 }
7793
7794 static void
7795 do_FRINT (sim_cpu *cpu)
7796 {
7797 /* instr[31,23] = 0001 1110 0
7798 instr[22] = single(0)/double(1)
7799 instr[21,18] = 1001
7800 instr[17,15] = rounding mode
7801 instr[14,10] = 10000
7802 instr[9,5] = source
7803 instr[4,0] = dest */
7804
7805 float val;
7806 unsigned rs = INSTR (9, 5);
7807 unsigned rd = INSTR (4, 0);
7808 unsigned int rmode = INSTR (17, 15);
7809
7810 NYI_assert (31, 23, 0x03C);
7811 NYI_assert (21, 18, 0x9);
7812 NYI_assert (14, 10, 0x10);
7813
7814 if (rmode == 6 || rmode == 7)
7815 /* FIXME: Add support for rmode == 6 exactness check. */
7816 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7817
7818 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7819 if (INSTR (22, 22))
7820 {
7821 double val = aarch64_get_FP_double (cpu, rs);
7822
7823 switch (rmode)
7824 {
7825 case 0: /* mode N: nearest or even. */
7826 {
7827 double rval = round (val);
7828
7829 if (val - rval == 0.5)
7830 {
7831 if (((rval / 2.0) * 2.0) != rval)
7832 rval += 1.0;
7833 }
7834
7835 aarch64_set_FP_double (cpu, rd, round (val));
7836 return;
7837 }
7838
7839 case 1: /* mode P: towards +inf. */
7840 if (val < 0.0)
7841 aarch64_set_FP_double (cpu, rd, trunc (val));
7842 else
7843 aarch64_set_FP_double (cpu, rd, round (val));
7844 return;
7845
7846 case 2: /* mode M: towards -inf. */
7847 if (val < 0.0)
7848 aarch64_set_FP_double (cpu, rd, round (val));
7849 else
7850 aarch64_set_FP_double (cpu, rd, trunc (val));
7851 return;
7852
7853 case 3: /* mode Z: towards 0. */
7854 aarch64_set_FP_double (cpu, rd, trunc (val));
7855 return;
7856
7857 case 4: /* mode A: away from 0. */
7858 aarch64_set_FP_double (cpu, rd, round (val));
7859 return;
7860
7861 case 6: /* mode X: use FPCR with exactness check. */
7862 case 7: /* mode I: use FPCR mode. */
7863 HALT_NYI;
7864
7865 default:
7866 HALT_UNALLOC;
7867 }
7868 }
7869
7870 val = aarch64_get_FP_float (cpu, rs);
7871
7872 switch (rmode)
7873 {
7874 case 0: /* mode N: nearest or even. */
7875 {
7876 float rval = roundf (val);
7877
7878 if (val - rval == 0.5)
7879 {
7880 if (((rval / 2.0) * 2.0) != rval)
7881 rval += 1.0;
7882 }
7883
7884 aarch64_set_FP_float (cpu, rd, rval);
7885 return;
7886 }
7887
7888 case 1: /* mode P: towards +inf. */
7889 if (val < 0.0)
7890 aarch64_set_FP_float (cpu, rd, truncf (val));
7891 else
7892 aarch64_set_FP_float (cpu, rd, roundf (val));
7893 return;
7894
7895 case 2: /* mode M: towards -inf. */
7896 if (val < 0.0)
7897 aarch64_set_FP_float (cpu, rd, truncf (val));
7898 else
7899 aarch64_set_FP_float (cpu, rd, roundf (val));
7900 return;
7901
7902 case 3: /* mode Z: towards 0. */
7903 aarch64_set_FP_float (cpu, rd, truncf (val));
7904 return;
7905
7906 case 4: /* mode A: away from 0. */
7907 aarch64_set_FP_float (cpu, rd, roundf (val));
7908 return;
7909
7910 case 6: /* mode X: use FPCR with exactness check. */
7911 case 7: /* mode I: use FPCR mode. */
7912 HALT_NYI;
7913
7914 default:
7915 HALT_UNALLOC;
7916 }
7917 }
7918
7919 /* Convert half to float. */
7920 static void
7921 do_FCVT_half_to_single (sim_cpu *cpu)
7922 {
7923 unsigned rn = INSTR (9, 5);
7924 unsigned rd = INSTR (4, 0);
7925
7926 NYI_assert (31, 10, 0x7B890);
7927
7928 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7929 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7930 }
7931
7932 /* Convert half to double. */
7933 static void
7934 do_FCVT_half_to_double (sim_cpu *cpu)
7935 {
7936 unsigned rn = INSTR (9, 5);
7937 unsigned rd = INSTR (4, 0);
7938
7939 NYI_assert (31, 10, 0x7B8B0);
7940
7941 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7942 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7943 }
7944
7945 static void
7946 do_FCVT_single_to_half (sim_cpu *cpu)
7947 {
7948 unsigned rn = INSTR (9, 5);
7949 unsigned rd = INSTR (4, 0);
7950
7951 NYI_assert (31, 10, 0x788F0);
7952
7953 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7954 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7955 }
7956
7957 /* Convert double to half. */
7958 static void
7959 do_FCVT_double_to_half (sim_cpu *cpu)
7960 {
7961 unsigned rn = INSTR (9, 5);
7962 unsigned rd = INSTR (4, 0);
7963
7964 NYI_assert (31, 10, 0x798F0);
7965
7966 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7967 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7968 }
7969
7970 static void
7971 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7972 {
7973 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7974 instr[30] = 0
7975 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7976 instr[28,25] = 1111
7977 instr[24] = 0
7978 instr[23,22] ==> type : 00 ==> source is single,
7979 01 ==> source is double
7980 10 ==> UNALLOC
7981 11 ==> UNALLOC or source is half
7982 instr[21] = 1
7983 instr[20,15] ==> opcode : with type 00 or 01
7984 000000 ==> FMOV, 000001 ==> FABS,
7985 000010 ==> FNEG, 000011 ==> FSQRT,
7986 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7987 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7988 001000 ==> FRINTN, 001001 ==> FRINTP,
7989 001010 ==> FRINTM, 001011 ==> FRINTZ,
7990 001100 ==> FRINTA, 001101 ==> UNALLOC
7991 001110 ==> FRINTX, 001111 ==> FRINTI
7992 with type 11
7993 000100 ==> FCVT (half-to-single)
7994 000101 ==> FCVT (half-to-double)
7995 instr[14,10] = 10000. */
7996
7997 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7998 uint32_t type = INSTR (23, 22);
7999 uint32_t opcode = INSTR (20, 15);
8000
8001 if (M_S != 0)
8002 HALT_UNALLOC;
8003
8004 if (type == 3)
8005 {
8006 if (opcode == 4)
8007 do_FCVT_half_to_single (cpu);
8008 else if (opcode == 5)
8009 do_FCVT_half_to_double (cpu);
8010 else
8011 HALT_UNALLOC;
8012 return;
8013 }
8014
8015 if (type == 2)
8016 HALT_UNALLOC;
8017
8018 switch (opcode)
8019 {
8020 case 0:
8021 if (type)
8022 ffmovd (cpu);
8023 else
8024 ffmovs (cpu);
8025 return;
8026
8027 case 1:
8028 if (type)
8029 fabcpu (cpu);
8030 else
8031 fabss (cpu);
8032 return;
8033
8034 case 2:
8035 if (type)
8036 fnegd (cpu);
8037 else
8038 fnegs (cpu);
8039 return;
8040
8041 case 3:
8042 if (type)
8043 fsqrtd (cpu);
8044 else
8045 fsqrts (cpu);
8046 return;
8047
8048 case 4:
8049 if (type)
8050 fcvtds (cpu);
8051 else
8052 HALT_UNALLOC;
8053 return;
8054
8055 case 5:
8056 if (type)
8057 HALT_UNALLOC;
8058 fcvtcpu (cpu);
8059 return;
8060
8061 case 8: /* FRINTN etc. */
8062 case 9:
8063 case 10:
8064 case 11:
8065 case 12:
8066 case 14:
8067 case 15:
8068 do_FRINT (cpu);
8069 return;
8070
8071 case 7:
8072 if (INSTR (22, 22))
8073 do_FCVT_double_to_half (cpu);
8074 else
8075 do_FCVT_single_to_half (cpu);
8076 return;
8077
8078 case 13:
8079 HALT_NYI;
8080
8081 default:
8082 HALT_UNALLOC;
8083 }
8084 }
8085
8086 /* 32 bit signed int to float. */
8087 static void
8088 scvtf32 (sim_cpu *cpu)
8089 {
8090 unsigned rn = INSTR (9, 5);
8091 unsigned sd = INSTR (4, 0);
8092
8093 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8094 aarch64_set_FP_float
8095 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8096 }
8097
8098 /* signed int to float. */
8099 static void
8100 scvtf (sim_cpu *cpu)
8101 {
8102 unsigned rn = INSTR (9, 5);
8103 unsigned sd = INSTR (4, 0);
8104
8105 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8106 aarch64_set_FP_float
8107 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8108 }
8109
8110 /* 32 bit signed int to double. */
8111 static void
8112 scvtd32 (sim_cpu *cpu)
8113 {
8114 unsigned rn = INSTR (9, 5);
8115 unsigned sd = INSTR (4, 0);
8116
8117 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8118 aarch64_set_FP_double
8119 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8120 }
8121
8122 /* signed int to double. */
8123 static void
8124 scvtd (sim_cpu *cpu)
8125 {
8126 unsigned rn = INSTR (9, 5);
8127 unsigned sd = INSTR (4, 0);
8128
8129 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8130 aarch64_set_FP_double
8131 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8132 }
8133
8134 static const float FLOAT_INT_MAX = (float) INT_MAX;
8135 static const float FLOAT_INT_MIN = (float) INT_MIN;
8136 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8137 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8138 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8139 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8140 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8141 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8142
8143 #define UINT_MIN 0
8144 #define ULONG_MIN 0
8145 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8146 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8147 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8148 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8149 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8150 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8151 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8152 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8153
8154 /* Check for FP exception conditions:
8155 NaN raises IO
8156 Infinity raises IO
8157 Out of Range raises IO and IX and saturates value
8158 Denormal raises ID and IX and sets to zero. */
8159 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8160 do \
8161 { \
8162 switch (fpclassify (F)) \
8163 { \
8164 case FP_INFINITE: \
8165 case FP_NAN: \
8166 aarch64_set_FPSR (cpu, IO); \
8167 if (signbit (F)) \
8168 VALUE = ITYPE##_MAX; \
8169 else \
8170 VALUE = ITYPE##_MIN; \
8171 break; \
8172 \
8173 case FP_NORMAL: \
8174 if (F >= FTYPE##_##ITYPE##_MAX) \
8175 { \
8176 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8177 VALUE = ITYPE##_MAX; \
8178 } \
8179 else if (F <= FTYPE##_##ITYPE##_MIN) \
8180 { \
8181 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8182 VALUE = ITYPE##_MIN; \
8183 } \
8184 break; \
8185 \
8186 case FP_SUBNORMAL: \
8187 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8188 VALUE = 0; \
8189 break; \
8190 \
8191 default: \
8192 case FP_ZERO: \
8193 VALUE = 0; \
8194 break; \
8195 } \
8196 } \
8197 while (0)
8198
8199 /* 32 bit convert float to signed int truncate towards zero. */
8200 static void
8201 fcvtszs32 (sim_cpu *cpu)
8202 {
8203 unsigned sn = INSTR (9, 5);
8204 unsigned rd = INSTR (4, 0);
8205 /* TODO : check that this rounds toward zero. */
8206 float f = aarch64_get_FP_float (cpu, sn);
8207 int32_t value = (int32_t) f;
8208
8209 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8210
8211 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8212 /* Avoid sign extension to 64 bit. */
8213 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8214 }
8215
8216 /* 64 bit convert float to signed int truncate towards zero. */
8217 static void
8218 fcvtszs (sim_cpu *cpu)
8219 {
8220 unsigned sn = INSTR (9, 5);
8221 unsigned rd = INSTR (4, 0);
8222 float f = aarch64_get_FP_float (cpu, sn);
8223 int64_t value = (int64_t) f;
8224
8225 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8226
8227 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8228 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8229 }
8230
8231 /* 32 bit convert double to signed int truncate towards zero. */
8232 static void
8233 fcvtszd32 (sim_cpu *cpu)
8234 {
8235 unsigned sn = INSTR (9, 5);
8236 unsigned rd = INSTR (4, 0);
8237 /* TODO : check that this rounds toward zero. */
8238 double d = aarch64_get_FP_double (cpu, sn);
8239 int32_t value = (int32_t) d;
8240
8241 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8242
8243 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8244 /* Avoid sign extension to 64 bit. */
8245 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8246 }
8247
8248 /* 64 bit convert double to signed int truncate towards zero. */
8249 static void
8250 fcvtszd (sim_cpu *cpu)
8251 {
8252 unsigned sn = INSTR (9, 5);
8253 unsigned rd = INSTR (4, 0);
8254 /* TODO : check that this rounds toward zero. */
8255 double d = aarch64_get_FP_double (cpu, sn);
8256 int64_t value;
8257
8258 value = (int64_t) d;
8259
8260 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8261
8262 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8263 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8264 }
8265
8266 static void
8267 do_fcvtzu (sim_cpu *cpu)
8268 {
8269 /* instr[31] = size: 32-bit (0), 64-bit (1)
8270 instr[30,23] = 00111100
8271 instr[22] = type: single (0)/ double (1)
8272 instr[21] = enable (0)/disable(1) precision
8273 instr[20,16] = 11001
8274 instr[15,10] = precision
8275 instr[9,5] = Rs
8276 instr[4,0] = Rd. */
8277
8278 unsigned rs = INSTR (9, 5);
8279 unsigned rd = INSTR (4, 0);
8280
8281 NYI_assert (30, 23, 0x3C);
8282 NYI_assert (20, 16, 0x19);
8283
8284 if (INSTR (21, 21) != 1)
8285 /* Convert to fixed point. */
8286 HALT_NYI;
8287
8288 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8289 if (INSTR (31, 31))
8290 {
8291 /* Convert to unsigned 64-bit integer. */
8292 if (INSTR (22, 22))
8293 {
8294 double d = aarch64_get_FP_double (cpu, rs);
8295 uint64_t value = (uint64_t) d;
8296
8297 /* Do not raise an exception if we have reached ULONG_MAX. */
8298 if (value != (1UL << 63))
8299 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8300
8301 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8302 }
8303 else
8304 {
8305 float f = aarch64_get_FP_float (cpu, rs);
8306 uint64_t value = (uint64_t) f;
8307
8308 /* Do not raise an exception if we have reached ULONG_MAX. */
8309 if (value != (1UL << 63))
8310 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8311
8312 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8313 }
8314 }
8315 else
8316 {
8317 uint32_t value;
8318
8319 /* Convert to unsigned 32-bit integer. */
8320 if (INSTR (22, 22))
8321 {
8322 double d = aarch64_get_FP_double (cpu, rs);
8323
8324 value = (uint32_t) d;
8325 /* Do not raise an exception if we have reached UINT_MAX. */
8326 if (value != (1UL << 31))
8327 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8328 }
8329 else
8330 {
8331 float f = aarch64_get_FP_float (cpu, rs);
8332
8333 value = (uint32_t) f;
8334 /* Do not raise an exception if we have reached UINT_MAX. */
8335 if (value != (1UL << 31))
8336 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8337 }
8338
8339 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8340 }
8341 }
8342
8343 static void
8344 do_UCVTF (sim_cpu *cpu)
8345 {
8346 /* instr[31] = size: 32-bit (0), 64-bit (1)
8347 instr[30,23] = 001 1110 0
8348 instr[22] = type: single (0)/ double (1)
8349 instr[21] = enable (0)/disable(1) precision
8350 instr[20,16] = 0 0011
8351 instr[15,10] = precision
8352 instr[9,5] = Rs
8353 instr[4,0] = Rd. */
8354
8355 unsigned rs = INSTR (9, 5);
8356 unsigned rd = INSTR (4, 0);
8357
8358 NYI_assert (30, 23, 0x3C);
8359 NYI_assert (20, 16, 0x03);
8360
8361 if (INSTR (21, 21) != 1)
8362 HALT_NYI;
8363
8364 /* FIXME: Add exception raising. */
8365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8366 if (INSTR (31, 31))
8367 {
8368 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8369
8370 if (INSTR (22, 22))
8371 aarch64_set_FP_double (cpu, rd, (double) value);
8372 else
8373 aarch64_set_FP_float (cpu, rd, (float) value);
8374 }
8375 else
8376 {
8377 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8378
8379 if (INSTR (22, 22))
8380 aarch64_set_FP_double (cpu, rd, (double) value);
8381 else
8382 aarch64_set_FP_float (cpu, rd, (float) value);
8383 }
8384 }
8385
8386 static void
8387 float_vector_move (sim_cpu *cpu)
8388 {
8389 /* instr[31,17] == 100 1111 0101 0111
8390 instr[16] ==> direction 0=> to GR, 1=> from GR
8391 instr[15,10] => ???
8392 instr[9,5] ==> source
8393 instr[4,0] ==> dest. */
8394
8395 unsigned rn = INSTR (9, 5);
8396 unsigned rd = INSTR (4, 0);
8397
8398 NYI_assert (31, 17, 0x4F57);
8399
8400 if (INSTR (15, 10) != 0)
8401 HALT_UNALLOC;
8402
8403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8404 if (INSTR (16, 16))
8405 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8406 else
8407 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8408 }
8409
8410 static void
8411 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8412 {
8413 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8414 instr[30 = 0
8415 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8416 instr[28,25] = 1111
8417 instr[24] = 0
8418 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8419 instr[21] = 1
8420 instr[20,19] = rmode
8421 instr[18,16] = opcode
8422 instr[15,10] = 10 0000 */
8423
8424 uint32_t rmode_opcode;
8425 uint32_t size_type;
8426 uint32_t type;
8427 uint32_t size;
8428 uint32_t S;
8429
8430 if (INSTR (31, 17) == 0x4F57)
8431 {
8432 float_vector_move (cpu);
8433 return;
8434 }
8435
8436 size = INSTR (31, 31);
8437 S = INSTR (29, 29);
8438 if (S != 0)
8439 HALT_UNALLOC;
8440
8441 type = INSTR (23, 22);
8442 if (type > 1)
8443 HALT_UNALLOC;
8444
8445 rmode_opcode = INSTR (20, 16);
8446 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8447
8448 switch (rmode_opcode)
8449 {
8450 case 2: /* SCVTF. */
8451 switch (size_type)
8452 {
8453 case 0: scvtf32 (cpu); return;
8454 case 1: scvtd32 (cpu); return;
8455 case 2: scvtf (cpu); return;
8456 case 3: scvtd (cpu); return;
8457 }
8458
8459 case 6: /* FMOV GR, Vec. */
8460 switch (size_type)
8461 {
8462 case 0: gfmovs (cpu); return;
8463 case 3: gfmovd (cpu); return;
8464 default: HALT_UNALLOC;
8465 }
8466
8467 case 7: /* FMOV vec, GR. */
8468 switch (size_type)
8469 {
8470 case 0: fgmovs (cpu); return;
8471 case 3: fgmovd (cpu); return;
8472 default: HALT_UNALLOC;
8473 }
8474
8475 case 24: /* FCVTZS. */
8476 switch (size_type)
8477 {
8478 case 0: fcvtszs32 (cpu); return;
8479 case 1: fcvtszd32 (cpu); return;
8480 case 2: fcvtszs (cpu); return;
8481 case 3: fcvtszd (cpu); return;
8482 }
8483
8484 case 25: do_fcvtzu (cpu); return;
8485 case 3: do_UCVTF (cpu); return;
8486
8487 case 0: /* FCVTNS. */
8488 case 1: /* FCVTNU. */
8489 case 4: /* FCVTAS. */
8490 case 5: /* FCVTAU. */
8491 case 8: /* FCVPTS. */
8492 case 9: /* FCVTPU. */
8493 case 16: /* FCVTMS. */
8494 case 17: /* FCVTMU. */
8495 default:
8496 HALT_NYI;
8497 }
8498 }
8499
8500 static void
8501 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8502 {
8503 uint32_t flags;
8504
8505 /* FIXME: Add exception raising. */
8506 if (isnan (fvalue1) || isnan (fvalue2))
8507 flags = C|V;
8508 else if (isinf (fvalue1) && isinf (fvalue2))
8509 {
8510 /* Subtracting two infinities may give a NaN. We only need to compare
8511 the signs, which we can get from isinf. */
8512 int result = isinf (fvalue1) - isinf (fvalue2);
8513
8514 if (result == 0)
8515 flags = Z|C;
8516 else if (result < 0)
8517 flags = N;
8518 else /* (result > 0). */
8519 flags = C;
8520 }
8521 else
8522 {
8523 float result = fvalue1 - fvalue2;
8524
8525 if (result == 0.0)
8526 flags = Z|C;
8527 else if (result < 0)
8528 flags = N;
8529 else /* (result > 0). */
8530 flags = C;
8531 }
8532
8533 aarch64_set_CPSR (cpu, flags);
8534 }
8535
8536 static void
8537 fcmps (sim_cpu *cpu)
8538 {
8539 unsigned sm = INSTR (20, 16);
8540 unsigned sn = INSTR ( 9, 5);
8541
8542 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8543 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8544
8545 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8546 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8547 }
8548
8549 /* Float compare to zero -- Invalid Operation exception
8550 only on signaling NaNs. */
8551 static void
8552 fcmpzs (sim_cpu *cpu)
8553 {
8554 unsigned sn = INSTR ( 9, 5);
8555 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8556
8557 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8558 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8559 }
8560
8561 /* Float compare -- Invalid Operation exception on all NaNs. */
8562 static void
8563 fcmpes (sim_cpu *cpu)
8564 {
8565 unsigned sm = INSTR (20, 16);
8566 unsigned sn = INSTR ( 9, 5);
8567
8568 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8569 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8570
8571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8572 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8573 }
8574
8575 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8576 static void
8577 fcmpzes (sim_cpu *cpu)
8578 {
8579 unsigned sn = INSTR ( 9, 5);
8580 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8581
8582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8583 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8584 }
8585
8586 static void
8587 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8588 {
8589 uint32_t flags;
8590
8591 /* FIXME: Add exception raising. */
8592 if (isnan (dval1) || isnan (dval2))
8593 flags = C|V;
8594 else if (isinf (dval1) && isinf (dval2))
8595 {
8596 /* Subtracting two infinities may give a NaN. We only need to compare
8597 the signs, which we can get from isinf. */
8598 int result = isinf (dval1) - isinf (dval2);
8599
8600 if (result == 0)
8601 flags = Z|C;
8602 else if (result < 0)
8603 flags = N;
8604 else /* (result > 0). */
8605 flags = C;
8606 }
8607 else
8608 {
8609 double result = dval1 - dval2;
8610
8611 if (result == 0.0)
8612 flags = Z|C;
8613 else if (result < 0)
8614 flags = N;
8615 else /* (result > 0). */
8616 flags = C;
8617 }
8618
8619 aarch64_set_CPSR (cpu, flags);
8620 }
8621
8622 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8623 static void
8624 fcmpd (sim_cpu *cpu)
8625 {
8626 unsigned sm = INSTR (20, 16);
8627 unsigned sn = INSTR ( 9, 5);
8628
8629 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8630 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8631
8632 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8633 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8634 }
8635
8636 /* Double compare to zero -- Invalid Operation exception
8637 only on signaling NaNs. */
8638 static void
8639 fcmpzd (sim_cpu *cpu)
8640 {
8641 unsigned sn = INSTR ( 9, 5);
8642 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8643
8644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8645 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8646 }
8647
8648 /* Double compare -- Invalid Operation exception on all NaNs. */
8649 static void
8650 fcmped (sim_cpu *cpu)
8651 {
8652 unsigned sm = INSTR (20, 16);
8653 unsigned sn = INSTR ( 9, 5);
8654
8655 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8656 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8657
8658 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8659 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8660 }
8661
8662 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8663 static void
8664 fcmpzed (sim_cpu *cpu)
8665 {
8666 unsigned sn = INSTR ( 9, 5);
8667 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8668
8669 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8670 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8671 }
8672
8673 static void
8674 dexSimpleFPCompare (sim_cpu *cpu)
8675 {
8676 /* assert instr[28,25] == 1111
8677 instr[30:24:21:13,10] = 0011000
8678 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8679 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8680 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8681 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8682 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8683 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8684 ow ==> UNALLOC */
8685 uint32_t dispatch;
8686 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8687 uint32_t type = INSTR (23, 22);
8688 uint32_t op = INSTR (15, 14);
8689 uint32_t op2_2_0 = INSTR (2, 0);
8690
8691 if (op2_2_0 != 0)
8692 HALT_UNALLOC;
8693
8694 if (M_S != 0)
8695 HALT_UNALLOC;
8696
8697 if (type > 1)
8698 HALT_UNALLOC;
8699
8700 if (op != 0)
8701 HALT_UNALLOC;
8702
8703 /* dispatch on type and top 2 bits of opcode. */
8704 dispatch = (type << 2) | INSTR (4, 3);
8705
8706 switch (dispatch)
8707 {
8708 case 0: fcmps (cpu); return;
8709 case 1: fcmpzs (cpu); return;
8710 case 2: fcmpes (cpu); return;
8711 case 3: fcmpzes (cpu); return;
8712 case 4: fcmpd (cpu); return;
8713 case 5: fcmpzd (cpu); return;
8714 case 6: fcmped (cpu); return;
8715 case 7: fcmpzed (cpu); return;
8716 }
8717 }
8718
8719 static void
8720 do_scalar_FADDP (sim_cpu *cpu)
8721 {
8722 /* instr [31,23] = 0111 1110 0
8723 instr [22] = single(0)/double(1)
8724 instr [21,10] = 11 0000 1101 10
8725 instr [9,5] = Fn
8726 instr [4,0] = Fd. */
8727
8728 unsigned Fn = INSTR (9, 5);
8729 unsigned Fd = INSTR (4, 0);
8730
8731 NYI_assert (31, 23, 0x0FC);
8732 NYI_assert (21, 10, 0xC36);
8733
8734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8735 if (INSTR (22, 22))
8736 {
8737 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8738 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8739
8740 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8741 }
8742 else
8743 {
8744 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8745 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8746
8747 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8748 }
8749 }
8750
8751 /* Floating point absolute difference. */
8752
8753 static void
8754 do_scalar_FABD (sim_cpu *cpu)
8755 {
8756 /* instr [31,23] = 0111 1110 1
8757 instr [22] = float(0)/double(1)
8758 instr [21] = 1
8759 instr [20,16] = Rm
8760 instr [15,10] = 1101 01
8761 instr [9, 5] = Rn
8762 instr [4, 0] = Rd. */
8763
8764 unsigned rm = INSTR (20, 16);
8765 unsigned rn = INSTR (9, 5);
8766 unsigned rd = INSTR (4, 0);
8767
8768 NYI_assert (31, 23, 0x0FD);
8769 NYI_assert (21, 21, 1);
8770 NYI_assert (15, 10, 0x35);
8771
8772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8773 if (INSTR (22, 22))
8774 aarch64_set_FP_double (cpu, rd,
8775 fabs (aarch64_get_FP_double (cpu, rn)
8776 - aarch64_get_FP_double (cpu, rm)));
8777 else
8778 aarch64_set_FP_float (cpu, rd,
8779 fabsf (aarch64_get_FP_float (cpu, rn)
8780 - aarch64_get_FP_float (cpu, rm)));
8781 }
8782
8783 static void
8784 do_scalar_CMGT (sim_cpu *cpu)
8785 {
8786 /* instr [31,21] = 0101 1110 111
8787 instr [20,16] = Rm
8788 instr [15,10] = 00 1101
8789 instr [9, 5] = Rn
8790 instr [4, 0] = Rd. */
8791
8792 unsigned rm = INSTR (20, 16);
8793 unsigned rn = INSTR (9, 5);
8794 unsigned rd = INSTR (4, 0);
8795
8796 NYI_assert (31, 21, 0x2F7);
8797 NYI_assert (15, 10, 0x0D);
8798
8799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8800 aarch64_set_vec_u64 (cpu, rd, 0,
8801 aarch64_get_vec_u64 (cpu, rn, 0) >
8802 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8803 }
8804
8805 static void
8806 do_scalar_USHR (sim_cpu *cpu)
8807 {
8808 /* instr [31,23] = 0111 1111 0
8809 instr [22,16] = shift amount
8810 instr [15,10] = 0000 01
8811 instr [9, 5] = Rn
8812 instr [4, 0] = Rd. */
8813
8814 unsigned amount = 128 - INSTR (22, 16);
8815 unsigned rn = INSTR (9, 5);
8816 unsigned rd = INSTR (4, 0);
8817
8818 NYI_assert (31, 23, 0x0FE);
8819 NYI_assert (15, 10, 0x01);
8820
8821 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8822 aarch64_set_vec_u64 (cpu, rd, 0,
8823 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8824 }
8825
8826 static void
8827 do_scalar_SSHL (sim_cpu *cpu)
8828 {
8829 /* instr [31,21] = 0101 1110 111
8830 instr [20,16] = Rm
8831 instr [15,10] = 0100 01
8832 instr [9, 5] = Rn
8833 instr [4, 0] = Rd. */
8834
8835 unsigned rm = INSTR (20, 16);
8836 unsigned rn = INSTR (9, 5);
8837 unsigned rd = INSTR (4, 0);
8838 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8839
8840 NYI_assert (31, 21, 0x2F7);
8841 NYI_assert (15, 10, 0x11);
8842
8843 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8844 if (shift >= 0)
8845 aarch64_set_vec_s64 (cpu, rd, 0,
8846 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8847 else
8848 aarch64_set_vec_s64 (cpu, rd, 0,
8849 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8850 }
8851
8852 static void
8853 do_scalar_shift (sim_cpu *cpu)
8854 {
8855 /* instr [31,23] = 0101 1111 0
8856 instr [22,16] = shift amount
8857 instr [15,10] = 0101 01 [SHL]
8858 instr [15,10] = 0000 01 [SSHR]
8859 instr [9, 5] = Rn
8860 instr [4, 0] = Rd. */
8861
8862 unsigned rn = INSTR (9, 5);
8863 unsigned rd = INSTR (4, 0);
8864 unsigned amount;
8865
8866 NYI_assert (31, 23, 0x0BE);
8867
8868 if (INSTR (22, 22) == 0)
8869 HALT_UNALLOC;
8870
8871 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8872 switch (INSTR (15, 10))
8873 {
8874 case 0x01: /* SSHR */
8875 amount = 128 - INSTR (22, 16);
8876 aarch64_set_vec_s64 (cpu, rd, 0,
8877 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8878 return;
8879 case 0x15: /* SHL */
8880 amount = INSTR (22, 16) - 64;
8881 aarch64_set_vec_u64 (cpu, rd, 0,
8882 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8883 return;
8884 default:
8885 HALT_NYI;
8886 }
8887 }
8888
8889 /* FCMEQ FCMGT FCMGE. */
8890 static void
8891 do_scalar_FCM (sim_cpu *cpu)
8892 {
8893 /* instr [31,30] = 01
8894 instr [29] = U
8895 instr [28,24] = 1 1110
8896 instr [23] = E
8897 instr [22] = size
8898 instr [21] = 1
8899 instr [20,16] = Rm
8900 instr [15,12] = 1110
8901 instr [11] = AC
8902 instr [10] = 1
8903 instr [9, 5] = Rn
8904 instr [4, 0] = Rd. */
8905
8906 unsigned rm = INSTR (20, 16);
8907 unsigned rn = INSTR (9, 5);
8908 unsigned rd = INSTR (4, 0);
8909 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8910 unsigned result;
8911 float val1;
8912 float val2;
8913
8914 NYI_assert (31, 30, 1);
8915 NYI_assert (28, 24, 0x1E);
8916 NYI_assert (21, 21, 1);
8917 NYI_assert (15, 12, 0xE);
8918 NYI_assert (10, 10, 1);
8919
8920 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8921 if (INSTR (22, 22))
8922 {
8923 double val1 = aarch64_get_FP_double (cpu, rn);
8924 double val2 = aarch64_get_FP_double (cpu, rm);
8925
8926 switch (EUac)
8927 {
8928 case 0: /* 000 */
8929 result = val1 == val2;
8930 break;
8931
8932 case 3: /* 011 */
8933 val1 = fabs (val1);
8934 val2 = fabs (val2);
8935 /* Fall through. */
8936 case 2: /* 010 */
8937 result = val1 >= val2;
8938 break;
8939
8940 case 7: /* 111 */
8941 val1 = fabs (val1);
8942 val2 = fabs (val2);
8943 /* Fall through. */
8944 case 6: /* 110 */
8945 result = val1 > val2;
8946 break;
8947
8948 default:
8949 HALT_UNALLOC;
8950 }
8951
8952 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8953 return;
8954 }
8955
8956 val1 = aarch64_get_FP_float (cpu, rn);
8957 val2 = aarch64_get_FP_float (cpu, rm);
8958
8959 switch (EUac)
8960 {
8961 case 0: /* 000 */
8962 result = val1 == val2;
8963 break;
8964
8965 case 3: /* 011 */
8966 val1 = fabsf (val1);
8967 val2 = fabsf (val2);
8968 /* Fall through. */
8969 case 2: /* 010 */
8970 result = val1 >= val2;
8971 break;
8972
8973 case 7: /* 111 */
8974 val1 = fabsf (val1);
8975 val2 = fabsf (val2);
8976 /* Fall through. */
8977 case 6: /* 110 */
8978 result = val1 > val2;
8979 break;
8980
8981 default:
8982 HALT_UNALLOC;
8983 }
8984
8985 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8986 }
8987
8988 /* An alias of DUP. */
8989 static void
8990 do_scalar_MOV (sim_cpu *cpu)
8991 {
8992 /* instr [31,21] = 0101 1110 000
8993 instr [20,16] = imm5
8994 instr [15,10] = 0000 01
8995 instr [9, 5] = Rn
8996 instr [4, 0] = Rd. */
8997
8998 unsigned rn = INSTR (9, 5);
8999 unsigned rd = INSTR (4, 0);
9000 unsigned index;
9001
9002 NYI_assert (31, 21, 0x2F0);
9003 NYI_assert (15, 10, 0x01);
9004
9005 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9006 if (INSTR (16, 16))
9007 {
9008 /* 8-bit. */
9009 index = INSTR (20, 17);
9010 aarch64_set_vec_u8
9011 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9012 }
9013 else if (INSTR (17, 17))
9014 {
9015 /* 16-bit. */
9016 index = INSTR (20, 18);
9017 aarch64_set_vec_u16
9018 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9019 }
9020 else if (INSTR (18, 18))
9021 {
9022 /* 32-bit. */
9023 index = INSTR (20, 19);
9024 aarch64_set_vec_u32
9025 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9026 }
9027 else if (INSTR (19, 19))
9028 {
9029 /* 64-bit. */
9030 index = INSTR (20, 20);
9031 aarch64_set_vec_u64
9032 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9033 }
9034 else
9035 HALT_UNALLOC;
9036 }
9037
9038 static void
9039 do_scalar_NEG (sim_cpu *cpu)
9040 {
9041 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9042 instr [9, 5] = Rn
9043 instr [4, 0] = Rd. */
9044
9045 unsigned rn = INSTR (9, 5);
9046 unsigned rd = INSTR (4, 0);
9047
9048 NYI_assert (31, 10, 0x1FB82E);
9049
9050 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9051 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9052 }
9053
9054 static void
9055 do_scalar_USHL (sim_cpu *cpu)
9056 {
9057 /* instr [31,21] = 0111 1110 111
9058 instr [20,16] = Rm
9059 instr [15,10] = 0100 01
9060 instr [9, 5] = Rn
9061 instr [4, 0] = Rd. */
9062
9063 unsigned rm = INSTR (20, 16);
9064 unsigned rn = INSTR (9, 5);
9065 unsigned rd = INSTR (4, 0);
9066 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9067
9068 NYI_assert (31, 21, 0x3F7);
9069 NYI_assert (15, 10, 0x11);
9070
9071 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9072 if (shift >= 0)
9073 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9074 else
9075 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9076 }
9077
9078 static void
9079 do_double_add (sim_cpu *cpu)
9080 {
9081 /* instr [31,21] = 0101 1110 111
9082 instr [20,16] = Fn
9083 instr [15,10] = 1000 01
9084 instr [9,5] = Fm
9085 instr [4,0] = Fd. */
9086 unsigned Fd;
9087 unsigned Fm;
9088 unsigned Fn;
9089 double val1;
9090 double val2;
9091
9092 NYI_assert (31, 21, 0x2F7);
9093 NYI_assert (15, 10, 0x21);
9094
9095 Fd = INSTR (4, 0);
9096 Fm = INSTR (9, 5);
9097 Fn = INSTR (20, 16);
9098
9099 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9100 val1 = aarch64_get_FP_double (cpu, Fm);
9101 val2 = aarch64_get_FP_double (cpu, Fn);
9102
9103 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9104 }
9105
9106 static void
9107 do_scalar_UCVTF (sim_cpu *cpu)
9108 {
9109 /* instr [31,23] = 0111 1110 0
9110 instr [22] = single(0)/double(1)
9111 instr [21,10] = 10 0001 1101 10
9112 instr [9,5] = rn
9113 instr [4,0] = rd. */
9114
9115 unsigned rn = INSTR (9, 5);
9116 unsigned rd = INSTR (4, 0);
9117
9118 NYI_assert (31, 23, 0x0FC);
9119 NYI_assert (21, 10, 0x876);
9120
9121 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9122 if (INSTR (22, 22))
9123 {
9124 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9125
9126 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9127 }
9128 else
9129 {
9130 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9131
9132 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9133 }
9134 }
9135
9136 static void
9137 do_scalar_vec (sim_cpu *cpu)
9138 {
9139 /* instr [30] = 1. */
9140 /* instr [28,25] = 1111. */
9141 switch (INSTR (31, 23))
9142 {
9143 case 0xBC:
9144 switch (INSTR (15, 10))
9145 {
9146 case 0x01: do_scalar_MOV (cpu); return;
9147 case 0x39: do_scalar_FCM (cpu); return;
9148 case 0x3B: do_scalar_FCM (cpu); return;
9149 }
9150 break;
9151
9152 case 0xBE: do_scalar_shift (cpu); return;
9153
9154 case 0xFC:
9155 switch (INSTR (15, 10))
9156 {
9157 case 0x36:
9158 switch (INSTR (21, 16))
9159 {
9160 case 0x30: do_scalar_FADDP (cpu); return;
9161 case 0x21: do_scalar_UCVTF (cpu); return;
9162 }
9163 HALT_NYI;
9164 case 0x39: do_scalar_FCM (cpu); return;
9165 case 0x3B: do_scalar_FCM (cpu); return;
9166 }
9167 break;
9168
9169 case 0xFD:
9170 switch (INSTR (15, 10))
9171 {
9172 case 0x0D: do_scalar_CMGT (cpu); return;
9173 case 0x11: do_scalar_USHL (cpu); return;
9174 case 0x2E: do_scalar_NEG (cpu); return;
9175 case 0x35: do_scalar_FABD (cpu); return;
9176 case 0x39: do_scalar_FCM (cpu); return;
9177 case 0x3B: do_scalar_FCM (cpu); return;
9178 default:
9179 HALT_NYI;
9180 }
9181
9182 case 0xFE: do_scalar_USHR (cpu); return;
9183
9184 case 0xBD:
9185 switch (INSTR (15, 10))
9186 {
9187 case 0x21: do_double_add (cpu); return;
9188 case 0x11: do_scalar_SSHL (cpu); return;
9189 default:
9190 HALT_NYI;
9191 }
9192
9193 default:
9194 HALT_NYI;
9195 }
9196 }
9197
9198 static void
9199 dexAdvSIMD1 (sim_cpu *cpu)
9200 {
9201 /* instr [28,25] = 1 111. */
9202
9203 /* We are currently only interested in the basic
9204 scalar fp routines which all have bit 30 = 0. */
9205 if (INSTR (30, 30))
9206 do_scalar_vec (cpu);
9207
9208 /* instr[24] is set for FP data processing 3-source and clear for
9209 all other basic scalar fp instruction groups. */
9210 else if (INSTR (24, 24))
9211 dexSimpleFPDataProc3Source (cpu);
9212
9213 /* instr[21] is clear for floating <-> fixed conversions and set for
9214 all other basic scalar fp instruction groups. */
9215 else if (!INSTR (21, 21))
9216 dexSimpleFPFixedConvert (cpu);
9217
9218 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9219 11 ==> cond select, 00 ==> other. */
9220 else
9221 switch (INSTR (11, 10))
9222 {
9223 case 1: dexSimpleFPCondCompare (cpu); return;
9224 case 2: dexSimpleFPDataProc2Source (cpu); return;
9225 case 3: dexSimpleFPCondSelect (cpu); return;
9226
9227 default:
9228 /* Now an ordered cascade of tests.
9229 FP immediate has instr [12] == 1.
9230 FP compare has instr [13] == 1.
9231 FP Data Proc 1 Source has instr [14] == 1.
9232 FP floating <--> integer conversions has instr [15] == 0. */
9233 if (INSTR (12, 12))
9234 dexSimpleFPImmediate (cpu);
9235
9236 else if (INSTR (13, 13))
9237 dexSimpleFPCompare (cpu);
9238
9239 else if (INSTR (14, 14))
9240 dexSimpleFPDataProc1Source (cpu);
9241
9242 else if (!INSTR (15, 15))
9243 dexSimpleFPIntegerConvert (cpu);
9244
9245 else
9246 /* If we get here then instr[15] == 1 which means UNALLOC. */
9247 HALT_UNALLOC;
9248 }
9249 }
9250
9251 /* PC relative addressing. */
9252
9253 static void
9254 pcadr (sim_cpu *cpu)
9255 {
9256 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9257 instr[30,29] = immlo
9258 instr[23,5] = immhi. */
9259 uint64_t address;
9260 unsigned rd = INSTR (4, 0);
9261 uint32_t isPage = INSTR (31, 31);
9262 union { int64_t u64; uint64_t s64; } imm;
9263 uint64_t offset;
9264
9265 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9266 offset = imm.u64;
9267 offset = (offset << 2) | INSTR (30, 29);
9268
9269 address = aarch64_get_PC (cpu);
9270
9271 if (isPage)
9272 {
9273 offset <<= 12;
9274 address &= ~0xfff;
9275 }
9276
9277 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9278 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9279 }
9280
9281 /* Specific decode and execute for group Data Processing Immediate. */
9282
9283 static void
9284 dexPCRelAddressing (sim_cpu *cpu)
9285 {
9286 /* assert instr[28,24] = 10000. */
9287 pcadr (cpu);
9288 }
9289
9290 /* Immediate logical.
9291 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9292 16, 32 or 64 bit sequence pulled out at decode and possibly
9293 inverting it..
9294
9295 N.B. the output register (dest) can normally be Xn or SP
9296 the exception occurs for flag setting instructions which may
9297 only use Xn for the output (dest). The input register can
9298 never be SP. */
9299
9300 /* 32 bit and immediate. */
9301 static void
9302 and32 (sim_cpu *cpu, uint32_t bimm)
9303 {
9304 unsigned rn = INSTR (9, 5);
9305 unsigned rd = INSTR (4, 0);
9306
9307 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9308 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9309 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9310 }
9311
9312 /* 64 bit and immediate. */
9313 static void
9314 and64 (sim_cpu *cpu, uint64_t bimm)
9315 {
9316 unsigned rn = INSTR (9, 5);
9317 unsigned rd = INSTR (4, 0);
9318
9319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9320 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9321 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9322 }
9323
9324 /* 32 bit and immediate set flags. */
9325 static void
9326 ands32 (sim_cpu *cpu, uint32_t bimm)
9327 {
9328 unsigned rn = INSTR (9, 5);
9329 unsigned rd = INSTR (4, 0);
9330
9331 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9332 uint32_t value2 = bimm;
9333
9334 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9335 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9336 set_flags_for_binop32 (cpu, value1 & value2);
9337 }
9338
9339 /* 64 bit and immediate set flags. */
9340 static void
9341 ands64 (sim_cpu *cpu, uint64_t bimm)
9342 {
9343 unsigned rn = INSTR (9, 5);
9344 unsigned rd = INSTR (4, 0);
9345
9346 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9347 uint64_t value2 = bimm;
9348
9349 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9350 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9351 set_flags_for_binop64 (cpu, value1 & value2);
9352 }
9353
9354 /* 32 bit exclusive or immediate. */
9355 static void
9356 eor32 (sim_cpu *cpu, uint32_t bimm)
9357 {
9358 unsigned rn = INSTR (9, 5);
9359 unsigned rd = INSTR (4, 0);
9360
9361 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9362 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9363 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9364 }
9365
9366 /* 64 bit exclusive or immediate. */
9367 static void
9368 eor64 (sim_cpu *cpu, uint64_t bimm)
9369 {
9370 unsigned rn = INSTR (9, 5);
9371 unsigned rd = INSTR (4, 0);
9372
9373 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9374 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9375 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9376 }
9377
9378 /* 32 bit or immediate. */
9379 static void
9380 orr32 (sim_cpu *cpu, uint32_t bimm)
9381 {
9382 unsigned rn = INSTR (9, 5);
9383 unsigned rd = INSTR (4, 0);
9384
9385 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9386 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9387 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9388 }
9389
9390 /* 64 bit or immediate. */
9391 static void
9392 orr64 (sim_cpu *cpu, uint64_t bimm)
9393 {
9394 unsigned rn = INSTR (9, 5);
9395 unsigned rd = INSTR (4, 0);
9396
9397 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9398 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9399 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9400 }
9401
9402 /* Logical shifted register.
9403 These allow an optional LSL, ASR, LSR or ROR to the second source
9404 register with a count up to the register bit count.
9405 N.B register args may not be SP. */
9406
9407 /* 32 bit AND shifted register. */
9408 static void
9409 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9410 {
9411 unsigned rm = INSTR (20, 16);
9412 unsigned rn = INSTR (9, 5);
9413 unsigned rd = INSTR (4, 0);
9414
9415 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9416 aarch64_set_reg_u64
9417 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9418 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9419 }
9420
9421 /* 64 bit AND shifted register. */
9422 static void
9423 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9424 {
9425 unsigned rm = INSTR (20, 16);
9426 unsigned rn = INSTR (9, 5);
9427 unsigned rd = INSTR (4, 0);
9428
9429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9430 aarch64_set_reg_u64
9431 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9432 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9433 }
9434
9435 /* 32 bit AND shifted register setting flags. */
9436 static void
9437 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9438 {
9439 unsigned rm = INSTR (20, 16);
9440 unsigned rn = INSTR (9, 5);
9441 unsigned rd = INSTR (4, 0);
9442
9443 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9444 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9445 shift, count);
9446
9447 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9448 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9449 set_flags_for_binop32 (cpu, value1 & value2);
9450 }
9451
9452 /* 64 bit AND shifted register setting flags. */
9453 static void
9454 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9455 {
9456 unsigned rm = INSTR (20, 16);
9457 unsigned rn = INSTR (9, 5);
9458 unsigned rd = INSTR (4, 0);
9459
9460 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9461 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9462 shift, count);
9463
9464 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9465 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9466 set_flags_for_binop64 (cpu, value1 & value2);
9467 }
9468
9469 /* 32 bit BIC shifted register. */
9470 static void
9471 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9472 {
9473 unsigned rm = INSTR (20, 16);
9474 unsigned rn = INSTR (9, 5);
9475 unsigned rd = INSTR (4, 0);
9476
9477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9478 aarch64_set_reg_u64
9479 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9480 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9481 }
9482
9483 /* 64 bit BIC shifted register. */
9484 static void
9485 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9486 {
9487 unsigned rm = INSTR (20, 16);
9488 unsigned rn = INSTR (9, 5);
9489 unsigned rd = INSTR (4, 0);
9490
9491 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9492 aarch64_set_reg_u64
9493 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9494 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9495 }
9496
9497 /* 32 bit BIC shifted register setting flags. */
9498 static void
9499 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9500 {
9501 unsigned rm = INSTR (20, 16);
9502 unsigned rn = INSTR (9, 5);
9503 unsigned rd = INSTR (4, 0);
9504
9505 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9506 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9507 shift, count);
9508
9509 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9510 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9511 set_flags_for_binop32 (cpu, value1 & value2);
9512 }
9513
9514 /* 64 bit BIC shifted register setting flags. */
9515 static void
9516 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9517 {
9518 unsigned rm = INSTR (20, 16);
9519 unsigned rn = INSTR (9, 5);
9520 unsigned rd = INSTR (4, 0);
9521
9522 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9523 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9524 shift, count);
9525
9526 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9527 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9528 set_flags_for_binop64 (cpu, value1 & value2);
9529 }
9530
9531 /* 32 bit EON shifted register. */
9532 static void
9533 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9534 {
9535 unsigned rm = INSTR (20, 16);
9536 unsigned rn = INSTR (9, 5);
9537 unsigned rd = INSTR (4, 0);
9538
9539 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9540 aarch64_set_reg_u64
9541 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9542 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9543 }
9544
9545 /* 64 bit EON shifted register. */
9546 static void
9547 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9548 {
9549 unsigned rm = INSTR (20, 16);
9550 unsigned rn = INSTR (9, 5);
9551 unsigned rd = INSTR (4, 0);
9552
9553 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9554 aarch64_set_reg_u64
9555 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9556 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9557 }
9558
9559 /* 32 bit EOR shifted register. */
9560 static void
9561 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9562 {
9563 unsigned rm = INSTR (20, 16);
9564 unsigned rn = INSTR (9, 5);
9565 unsigned rd = INSTR (4, 0);
9566
9567 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9568 aarch64_set_reg_u64
9569 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9570 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9571 }
9572
9573 /* 64 bit EOR shifted register. */
9574 static void
9575 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9576 {
9577 unsigned rm = INSTR (20, 16);
9578 unsigned rn = INSTR (9, 5);
9579 unsigned rd = INSTR (4, 0);
9580
9581 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9582 aarch64_set_reg_u64
9583 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9584 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9585 }
9586
9587 /* 32 bit ORR shifted register. */
9588 static void
9589 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9590 {
9591 unsigned rm = INSTR (20, 16);
9592 unsigned rn = INSTR (9, 5);
9593 unsigned rd = INSTR (4, 0);
9594
9595 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9596 aarch64_set_reg_u64
9597 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9598 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9599 }
9600
9601 /* 64 bit ORR shifted register. */
9602 static void
9603 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9604 {
9605 unsigned rm = INSTR (20, 16);
9606 unsigned rn = INSTR (9, 5);
9607 unsigned rd = INSTR (4, 0);
9608
9609 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9610 aarch64_set_reg_u64
9611 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9612 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9613 }
9614
9615 /* 32 bit ORN shifted register. */
9616 static void
9617 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9618 {
9619 unsigned rm = INSTR (20, 16);
9620 unsigned rn = INSTR (9, 5);
9621 unsigned rd = INSTR (4, 0);
9622
9623 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9624 aarch64_set_reg_u64
9625 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9626 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9627 }
9628
9629 /* 64 bit ORN shifted register. */
9630 static void
9631 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9632 {
9633 unsigned rm = INSTR (20, 16);
9634 unsigned rn = INSTR (9, 5);
9635 unsigned rd = INSTR (4, 0);
9636
9637 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9638 aarch64_set_reg_u64
9639 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9640 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9641 }
9642
9643 static void
9644 dexLogicalImmediate (sim_cpu *cpu)
9645 {
9646 /* assert instr[28,23] = 1001000
9647 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9648 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9649 instr[22] = N : used to construct immediate mask
9650 instr[21,16] = immr
9651 instr[15,10] = imms
9652 instr[9,5] = Rn
9653 instr[4,0] = Rd */
9654
9655 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9656 uint32_t size = INSTR (31, 31);
9657 uint32_t N = INSTR (22, 22);
9658 /* uint32_t immr = INSTR (21, 16);. */
9659 /* uint32_t imms = INSTR (15, 10);. */
9660 uint32_t index = INSTR (22, 10);
9661 uint64_t bimm64 = LITable [index];
9662 uint32_t dispatch = INSTR (30, 29);
9663
9664 if (~size & N)
9665 HALT_UNALLOC;
9666
9667 if (!bimm64)
9668 HALT_UNALLOC;
9669
9670 if (size == 0)
9671 {
9672 uint32_t bimm = (uint32_t) bimm64;
9673
9674 switch (dispatch)
9675 {
9676 case 0: and32 (cpu, bimm); return;
9677 case 1: orr32 (cpu, bimm); return;
9678 case 2: eor32 (cpu, bimm); return;
9679 case 3: ands32 (cpu, bimm); return;
9680 }
9681 }
9682 else
9683 {
9684 switch (dispatch)
9685 {
9686 case 0: and64 (cpu, bimm64); return;
9687 case 1: orr64 (cpu, bimm64); return;
9688 case 2: eor64 (cpu, bimm64); return;
9689 case 3: ands64 (cpu, bimm64); return;
9690 }
9691 }
9692 HALT_UNALLOC;
9693 }
9694
9695 /* Immediate move.
9696 The uimm argument is a 16 bit value to be inserted into the
9697 target register the pos argument locates the 16 bit word in the
9698 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9699 3} for 64 bit.
9700 N.B register arg may not be SP so it should be.
9701 accessed using the setGZRegisterXXX accessors. */
9702
9703 /* 32 bit move 16 bit immediate zero remaining shorts. */
9704 static void
9705 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9706 {
9707 unsigned rd = INSTR (4, 0);
9708
9709 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9710 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9711 }
9712
9713 /* 64 bit move 16 bit immediate zero remaining shorts. */
9714 static void
9715 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9716 {
9717 unsigned rd = INSTR (4, 0);
9718
9719 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9720 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9721 }
9722
9723 /* 32 bit move 16 bit immediate negated. */
9724 static void
9725 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9726 {
9727 unsigned rd = INSTR (4, 0);
9728
9729 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9730 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9731 }
9732
9733 /* 64 bit move 16 bit immediate negated. */
9734 static void
9735 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9736 {
9737 unsigned rd = INSTR (4, 0);
9738
9739 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9740 aarch64_set_reg_u64
9741 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9742 ^ 0xffffffffffffffffULL));
9743 }
9744
9745 /* 32 bit move 16 bit immediate keep remaining shorts. */
9746 static void
9747 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9748 {
9749 unsigned rd = INSTR (4, 0);
9750 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9751 uint32_t value = val << (pos * 16);
9752 uint32_t mask = ~(0xffffU << (pos * 16));
9753
9754 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9755 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9756 }
9757
9758 /* 64 bit move 16 it immediate keep remaining shorts. */
9759 static void
9760 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9761 {
9762 unsigned rd = INSTR (4, 0);
9763 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9764 uint64_t value = (uint64_t) val << (pos * 16);
9765 uint64_t mask = ~(0xffffULL << (pos * 16));
9766
9767 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9768 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9769 }
9770
9771 static void
9772 dexMoveWideImmediate (sim_cpu *cpu)
9773 {
9774 /* assert instr[28:23] = 100101
9775 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9776 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9777 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9778 instr[20,5] = uimm16
9779 instr[4,0] = Rd */
9780
9781 /* N.B. the (multiple of 16) shift is applied by the called routine,
9782 we just pass the multiplier. */
9783
9784 uint32_t imm;
9785 uint32_t size = INSTR (31, 31);
9786 uint32_t op = INSTR (30, 29);
9787 uint32_t shift = INSTR (22, 21);
9788
9789 /* 32 bit can only shift 0 or 1 lot of 16.
9790 anything else is an unallocated instruction. */
9791 if (size == 0 && (shift > 1))
9792 HALT_UNALLOC;
9793
9794 if (op == 1)
9795 HALT_UNALLOC;
9796
9797 imm = INSTR (20, 5);
9798
9799 if (size == 0)
9800 {
9801 if (op == 0)
9802 movn32 (cpu, imm, shift);
9803 else if (op == 2)
9804 movz32 (cpu, imm, shift);
9805 else
9806 movk32 (cpu, imm, shift);
9807 }
9808 else
9809 {
9810 if (op == 0)
9811 movn64 (cpu, imm, shift);
9812 else if (op == 2)
9813 movz64 (cpu, imm, shift);
9814 else
9815 movk64 (cpu, imm, shift);
9816 }
9817 }
9818
9819 /* Bitfield operations.
9820 These take a pair of bit positions r and s which are in {0..31}
9821 or {0..63} depending on the instruction word size.
9822 N.B register args may not be SP. */
9823
9824 /* OK, we start with ubfm which just needs to pick
9825 some bits out of source zero the rest and write
9826 the result to dest. Just need two logical shifts. */
9827
9828 /* 32 bit bitfield move, left and right of affected zeroed
9829 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9830 static void
9831 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9832 {
9833 unsigned rd;
9834 unsigned rn = INSTR (9, 5);
9835 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9836
9837 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9838 if (r <= s)
9839 {
9840 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9841 We want only bits s:xxx:r at the bottom of the word
9842 so we LSL bit s up to bit 31 i.e. by 31 - s
9843 and then we LSR to bring bit 31 down to bit s - r
9844 i.e. by 31 + r - s. */
9845 value <<= 31 - s;
9846 value >>= 31 + r - s;
9847 }
9848 else
9849 {
9850 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9851 We want only bits s:xxx:0 starting at it 31-(r-1)
9852 so we LSL bit s up to bit 31 i.e. by 31 - s
9853 and then we LSL to bring bit 31 down to 31-(r-1)+s
9854 i.e. by r - (s + 1). */
9855 value <<= 31 - s;
9856 value >>= r - (s + 1);
9857 }
9858
9859 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9860 rd = INSTR (4, 0);
9861 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9862 }
9863
9864 /* 64 bit bitfield move, left and right of affected zeroed
9865 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9866 static void
9867 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9868 {
9869 unsigned rd;
9870 unsigned rn = INSTR (9, 5);
9871 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9872
9873 if (r <= s)
9874 {
9875 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9876 We want only bits s:xxx:r at the bottom of the word.
9877 So we LSL bit s up to bit 63 i.e. by 63 - s
9878 and then we LSR to bring bit 63 down to bit s - r
9879 i.e. by 63 + r - s. */
9880 value <<= 63 - s;
9881 value >>= 63 + r - s;
9882 }
9883 else
9884 {
9885 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9886 We want only bits s:xxx:0 starting at it 63-(r-1).
9887 So we LSL bit s up to bit 63 i.e. by 63 - s
9888 and then we LSL to bring bit 63 down to 63-(r-1)+s
9889 i.e. by r - (s + 1). */
9890 value <<= 63 - s;
9891 value >>= r - (s + 1);
9892 }
9893
9894 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9895 rd = INSTR (4, 0);
9896 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9897 }
9898
9899 /* The signed versions need to insert sign bits
9900 on the left of the inserted bit field. so we do
9901 much the same as the unsigned version except we
9902 use an arithmetic shift right -- this just means
9903 we need to operate on signed values. */
9904
9905 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9906 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9907 static void
9908 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9909 {
9910 unsigned rd;
9911 unsigned rn = INSTR (9, 5);
9912 /* as per ubfm32 but use an ASR instead of an LSR. */
9913 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9914
9915 if (r <= s)
9916 {
9917 value <<= 31 - s;
9918 value >>= 31 + r - s;
9919 }
9920 else
9921 {
9922 value <<= 31 - s;
9923 value >>= r - (s + 1);
9924 }
9925
9926 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9927 rd = INSTR (4, 0);
9928 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9929 }
9930
9931 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9932 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9933 static void
9934 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9935 {
9936 unsigned rd;
9937 unsigned rn = INSTR (9, 5);
9938 /* acpu per ubfm but use an ASR instead of an LSR. */
9939 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9940
9941 if (r <= s)
9942 {
9943 value <<= 63 - s;
9944 value >>= 63 + r - s;
9945 }
9946 else
9947 {
9948 value <<= 63 - s;
9949 value >>= r - (s + 1);
9950 }
9951
9952 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9953 rd = INSTR (4, 0);
9954 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9955 }
9956
9957 /* Finally, these versions leave non-affected bits
9958 as is. so we need to generate the bits as per
9959 ubfm and also generate a mask to pick the
9960 bits from the original and computed values. */
9961
9962 /* 32 bit bitfield move, non-affected bits left as is.
9963 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9964 static void
9965 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9966 {
9967 unsigned rn = INSTR (9, 5);
9968 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9969 uint32_t mask = -1;
9970 unsigned rd;
9971 uint32_t value2;
9972
9973 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9974 if (r <= s)
9975 {
9976 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9977 We want only bits s:xxx:r at the bottom of the word
9978 so we LSL bit s up to bit 31 i.e. by 31 - s
9979 and then we LSR to bring bit 31 down to bit s - r
9980 i.e. by 31 + r - s. */
9981 value <<= 31 - s;
9982 value >>= 31 + r - s;
9983 /* the mask must include the same bits. */
9984 mask <<= 31 - s;
9985 mask >>= 31 + r - s;
9986 }
9987 else
9988 {
9989 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9990 We want only bits s:xxx:0 starting at it 31-(r-1)
9991 so we LSL bit s up to bit 31 i.e. by 31 - s
9992 and then we LSL to bring bit 31 down to 31-(r-1)+s
9993 i.e. by r - (s + 1). */
9994 value <<= 31 - s;
9995 value >>= r - (s + 1);
9996 /* The mask must include the same bits. */
9997 mask <<= 31 - s;
9998 mask >>= r - (s + 1);
9999 }
10000
10001 rd = INSTR (4, 0);
10002 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10003
10004 value2 &= ~mask;
10005 value2 |= value;
10006
10007 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10008 aarch64_set_reg_u64
10009 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10010 }
10011
10012 /* 64 bit bitfield move, non-affected bits left as is.
10013 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10014 static void
10015 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10016 {
10017 unsigned rd;
10018 unsigned rn = INSTR (9, 5);
10019 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10020 uint64_t mask = 0xffffffffffffffffULL;
10021
10022 if (r <= s)
10023 {
10024 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10025 We want only bits s:xxx:r at the bottom of the word
10026 so we LSL bit s up to bit 63 i.e. by 63 - s
10027 and then we LSR to bring bit 63 down to bit s - r
10028 i.e. by 63 + r - s. */
10029 value <<= 63 - s;
10030 value >>= 63 + r - s;
10031 /* The mask must include the same bits. */
10032 mask <<= 63 - s;
10033 mask >>= 63 + r - s;
10034 }
10035 else
10036 {
10037 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10038 We want only bits s:xxx:0 starting at it 63-(r-1)
10039 so we LSL bit s up to bit 63 i.e. by 63 - s
10040 and then we LSL to bring bit 63 down to 63-(r-1)+s
10041 i.e. by r - (s + 1). */
10042 value <<= 63 - s;
10043 value >>= r - (s + 1);
10044 /* The mask must include the same bits. */
10045 mask <<= 63 - s;
10046 mask >>= r - (s + 1);
10047 }
10048
10049 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10050 rd = INSTR (4, 0);
10051 aarch64_set_reg_u64
10052 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10053 }
10054
10055 static void
10056 dexBitfieldImmediate (sim_cpu *cpu)
10057 {
10058 /* assert instr[28:23] = 100110
10059 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10060 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10061 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10062 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10063 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10064 instr[9,5] = Rn
10065 instr[4,0] = Rd */
10066
10067 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10068 uint32_t dispatch;
10069 uint32_t imms;
10070 uint32_t size = INSTR (31, 31);
10071 uint32_t N = INSTR (22, 22);
10072 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10073 /* or else we have an UNALLOC. */
10074 uint32_t immr = INSTR (21, 16);
10075
10076 if (~size & N)
10077 HALT_UNALLOC;
10078
10079 if (!size && uimm (immr, 5, 5))
10080 HALT_UNALLOC;
10081
10082 imms = INSTR (15, 10);
10083 if (!size && uimm (imms, 5, 5))
10084 HALT_UNALLOC;
10085
10086 /* Switch on combined size and op. */
10087 dispatch = INSTR (31, 29);
10088 switch (dispatch)
10089 {
10090 case 0: sbfm32 (cpu, immr, imms); return;
10091 case 1: bfm32 (cpu, immr, imms); return;
10092 case 2: ubfm32 (cpu, immr, imms); return;
10093 case 4: sbfm (cpu, immr, imms); return;
10094 case 5: bfm (cpu, immr, imms); return;
10095 case 6: ubfm (cpu, immr, imms); return;
10096 default: HALT_UNALLOC;
10097 }
10098 }
10099
10100 static void
10101 do_EXTR_32 (sim_cpu *cpu)
10102 {
10103 /* instr[31:21] = 00010011100
10104 instr[20,16] = Rm
10105 instr[15,10] = imms : 0xxxxx for 32 bit
10106 instr[9,5] = Rn
10107 instr[4,0] = Rd */
10108 unsigned rm = INSTR (20, 16);
10109 unsigned imms = INSTR (15, 10) & 31;
10110 unsigned rn = INSTR ( 9, 5);
10111 unsigned rd = INSTR ( 4, 0);
10112 uint64_t val1;
10113 uint64_t val2;
10114
10115 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10116 val1 >>= imms;
10117 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10118 val2 <<= (32 - imms);
10119
10120 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10121 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10122 }
10123
10124 static void
10125 do_EXTR_64 (sim_cpu *cpu)
10126 {
10127 /* instr[31:21] = 10010011100
10128 instr[20,16] = Rm
10129 instr[15,10] = imms
10130 instr[9,5] = Rn
10131 instr[4,0] = Rd */
10132 unsigned rm = INSTR (20, 16);
10133 unsigned imms = INSTR (15, 10) & 63;
10134 unsigned rn = INSTR ( 9, 5);
10135 unsigned rd = INSTR ( 4, 0);
10136 uint64_t val;
10137
10138 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10139 val >>= imms;
10140 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10141
10142 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10143 }
10144
10145 static void
10146 dexExtractImmediate (sim_cpu *cpu)
10147 {
10148 /* assert instr[28:23] = 100111
10149 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10150 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10151 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10152 instr[21] = op0 : must be 0 or UNALLOC
10153 instr[20,16] = Rm
10154 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10155 instr[9,5] = Rn
10156 instr[4,0] = Rd */
10157
10158 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10159 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10160 uint32_t dispatch;
10161 uint32_t size = INSTR (31, 31);
10162 uint32_t N = INSTR (22, 22);
10163 /* 32 bit operations must have imms[5] = 0
10164 or else we have an UNALLOC. */
10165 uint32_t imms = INSTR (15, 10);
10166
10167 if (size ^ N)
10168 HALT_UNALLOC;
10169
10170 if (!size && uimm (imms, 5, 5))
10171 HALT_UNALLOC;
10172
10173 /* Switch on combined size and op. */
10174 dispatch = INSTR (31, 29);
10175
10176 if (dispatch == 0)
10177 do_EXTR_32 (cpu);
10178
10179 else if (dispatch == 4)
10180 do_EXTR_64 (cpu);
10181
10182 else if (dispatch == 1)
10183 HALT_NYI;
10184 else
10185 HALT_UNALLOC;
10186 }
10187
10188 static void
10189 dexDPImm (sim_cpu *cpu)
10190 {
10191 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10192 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10193 bits [25,23] of a DPImm are the secondary dispatch vector. */
10194 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10195
10196 switch (group2)
10197 {
10198 case DPIMM_PCADR_000:
10199 case DPIMM_PCADR_001:
10200 dexPCRelAddressing (cpu);
10201 return;
10202
10203 case DPIMM_ADDSUB_010:
10204 case DPIMM_ADDSUB_011:
10205 dexAddSubtractImmediate (cpu);
10206 return;
10207
10208 case DPIMM_LOG_100:
10209 dexLogicalImmediate (cpu);
10210 return;
10211
10212 case DPIMM_MOV_101:
10213 dexMoveWideImmediate (cpu);
10214 return;
10215
10216 case DPIMM_BITF_110:
10217 dexBitfieldImmediate (cpu);
10218 return;
10219
10220 case DPIMM_EXTR_111:
10221 dexExtractImmediate (cpu);
10222 return;
10223
10224 default:
10225 /* Should never reach here. */
10226 HALT_NYI;
10227 }
10228 }
10229
10230 static void
10231 dexLoadUnscaledImmediate (sim_cpu *cpu)
10232 {
10233 /* instr[29,24] == 111_00
10234 instr[21] == 0
10235 instr[11,10] == 00
10236 instr[31,30] = size
10237 instr[26] = V
10238 instr[23,22] = opc
10239 instr[20,12] = simm9
10240 instr[9,5] = rn may be SP. */
10241 /* unsigned rt = INSTR (4, 0); */
10242 uint32_t V = INSTR (26, 26);
10243 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10244 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10245
10246 if (!V)
10247 {
10248 /* GReg operations. */
10249 switch (dispatch)
10250 {
10251 case 0: sturb (cpu, imm); return;
10252 case 1: ldurb32 (cpu, imm); return;
10253 case 2: ldursb64 (cpu, imm); return;
10254 case 3: ldursb32 (cpu, imm); return;
10255 case 4: sturh (cpu, imm); return;
10256 case 5: ldurh32 (cpu, imm); return;
10257 case 6: ldursh64 (cpu, imm); return;
10258 case 7: ldursh32 (cpu, imm); return;
10259 case 8: stur32 (cpu, imm); return;
10260 case 9: ldur32 (cpu, imm); return;
10261 case 10: ldursw (cpu, imm); return;
10262 case 12: stur64 (cpu, imm); return;
10263 case 13: ldur64 (cpu, imm); return;
10264
10265 case 14:
10266 /* PRFUM NYI. */
10267 HALT_NYI;
10268
10269 default:
10270 case 11:
10271 case 15:
10272 HALT_UNALLOC;
10273 }
10274 }
10275
10276 /* FReg operations. */
10277 switch (dispatch)
10278 {
10279 case 2: fsturq (cpu, imm); return;
10280 case 3: fldurq (cpu, imm); return;
10281 case 8: fsturs (cpu, imm); return;
10282 case 9: fldurs (cpu, imm); return;
10283 case 12: fsturd (cpu, imm); return;
10284 case 13: fldurd (cpu, imm); return;
10285
10286 case 0: /* STUR 8 bit FP. */
10287 case 1: /* LDUR 8 bit FP. */
10288 case 4: /* STUR 16 bit FP. */
10289 case 5: /* LDUR 8 bit FP. */
10290 HALT_NYI;
10291
10292 default:
10293 case 6:
10294 case 7:
10295 case 10:
10296 case 11:
10297 case 14:
10298 case 15:
10299 HALT_UNALLOC;
10300 }
10301 }
10302
10303 /* N.B. A preliminary note regarding all the ldrs<x>32
10304 instructions
10305
10306 The signed value loaded by these instructions is cast to unsigned
10307 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10308 64 bit element of the GReg union. this performs a 32 bit sign extension
10309 (as required) but avoids 64 bit sign extension, thus ensuring that the
10310 top half of the register word is zero. this is what the spec demands
10311 when a 32 bit load occurs. */
10312
10313 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10314 static void
10315 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10316 {
10317 unsigned int rn = INSTR (9, 5);
10318 unsigned int rt = INSTR (4, 0);
10319
10320 /* The target register may not be SP but the source may be
10321 there is no scaling required for a byte load. */
10322 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10323 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10324 (int64_t) aarch64_get_mem_s8 (cpu, address));
10325 }
10326
10327 /* 32 bit load sign-extended byte scaled or unscaled zero-
10328 or sign-extended 32-bit register offset. */
10329 static void
10330 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10331 {
10332 unsigned int rm = INSTR (20, 16);
10333 unsigned int rn = INSTR (9, 5);
10334 unsigned int rt = INSTR (4, 0);
10335
10336 /* rn may reference SP, rm and rt must reference ZR. */
10337
10338 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10339 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10340 extension);
10341
10342 /* There is no scaling required for a byte load. */
10343 aarch64_set_reg_u64
10344 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10345 + displacement));
10346 }
10347
10348 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10349 pre- or post-writeback. */
10350 static void
10351 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10352 {
10353 uint64_t address;
10354 unsigned int rn = INSTR (9, 5);
10355 unsigned int rt = INSTR (4, 0);
10356
10357 if (rn == rt && wb != NoWriteBack)
10358 HALT_UNALLOC;
10359
10360 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10361
10362 if (wb == Pre)
10363 address += offset;
10364
10365 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10366 (int64_t) aarch64_get_mem_s8 (cpu, address));
10367
10368 if (wb == Post)
10369 address += offset;
10370
10371 if (wb != NoWriteBack)
10372 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10373 }
10374
10375 /* 8 bit store scaled. */
10376 static void
10377 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10378 {
10379 unsigned st = INSTR (4, 0);
10380 unsigned rn = INSTR (9, 5);
10381
10382 aarch64_set_mem_u8 (cpu,
10383 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10384 aarch64_get_vec_u8 (cpu, st, 0));
10385 }
10386
10387 /* 8 bit store scaled or unscaled zero- or
10388 sign-extended 8-bit register offset. */
10389 static void
10390 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10391 {
10392 unsigned rm = INSTR (20, 16);
10393 unsigned rn = INSTR (9, 5);
10394 unsigned st = INSTR (4, 0);
10395
10396 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10397 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10398 extension);
10399 uint64_t displacement = scaling == Scaled ? extended : 0;
10400
10401 aarch64_set_mem_u8
10402 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10403 }
10404
10405 /* 16 bit store scaled. */
10406 static void
10407 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10408 {
10409 unsigned st = INSTR (4, 0);
10410 unsigned rn = INSTR (9, 5);
10411
10412 aarch64_set_mem_u16
10413 (cpu,
10414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10415 aarch64_get_vec_u16 (cpu, st, 0));
10416 }
10417
10418 /* 16 bit store scaled or unscaled zero-
10419 or sign-extended 16-bit register offset. */
10420 static void
10421 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10422 {
10423 unsigned rm = INSTR (20, 16);
10424 unsigned rn = INSTR (9, 5);
10425 unsigned st = INSTR (4, 0);
10426
10427 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10428 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10429 extension);
10430 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10431
10432 aarch64_set_mem_u16
10433 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10434 }
10435
10436 /* 32 bit store scaled unsigned 12 bit. */
10437 static void
10438 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10439 {
10440 unsigned st = INSTR (4, 0);
10441 unsigned rn = INSTR (9, 5);
10442
10443 aarch64_set_mem_u32
10444 (cpu,
10445 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10446 aarch64_get_vec_u32 (cpu, st, 0));
10447 }
10448
10449 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10450 static void
10451 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10452 {
10453 unsigned rn = INSTR (9, 5);
10454 unsigned st = INSTR (4, 0);
10455
10456 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10457
10458 if (wb != Post)
10459 address += offset;
10460
10461 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10462
10463 if (wb == Post)
10464 address += offset;
10465
10466 if (wb != NoWriteBack)
10467 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10468 }
10469
10470 /* 32 bit store scaled or unscaled zero-
10471 or sign-extended 32-bit register offset. */
10472 static void
10473 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10474 {
10475 unsigned rm = INSTR (20, 16);
10476 unsigned rn = INSTR (9, 5);
10477 unsigned st = INSTR (4, 0);
10478
10479 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10480 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10481 extension);
10482 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10483
10484 aarch64_set_mem_u32
10485 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10486 }
10487
10488 /* 64 bit store scaled unsigned 12 bit. */
10489 static void
10490 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10491 {
10492 unsigned st = INSTR (4, 0);
10493 unsigned rn = INSTR (9, 5);
10494
10495 aarch64_set_mem_u64
10496 (cpu,
10497 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10498 aarch64_get_vec_u64 (cpu, st, 0));
10499 }
10500
10501 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10502 static void
10503 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10504 {
10505 unsigned rn = INSTR (9, 5);
10506 unsigned st = INSTR (4, 0);
10507
10508 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10509
10510 if (wb != Post)
10511 address += offset;
10512
10513 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10514
10515 if (wb == Post)
10516 address += offset;
10517
10518 if (wb != NoWriteBack)
10519 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10520 }
10521
10522 /* 64 bit store scaled or unscaled zero-
10523 or sign-extended 32-bit register offset. */
10524 static void
10525 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10526 {
10527 unsigned rm = INSTR (20, 16);
10528 unsigned rn = INSTR (9, 5);
10529 unsigned st = INSTR (4, 0);
10530
10531 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10532 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10533 extension);
10534 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10535
10536 aarch64_set_mem_u64
10537 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10538 }
10539
10540 /* 128 bit store scaled unsigned 12 bit. */
10541 static void
10542 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10543 {
10544 FRegister a;
10545 unsigned st = INSTR (4, 0);
10546 unsigned rn = INSTR (9, 5);
10547 uint64_t addr;
10548
10549 aarch64_get_FP_long_double (cpu, st, & a);
10550
10551 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10552 aarch64_set_mem_long_double (cpu, addr, a);
10553 }
10554
10555 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10556 static void
10557 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10558 {
10559 FRegister a;
10560 unsigned rn = INSTR (9, 5);
10561 unsigned st = INSTR (4, 0);
10562 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10563
10564 if (wb != Post)
10565 address += offset;
10566
10567 aarch64_get_FP_long_double (cpu, st, & a);
10568 aarch64_set_mem_long_double (cpu, address, a);
10569
10570 if (wb == Post)
10571 address += offset;
10572
10573 if (wb != NoWriteBack)
10574 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10575 }
10576
10577 /* 128 bit store scaled or unscaled zero-
10578 or sign-extended 32-bit register offset. */
10579 static void
10580 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10581 {
10582 unsigned rm = INSTR (20, 16);
10583 unsigned rn = INSTR (9, 5);
10584 unsigned st = INSTR (4, 0);
10585
10586 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10587 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10588 extension);
10589 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10590
10591 FRegister a;
10592
10593 aarch64_get_FP_long_double (cpu, st, & a);
10594 aarch64_set_mem_long_double (cpu, address + displacement, a);
10595 }
10596
10597 static void
10598 dexLoadImmediatePrePost (sim_cpu *cpu)
10599 {
10600 /* instr[31,30] = size
10601 instr[29,27] = 111
10602 instr[26] = V
10603 instr[25,24] = 00
10604 instr[23,22] = opc
10605 instr[21] = 0
10606 instr[20,12] = simm9
10607 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10608 instr[10] = 0
10609 instr[9,5] = Rn may be SP.
10610 instr[4,0] = Rt */
10611
10612 uint32_t V = INSTR (26, 26);
10613 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10614 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10615 WriteBack wb = INSTR (11, 11);
10616
10617 if (!V)
10618 {
10619 /* GReg operations. */
10620 switch (dispatch)
10621 {
10622 case 0: strb_wb (cpu, imm, wb); return;
10623 case 1: ldrb32_wb (cpu, imm, wb); return;
10624 case 2: ldrsb_wb (cpu, imm, wb); return;
10625 case 3: ldrsb32_wb (cpu, imm, wb); return;
10626 case 4: strh_wb (cpu, imm, wb); return;
10627 case 5: ldrh32_wb (cpu, imm, wb); return;
10628 case 6: ldrsh64_wb (cpu, imm, wb); return;
10629 case 7: ldrsh32_wb (cpu, imm, wb); return;
10630 case 8: str32_wb (cpu, imm, wb); return;
10631 case 9: ldr32_wb (cpu, imm, wb); return;
10632 case 10: ldrsw_wb (cpu, imm, wb); return;
10633 case 12: str_wb (cpu, imm, wb); return;
10634 case 13: ldr_wb (cpu, imm, wb); return;
10635
10636 default:
10637 case 11:
10638 case 14:
10639 case 15:
10640 HALT_UNALLOC;
10641 }
10642 }
10643
10644 /* FReg operations. */
10645 switch (dispatch)
10646 {
10647 case 2: fstrq_wb (cpu, imm, wb); return;
10648 case 3: fldrq_wb (cpu, imm, wb); return;
10649 case 8: fstrs_wb (cpu, imm, wb); return;
10650 case 9: fldrs_wb (cpu, imm, wb); return;
10651 case 12: fstrd_wb (cpu, imm, wb); return;
10652 case 13: fldrd_wb (cpu, imm, wb); return;
10653
10654 case 0: /* STUR 8 bit FP. */
10655 case 1: /* LDUR 8 bit FP. */
10656 case 4: /* STUR 16 bit FP. */
10657 case 5: /* LDUR 8 bit FP. */
10658 HALT_NYI;
10659
10660 default:
10661 case 6:
10662 case 7:
10663 case 10:
10664 case 11:
10665 case 14:
10666 case 15:
10667 HALT_UNALLOC;
10668 }
10669 }
10670
10671 static void
10672 dexLoadRegisterOffset (sim_cpu *cpu)
10673 {
10674 /* instr[31,30] = size
10675 instr[29,27] = 111
10676 instr[26] = V
10677 instr[25,24] = 00
10678 instr[23,22] = opc
10679 instr[21] = 1
10680 instr[20,16] = rm
10681 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10682 110 ==> SXTW, 111 ==> SXTX,
10683 ow ==> RESERVED
10684 instr[12] = scaled
10685 instr[11,10] = 10
10686 instr[9,5] = rn
10687 instr[4,0] = rt. */
10688
10689 uint32_t V = INSTR (26, 26);
10690 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10691 Scaling scale = INSTR (12, 12);
10692 Extension extensionType = INSTR (15, 13);
10693
10694 /* Check for illegal extension types. */
10695 if (uimm (extensionType, 1, 1) == 0)
10696 HALT_UNALLOC;
10697
10698 if (extensionType == UXTX || extensionType == SXTX)
10699 extensionType = NoExtension;
10700
10701 if (!V)
10702 {
10703 /* GReg operations. */
10704 switch (dispatch)
10705 {
10706 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10707 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10708 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10709 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10710 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10711 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10712 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10713 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10714 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10715 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10716 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10717 case 12: str_scale_ext (cpu, scale, extensionType); return;
10718 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10719 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10720
10721 default:
10722 case 11:
10723 case 15:
10724 HALT_UNALLOC;
10725 }
10726 }
10727
10728 /* FReg operations. */
10729 switch (dispatch)
10730 {
10731 case 1: /* LDUR 8 bit FP. */
10732 HALT_NYI;
10733 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10734 case 5: /* LDUR 8 bit FP. */
10735 HALT_NYI;
10736 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10737 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10738
10739 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10740 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10741 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10742 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10743 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10744
10745 default:
10746 case 6:
10747 case 7:
10748 case 10:
10749 case 11:
10750 case 14:
10751 case 15:
10752 HALT_UNALLOC;
10753 }
10754 }
10755
10756 static void
10757 dexLoadUnsignedImmediate (sim_cpu *cpu)
10758 {
10759 /* instr[29,24] == 111_01
10760 instr[31,30] = size
10761 instr[26] = V
10762 instr[23,22] = opc
10763 instr[21,10] = uimm12 : unsigned immediate offset
10764 instr[9,5] = rn may be SP.
10765 instr[4,0] = rt. */
10766
10767 uint32_t V = INSTR (26,26);
10768 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10769 uint32_t imm = INSTR (21, 10);
10770
10771 if (!V)
10772 {
10773 /* GReg operations. */
10774 switch (dispatch)
10775 {
10776 case 0: strb_abs (cpu, imm); return;
10777 case 1: ldrb32_abs (cpu, imm); return;
10778 case 2: ldrsb_abs (cpu, imm); return;
10779 case 3: ldrsb32_abs (cpu, imm); return;
10780 case 4: strh_abs (cpu, imm); return;
10781 case 5: ldrh32_abs (cpu, imm); return;
10782 case 6: ldrsh_abs (cpu, imm); return;
10783 case 7: ldrsh32_abs (cpu, imm); return;
10784 case 8: str32_abs (cpu, imm); return;
10785 case 9: ldr32_abs (cpu, imm); return;
10786 case 10: ldrsw_abs (cpu, imm); return;
10787 case 12: str_abs (cpu, imm); return;
10788 case 13: ldr_abs (cpu, imm); return;
10789 case 14: prfm_abs (cpu, imm); return;
10790
10791 default:
10792 case 11:
10793 case 15:
10794 HALT_UNALLOC;
10795 }
10796 }
10797
10798 /* FReg operations. */
10799 switch (dispatch)
10800 {
10801 case 0: fstrb_abs (cpu, imm); return;
10802 case 4: fstrh_abs (cpu, imm); return;
10803 case 8: fstrs_abs (cpu, imm); return;
10804 case 12: fstrd_abs (cpu, imm); return;
10805 case 2: fstrq_abs (cpu, imm); return;
10806
10807 case 1: fldrb_abs (cpu, imm); return;
10808 case 5: fldrh_abs (cpu, imm); return;
10809 case 9: fldrs_abs (cpu, imm); return;
10810 case 13: fldrd_abs (cpu, imm); return;
10811 case 3: fldrq_abs (cpu, imm); return;
10812
10813 default:
10814 case 6:
10815 case 7:
10816 case 10:
10817 case 11:
10818 case 14:
10819 case 15:
10820 HALT_UNALLOC;
10821 }
10822 }
10823
10824 static void
10825 dexLoadExclusive (sim_cpu *cpu)
10826 {
10827 /* assert instr[29:24] = 001000;
10828 instr[31,30] = size
10829 instr[23] = 0 if exclusive
10830 instr[22] = L : 1 if load, 0 if store
10831 instr[21] = 1 if pair
10832 instr[20,16] = Rs
10833 instr[15] = o0 : 1 if ordered
10834 instr[14,10] = Rt2
10835 instr[9,5] = Rn
10836 instr[4.0] = Rt. */
10837
10838 switch (INSTR (22, 21))
10839 {
10840 case 2: ldxr (cpu); return;
10841 case 0: stxr (cpu); return;
10842 default: HALT_NYI;
10843 }
10844 }
10845
10846 static void
10847 dexLoadOther (sim_cpu *cpu)
10848 {
10849 uint32_t dispatch;
10850
10851 /* instr[29,25] = 111_0
10852 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10853 instr[21:11,10] is the secondary dispatch. */
10854 if (INSTR (24, 24))
10855 {
10856 dexLoadUnsignedImmediate (cpu);
10857 return;
10858 }
10859
10860 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10861 switch (dispatch)
10862 {
10863 case 0: dexLoadUnscaledImmediate (cpu); return;
10864 case 1: dexLoadImmediatePrePost (cpu); return;
10865 case 3: dexLoadImmediatePrePost (cpu); return;
10866 case 6: dexLoadRegisterOffset (cpu); return;
10867
10868 default:
10869 case 2:
10870 case 4:
10871 case 5:
10872 case 7:
10873 HALT_NYI;
10874 }
10875 }
10876
10877 static void
10878 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10879 {
10880 unsigned rn = INSTR (14, 10);
10881 unsigned rd = INSTR (9, 5);
10882 unsigned rm = INSTR (4, 0);
10883 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10884
10885 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10886 HALT_UNALLOC; /* ??? */
10887
10888 offset <<= 2;
10889
10890 if (wb != Post)
10891 address += offset;
10892
10893 aarch64_set_mem_u32 (cpu, address,
10894 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10895 aarch64_set_mem_u32 (cpu, address + 4,
10896 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10897
10898 if (wb == Post)
10899 address += offset;
10900
10901 if (wb != NoWriteBack)
10902 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10903 }
10904
10905 static void
10906 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10907 {
10908 unsigned rn = INSTR (14, 10);
10909 unsigned rd = INSTR (9, 5);
10910 unsigned rm = INSTR (4, 0);
10911 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10912
10913 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10914 HALT_UNALLOC; /* ??? */
10915
10916 offset <<= 3;
10917
10918 if (wb != Post)
10919 address += offset;
10920
10921 aarch64_set_mem_u64 (cpu, address,
10922 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10923 aarch64_set_mem_u64 (cpu, address + 8,
10924 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10925
10926 if (wb == Post)
10927 address += offset;
10928
10929 if (wb != NoWriteBack)
10930 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10931 }
10932
10933 static void
10934 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10935 {
10936 unsigned rn = INSTR (14, 10);
10937 unsigned rd = INSTR (9, 5);
10938 unsigned rm = INSTR (4, 0);
10939 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10940
10941 /* Treat this as unalloc to make sure we don't do it. */
10942 if (rn == rm)
10943 HALT_UNALLOC;
10944
10945 offset <<= 2;
10946
10947 if (wb != Post)
10948 address += offset;
10949
10950 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10951 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10952
10953 if (wb == Post)
10954 address += offset;
10955
10956 if (wb != NoWriteBack)
10957 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10958 }
10959
10960 static void
10961 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10962 {
10963 unsigned rn = INSTR (14, 10);
10964 unsigned rd = INSTR (9, 5);
10965 unsigned rm = INSTR (4, 0);
10966 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10967
10968 /* Treat this as unalloc to make sure we don't do it. */
10969 if (rn == rm)
10970 HALT_UNALLOC;
10971
10972 offset <<= 2;
10973
10974 if (wb != Post)
10975 address += offset;
10976
10977 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10978 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10979
10980 if (wb == Post)
10981 address += offset;
10982
10983 if (wb != NoWriteBack)
10984 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10985 }
10986
10987 static void
10988 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10989 {
10990 unsigned rn = INSTR (14, 10);
10991 unsigned rd = INSTR (9, 5);
10992 unsigned rm = INSTR (4, 0);
10993 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10994
10995 /* Treat this as unalloc to make sure we don't do it. */
10996 if (rn == rm)
10997 HALT_UNALLOC;
10998
10999 offset <<= 3;
11000
11001 if (wb != Post)
11002 address += offset;
11003
11004 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11005 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11006
11007 if (wb == Post)
11008 address += offset;
11009
11010 if (wb != NoWriteBack)
11011 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11012 }
11013
11014 static void
11015 dex_load_store_pair_gr (sim_cpu *cpu)
11016 {
11017 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11018 instr[29,25] = instruction encoding: 101_0
11019 instr[26] = V : 1 if fp 0 if gp
11020 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11021 instr[22] = load/store (1=> load)
11022 instr[21,15] = signed, scaled, offset
11023 instr[14,10] = Rn
11024 instr[ 9, 5] = Rd
11025 instr[ 4, 0] = Rm. */
11026
11027 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11028 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11029
11030 switch (dispatch)
11031 {
11032 case 2: store_pair_u32 (cpu, offset, Post); return;
11033 case 3: load_pair_u32 (cpu, offset, Post); return;
11034 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11035 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11036 case 6: store_pair_u32 (cpu, offset, Pre); return;
11037 case 7: load_pair_u32 (cpu, offset, Pre); return;
11038
11039 case 11: load_pair_s32 (cpu, offset, Post); return;
11040 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11041 case 15: load_pair_s32 (cpu, offset, Pre); return;
11042
11043 case 18: store_pair_u64 (cpu, offset, Post); return;
11044 case 19: load_pair_u64 (cpu, offset, Post); return;
11045 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11046 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11047 case 22: store_pair_u64 (cpu, offset, Pre); return;
11048 case 23: load_pair_u64 (cpu, offset, Pre); return;
11049
11050 default:
11051 HALT_UNALLOC;
11052 }
11053 }
11054
11055 static void
11056 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11057 {
11058 unsigned rn = INSTR (14, 10);
11059 unsigned rd = INSTR (9, 5);
11060 unsigned rm = INSTR (4, 0);
11061 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11062
11063 offset <<= 2;
11064
11065 if (wb != Post)
11066 address += offset;
11067
11068 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11069 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11070
11071 if (wb == Post)
11072 address += offset;
11073
11074 if (wb != NoWriteBack)
11075 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11076 }
11077
11078 static void
11079 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11080 {
11081 unsigned rn = INSTR (14, 10);
11082 unsigned rd = INSTR (9, 5);
11083 unsigned rm = INSTR (4, 0);
11084 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11085
11086 offset <<= 3;
11087
11088 if (wb != Post)
11089 address += offset;
11090
11091 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11092 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11093
11094 if (wb == Post)
11095 address += offset;
11096
11097 if (wb != NoWriteBack)
11098 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11099 }
11100
11101 static void
11102 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11103 {
11104 FRegister a;
11105 unsigned rn = INSTR (14, 10);
11106 unsigned rd = INSTR (9, 5);
11107 unsigned rm = INSTR (4, 0);
11108 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11109
11110 offset <<= 4;
11111
11112 if (wb != Post)
11113 address += offset;
11114
11115 aarch64_get_FP_long_double (cpu, rm, & a);
11116 aarch64_set_mem_long_double (cpu, address, a);
11117 aarch64_get_FP_long_double (cpu, rn, & a);
11118 aarch64_set_mem_long_double (cpu, address + 16, a);
11119
11120 if (wb == Post)
11121 address += offset;
11122
11123 if (wb != NoWriteBack)
11124 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11125 }
11126
11127 static void
11128 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11129 {
11130 unsigned rn = INSTR (14, 10);
11131 unsigned rd = INSTR (9, 5);
11132 unsigned rm = INSTR (4, 0);
11133 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11134
11135 if (rm == rn)
11136 HALT_UNALLOC;
11137
11138 offset <<= 2;
11139
11140 if (wb != Post)
11141 address += offset;
11142
11143 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11144 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11145
11146 if (wb == Post)
11147 address += offset;
11148
11149 if (wb != NoWriteBack)
11150 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11151 }
11152
11153 static void
11154 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11155 {
11156 unsigned rn = INSTR (14, 10);
11157 unsigned rd = INSTR (9, 5);
11158 unsigned rm = INSTR (4, 0);
11159 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11160
11161 if (rm == rn)
11162 HALT_UNALLOC;
11163
11164 offset <<= 3;
11165
11166 if (wb != Post)
11167 address += offset;
11168
11169 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11170 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11171
11172 if (wb == Post)
11173 address += offset;
11174
11175 if (wb != NoWriteBack)
11176 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11177 }
11178
11179 static void
11180 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11181 {
11182 FRegister a;
11183 unsigned rn = INSTR (14, 10);
11184 unsigned rd = INSTR (9, 5);
11185 unsigned rm = INSTR (4, 0);
11186 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11187
11188 if (rm == rn)
11189 HALT_UNALLOC;
11190
11191 offset <<= 4;
11192
11193 if (wb != Post)
11194 address += offset;
11195
11196 aarch64_get_mem_long_double (cpu, address, & a);
11197 aarch64_set_FP_long_double (cpu, rm, a);
11198 aarch64_get_mem_long_double (cpu, address + 16, & a);
11199 aarch64_set_FP_long_double (cpu, rn, a);
11200
11201 if (wb == Post)
11202 address += offset;
11203
11204 if (wb != NoWriteBack)
11205 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11206 }
11207
11208 static void
11209 dex_load_store_pair_fp (sim_cpu *cpu)
11210 {
11211 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11212 instr[29,25] = instruction encoding
11213 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11214 instr[22] = load/store (1=> load)
11215 instr[21,15] = signed, scaled, offset
11216 instr[14,10] = Rn
11217 instr[ 9, 5] = Rd
11218 instr[ 4, 0] = Rm */
11219
11220 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11221 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11222
11223 switch (dispatch)
11224 {
11225 case 2: store_pair_float (cpu, offset, Post); return;
11226 case 3: load_pair_float (cpu, offset, Post); return;
11227 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11228 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11229 case 6: store_pair_float (cpu, offset, Pre); return;
11230 case 7: load_pair_float (cpu, offset, Pre); return;
11231
11232 case 10: store_pair_double (cpu, offset, Post); return;
11233 case 11: load_pair_double (cpu, offset, Post); return;
11234 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11235 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11236 case 14: store_pair_double (cpu, offset, Pre); return;
11237 case 15: load_pair_double (cpu, offset, Pre); return;
11238
11239 case 18: store_pair_long_double (cpu, offset, Post); return;
11240 case 19: load_pair_long_double (cpu, offset, Post); return;
11241 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11242 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11243 case 22: store_pair_long_double (cpu, offset, Pre); return;
11244 case 23: load_pair_long_double (cpu, offset, Pre); return;
11245
11246 default:
11247 HALT_UNALLOC;
11248 }
11249 }
11250
11251 static inline unsigned
11252 vec_reg (unsigned v, unsigned o)
11253 {
11254 return (v + o) & 0x3F;
11255 }
11256
11257 /* Load multiple N-element structures to N consecutive registers. */
11258 static void
11259 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11260 {
11261 int all = INSTR (30, 30);
11262 unsigned size = INSTR (11, 10);
11263 unsigned vd = INSTR (4, 0);
11264 unsigned i;
11265
11266 switch (size)
11267 {
11268 case 0: /* 8-bit operations. */
11269 if (all)
11270 for (i = 0; i < (16 * N); i++)
11271 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11272 aarch64_get_mem_u8 (cpu, address + i));
11273 else
11274 for (i = 0; i < (8 * N); i++)
11275 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11276 aarch64_get_mem_u8 (cpu, address + i));
11277 return;
11278
11279 case 1: /* 16-bit operations. */
11280 if (all)
11281 for (i = 0; i < (8 * N); i++)
11282 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11283 aarch64_get_mem_u16 (cpu, address + i * 2));
11284 else
11285 for (i = 0; i < (4 * N); i++)
11286 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11287 aarch64_get_mem_u16 (cpu, address + i * 2));
11288 return;
11289
11290 case 2: /* 32-bit operations. */
11291 if (all)
11292 for (i = 0; i < (4 * N); i++)
11293 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11294 aarch64_get_mem_u32 (cpu, address + i * 4));
11295 else
11296 for (i = 0; i < (2 * N); i++)
11297 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11298 aarch64_get_mem_u32 (cpu, address + i * 4));
11299 return;
11300
11301 case 3: /* 64-bit operations. */
11302 if (all)
11303 for (i = 0; i < (2 * N); i++)
11304 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11305 aarch64_get_mem_u64 (cpu, address + i * 8));
11306 else
11307 for (i = 0; i < N; i++)
11308 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11309 aarch64_get_mem_u64 (cpu, address + i * 8));
11310 return;
11311 }
11312 }
11313
11314 /* LD4: load multiple 4-element to four consecutive registers. */
11315 static void
11316 LD4 (sim_cpu *cpu, uint64_t address)
11317 {
11318 vec_load (cpu, address, 4);
11319 }
11320
11321 /* LD3: load multiple 3-element structures to three consecutive registers. */
11322 static void
11323 LD3 (sim_cpu *cpu, uint64_t address)
11324 {
11325 vec_load (cpu, address, 3);
11326 }
11327
11328 /* LD2: load multiple 2-element structures to two consecutive registers. */
11329 static void
11330 LD2 (sim_cpu *cpu, uint64_t address)
11331 {
11332 vec_load (cpu, address, 2);
11333 }
11334
11335 /* Load multiple 1-element structures into one register. */
11336 static void
11337 LD1_1 (sim_cpu *cpu, uint64_t address)
11338 {
11339 int all = INSTR (30, 30);
11340 unsigned size = INSTR (11, 10);
11341 unsigned vd = INSTR (4, 0);
11342 unsigned i;
11343
11344 switch (size)
11345 {
11346 case 0:
11347 /* LD1 {Vd.16b}, addr, #16 */
11348 /* LD1 {Vd.8b}, addr, #8 */
11349 for (i = 0; i < (all ? 16 : 8); i++)
11350 aarch64_set_vec_u8 (cpu, vd, i,
11351 aarch64_get_mem_u8 (cpu, address + i));
11352 return;
11353
11354 case 1:
11355 /* LD1 {Vd.8h}, addr, #16 */
11356 /* LD1 {Vd.4h}, addr, #8 */
11357 for (i = 0; i < (all ? 8 : 4); i++)
11358 aarch64_set_vec_u16 (cpu, vd, i,
11359 aarch64_get_mem_u16 (cpu, address + i * 2));
11360 return;
11361
11362 case 2:
11363 /* LD1 {Vd.4s}, addr, #16 */
11364 /* LD1 {Vd.2s}, addr, #8 */
11365 for (i = 0; i < (all ? 4 : 2); i++)
11366 aarch64_set_vec_u32 (cpu, vd, i,
11367 aarch64_get_mem_u32 (cpu, address + i * 4));
11368 return;
11369
11370 case 3:
11371 /* LD1 {Vd.2d}, addr, #16 */
11372 /* LD1 {Vd.1d}, addr, #8 */
11373 for (i = 0; i < (all ? 2 : 1); i++)
11374 aarch64_set_vec_u64 (cpu, vd, i,
11375 aarch64_get_mem_u64 (cpu, address + i * 8));
11376 return;
11377 }
11378 }
11379
11380 /* Load multiple 1-element structures into two registers. */
11381 static void
11382 LD1_2 (sim_cpu *cpu, uint64_t address)
11383 {
11384 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11385 So why have two different instructions ? There must be something
11386 wrong somewhere. */
11387 vec_load (cpu, address, 2);
11388 }
11389
11390 /* Load multiple 1-element structures into three registers. */
11391 static void
11392 LD1_3 (sim_cpu *cpu, uint64_t address)
11393 {
11394 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11395 So why have two different instructions ? There must be something
11396 wrong somewhere. */
11397 vec_load (cpu, address, 3);
11398 }
11399
11400 /* Load multiple 1-element structures into four registers. */
11401 static void
11402 LD1_4 (sim_cpu *cpu, uint64_t address)
11403 {
11404 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11405 So why have two different instructions ? There must be something
11406 wrong somewhere. */
11407 vec_load (cpu, address, 4);
11408 }
11409
11410 /* Store multiple N-element structures to N consecutive registers. */
11411 static void
11412 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11413 {
11414 int all = INSTR (30, 30);
11415 unsigned size = INSTR (11, 10);
11416 unsigned vd = INSTR (4, 0);
11417 unsigned i;
11418
11419 switch (size)
11420 {
11421 case 0: /* 8-bit operations. */
11422 if (all)
11423 for (i = 0; i < (16 * N); i++)
11424 aarch64_set_mem_u8
11425 (cpu, address + i,
11426 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11427 else
11428 for (i = 0; i < (8 * N); i++)
11429 aarch64_set_mem_u8
11430 (cpu, address + i,
11431 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11432 return;
11433
11434 case 1: /* 16-bit operations. */
11435 if (all)
11436 for (i = 0; i < (8 * N); i++)
11437 aarch64_set_mem_u16
11438 (cpu, address + i * 2,
11439 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11440 else
11441 for (i = 0; i < (4 * N); i++)
11442 aarch64_set_mem_u16
11443 (cpu, address + i * 2,
11444 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11445 return;
11446
11447 case 2: /* 32-bit operations. */
11448 if (all)
11449 for (i = 0; i < (4 * N); i++)
11450 aarch64_set_mem_u32
11451 (cpu, address + i * 4,
11452 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11453 else
11454 for (i = 0; i < (2 * N); i++)
11455 aarch64_set_mem_u32
11456 (cpu, address + i * 4,
11457 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11458 return;
11459
11460 case 3: /* 64-bit operations. */
11461 if (all)
11462 for (i = 0; i < (2 * N); i++)
11463 aarch64_set_mem_u64
11464 (cpu, address + i * 8,
11465 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11466 else
11467 for (i = 0; i < N; i++)
11468 aarch64_set_mem_u64
11469 (cpu, address + i * 8,
11470 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11471 return;
11472 }
11473 }
11474
11475 /* Store multiple 4-element structure to four consecutive registers. */
11476 static void
11477 ST4 (sim_cpu *cpu, uint64_t address)
11478 {
11479 vec_store (cpu, address, 4);
11480 }
11481
11482 /* Store multiple 3-element structures to three consecutive registers. */
11483 static void
11484 ST3 (sim_cpu *cpu, uint64_t address)
11485 {
11486 vec_store (cpu, address, 3);
11487 }
11488
11489 /* Store multiple 2-element structures to two consecutive registers. */
11490 static void
11491 ST2 (sim_cpu *cpu, uint64_t address)
11492 {
11493 vec_store (cpu, address, 2);
11494 }
11495
11496 /* Store multiple 1-element structures into one register. */
11497 static void
11498 ST1_1 (sim_cpu *cpu, uint64_t address)
11499 {
11500 int all = INSTR (30, 30);
11501 unsigned size = INSTR (11, 10);
11502 unsigned vd = INSTR (4, 0);
11503 unsigned i;
11504
11505 switch (size)
11506 {
11507 case 0:
11508 for (i = 0; i < (all ? 16 : 8); i++)
11509 aarch64_set_mem_u8 (cpu, address + i,
11510 aarch64_get_vec_u8 (cpu, vd, i));
11511 return;
11512
11513 case 1:
11514 for (i = 0; i < (all ? 8 : 4); i++)
11515 aarch64_set_mem_u16 (cpu, address + i * 2,
11516 aarch64_get_vec_u16 (cpu, vd, i));
11517 return;
11518
11519 case 2:
11520 for (i = 0; i < (all ? 4 : 2); i++)
11521 aarch64_set_mem_u32 (cpu, address + i * 4,
11522 aarch64_get_vec_u32 (cpu, vd, i));
11523 return;
11524
11525 case 3:
11526 for (i = 0; i < (all ? 2 : 1); i++)
11527 aarch64_set_mem_u64 (cpu, address + i * 8,
11528 aarch64_get_vec_u64 (cpu, vd, i));
11529 return;
11530 }
11531 }
11532
11533 /* Store multiple 1-element structures into two registers. */
11534 static void
11535 ST1_2 (sim_cpu *cpu, uint64_t address)
11536 {
11537 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11538 So why have two different instructions ? There must be
11539 something wrong somewhere. */
11540 vec_store (cpu, address, 2);
11541 }
11542
11543 /* Store multiple 1-element structures into three registers. */
11544 static void
11545 ST1_3 (sim_cpu *cpu, uint64_t address)
11546 {
11547 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11548 So why have two different instructions ? There must be
11549 something wrong somewhere. */
11550 vec_store (cpu, address, 3);
11551 }
11552
11553 /* Store multiple 1-element structures into four registers. */
11554 static void
11555 ST1_4 (sim_cpu *cpu, uint64_t address)
11556 {
11557 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11558 So why have two different instructions ? There must be
11559 something wrong somewhere. */
11560 vec_store (cpu, address, 4);
11561 }
11562
11563 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11564 do \
11565 { \
11566 switch (INSTR (15, 14)) \
11567 { \
11568 case 0: \
11569 lane = (full << 3) | (s << 2) | size; \
11570 size = 0; \
11571 break; \
11572 \
11573 case 1: \
11574 if ((size & 1) == 1) \
11575 HALT_UNALLOC; \
11576 lane = (full << 2) | (s << 1) | (size >> 1); \
11577 size = 1; \
11578 break; \
11579 \
11580 case 2: \
11581 if ((size & 2) == 2) \
11582 HALT_UNALLOC; \
11583 \
11584 if ((size & 1) == 0) \
11585 { \
11586 lane = (full << 1) | s; \
11587 size = 2; \
11588 } \
11589 else \
11590 { \
11591 if (s) \
11592 HALT_UNALLOC; \
11593 lane = full; \
11594 size = 3; \
11595 } \
11596 break; \
11597 \
11598 default: \
11599 HALT_UNALLOC; \
11600 } \
11601 } \
11602 while (0)
11603
11604 /* Load single structure into one lane of N registers. */
11605 static void
11606 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11607 {
11608 /* instr[31] = 0
11609 instr[30] = element selector 0=>half, 1=>all elements
11610 instr[29,24] = 00 1101
11611 instr[23] = 0=>simple, 1=>post
11612 instr[22] = 1
11613 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11614 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11615 11111 (immediate post inc)
11616 instr[15,13] = opcode
11617 instr[12] = S, used for lane number
11618 instr[11,10] = size, also used for lane number
11619 instr[9,5] = address
11620 instr[4,0] = Vd */
11621
11622 unsigned full = INSTR (30, 30);
11623 unsigned vd = INSTR (4, 0);
11624 unsigned size = INSTR (11, 10);
11625 unsigned s = INSTR (12, 12);
11626 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11627 int lane = 0;
11628 int i;
11629
11630 NYI_assert (29, 24, 0x0D);
11631 NYI_assert (22, 22, 1);
11632
11633 /* Compute the lane number first (using size), and then compute size. */
11634 LDn_STn_SINGLE_LANE_AND_SIZE ();
11635
11636 for (i = 0; i < nregs; i++)
11637 switch (size)
11638 {
11639 case 0:
11640 {
11641 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11642 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11643 break;
11644 }
11645
11646 case 1:
11647 {
11648 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11649 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11650 break;
11651 }
11652
11653 case 2:
11654 {
11655 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11656 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11657 break;
11658 }
11659
11660 case 3:
11661 {
11662 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11663 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11664 break;
11665 }
11666 }
11667 }
11668
11669 /* Store single structure from one lane from N registers. */
11670 static void
11671 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11672 {
11673 /* instr[31] = 0
11674 instr[30] = element selector 0=>half, 1=>all elements
11675 instr[29,24] = 00 1101
11676 instr[23] = 0=>simple, 1=>post
11677 instr[22] = 0
11678 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11679 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11680 11111 (immediate post inc)
11681 instr[15,13] = opcode
11682 instr[12] = S, used for lane number
11683 instr[11,10] = size, also used for lane number
11684 instr[9,5] = address
11685 instr[4,0] = Vd */
11686
11687 unsigned full = INSTR (30, 30);
11688 unsigned vd = INSTR (4, 0);
11689 unsigned size = INSTR (11, 10);
11690 unsigned s = INSTR (12, 12);
11691 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11692 int lane = 0;
11693 int i;
11694
11695 NYI_assert (29, 24, 0x0D);
11696 NYI_assert (22, 22, 0);
11697
11698 /* Compute the lane number first (using size), and then compute size. */
11699 LDn_STn_SINGLE_LANE_AND_SIZE ();
11700
11701 for (i = 0; i < nregs; i++)
11702 switch (size)
11703 {
11704 case 0:
11705 {
11706 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11707 aarch64_set_mem_u8 (cpu, address + i, val);
11708 break;
11709 }
11710
11711 case 1:
11712 {
11713 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11714 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11715 break;
11716 }
11717
11718 case 2:
11719 {
11720 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11721 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11722 break;
11723 }
11724
11725 case 3:
11726 {
11727 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11728 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11729 break;
11730 }
11731 }
11732 }
11733
11734 /* Load single structure into all lanes of N registers. */
11735 static void
11736 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11737 {
11738 /* instr[31] = 0
11739 instr[30] = element selector 0=>half, 1=>all elements
11740 instr[29,24] = 00 1101
11741 instr[23] = 0=>simple, 1=>post
11742 instr[22] = 1
11743 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11744 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11745 11111 (immediate post inc)
11746 instr[15,14] = 11
11747 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11748 instr[12] = 0
11749 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11750 10=> word(s), 11=> double(d)
11751 instr[9,5] = address
11752 instr[4,0] = Vd */
11753
11754 unsigned full = INSTR (30, 30);
11755 unsigned vd = INSTR (4, 0);
11756 unsigned size = INSTR (11, 10);
11757 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11758 int i, n;
11759
11760 NYI_assert (29, 24, 0x0D);
11761 NYI_assert (22, 22, 1);
11762 NYI_assert (15, 14, 3);
11763 NYI_assert (12, 12, 0);
11764
11765 for (n = 0; n < nregs; n++)
11766 switch (size)
11767 {
11768 case 0:
11769 {
11770 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11771 for (i = 0; i < (full ? 16 : 8); i++)
11772 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11773 break;
11774 }
11775
11776 case 1:
11777 {
11778 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11779 for (i = 0; i < (full ? 8 : 4); i++)
11780 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11781 break;
11782 }
11783
11784 case 2:
11785 {
11786 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11787 for (i = 0; i < (full ? 4 : 2); i++)
11788 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11789 break;
11790 }
11791
11792 case 3:
11793 {
11794 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11795 for (i = 0; i < (full ? 2 : 1); i++)
11796 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11797 break;
11798 }
11799
11800 default:
11801 HALT_UNALLOC;
11802 }
11803 }
11804
11805 static void
11806 do_vec_load_store (sim_cpu *cpu)
11807 {
11808 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11809
11810 instr[31] = 0
11811 instr[30] = element selector 0=>half, 1=>all elements
11812 instr[29,25] = 00110
11813 instr[24] = 0=>multiple struct, 1=>single struct
11814 instr[23] = 0=>simple, 1=>post
11815 instr[22] = 0=>store, 1=>load
11816 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11817 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11818 11111 (immediate post inc)
11819 instr[15,12] = elements and destinations. eg for load:
11820 0000=>LD4 => load multiple 4-element to
11821 four consecutive registers
11822 0100=>LD3 => load multiple 3-element to
11823 three consecutive registers
11824 1000=>LD2 => load multiple 2-element to
11825 two consecutive registers
11826 0010=>LD1 => load multiple 1-element to
11827 four consecutive registers
11828 0110=>LD1 => load multiple 1-element to
11829 three consecutive registers
11830 1010=>LD1 => load multiple 1-element to
11831 two consecutive registers
11832 0111=>LD1 => load multiple 1-element to
11833 one register
11834 1100=>LDR1,LDR2
11835 1110=>LDR3,LDR4
11836 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11837 10=> word(s), 11=> double(d)
11838 instr[9,5] = Vn, can be SP
11839 instr[4,0] = Vd */
11840
11841 int single;
11842 int post;
11843 int load;
11844 unsigned vn;
11845 uint64_t address;
11846 int type;
11847
11848 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11849 HALT_NYI;
11850
11851 single = INSTR (24, 24);
11852 post = INSTR (23, 23);
11853 load = INSTR (22, 22);
11854 type = INSTR (15, 12);
11855 vn = INSTR (9, 5);
11856 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11857
11858 if (! single && INSTR (21, 21) != 0)
11859 HALT_UNALLOC;
11860
11861 if (post)
11862 {
11863 unsigned vm = INSTR (20, 16);
11864
11865 if (vm == R31)
11866 {
11867 unsigned sizeof_operation;
11868
11869 if (single)
11870 {
11871 if ((type >= 0) && (type <= 11))
11872 {
11873 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11874 switch (INSTR (15, 14))
11875 {
11876 case 0:
11877 sizeof_operation = nregs * 1;
11878 break;
11879 case 1:
11880 sizeof_operation = nregs * 2;
11881 break;
11882 case 2:
11883 if (INSTR (10, 10) == 0)
11884 sizeof_operation = nregs * 4;
11885 else
11886 sizeof_operation = nregs * 8;
11887 break;
11888 default:
11889 HALT_UNALLOC;
11890 }
11891 }
11892 else if (type == 0xC)
11893 {
11894 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11895 sizeof_operation <<= INSTR (11, 10);
11896 }
11897 else if (type == 0xE)
11898 {
11899 sizeof_operation = INSTR (21, 21) ? 4 : 3;
11900 sizeof_operation <<= INSTR (11, 10);
11901 }
11902 else
11903 HALT_UNALLOC;
11904 }
11905 else
11906 {
11907 switch (type)
11908 {
11909 case 0: sizeof_operation = 32; break;
11910 case 4: sizeof_operation = 24; break;
11911 case 8: sizeof_operation = 16; break;
11912
11913 case 7:
11914 /* One register, immediate offset variant. */
11915 sizeof_operation = 8;
11916 break;
11917
11918 case 10:
11919 /* Two registers, immediate offset variant. */
11920 sizeof_operation = 16;
11921 break;
11922
11923 case 6:
11924 /* Three registers, immediate offset variant. */
11925 sizeof_operation = 24;
11926 break;
11927
11928 case 2:
11929 /* Four registers, immediate offset variant. */
11930 sizeof_operation = 32;
11931 break;
11932
11933 default:
11934 HALT_UNALLOC;
11935 }
11936
11937 if (INSTR (30, 30))
11938 sizeof_operation *= 2;
11939 }
11940
11941 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11942 }
11943 else
11944 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11945 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11946 }
11947 else
11948 {
11949 NYI_assert (20, 16, 0);
11950 }
11951
11952 if (single)
11953 {
11954 if (load)
11955 {
11956 if ((type >= 0) && (type <= 11))
11957 do_vec_LDn_single (cpu, address);
11958 else if ((type == 0xC) || (type == 0xE))
11959 do_vec_LDnR (cpu, address);
11960 else
11961 HALT_UNALLOC;
11962 return;
11963 }
11964
11965 /* Stores. */
11966 if ((type >= 0) && (type <= 11))
11967 {
11968 do_vec_STn_single (cpu, address);
11969 return;
11970 }
11971
11972 HALT_UNALLOC;
11973 }
11974
11975 if (load)
11976 {
11977 switch (type)
11978 {
11979 case 0: LD4 (cpu, address); return;
11980 case 4: LD3 (cpu, address); return;
11981 case 8: LD2 (cpu, address); return;
11982 case 2: LD1_4 (cpu, address); return;
11983 case 6: LD1_3 (cpu, address); return;
11984 case 10: LD1_2 (cpu, address); return;
11985 case 7: LD1_1 (cpu, address); return;
11986
11987 default:
11988 HALT_UNALLOC;
11989 }
11990 }
11991
11992 /* Stores. */
11993 switch (type)
11994 {
11995 case 0: ST4 (cpu, address); return;
11996 case 4: ST3 (cpu, address); return;
11997 case 8: ST2 (cpu, address); return;
11998 case 2: ST1_4 (cpu, address); return;
11999 case 6: ST1_3 (cpu, address); return;
12000 case 10: ST1_2 (cpu, address); return;
12001 case 7: ST1_1 (cpu, address); return;
12002 default:
12003 HALT_UNALLOC;
12004 }
12005 }
12006
12007 static void
12008 dexLdSt (sim_cpu *cpu)
12009 {
12010 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12011 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12012 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12013 bits [29,28:26] of a LS are the secondary dispatch vector. */
12014 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12015
12016 switch (group2)
12017 {
12018 case LS_EXCL_000:
12019 dexLoadExclusive (cpu); return;
12020
12021 case LS_LIT_010:
12022 case LS_LIT_011:
12023 dexLoadLiteral (cpu); return;
12024
12025 case LS_OTHER_110:
12026 case LS_OTHER_111:
12027 dexLoadOther (cpu); return;
12028
12029 case LS_ADVSIMD_001:
12030 do_vec_load_store (cpu); return;
12031
12032 case LS_PAIR_100:
12033 dex_load_store_pair_gr (cpu); return;
12034
12035 case LS_PAIR_101:
12036 dex_load_store_pair_fp (cpu); return;
12037
12038 default:
12039 /* Should never reach here. */
12040 HALT_NYI;
12041 }
12042 }
12043
12044 /* Specific decode and execute for group Data Processing Register. */
12045
12046 static void
12047 dexLogicalShiftedRegister (sim_cpu *cpu)
12048 {
12049 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12050 instr[30,29] = op
12051 instr[28:24] = 01010
12052 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12053 instr[21] = N
12054 instr[20,16] = Rm
12055 instr[15,10] = count : must be 0xxxxx for 32 bit
12056 instr[9,5] = Rn
12057 instr[4,0] = Rd */
12058
12059 uint32_t size = INSTR (31, 31);
12060 Shift shiftType = INSTR (23, 22);
12061 uint32_t count = INSTR (15, 10);
12062
12063 /* 32 bit operations must have count[5] = 0.
12064 or else we have an UNALLOC. */
12065 if (size == 0 && uimm (count, 5, 5))
12066 HALT_UNALLOC;
12067
12068 /* Dispatch on size:op:N. */
12069 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12070 {
12071 case 0: and32_shift (cpu, shiftType, count); return;
12072 case 1: bic32_shift (cpu, shiftType, count); return;
12073 case 2: orr32_shift (cpu, shiftType, count); return;
12074 case 3: orn32_shift (cpu, shiftType, count); return;
12075 case 4: eor32_shift (cpu, shiftType, count); return;
12076 case 5: eon32_shift (cpu, shiftType, count); return;
12077 case 6: ands32_shift (cpu, shiftType, count); return;
12078 case 7: bics32_shift (cpu, shiftType, count); return;
12079 case 8: and64_shift (cpu, shiftType, count); return;
12080 case 9: bic64_shift (cpu, shiftType, count); return;
12081 case 10:orr64_shift (cpu, shiftType, count); return;
12082 case 11:orn64_shift (cpu, shiftType, count); return;
12083 case 12:eor64_shift (cpu, shiftType, count); return;
12084 case 13:eon64_shift (cpu, shiftType, count); return;
12085 case 14:ands64_shift (cpu, shiftType, count); return;
12086 case 15:bics64_shift (cpu, shiftType, count); return;
12087 }
12088 }
12089
12090 /* 32 bit conditional select. */
12091 static void
12092 csel32 (sim_cpu *cpu, CondCode cc)
12093 {
12094 unsigned rm = INSTR (20, 16);
12095 unsigned rn = INSTR (9, 5);
12096 unsigned rd = INSTR (4, 0);
12097
12098 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12099 testConditionCode (cpu, cc)
12100 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12101 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12102 }
12103
12104 /* 64 bit conditional select. */
12105 static void
12106 csel64 (sim_cpu *cpu, CondCode cc)
12107 {
12108 unsigned rm = INSTR (20, 16);
12109 unsigned rn = INSTR (9, 5);
12110 unsigned rd = INSTR (4, 0);
12111
12112 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12113 testConditionCode (cpu, cc)
12114 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12115 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12116 }
12117
12118 /* 32 bit conditional increment. */
12119 static void
12120 csinc32 (sim_cpu *cpu, CondCode cc)
12121 {
12122 unsigned rm = INSTR (20, 16);
12123 unsigned rn = INSTR (9, 5);
12124 unsigned rd = INSTR (4, 0);
12125
12126 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12127 testConditionCode (cpu, cc)
12128 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12129 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12130 }
12131
12132 /* 64 bit conditional increment. */
12133 static void
12134 csinc64 (sim_cpu *cpu, CondCode cc)
12135 {
12136 unsigned rm = INSTR (20, 16);
12137 unsigned rn = INSTR (9, 5);
12138 unsigned rd = INSTR (4, 0);
12139
12140 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12141 testConditionCode (cpu, cc)
12142 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12143 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12144 }
12145
12146 /* 32 bit conditional invert. */
12147 static void
12148 csinv32 (sim_cpu *cpu, CondCode cc)
12149 {
12150 unsigned rm = INSTR (20, 16);
12151 unsigned rn = INSTR (9, 5);
12152 unsigned rd = INSTR (4, 0);
12153
12154 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12155 testConditionCode (cpu, cc)
12156 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12157 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12158 }
12159
12160 /* 64 bit conditional invert. */
12161 static void
12162 csinv64 (sim_cpu *cpu, CondCode cc)
12163 {
12164 unsigned rm = INSTR (20, 16);
12165 unsigned rn = INSTR (9, 5);
12166 unsigned rd = INSTR (4, 0);
12167
12168 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12169 testConditionCode (cpu, cc)
12170 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12171 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12172 }
12173
12174 /* 32 bit conditional negate. */
12175 static void
12176 csneg32 (sim_cpu *cpu, CondCode cc)
12177 {
12178 unsigned rm = INSTR (20, 16);
12179 unsigned rn = INSTR (9, 5);
12180 unsigned rd = INSTR (4, 0);
12181
12182 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12183 testConditionCode (cpu, cc)
12184 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12185 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12186 }
12187
12188 /* 64 bit conditional negate. */
12189 static void
12190 csneg64 (sim_cpu *cpu, CondCode cc)
12191 {
12192 unsigned rm = INSTR (20, 16);
12193 unsigned rn = INSTR (9, 5);
12194 unsigned rd = INSTR (4, 0);
12195
12196 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12197 testConditionCode (cpu, cc)
12198 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12199 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12200 }
12201
12202 static void
12203 dexCondSelect (sim_cpu *cpu)
12204 {
12205 /* instr[28,21] = 11011011
12206 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12207 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12208 100 ==> CSINV, 101 ==> CSNEG,
12209 _1_ ==> UNALLOC
12210 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12211 instr[15,12] = cond
12212 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12213
12214 CondCode cc = INSTR (15, 12);
12215 uint32_t S = INSTR (29, 29);
12216 uint32_t op2 = INSTR (11, 10);
12217
12218 if (S == 1)
12219 HALT_UNALLOC;
12220
12221 if (op2 & 0x2)
12222 HALT_UNALLOC;
12223
12224 switch ((INSTR (31, 30) << 1) | op2)
12225 {
12226 case 0: csel32 (cpu, cc); return;
12227 case 1: csinc32 (cpu, cc); return;
12228 case 2: csinv32 (cpu, cc); return;
12229 case 3: csneg32 (cpu, cc); return;
12230 case 4: csel64 (cpu, cc); return;
12231 case 5: csinc64 (cpu, cc); return;
12232 case 6: csinv64 (cpu, cc); return;
12233 case 7: csneg64 (cpu, cc); return;
12234 }
12235 }
12236
12237 /* Some helpers for counting leading 1 or 0 bits. */
12238
12239 /* Counts the number of leading bits which are the same
12240 in a 32 bit value in the range 1 to 32. */
12241 static uint32_t
12242 leading32 (uint32_t value)
12243 {
12244 int32_t mask= 0xffff0000;
12245 uint32_t count= 16; /* Counts number of bits set in mask. */
12246 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12247 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12248
12249 while (lo + 1 < hi)
12250 {
12251 int32_t test = (value & mask);
12252
12253 if (test == 0 || test == mask)
12254 {
12255 lo = count;
12256 count = (lo + hi) / 2;
12257 mask >>= (count - lo);
12258 }
12259 else
12260 {
12261 hi = count;
12262 count = (lo + hi) / 2;
12263 mask <<= hi - count;
12264 }
12265 }
12266
12267 if (lo != hi)
12268 {
12269 int32_t test;
12270
12271 mask >>= 1;
12272 test = (value & mask);
12273
12274 if (test == 0 || test == mask)
12275 count = hi;
12276 else
12277 count = lo;
12278 }
12279
12280 return count;
12281 }
12282
12283 /* Counts the number of leading bits which are the same
12284 in a 64 bit value in the range 1 to 64. */
12285 static uint64_t
12286 leading64 (uint64_t value)
12287 {
12288 int64_t mask= 0xffffffff00000000LL;
12289 uint64_t count = 32; /* Counts number of bits set in mask. */
12290 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12291 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12292
12293 while (lo + 1 < hi)
12294 {
12295 int64_t test = (value & mask);
12296
12297 if (test == 0 || test == mask)
12298 {
12299 lo = count;
12300 count = (lo + hi) / 2;
12301 mask >>= (count - lo);
12302 }
12303 else
12304 {
12305 hi = count;
12306 count = (lo + hi) / 2;
12307 mask <<= hi - count;
12308 }
12309 }
12310
12311 if (lo != hi)
12312 {
12313 int64_t test;
12314
12315 mask >>= 1;
12316 test = (value & mask);
12317
12318 if (test == 0 || test == mask)
12319 count = hi;
12320 else
12321 count = lo;
12322 }
12323
12324 return count;
12325 }
12326
12327 /* Bit operations. */
12328 /* N.B register args may not be SP. */
12329
12330 /* 32 bit count leading sign bits. */
12331 static void
12332 cls32 (sim_cpu *cpu)
12333 {
12334 unsigned rn = INSTR (9, 5);
12335 unsigned rd = INSTR (4, 0);
12336
12337 /* N.B. the result needs to exclude the leading bit. */
12338 aarch64_set_reg_u64
12339 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12340 }
12341
12342 /* 64 bit count leading sign bits. */
12343 static void
12344 cls64 (sim_cpu *cpu)
12345 {
12346 unsigned rn = INSTR (9, 5);
12347 unsigned rd = INSTR (4, 0);
12348
12349 /* N.B. the result needs to exclude the leading bit. */
12350 aarch64_set_reg_u64
12351 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12352 }
12353
12354 /* 32 bit count leading zero bits. */
12355 static void
12356 clz32 (sim_cpu *cpu)
12357 {
12358 unsigned rn = INSTR (9, 5);
12359 unsigned rd = INSTR (4, 0);
12360 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12361
12362 /* if the sign (top) bit is set then the count is 0. */
12363 if (pick32 (value, 31, 31))
12364 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12365 else
12366 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12367 }
12368
12369 /* 64 bit count leading zero bits. */
12370 static void
12371 clz64 (sim_cpu *cpu)
12372 {
12373 unsigned rn = INSTR (9, 5);
12374 unsigned rd = INSTR (4, 0);
12375 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12376
12377 /* if the sign (top) bit is set then the count is 0. */
12378 if (pick64 (value, 63, 63))
12379 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12380 else
12381 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12382 }
12383
12384 /* 32 bit reverse bits. */
12385 static void
12386 rbit32 (sim_cpu *cpu)
12387 {
12388 unsigned rn = INSTR (9, 5);
12389 unsigned rd = INSTR (4, 0);
12390 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12391 uint32_t result = 0;
12392 int i;
12393
12394 for (i = 0; i < 32; i++)
12395 {
12396 result <<= 1;
12397 result |= (value & 1);
12398 value >>= 1;
12399 }
12400 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12401 }
12402
12403 /* 64 bit reverse bits. */
12404 static void
12405 rbit64 (sim_cpu *cpu)
12406 {
12407 unsigned rn = INSTR (9, 5);
12408 unsigned rd = INSTR (4, 0);
12409 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12410 uint64_t result = 0;
12411 int i;
12412
12413 for (i = 0; i < 64; i++)
12414 {
12415 result <<= 1;
12416 result |= (value & 1UL);
12417 value >>= 1;
12418 }
12419 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12420 }
12421
12422 /* 32 bit reverse bytes. */
12423 static void
12424 rev32 (sim_cpu *cpu)
12425 {
12426 unsigned rn = INSTR (9, 5);
12427 unsigned rd = INSTR (4, 0);
12428 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12429 uint32_t result = 0;
12430 int i;
12431
12432 for (i = 0; i < 4; i++)
12433 {
12434 result <<= 8;
12435 result |= (value & 0xff);
12436 value >>= 8;
12437 }
12438 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12439 }
12440
12441 /* 64 bit reverse bytes. */
12442 static void
12443 rev64 (sim_cpu *cpu)
12444 {
12445 unsigned rn = INSTR (9, 5);
12446 unsigned rd = INSTR (4, 0);
12447 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12448 uint64_t result = 0;
12449 int i;
12450
12451 for (i = 0; i < 8; i++)
12452 {
12453 result <<= 8;
12454 result |= (value & 0xffULL);
12455 value >>= 8;
12456 }
12457 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12458 }
12459
12460 /* 32 bit reverse shorts. */
12461 /* N.B.this reverses the order of the bytes in each half word. */
12462 static void
12463 revh32 (sim_cpu *cpu)
12464 {
12465 unsigned rn = INSTR (9, 5);
12466 unsigned rd = INSTR (4, 0);
12467 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12468 uint32_t result = 0;
12469 int i;
12470
12471 for (i = 0; i < 2; i++)
12472 {
12473 result <<= 8;
12474 result |= (value & 0x00ff00ff);
12475 value >>= 8;
12476 }
12477 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12478 }
12479
12480 /* 64 bit reverse shorts. */
12481 /* N.B.this reverses the order of the bytes in each half word. */
12482 static void
12483 revh64 (sim_cpu *cpu)
12484 {
12485 unsigned rn = INSTR (9, 5);
12486 unsigned rd = INSTR (4, 0);
12487 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12488 uint64_t result = 0;
12489 int i;
12490
12491 for (i = 0; i < 2; i++)
12492 {
12493 result <<= 8;
12494 result |= (value & 0x00ff00ff00ff00ffULL);
12495 value >>= 8;
12496 }
12497 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12498 }
12499
12500 static void
12501 dexDataProc1Source (sim_cpu *cpu)
12502 {
12503 /* instr[30] = 1
12504 instr[28,21] = 111010110
12505 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12506 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12507 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12508 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12509 000010 ==> REV, 000011 ==> UNALLOC
12510 000100 ==> CLZ, 000101 ==> CLS
12511 ow ==> UNALLOC
12512 instr[9,5] = rn : may not be SP
12513 instr[4,0] = rd : may not be SP. */
12514
12515 uint32_t S = INSTR (29, 29);
12516 uint32_t opcode2 = INSTR (20, 16);
12517 uint32_t opcode = INSTR (15, 10);
12518 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12519
12520 if (S == 1)
12521 HALT_UNALLOC;
12522
12523 if (opcode2 != 0)
12524 HALT_UNALLOC;
12525
12526 if (opcode & 0x38)
12527 HALT_UNALLOC;
12528
12529 switch (dispatch)
12530 {
12531 case 0: rbit32 (cpu); return;
12532 case 1: revh32 (cpu); return;
12533 case 2: rev32 (cpu); return;
12534 case 4: clz32 (cpu); return;
12535 case 5: cls32 (cpu); return;
12536 case 8: rbit64 (cpu); return;
12537 case 9: revh64 (cpu); return;
12538 case 10:rev32 (cpu); return;
12539 case 11:rev64 (cpu); return;
12540 case 12:clz64 (cpu); return;
12541 case 13:cls64 (cpu); return;
12542 default: HALT_UNALLOC;
12543 }
12544 }
12545
12546 /* Variable shift.
12547 Shifts by count supplied in register.
12548 N.B register args may not be SP.
12549 These all use the shifted auxiliary function for
12550 simplicity and clarity. Writing the actual shift
12551 inline would avoid a branch and so be faster but
12552 would also necessitate getting signs right. */
12553
12554 /* 32 bit arithmetic shift right. */
12555 static void
12556 asrv32 (sim_cpu *cpu)
12557 {
12558 unsigned rm = INSTR (20, 16);
12559 unsigned rn = INSTR (9, 5);
12560 unsigned rd = INSTR (4, 0);
12561
12562 aarch64_set_reg_u64
12563 (cpu, rd, NO_SP,
12564 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12565 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12566 }
12567
12568 /* 64 bit arithmetic shift right. */
12569 static void
12570 asrv64 (sim_cpu *cpu)
12571 {
12572 unsigned rm = INSTR (20, 16);
12573 unsigned rn = INSTR (9, 5);
12574 unsigned rd = INSTR (4, 0);
12575
12576 aarch64_set_reg_u64
12577 (cpu, rd, NO_SP,
12578 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12579 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12580 }
12581
12582 /* 32 bit logical shift left. */
12583 static void
12584 lslv32 (sim_cpu *cpu)
12585 {
12586 unsigned rm = INSTR (20, 16);
12587 unsigned rn = INSTR (9, 5);
12588 unsigned rd = INSTR (4, 0);
12589
12590 aarch64_set_reg_u64
12591 (cpu, rd, NO_SP,
12592 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12593 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12594 }
12595
12596 /* 64 bit arithmetic shift left. */
12597 static void
12598 lslv64 (sim_cpu *cpu)
12599 {
12600 unsigned rm = INSTR (20, 16);
12601 unsigned rn = INSTR (9, 5);
12602 unsigned rd = INSTR (4, 0);
12603
12604 aarch64_set_reg_u64
12605 (cpu, rd, NO_SP,
12606 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12607 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12608 }
12609
12610 /* 32 bit logical shift right. */
12611 static void
12612 lsrv32 (sim_cpu *cpu)
12613 {
12614 unsigned rm = INSTR (20, 16);
12615 unsigned rn = INSTR (9, 5);
12616 unsigned rd = INSTR (4, 0);
12617
12618 aarch64_set_reg_u64
12619 (cpu, rd, NO_SP,
12620 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12621 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12622 }
12623
12624 /* 64 bit logical shift right. */
12625 static void
12626 lsrv64 (sim_cpu *cpu)
12627 {
12628 unsigned rm = INSTR (20, 16);
12629 unsigned rn = INSTR (9, 5);
12630 unsigned rd = INSTR (4, 0);
12631
12632 aarch64_set_reg_u64
12633 (cpu, rd, NO_SP,
12634 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12635 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12636 }
12637
12638 /* 32 bit rotate right. */
12639 static void
12640 rorv32 (sim_cpu *cpu)
12641 {
12642 unsigned rm = INSTR (20, 16);
12643 unsigned rn = INSTR (9, 5);
12644 unsigned rd = INSTR (4, 0);
12645
12646 aarch64_set_reg_u64
12647 (cpu, rd, NO_SP,
12648 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12649 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12650 }
12651
12652 /* 64 bit rotate right. */
12653 static void
12654 rorv64 (sim_cpu *cpu)
12655 {
12656 unsigned rm = INSTR (20, 16);
12657 unsigned rn = INSTR (9, 5);
12658 unsigned rd = INSTR (4, 0);
12659
12660 aarch64_set_reg_u64
12661 (cpu, rd, NO_SP,
12662 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12663 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12664 }
12665
12666
12667 /* divide. */
12668
12669 /* 32 bit signed divide. */
12670 static void
12671 cpuiv32 (sim_cpu *cpu)
12672 {
12673 unsigned rm = INSTR (20, 16);
12674 unsigned rn = INSTR (9, 5);
12675 unsigned rd = INSTR (4, 0);
12676 /* N.B. the pseudo-code does the divide using 64 bit data. */
12677 /* TODO : check that this rounds towards zero as required. */
12678 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12679 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12680
12681 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12682 divisor ? ((int32_t) (dividend / divisor)) : 0);
12683 }
12684
12685 /* 64 bit signed divide. */
12686 static void
12687 cpuiv64 (sim_cpu *cpu)
12688 {
12689 unsigned rm = INSTR (20, 16);
12690 unsigned rn = INSTR (9, 5);
12691 unsigned rd = INSTR (4, 0);
12692
12693 /* TODO : check that this rounds towards zero as required. */
12694 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12695
12696 aarch64_set_reg_s64
12697 (cpu, rd, NO_SP,
12698 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12699 }
12700
12701 /* 32 bit unsigned divide. */
12702 static void
12703 udiv32 (sim_cpu *cpu)
12704 {
12705 unsigned rm = INSTR (20, 16);
12706 unsigned rn = INSTR (9, 5);
12707 unsigned rd = INSTR (4, 0);
12708
12709 /* N.B. the pseudo-code does the divide using 64 bit data. */
12710 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12711 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12712
12713 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12714 divisor ? (uint32_t) (dividend / divisor) : 0);
12715 }
12716
12717 /* 64 bit unsigned divide. */
12718 static void
12719 udiv64 (sim_cpu *cpu)
12720 {
12721 unsigned rm = INSTR (20, 16);
12722 unsigned rn = INSTR (9, 5);
12723 unsigned rd = INSTR (4, 0);
12724
12725 /* TODO : check that this rounds towards zero as required. */
12726 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12727
12728 aarch64_set_reg_u64
12729 (cpu, rd, NO_SP,
12730 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12731 }
12732
12733 static void
12734 dexDataProc2Source (sim_cpu *cpu)
12735 {
12736 /* assert instr[30] == 0
12737 instr[28,21] == 11010110
12738 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12739 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12740 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12741 001000 ==> LSLV, 001001 ==> LSRV
12742 001010 ==> ASRV, 001011 ==> RORV
12743 ow ==> UNALLOC. */
12744
12745 uint32_t dispatch;
12746 uint32_t S = INSTR (29, 29);
12747 uint32_t opcode = INSTR (15, 10);
12748
12749 if (S == 1)
12750 HALT_UNALLOC;
12751
12752 if (opcode & 0x34)
12753 HALT_UNALLOC;
12754
12755 dispatch = ( (INSTR (31, 31) << 3)
12756 | (uimm (opcode, 3, 3) << 2)
12757 | uimm (opcode, 1, 0));
12758 switch (dispatch)
12759 {
12760 case 2: udiv32 (cpu); return;
12761 case 3: cpuiv32 (cpu); return;
12762 case 4: lslv32 (cpu); return;
12763 case 5: lsrv32 (cpu); return;
12764 case 6: asrv32 (cpu); return;
12765 case 7: rorv32 (cpu); return;
12766 case 10: udiv64 (cpu); return;
12767 case 11: cpuiv64 (cpu); return;
12768 case 12: lslv64 (cpu); return;
12769 case 13: lsrv64 (cpu); return;
12770 case 14: asrv64 (cpu); return;
12771 case 15: rorv64 (cpu); return;
12772 default: HALT_UNALLOC;
12773 }
12774 }
12775
12776
12777 /* Multiply. */
12778
12779 /* 32 bit multiply and add. */
12780 static void
12781 madd32 (sim_cpu *cpu)
12782 {
12783 unsigned rm = INSTR (20, 16);
12784 unsigned ra = INSTR (14, 10);
12785 unsigned rn = INSTR (9, 5);
12786 unsigned rd = INSTR (4, 0);
12787
12788 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12789 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12790 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12791 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12792 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12793 }
12794
12795 /* 64 bit multiply and add. */
12796 static void
12797 madd64 (sim_cpu *cpu)
12798 {
12799 unsigned rm = INSTR (20, 16);
12800 unsigned ra = INSTR (14, 10);
12801 unsigned rn = INSTR (9, 5);
12802 unsigned rd = INSTR (4, 0);
12803
12804 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12805 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12806 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12807 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12808 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12809 }
12810
12811 /* 32 bit multiply and sub. */
12812 static void
12813 msub32 (sim_cpu *cpu)
12814 {
12815 unsigned rm = INSTR (20, 16);
12816 unsigned ra = INSTR (14, 10);
12817 unsigned rn = INSTR (9, 5);
12818 unsigned rd = INSTR (4, 0);
12819
12820 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12821 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12822 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12823 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12824 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12825 }
12826
12827 /* 64 bit multiply and sub. */
12828 static void
12829 msub64 (sim_cpu *cpu)
12830 {
12831 unsigned rm = INSTR (20, 16);
12832 unsigned ra = INSTR (14, 10);
12833 unsigned rn = INSTR (9, 5);
12834 unsigned rd = INSTR (4, 0);
12835
12836 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12837 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12838 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12839 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12840 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12841 }
12842
12843 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12844 static void
12845 smaddl (sim_cpu *cpu)
12846 {
12847 unsigned rm = INSTR (20, 16);
12848 unsigned ra = INSTR (14, 10);
12849 unsigned rn = INSTR (9, 5);
12850 unsigned rd = INSTR (4, 0);
12851
12852 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12853 obtain a 64 bit product. */
12854 aarch64_set_reg_s64
12855 (cpu, rd, NO_SP,
12856 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12857 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12858 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12859 }
12860
12861 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12862 static void
12863 smsubl (sim_cpu *cpu)
12864 {
12865 unsigned rm = INSTR (20, 16);
12866 unsigned ra = INSTR (14, 10);
12867 unsigned rn = INSTR (9, 5);
12868 unsigned rd = INSTR (4, 0);
12869
12870 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12871 obtain a 64 bit product. */
12872 aarch64_set_reg_s64
12873 (cpu, rd, NO_SP,
12874 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12875 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12876 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12877 }
12878
12879 /* Integer Multiply/Divide. */
12880
12881 /* First some macros and a helper function. */
12882 /* Macros to test or access elements of 64 bit words. */
12883
12884 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12885 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12886 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12887 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12888 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12889 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12890
12891 /* Offset of sign bit in 64 bit signed integger. */
12892 #define SIGN_SHIFT_U64 63
12893 /* The sign bit itself -- also identifies the minimum negative int value. */
12894 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12895 /* Return true if a 64 bit signed int presented as an unsigned int is the
12896 most negative value. */
12897 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12898 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12899 int has its sign bit set to false. */
12900 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12901 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12902 an unsigned int has its sign bit set or not. */
12903 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12904 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12905 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12906
12907 /* Multiply two 64 bit ints and return.
12908 the hi 64 bits of the 128 bit product. */
12909
12910 static uint64_t
12911 mul64hi (uint64_t value1, uint64_t value2)
12912 {
12913 uint64_t resultmid1;
12914 uint64_t result;
12915 uint64_t value1_lo = lowWordToU64 (value1);
12916 uint64_t value1_hi = highWordToU64 (value1) ;
12917 uint64_t value2_lo = lowWordToU64 (value2);
12918 uint64_t value2_hi = highWordToU64 (value2);
12919
12920 /* Cross-multiply and collect results. */
12921 uint64_t xproductlo = value1_lo * value2_lo;
12922 uint64_t xproductmid1 = value1_lo * value2_hi;
12923 uint64_t xproductmid2 = value1_hi * value2_lo;
12924 uint64_t xproducthi = value1_hi * value2_hi;
12925 uint64_t carry = 0;
12926 /* Start accumulating 64 bit results. */
12927 /* Drop bottom half of lowest cross-product. */
12928 uint64_t resultmid = xproductlo >> 32;
12929 /* Add in middle products. */
12930 resultmid = resultmid + xproductmid1;
12931
12932 /* Check for overflow. */
12933 if (resultmid < xproductmid1)
12934 /* Carry over 1 into top cross-product. */
12935 carry++;
12936
12937 resultmid1 = resultmid + xproductmid2;
12938
12939 /* Check for overflow. */
12940 if (resultmid1 < xproductmid2)
12941 /* Carry over 1 into top cross-product. */
12942 carry++;
12943
12944 /* Drop lowest 32 bits of middle cross-product. */
12945 result = resultmid1 >> 32;
12946
12947 /* Add top cross-product plus and any carry. */
12948 result += xproducthi + carry;
12949
12950 return result;
12951 }
12952
12953 /* Signed multiply high, source, source2 :
12954 64 bit, dest <-- high 64-bit of result. */
12955 static void
12956 smulh (sim_cpu *cpu)
12957 {
12958 uint64_t uresult;
12959 int64_t result;
12960 unsigned rm = INSTR (20, 16);
12961 unsigned rn = INSTR (9, 5);
12962 unsigned rd = INSTR (4, 0);
12963 GReg ra = INSTR (14, 10);
12964 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12965 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12966 uint64_t uvalue1;
12967 uint64_t uvalue2;
12968 int64_t signum = 1;
12969
12970 if (ra != R31)
12971 HALT_UNALLOC;
12972
12973 /* Convert to unsigned and use the unsigned mul64hi routine
12974 the fix the sign up afterwards. */
12975 if (value1 < 0)
12976 {
12977 signum *= -1L;
12978 uvalue1 = -value1;
12979 }
12980 else
12981 {
12982 uvalue1 = value1;
12983 }
12984
12985 if (value2 < 0)
12986 {
12987 signum *= -1L;
12988 uvalue2 = -value2;
12989 }
12990 else
12991 {
12992 uvalue2 = value2;
12993 }
12994
12995 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12996 uresult = mul64hi (uvalue1, uvalue2);
12997 result = uresult;
12998 result *= signum;
12999
13000 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13001 }
13002
13003 /* Unsigned multiply add long -- source, source2 :
13004 32 bit, source3 : 64 bit. */
13005 static void
13006 umaddl (sim_cpu *cpu)
13007 {
13008 unsigned rm = INSTR (20, 16);
13009 unsigned ra = INSTR (14, 10);
13010 unsigned rn = INSTR (9, 5);
13011 unsigned rd = INSTR (4, 0);
13012
13013 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13014 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13015 obtain a 64 bit product. */
13016 aarch64_set_reg_u64
13017 (cpu, rd, NO_SP,
13018 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13019 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13020 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13021 }
13022
13023 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13024 static void
13025 umsubl (sim_cpu *cpu)
13026 {
13027 unsigned rm = INSTR (20, 16);
13028 unsigned ra = INSTR (14, 10);
13029 unsigned rn = INSTR (9, 5);
13030 unsigned rd = INSTR (4, 0);
13031
13032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13033 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13034 obtain a 64 bit product. */
13035 aarch64_set_reg_u64
13036 (cpu, rd, NO_SP,
13037 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13038 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13039 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13040 }
13041
13042 /* Unsigned multiply high, source, source2 :
13043 64 bit, dest <-- high 64-bit of result. */
13044 static void
13045 umulh (sim_cpu *cpu)
13046 {
13047 unsigned rm = INSTR (20, 16);
13048 unsigned rn = INSTR (9, 5);
13049 unsigned rd = INSTR (4, 0);
13050 GReg ra = INSTR (14, 10);
13051
13052 if (ra != R31)
13053 HALT_UNALLOC;
13054
13055 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13056 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13057 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13058 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13059 }
13060
13061 static void
13062 dexDataProc3Source (sim_cpu *cpu)
13063 {
13064 /* assert instr[28,24] == 11011. */
13065 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13066 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13067 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13068 instr[15] = o0 : 0/1 ==> ok
13069 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13070 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13071 0100 ==> SMULH, (64 bit only)
13072 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13073 1100 ==> UMULH (64 bit only)
13074 ow ==> UNALLOC. */
13075
13076 uint32_t dispatch;
13077 uint32_t size = INSTR (31, 31);
13078 uint32_t op54 = INSTR (30, 29);
13079 uint32_t op31 = INSTR (23, 21);
13080 uint32_t o0 = INSTR (15, 15);
13081
13082 if (op54 != 0)
13083 HALT_UNALLOC;
13084
13085 if (size == 0)
13086 {
13087 if (op31 != 0)
13088 HALT_UNALLOC;
13089
13090 if (o0 == 0)
13091 madd32 (cpu);
13092 else
13093 msub32 (cpu);
13094 return;
13095 }
13096
13097 dispatch = (op31 << 1) | o0;
13098
13099 switch (dispatch)
13100 {
13101 case 0: madd64 (cpu); return;
13102 case 1: msub64 (cpu); return;
13103 case 2: smaddl (cpu); return;
13104 case 3: smsubl (cpu); return;
13105 case 4: smulh (cpu); return;
13106 case 10: umaddl (cpu); return;
13107 case 11: umsubl (cpu); return;
13108 case 12: umulh (cpu); return;
13109 default: HALT_UNALLOC;
13110 }
13111 }
13112
13113 static void
13114 dexDPReg (sim_cpu *cpu)
13115 {
13116 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13117 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13118 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13119 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13120
13121 switch (group2)
13122 {
13123 case DPREG_LOG_000:
13124 case DPREG_LOG_001:
13125 dexLogicalShiftedRegister (cpu); return;
13126
13127 case DPREG_ADDSHF_010:
13128 dexAddSubtractShiftedRegister (cpu); return;
13129
13130 case DPREG_ADDEXT_011:
13131 dexAddSubtractExtendedRegister (cpu); return;
13132
13133 case DPREG_ADDCOND_100:
13134 {
13135 /* This set bundles a variety of different operations. */
13136 /* Check for. */
13137 /* 1) add/sub w carry. */
13138 uint32_t mask1 = 0x1FE00000U;
13139 uint32_t val1 = 0x1A000000U;
13140 /* 2) cond compare register/immediate. */
13141 uint32_t mask2 = 0x1FE00000U;
13142 uint32_t val2 = 0x1A400000U;
13143 /* 3) cond select. */
13144 uint32_t mask3 = 0x1FE00000U;
13145 uint32_t val3 = 0x1A800000U;
13146 /* 4) data proc 1/2 source. */
13147 uint32_t mask4 = 0x1FE00000U;
13148 uint32_t val4 = 0x1AC00000U;
13149
13150 if ((aarch64_get_instr (cpu) & mask1) == val1)
13151 dexAddSubtractWithCarry (cpu);
13152
13153 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13154 CondCompare (cpu);
13155
13156 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13157 dexCondSelect (cpu);
13158
13159 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13160 {
13161 /* Bit 30 is clear for data proc 2 source
13162 and set for data proc 1 source. */
13163 if (aarch64_get_instr (cpu) & (1U << 30))
13164 dexDataProc1Source (cpu);
13165 else
13166 dexDataProc2Source (cpu);
13167 }
13168
13169 else
13170 /* Should not reach here. */
13171 HALT_NYI;
13172
13173 return;
13174 }
13175
13176 case DPREG_3SRC_110:
13177 dexDataProc3Source (cpu); return;
13178
13179 case DPREG_UNALLOC_101:
13180 HALT_UNALLOC;
13181
13182 case DPREG_3SRC_111:
13183 dexDataProc3Source (cpu); return;
13184
13185 default:
13186 /* Should never reach here. */
13187 HALT_NYI;
13188 }
13189 }
13190
13191 /* Unconditional Branch immediate.
13192 Offset is a PC-relative byte offset in the range +/- 128MiB.
13193 The offset is assumed to be raw from the decode i.e. the
13194 simulator is expected to scale them from word offsets to byte. */
13195
13196 /* Unconditional branch. */
13197 static void
13198 buc (sim_cpu *cpu, int32_t offset)
13199 {
13200 aarch64_set_next_PC_by_offset (cpu, offset);
13201 }
13202
13203 static unsigned stack_depth = 0;
13204
13205 /* Unconditional branch and link -- writes return PC to LR. */
13206 static void
13207 bl (sim_cpu *cpu, int32_t offset)
13208 {
13209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13210 aarch64_save_LR (cpu);
13211 aarch64_set_next_PC_by_offset (cpu, offset);
13212
13213 if (TRACE_BRANCH_P (cpu))
13214 {
13215 ++ stack_depth;
13216 TRACE_BRANCH (cpu,
13217 " %*scall %" PRIx64 " [%s]"
13218 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13219 stack_depth, " ", aarch64_get_next_PC (cpu),
13220 aarch64_get_func (CPU_STATE (cpu),
13221 aarch64_get_next_PC (cpu)),
13222 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13223 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13224 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13225 );
13226 }
13227 }
13228
13229 /* Unconditional Branch register.
13230 Branch/return address is in source register. */
13231
13232 /* Unconditional branch. */
13233 static void
13234 br (sim_cpu *cpu)
13235 {
13236 unsigned rn = INSTR (9, 5);
13237 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13238 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13239 }
13240
13241 /* Unconditional branch and link -- writes return PC to LR. */
13242 static void
13243 blr (sim_cpu *cpu)
13244 {
13245 unsigned rn = INSTR (9, 5);
13246
13247 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13248 /* The pseudo code in the spec says we update LR before fetching.
13249 the value from the rn. */
13250 aarch64_save_LR (cpu);
13251 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13252
13253 if (TRACE_BRANCH_P (cpu))
13254 {
13255 ++ stack_depth;
13256 TRACE_BRANCH (cpu,
13257 " %*scall %" PRIx64 " [%s]"
13258 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13259 stack_depth, " ", aarch64_get_next_PC (cpu),
13260 aarch64_get_func (CPU_STATE (cpu),
13261 aarch64_get_next_PC (cpu)),
13262 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13263 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13264 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13265 );
13266 }
13267 }
13268
13269 /* Return -- assembler will default source to LR this is functionally
13270 equivalent to br but, presumably, unlike br it side effects the
13271 branch predictor. */
13272 static void
13273 ret (sim_cpu *cpu)
13274 {
13275 unsigned rn = INSTR (9, 5);
13276 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13277
13278 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13279 if (TRACE_BRANCH_P (cpu))
13280 {
13281 TRACE_BRANCH (cpu,
13282 " %*sreturn [result: %" PRIx64 "]",
13283 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13284 -- stack_depth;
13285 }
13286 }
13287
13288 /* NOP -- we implement this and call it from the decode in case we
13289 want to intercept it later. */
13290
13291 static void
13292 nop (sim_cpu *cpu)
13293 {
13294 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13295 }
13296
13297 /* Data synchronization barrier. */
13298
13299 static void
13300 dsb (sim_cpu *cpu)
13301 {
13302 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13303 }
13304
13305 /* Data memory barrier. */
13306
13307 static void
13308 dmb (sim_cpu *cpu)
13309 {
13310 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13311 }
13312
13313 /* Instruction synchronization barrier. */
13314
13315 static void
13316 isb (sim_cpu *cpu)
13317 {
13318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13319 }
13320
13321 static void
13322 dexBranchImmediate (sim_cpu *cpu)
13323 {
13324 /* assert instr[30,26] == 00101
13325 instr[31] ==> 0 == B, 1 == BL
13326 instr[25,0] == imm26 branch offset counted in words. */
13327
13328 uint32_t top = INSTR (31, 31);
13329 /* We have a 26 byte signed word offset which we need to pass to the
13330 execute routine as a signed byte offset. */
13331 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13332
13333 if (top)
13334 bl (cpu, offset);
13335 else
13336 buc (cpu, offset);
13337 }
13338
13339 /* Control Flow. */
13340
13341 /* Conditional branch
13342
13343 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13344 a bit position in the range 0 .. 63
13345
13346 cc is a CondCode enum value as pulled out of the decode
13347
13348 N.B. any offset register (source) can only be Xn or Wn. */
13349
13350 static void
13351 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13352 {
13353 /* The test returns TRUE if CC is met. */
13354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13355 if (testConditionCode (cpu, cc))
13356 aarch64_set_next_PC_by_offset (cpu, offset);
13357 }
13358
13359 /* 32 bit branch on register non-zero. */
13360 static void
13361 cbnz32 (sim_cpu *cpu, int32_t offset)
13362 {
13363 unsigned rt = INSTR (4, 0);
13364
13365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13366 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13367 aarch64_set_next_PC_by_offset (cpu, offset);
13368 }
13369
13370 /* 64 bit branch on register zero. */
13371 static void
13372 cbnz (sim_cpu *cpu, int32_t offset)
13373 {
13374 unsigned rt = INSTR (4, 0);
13375
13376 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13377 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13378 aarch64_set_next_PC_by_offset (cpu, offset);
13379 }
13380
13381 /* 32 bit branch on register non-zero. */
13382 static void
13383 cbz32 (sim_cpu *cpu, int32_t offset)
13384 {
13385 unsigned rt = INSTR (4, 0);
13386
13387 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13388 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13389 aarch64_set_next_PC_by_offset (cpu, offset);
13390 }
13391
13392 /* 64 bit branch on register zero. */
13393 static void
13394 cbz (sim_cpu *cpu, int32_t offset)
13395 {
13396 unsigned rt = INSTR (4, 0);
13397
13398 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13399 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13400 aarch64_set_next_PC_by_offset (cpu, offset);
13401 }
13402
13403 /* Branch on register bit test non-zero -- one size fits all. */
13404 static void
13405 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13406 {
13407 unsigned rt = INSTR (4, 0);
13408
13409 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13410 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13411 aarch64_set_next_PC_by_offset (cpu, offset);
13412 }
13413
13414 /* Branch on register bit test zero -- one size fits all. */
13415 static void
13416 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13417 {
13418 unsigned rt = INSTR (4, 0);
13419
13420 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13421 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13422 aarch64_set_next_PC_by_offset (cpu, offset);
13423 }
13424
13425 static void
13426 dexCompareBranchImmediate (sim_cpu *cpu)
13427 {
13428 /* instr[30,25] = 01 1010
13429 instr[31] = size : 0 ==> 32, 1 ==> 64
13430 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13431 instr[23,5] = simm19 branch offset counted in words
13432 instr[4,0] = rt */
13433
13434 uint32_t size = INSTR (31, 31);
13435 uint32_t op = INSTR (24, 24);
13436 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13437
13438 if (size == 0)
13439 {
13440 if (op == 0)
13441 cbz32 (cpu, offset);
13442 else
13443 cbnz32 (cpu, offset);
13444 }
13445 else
13446 {
13447 if (op == 0)
13448 cbz (cpu, offset);
13449 else
13450 cbnz (cpu, offset);
13451 }
13452 }
13453
13454 static void
13455 dexTestBranchImmediate (sim_cpu *cpu)
13456 {
13457 /* instr[31] = b5 : bit 5 of test bit idx
13458 instr[30,25] = 01 1011
13459 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13460 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13461 instr[18,5] = simm14 : signed offset counted in words
13462 instr[4,0] = uimm5 */
13463
13464 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13465 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13466
13467 NYI_assert (30, 25, 0x1b);
13468
13469 if (INSTR (24, 24) == 0)
13470 tbz (cpu, pos, offset);
13471 else
13472 tbnz (cpu, pos, offset);
13473 }
13474
13475 static void
13476 dexCondBranchImmediate (sim_cpu *cpu)
13477 {
13478 /* instr[31,25] = 010 1010
13479 instr[24] = op1; op => 00 ==> B.cond
13480 instr[23,5] = simm19 : signed offset counted in words
13481 instr[4] = op0
13482 instr[3,0] = cond */
13483
13484 int32_t offset;
13485 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13486
13487 NYI_assert (31, 25, 0x2a);
13488
13489 if (op != 0)
13490 HALT_UNALLOC;
13491
13492 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13493
13494 bcc (cpu, offset, INSTR (3, 0));
13495 }
13496
13497 static void
13498 dexBranchRegister (sim_cpu *cpu)
13499 {
13500 /* instr[31,25] = 110 1011
13501 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13502 instr[20,16] = op2 : must be 11111
13503 instr[15,10] = op3 : must be 000000
13504 instr[4,0] = op2 : must be 11111. */
13505
13506 uint32_t op = INSTR (24, 21);
13507 uint32_t op2 = INSTR (20, 16);
13508 uint32_t op3 = INSTR (15, 10);
13509 uint32_t op4 = INSTR (4, 0);
13510
13511 NYI_assert (31, 25, 0x6b);
13512
13513 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13514 HALT_UNALLOC;
13515
13516 if (op == 0)
13517 br (cpu);
13518
13519 else if (op == 1)
13520 blr (cpu);
13521
13522 else if (op == 2)
13523 ret (cpu);
13524
13525 else
13526 {
13527 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13528 /* anything else is unallocated. */
13529 uint32_t rn = INSTR (4, 0);
13530
13531 if (rn != 0x1f)
13532 HALT_UNALLOC;
13533
13534 if (op == 4 || op == 5)
13535 HALT_NYI;
13536
13537 HALT_UNALLOC;
13538 }
13539 }
13540
13541 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13542 but this may not be available. So instead we define the values we need
13543 here. */
13544 #define AngelSVC_Reason_Open 0x01
13545 #define AngelSVC_Reason_Close 0x02
13546 #define AngelSVC_Reason_Write 0x05
13547 #define AngelSVC_Reason_Read 0x06
13548 #define AngelSVC_Reason_IsTTY 0x09
13549 #define AngelSVC_Reason_Seek 0x0A
13550 #define AngelSVC_Reason_FLen 0x0C
13551 #define AngelSVC_Reason_Remove 0x0E
13552 #define AngelSVC_Reason_Rename 0x0F
13553 #define AngelSVC_Reason_Clock 0x10
13554 #define AngelSVC_Reason_Time 0x11
13555 #define AngelSVC_Reason_System 0x12
13556 #define AngelSVC_Reason_Errno 0x13
13557 #define AngelSVC_Reason_GetCmdLine 0x15
13558 #define AngelSVC_Reason_HeapInfo 0x16
13559 #define AngelSVC_Reason_ReportException 0x18
13560 #define AngelSVC_Reason_Elapsed 0x30
13561
13562
13563 static void
13564 handle_halt (sim_cpu *cpu, uint32_t val)
13565 {
13566 uint64_t result = 0;
13567
13568 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13569 if (val != 0xf000)
13570 {
13571 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13572 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13573 sim_stopped, SIM_SIGTRAP);
13574 }
13575
13576 /* We have encountered an Angel SVC call. See if we can process it. */
13577 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13578 {
13579 case AngelSVC_Reason_HeapInfo:
13580 {
13581 /* Get the values. */
13582 uint64_t stack_top = aarch64_get_stack_start (cpu);
13583 uint64_t heap_base = aarch64_get_heap_start (cpu);
13584
13585 /* Get the pointer */
13586 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13587 ptr = aarch64_get_mem_u64 (cpu, ptr);
13588
13589 /* Fill in the memory block. */
13590 /* Start addr of heap. */
13591 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13592 /* End addr of heap. */
13593 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13594 /* Lowest stack addr. */
13595 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13596 /* Initial stack addr. */
13597 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13598
13599 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13600 }
13601 break;
13602
13603 case AngelSVC_Reason_Open:
13604 {
13605 /* Get the pointer */
13606 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13607 /* FIXME: For now we just assume that we will only be asked
13608 to open the standard file descriptors. */
13609 static int fd = 0;
13610 result = fd ++;
13611
13612 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13613 }
13614 break;
13615
13616 case AngelSVC_Reason_Close:
13617 {
13618 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13619 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13620 result = 0;
13621 }
13622 break;
13623
13624 case AngelSVC_Reason_Errno:
13625 result = 0;
13626 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13627 break;
13628
13629 case AngelSVC_Reason_Clock:
13630 result =
13631 #ifdef CLOCKS_PER_SEC
13632 (CLOCKS_PER_SEC >= 100)
13633 ? (clock () / (CLOCKS_PER_SEC / 100))
13634 : ((clock () * 100) / CLOCKS_PER_SEC)
13635 #else
13636 /* Presume unix... clock() returns microseconds. */
13637 (clock () / 10000)
13638 #endif
13639 ;
13640 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13641 break;
13642
13643 case AngelSVC_Reason_GetCmdLine:
13644 {
13645 /* Get the pointer */
13646 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13647 ptr = aarch64_get_mem_u64 (cpu, ptr);
13648
13649 /* FIXME: No command line for now. */
13650 aarch64_set_mem_u64 (cpu, ptr, 0);
13651 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13652 }
13653 break;
13654
13655 case AngelSVC_Reason_IsTTY:
13656 result = 1;
13657 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13658 break;
13659
13660 case AngelSVC_Reason_Write:
13661 {
13662 /* Get the pointer */
13663 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13664 /* Get the write control block. */
13665 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13666 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13667 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13668
13669 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13670 PRIx64 " on descriptor %" PRIx64,
13671 len, buf, fd);
13672
13673 if (len > 1280)
13674 {
13675 TRACE_SYSCALL (cpu,
13676 " AngelSVC: Write: Suspiciously long write: %ld",
13677 (long) len);
13678 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13679 sim_stopped, SIM_SIGBUS);
13680 }
13681 else if (fd == 1)
13682 {
13683 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13684 }
13685 else if (fd == 2)
13686 {
13687 TRACE (cpu, 0, "\n");
13688 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13689 (int) len, aarch64_get_mem_ptr (cpu, buf));
13690 TRACE (cpu, 0, "\n");
13691 }
13692 else
13693 {
13694 TRACE_SYSCALL (cpu,
13695 " AngelSVC: Write: Unexpected file handle: %d",
13696 (int) fd);
13697 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13698 sim_stopped, SIM_SIGABRT);
13699 }
13700 }
13701 break;
13702
13703 case AngelSVC_Reason_ReportException:
13704 {
13705 /* Get the pointer */
13706 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13707 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13708 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13709 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13710
13711 TRACE_SYSCALL (cpu,
13712 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13713 type, state);
13714
13715 if (type == 0x20026)
13716 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13717 sim_exited, state);
13718 else
13719 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13720 sim_stopped, SIM_SIGINT);
13721 }
13722 break;
13723
13724 case AngelSVC_Reason_Read:
13725 case AngelSVC_Reason_FLen:
13726 case AngelSVC_Reason_Seek:
13727 case AngelSVC_Reason_Remove:
13728 case AngelSVC_Reason_Time:
13729 case AngelSVC_Reason_System:
13730 case AngelSVC_Reason_Rename:
13731 case AngelSVC_Reason_Elapsed:
13732 default:
13733 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13734 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13735 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13736 sim_stopped, SIM_SIGTRAP);
13737 }
13738
13739 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13740 }
13741
13742 static void
13743 dexExcpnGen (sim_cpu *cpu)
13744 {
13745 /* instr[31:24] = 11010100
13746 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13747 010 ==> HLT, 101 ==> DBG GEN EXCPN
13748 instr[20,5] = imm16
13749 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13750 instr[1,0] = LL : discriminates opc */
13751
13752 uint32_t opc = INSTR (23, 21);
13753 uint32_t imm16 = INSTR (20, 5);
13754 uint32_t opc2 = INSTR (4, 2);
13755 uint32_t LL;
13756
13757 NYI_assert (31, 24, 0xd4);
13758
13759 if (opc2 != 0)
13760 HALT_UNALLOC;
13761
13762 LL = INSTR (1, 0);
13763
13764 /* We only implement HLT and BRK for now. */
13765 if (opc == 1 && LL == 0)
13766 {
13767 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13768 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13769 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13770 }
13771
13772 if (opc == 2 && LL == 0)
13773 handle_halt (cpu, imm16);
13774
13775 else if (opc == 0 || opc == 5)
13776 HALT_NYI;
13777
13778 else
13779 HALT_UNALLOC;
13780 }
13781
13782 /* Stub for accessing system registers. */
13783
13784 static uint64_t
13785 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13786 unsigned crm, unsigned op2)
13787 {
13788 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13789 /* DCZID_EL0 - the Data Cache Zero ID register.
13790 We do not support DC ZVA at the moment, so
13791 we return a value with the disable bit set.
13792 We implement support for the DCZID register since
13793 it is used by the C library's memset function. */
13794 return ((uint64_t) 1) << 4;
13795
13796 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13797 /* Cache Type Register. */
13798 return 0x80008000UL;
13799
13800 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13801 /* TPIDR_EL0 - thread pointer id. */
13802 return aarch64_get_thread_id (cpu);
13803
13804 if (op1 == 3 && crm == 4 && op2 == 0)
13805 return aarch64_get_FPCR (cpu);
13806
13807 if (op1 == 3 && crm == 4 && op2 == 1)
13808 return aarch64_get_FPSR (cpu);
13809
13810 else if (op1 == 3 && crm == 2 && op2 == 0)
13811 return aarch64_get_CPSR (cpu);
13812
13813 HALT_NYI;
13814 }
13815
13816 static void
13817 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13818 unsigned crm, unsigned op2, uint64_t val)
13819 {
13820 if (op1 == 3 && crm == 4 && op2 == 0)
13821 aarch64_set_FPCR (cpu, val);
13822
13823 else if (op1 == 3 && crm == 4 && op2 == 1)
13824 aarch64_set_FPSR (cpu, val);
13825
13826 else if (op1 == 3 && crm == 2 && op2 == 0)
13827 aarch64_set_CPSR (cpu, val);
13828
13829 else
13830 HALT_NYI;
13831 }
13832
13833 static void
13834 do_mrs (sim_cpu *cpu)
13835 {
13836 /* instr[31:20] = 1101 0101 0001 1
13837 instr[19] = op0
13838 instr[18,16] = op1
13839 instr[15,12] = CRn
13840 instr[11,8] = CRm
13841 instr[7,5] = op2
13842 instr[4,0] = Rt */
13843 unsigned sys_op0 = INSTR (19, 19) + 2;
13844 unsigned sys_op1 = INSTR (18, 16);
13845 unsigned sys_crn = INSTR (15, 12);
13846 unsigned sys_crm = INSTR (11, 8);
13847 unsigned sys_op2 = INSTR (7, 5);
13848 unsigned rt = INSTR (4, 0);
13849
13850 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13851 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13852 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13853 }
13854
13855 static void
13856 do_MSR_immediate (sim_cpu *cpu)
13857 {
13858 /* instr[31:19] = 1101 0101 0000 0
13859 instr[18,16] = op1
13860 instr[15,12] = 0100
13861 instr[11,8] = CRm
13862 instr[7,5] = op2
13863 instr[4,0] = 1 1111 */
13864
13865 unsigned op1 = INSTR (18, 16);
13866 /*unsigned crm = INSTR (11, 8);*/
13867 unsigned op2 = INSTR (7, 5);
13868
13869 NYI_assert (31, 19, 0x1AA0);
13870 NYI_assert (15, 12, 0x4);
13871 NYI_assert (4, 0, 0x1F);
13872
13873 if (op1 == 0)
13874 {
13875 if (op2 == 5)
13876 HALT_NYI; /* set SPSel. */
13877 else
13878 HALT_UNALLOC;
13879 }
13880 else if (op1 == 3)
13881 {
13882 if (op2 == 6)
13883 HALT_NYI; /* set DAIFset. */
13884 else if (op2 == 7)
13885 HALT_NYI; /* set DAIFclr. */
13886 else
13887 HALT_UNALLOC;
13888 }
13889 else
13890 HALT_UNALLOC;
13891 }
13892
13893 static void
13894 do_MSR_reg (sim_cpu *cpu)
13895 {
13896 /* instr[31:20] = 1101 0101 0001
13897 instr[19] = op0
13898 instr[18,16] = op1
13899 instr[15,12] = CRn
13900 instr[11,8] = CRm
13901 instr[7,5] = op2
13902 instr[4,0] = Rt */
13903
13904 unsigned sys_op0 = INSTR (19, 19) + 2;
13905 unsigned sys_op1 = INSTR (18, 16);
13906 unsigned sys_crn = INSTR (15, 12);
13907 unsigned sys_crm = INSTR (11, 8);
13908 unsigned sys_op2 = INSTR (7, 5);
13909 unsigned rt = INSTR (4, 0);
13910
13911 NYI_assert (31, 20, 0xD51);
13912
13913 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13914 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13915 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13916 }
13917
13918 static void
13919 do_SYS (sim_cpu *cpu)
13920 {
13921 /* instr[31,19] = 1101 0101 0000 1
13922 instr[18,16] = op1
13923 instr[15,12] = CRn
13924 instr[11,8] = CRm
13925 instr[7,5] = op2
13926 instr[4,0] = Rt */
13927 NYI_assert (31, 19, 0x1AA1);
13928
13929 /* FIXME: For now we just silently accept system ops. */
13930 }
13931
13932 static void
13933 dexSystem (sim_cpu *cpu)
13934 {
13935 /* instr[31:22] = 1101 01010 0
13936 instr[21] = L
13937 instr[20,19] = op0
13938 instr[18,16] = op1
13939 instr[15,12] = CRn
13940 instr[11,8] = CRm
13941 instr[7,5] = op2
13942 instr[4,0] = uimm5 */
13943
13944 /* We are interested in HINT, DSB, DMB and ISB
13945
13946 Hint #0 encodes NOOP (this is the only hint we care about)
13947 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13948 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13949
13950 DSB, DMB, ISB are data store barrier, data memory barrier and
13951 instruction store barrier, respectively, where
13952
13953 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13954 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13955 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13956 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13957 10 ==> InerShareable, 11 ==> FullSystem
13958 types : 01 ==> Reads, 10 ==> Writes,
13959 11 ==> All, 00 ==> All (domain == FullSystem). */
13960
13961 unsigned rt = INSTR (4, 0);
13962
13963 NYI_assert (31, 22, 0x354);
13964
13965 switch (INSTR (21, 12))
13966 {
13967 case 0x032:
13968 if (rt == 0x1F)
13969 {
13970 /* NOP has CRm != 0000 OR. */
13971 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13972 uint32_t crm = INSTR (11, 8);
13973 uint32_t op2 = INSTR (7, 5);
13974
13975 if (crm != 0 || (op2 == 0 || op2 > 5))
13976 {
13977 /* Actually call nop method so we can reimplement it later. */
13978 nop (cpu);
13979 return;
13980 }
13981 }
13982 HALT_NYI;
13983
13984 case 0x033:
13985 {
13986 uint32_t op2 = INSTR (7, 5);
13987
13988 switch (op2)
13989 {
13990 case 2: HALT_NYI;
13991 case 4: dsb (cpu); return;
13992 case 5: dmb (cpu); return;
13993 case 6: isb (cpu); return;
13994 default: HALT_UNALLOC;
13995 }
13996 }
13997
13998 case 0x3B0:
13999 case 0x3B4:
14000 case 0x3BD:
14001 do_mrs (cpu);
14002 return;
14003
14004 case 0x0B7:
14005 do_SYS (cpu); /* DC is an alias of SYS. */
14006 return;
14007
14008 default:
14009 if (INSTR (21, 20) == 0x1)
14010 do_MSR_reg (cpu);
14011 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14012 do_MSR_immediate (cpu);
14013 else
14014 HALT_NYI;
14015 return;
14016 }
14017 }
14018
14019 static void
14020 dexBr (sim_cpu *cpu)
14021 {
14022 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14023 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14024 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14025 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14026
14027 switch (group2)
14028 {
14029 case BR_IMM_000:
14030 return dexBranchImmediate (cpu);
14031
14032 case BR_IMMCMP_001:
14033 /* Compare has bit 25 clear while test has it set. */
14034 if (!INSTR (25, 25))
14035 dexCompareBranchImmediate (cpu);
14036 else
14037 dexTestBranchImmediate (cpu);
14038 return;
14039
14040 case BR_IMMCOND_010:
14041 /* This is a conditional branch if bit 25 is clear otherwise
14042 unallocated. */
14043 if (!INSTR (25, 25))
14044 dexCondBranchImmediate (cpu);
14045 else
14046 HALT_UNALLOC;
14047 return;
14048
14049 case BR_UNALLOC_011:
14050 HALT_UNALLOC;
14051
14052 case BR_IMM_100:
14053 dexBranchImmediate (cpu);
14054 return;
14055
14056 case BR_IMMCMP_101:
14057 /* Compare has bit 25 clear while test has it set. */
14058 if (!INSTR (25, 25))
14059 dexCompareBranchImmediate (cpu);
14060 else
14061 dexTestBranchImmediate (cpu);
14062 return;
14063
14064 case BR_REG_110:
14065 /* Unconditional branch reg has bit 25 set. */
14066 if (INSTR (25, 25))
14067 dexBranchRegister (cpu);
14068
14069 /* This includes both Excpn Gen, System and unalloc operations.
14070 We need to decode the Excpn Gen operation BRK so we can plant
14071 debugger entry points.
14072 Excpn Gen operations have instr [24] = 0.
14073 we need to decode at least one of the System operations NOP
14074 which is an alias for HINT #0.
14075 System operations have instr [24,22] = 100. */
14076 else if (INSTR (24, 24) == 0)
14077 dexExcpnGen (cpu);
14078
14079 else if (INSTR (24, 22) == 4)
14080 dexSystem (cpu);
14081
14082 else
14083 HALT_UNALLOC;
14084
14085 return;
14086
14087 case BR_UNALLOC_111:
14088 HALT_UNALLOC;
14089
14090 default:
14091 /* Should never reach here. */
14092 HALT_NYI;
14093 }
14094 }
14095
14096 static void
14097 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14098 {
14099 /* We need to check if gdb wants an in here. */
14100 /* checkBreak (cpu);. */
14101
14102 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14103
14104 switch (group)
14105 {
14106 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14107 case GROUP_LDST_0100: dexLdSt (cpu); break;
14108 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14109 case GROUP_LDST_0110: dexLdSt (cpu); break;
14110 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14111 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14112 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14113 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14114 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14115 case GROUP_LDST_1100: dexLdSt (cpu); break;
14116 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14117 case GROUP_LDST_1110: dexLdSt (cpu); break;
14118 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14119
14120 case GROUP_UNALLOC_0001:
14121 case GROUP_UNALLOC_0010:
14122 case GROUP_UNALLOC_0011:
14123 HALT_UNALLOC;
14124
14125 default:
14126 /* Should never reach here. */
14127 HALT_NYI;
14128 }
14129 }
14130
14131 static bfd_boolean
14132 aarch64_step (sim_cpu *cpu)
14133 {
14134 uint64_t pc = aarch64_get_PC (cpu);
14135
14136 if (pc == TOP_LEVEL_RETURN_PC)
14137 return FALSE;
14138
14139 aarch64_set_next_PC (cpu, pc + 4);
14140
14141 /* Code is always little-endian. */
14142 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14143 & aarch64_get_instr (cpu), pc, 4);
14144 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14145
14146 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14147 aarch64_get_instr (cpu));
14148 TRACE_DISASM (cpu, pc);
14149
14150 aarch64_decode_and_execute (cpu, pc);
14151
14152 return TRUE;
14153 }
14154
14155 void
14156 aarch64_run (SIM_DESC sd)
14157 {
14158 sim_cpu *cpu = STATE_CPU (sd, 0);
14159
14160 while (aarch64_step (cpu))
14161 {
14162 aarch64_update_PC (cpu);
14163
14164 if (sim_events_tick (sd))
14165 sim_events_process (sd);
14166 }
14167
14168 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14169 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14170 }
14171
14172 void
14173 aarch64_init (sim_cpu *cpu, uint64_t pc)
14174 {
14175 uint64_t sp = aarch64_get_stack_start (cpu);
14176
14177 /* Install SP, FP and PC and set LR to -20
14178 so we can detect a top-level return. */
14179 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14180 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14181 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14182 aarch64_set_next_PC (cpu, pc);
14183 aarch64_update_PC (cpu);
14184 aarch64_init_LIT_table ();
14185 }
This page took 0.354418 seconds and 4 git commands to generate.