fc12948bffa66034ab8750ee0b5efe22b09865ae
1 /* rx.c --- opcode semantics for stand-alone RX simulator.
3 Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
6 This file is part of the GNU simulators.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include "opcode/rx.h"
36 static const char * id_names
[] = {
38 "RXO_mov", /* d = s (signed) */
39 "RXO_movbi", /* d = [s,s2] (signed) */
40 "RXO_movbir", /* [s,s2] = d (signed) */
41 "RXO_pushm", /* s..s2 */
42 "RXO_popm", /* s..s2 */
43 "RXO_xchg", /* s <-> d */
44 "RXO_stcc", /* d = s if cond(s2) */
45 "RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */
47 /* These are all either d OP= s or, if s2 is set, d = s OP s2. Note
48 that d may be "None". */
61 "RXO_adc", /* d = d + s + carry */
62 "RXO_sbb", /* d = d - s - ~carry */
63 "RXO_abs", /* d = |s| */
64 "RXO_max", /* d = max(d,s) */
65 "RXO_min", /* d = min(d,s) */
66 "RXO_emul", /* d:64 = d:32 * s */
67 "RXO_emulu", /* d:64 = d:32 * s (unsigned) */
69 "RXO_rolc", /* d <<= 1 through carry */
70 "RXO_rorc", /* d >>= 1 through carry*/
71 "RXO_rotl", /* d <<= #s without carry */
72 "RXO_rotr", /* d >>= #s without carry*/
73 "RXO_revw", /* d = revw(s) */
74 "RXO_revl", /* d = revl(s) */
75 "RXO_branch", /* pc = d if cond(s) */
76 "RXO_branchrel",/* pc += d if cond(s) */
77 "RXO_jsr", /* pc = d */
78 "RXO_jsrrel", /* pc += d */
104 "RXO_sat", /* sat(d) */
107 "RXO_fadd", /* d op= s */
116 "RXO_bset", /* d |= (1<<s) */
117 "RXO_bclr", /* d &= ~(1<<s) */
118 "RXO_btst", /* s & (1<<s2) */
119 "RXO_bnot", /* d ^= (1<<s) */
120 "RXO_bmcc", /* d<s> = cond(s2) */
122 "RXO_clrpsw", /* flag index in d */
123 "RXO_setpsw", /* flag index in d */
124 "RXO_mvtipl", /* new IPL in s */
128 "RXO_rtd", /* undocumented */
130 "RXO_dbt", /* undocumented */
131 "RXO_int", /* vector id in s */
135 "RXO_sccnd", /* d = cond(s) ? 1 : 0 */
138 static const char * optype_names
[] = {
140 "#Imm", /* #addend */
142 "[Rn]", /* [Rn + addend] */
145 " cc ", /* eq, gtu, etc */
146 "Flag", /* [UIOSZC] */
147 "RbRi" /* [Rb + scale * Ri] */
150 #define N_RXO (sizeof(id_names)/sizeof(id_names[0]))
151 #define N_RXT (sizeof(optype_names)/sizeof(optype_names[0]))
154 static unsigned long long benchmark_start_cycle
;
155 static unsigned long long benchmark_end_cycle
;
157 static int op_cache
[N_RXT
][N_RXT
][N_RXT
];
158 static int op_cache_rev
[N_MAP
];
159 static int op_cache_idx
= 0;
162 op_lookup (int a
, int b
, int c
)
164 if (op_cache
[a
][b
][c
])
165 return op_cache
[a
][b
][c
];
167 if (op_cache_idx
>= N_MAP
)
169 printf("op_cache_idx exceeds %d\n", N_MAP
);
172 op_cache
[a
][b
][c
] = op_cache_idx
;
173 op_cache_rev
[op_cache_idx
] = (a
<<8) | (b
<<4) | c
;
178 op_cache_string (int map
)
181 static char cb
[5][20];
184 map
= op_cache_rev
[map
];
189 sprintf(cb
[ci
], "%s %s %s", optype_names
[a
], optype_names
[b
], optype_names
[c
]);
193 static unsigned long long cycles_per_id
[N_RXO
][N_MAP
];
194 static unsigned long long times_per_id
[N_RXO
][N_MAP
];
195 static unsigned long long memory_stalls
;
196 static unsigned long long register_stalls
;
197 static unsigned long long branch_stalls
;
198 static unsigned long long branch_alignment_stalls
;
199 static unsigned long long fast_returns
;
201 static unsigned long times_per_pair
[N_RXO
][N_MAP
][N_RXO
][N_MAP
];
202 static int prev_opcode_id
= RXO_unknown
;
209 #endif /* CYCLE_STATS */
212 #ifdef CYCLE_ACCURATE
214 static int new_rt
= -1;
216 /* Number of cycles to add if an insn spans an 8-byte boundary. */
217 static int branch_alignment_penalty
= 0;
221 static int running_benchmark
= 1;
223 #define tprintf if (trace && running_benchmark) printf
225 jmp_buf decode_jmp_buf
;
226 unsigned int rx_cycles
= 0;
228 #ifdef CYCLE_ACCURATE
229 /* If nonzero, memory was read at some point and cycle latency might
231 static int memory_source
= 0;
232 /* If nonzero, memory was written and extra cycles might be
234 static int memory_dest
= 0;
237 cycles (int throughput
)
239 tprintf("%d cycles\n", throughput
);
240 regs
.cycle_count
+= throughput
;
243 /* Number of execution (E) cycles the op uses. For memory sources, we
244 include the load micro-op stall as two extra E cycles. */
245 #define E(c) cycles (memory_source ? c + 2 : c)
246 #define E1 cycles (1)
247 #define E2 cycles (2)
248 #define EBIT cycles (memory_source ? 2 : 1)
250 /* Check to see if a read latency must be applied for a given register. */
254 tprintf("register %d load stall\n", r); \
255 regs.cycle_count ++; \
256 STATS(register_stalls ++); \
263 tprintf ("Rt now %d\n", r); \
267 #else /* !CYCLE_ACCURATE */
277 #endif /* else CYCLE_ACCURATE */
279 static int size2bytes
[] = {
280 4, 1, 1, 1, 2, 2, 2, 3, 4
287 #define rx_abort() _rx_abort(__FILE__, __LINE__)
289 _rx_abort (const char *file
, int line
)
291 if (strrchr (file
, '/'))
292 file
= strrchr (file
, '/') + 1;
293 fprintf(stderr
, "abort at %s:%d\n", file
, line
);
297 static unsigned char *get_byte_base
;
298 static RX_Opcode_Decoded
**decode_cache_base
;
299 static SI get_byte_page
;
302 maybe_get_mem_page (SI tpc
)
304 if (((tpc
^ get_byte_page
) & NONPAGE_MASK
) || enable_counting
)
306 get_byte_page
= tpc
& NONPAGE_MASK
;
307 get_byte_base
= rx_mem_ptr (get_byte_page
, MPA_READING
) - get_byte_page
;
308 decode_cache_base
= rx_mem_decode_cache (get_byte_page
) - get_byte_page
;
312 /* This gets called a *lot* so optimize it. */
314 rx_get_byte (void *vdata
)
316 RX_Data
*rx_data
= (RX_Data
*)vdata
;
317 SI tpc
= rx_data
->dpc
;
319 /* See load.c for an explanation of this. */
323 maybe_get_mem_page (tpc
);
326 return get_byte_base
[tpc
];
330 get_op (const RX_Opcode_Decoded
*rd
, int i
)
332 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
337 case RX_Operand_None
:
340 case RX_Operand_Immediate
: /* #addend */
343 case RX_Operand_Register
: /* Rn */
345 rv
= get_reg (o
->reg
);
348 case RX_Operand_Predec
: /* [-Rn] */
349 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
351 case RX_Operand_Postinc
: /* [Rn+] */
352 case RX_Operand_Indirect
: /* [Rn + addend] */
353 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
354 #ifdef CYCLE_ACCURATE
356 if (o
->type
== RX_Operand_TwoReg
)
359 if (regs
.m2m
== M2M_BOTH
)
361 tprintf("src memory stall\n");
372 if (o
->type
== RX_Operand_TwoReg
)
373 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
375 addr
= get_reg (o
->reg
) + o
->addend
;
382 case RX_Byte
: /* undefined extension */
385 rv
= mem_get_qi (addr
);
388 case RX_Word
: /* undefined extension */
391 rv
= mem_get_hi (addr
);
395 rv
= mem_get_psi (addr
);
399 rv
= mem_get_si (addr
);
403 if (o
->type
== RX_Operand_Postinc
)
404 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
408 case RX_Operand_Condition
: /* eq, gtu, etc */
409 return condition_true (o
->reg
);
411 case RX_Operand_Flag
: /* [UIOSZC] */
412 return (regs
.r_psw
& (1 << o
->reg
)) ? 1 : 0;
415 /* if we've gotten here, we need to clip/extend the value according
422 case RX_Byte
: /* undefined extension */
423 rv
|= 0xdeadbe00; /* keep them honest */
431 rv
= sign_ext (rv
, 8);
434 case RX_Word
: /* undefined extension */
435 rv
|= 0xdead0000; /* keep them honest */
443 rv
= sign_ext (rv
, 16);
457 put_op (const RX_Opcode_Decoded
*rd
, int i
, int v
)
459 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
465 if (o
->type
!= RX_Operand_Register
)
469 case RX_Byte
: /* undefined extension */
470 v
|= 0xdeadbe00; /* keep them honest */
481 case RX_Word
: /* undefined extension */
482 v
|= 0xdead0000; /* keep them honest */
490 v
= sign_ext (v
, 16);
503 case RX_Operand_None
:
504 /* Opcodes like TST and CMP use this. */
507 case RX_Operand_Immediate
: /* #addend */
508 case RX_Operand_Condition
: /* eq, gtu, etc */
511 case RX_Operand_Register
: /* Rn */
516 case RX_Operand_Predec
: /* [-Rn] */
517 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
519 case RX_Operand_Postinc
: /* [Rn+] */
520 case RX_Operand_Indirect
: /* [Rn + addend] */
521 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
523 #ifdef CYCLE_ACCURATE
524 if (regs
.m2m
== M2M_BOTH
)
526 tprintf("dst memory stall\n");
536 if (o
->type
== RX_Operand_TwoReg
)
537 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
539 addr
= get_reg (o
->reg
) + o
->addend
;
546 case RX_Byte
: /* undefined extension */
549 mem_put_qi (addr
, v
);
552 case RX_Word
: /* undefined extension */
555 mem_put_hi (addr
, v
);
559 mem_put_psi (addr
, v
);
563 mem_put_si (addr
, v
);
567 if (o
->type
== RX_Operand_Postinc
)
568 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
572 case RX_Operand_Flag
: /* [UIOSZC] */
574 regs
.r_psw
|= (1 << o
->reg
);
576 regs
.r_psw
&= ~(1 << o
->reg
);
581 #define PD(x) put_op (opcode, 0, x)
582 #define PS(x) put_op (opcode, 1, x)
583 #define PS2(x) put_op (opcode, 2, x)
584 #define GD() get_op (opcode, 0)
585 #define GS() get_op (opcode, 1)
586 #define GS2() get_op (opcode, 2)
587 #define DSZ() size2bytes[opcode->op[0].size]
588 #define SSZ() size2bytes[opcode->op[0].size]
589 #define S2SZ() size2bytes[opcode->op[0].size]
591 /* "Universal" sources. */
592 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
593 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
598 int rsp
= get_reg (sp
);
601 mem_put_si (rsp
, val
);
604 /* Just like the above, but tag the memory as "pushed pc" so if anyone
605 tries to write to it, it will cause an error. */
609 int rsp
= get_reg (sp
);
612 mem_put_si (rsp
, val
);
613 mem_set_content_range (rsp
, rsp
+3, MC_PUSHED_PC
);
620 int rsp
= get_reg (sp
);
621 rv
= mem_get_si (rsp
);
631 int rsp
= get_reg (sp
);
632 if (mem_get_content_type (rsp
) != MC_PUSHED_PC
)
633 execution_error (SIM_ERR_CORRUPT_STACK
, rsp
);
634 rv
= mem_get_si (rsp
);
635 mem_set_content_range (rsp
, rsp
+3, MC_UNINIT
);
641 #define MATH_OP(vop,c) \
645 ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
646 tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
647 ma = sign_ext (uma, DSZ() * 8); \
648 mb = sign_ext (umb, DSZ() * 8); \
649 sll = (long long) ma vop (long long) mb vop c; \
650 tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
651 set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
656 #define LOGIC_OP(vop) \
661 tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
667 #define SHIFT_OP(val, type, count, OP, carry_mask) \
672 tprintf("%lld " #OP " %d\n", val, count); \
673 for (i = 0; i < count; i ++) \
675 c = val & carry_mask; \
679 set_oszc (val, 4, c); \
705 fop_fadd (fp_t s1
, fp_t s2
, fp_t
*d
)
707 *d
= rxfp_add (s1
, s2
);
712 fop_fmul (fp_t s1
, fp_t s2
, fp_t
*d
)
714 *d
= rxfp_mul (s1
, s2
);
719 fop_fdiv (fp_t s1
, fp_t s2
, fp_t
*d
)
721 *d
= rxfp_div (s1
, s2
);
726 fop_fsub (fp_t s1
, fp_t s2
, fp_t
*d
)
728 *d
= rxfp_sub (s1
, s2
);
732 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
733 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
736 return do_fp_exception (opcode_pc)
738 #define FLOAT_OP(func) \
745 do_store = fop_##func (fa, fb, &fc); \
746 tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
751 if ((fc & 0x80000000UL) != 0) \
753 if ((fc & 0x7fffffffUL) == 0) \
755 set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
758 #define carry (FLAG_C ? 1 : 0)
764 } exception_info
[] = {
765 { 0xFFFFFFD0UL
, "priviledged opcode", SIGILL
},
766 { 0xFFFFFFD4UL
, "access violation", SIGSEGV
},
767 { 0xFFFFFFDCUL
, "undefined opcode", SIGILL
},
768 { 0xFFFFFFE4UL
, "floating point", SIGFPE
}
770 #define EX_PRIVILEDGED 0
772 #define EX_UNDEFINED 2
773 #define EX_FLOATING 3
774 #define EXCEPTION(n) \
775 return generate_exception (n, opcode_pc)
777 #define PRIVILEDGED() \
779 EXCEPTION (EX_PRIVILEDGED)
782 generate_exception (unsigned long type
, SI opcode_pc
)
784 SI old_psw
, old_pc
, new_pc
;
786 new_pc
= mem_get_si (exception_info
[type
].vaddr
);
787 /* 0x00020000 is the value used to initialise the known
788 exception vectors (see rx.ld), but it is a reserved
789 area of memory so do not try to access it, and if the
790 value has not been changed by the program then the
791 vector has not been installed. */
792 if (new_pc
== 0 || new_pc
== 0x00020000)
795 return RX_MAKE_STOPPED (exception_info
[type
].signal
);
797 fprintf(stderr
, "Unhandled %s exception at pc = %#lx\n",
798 exception_info
[type
].str
, (unsigned long) opcode_pc
);
799 if (type
== EX_FLOATING
)
801 int mask
= FPPENDING ();
802 fprintf (stderr
, "Pending FP exceptions:");
803 if (mask
& FPSWBITS_FV
)
804 fprintf(stderr
, " Invalid");
805 if (mask
& FPSWBITS_FO
)
806 fprintf(stderr
, " Overflow");
807 if (mask
& FPSWBITS_FZ
)
808 fprintf(stderr
, " Division-by-zero");
809 if (mask
& FPSWBITS_FU
)
810 fprintf(stderr
, " Underflow");
811 if (mask
& FPSWBITS_FX
)
812 fprintf(stderr
, " Inexact");
813 if (mask
& FPSWBITS_CE
)
814 fprintf(stderr
, " Unimplemented");
815 fprintf(stderr
, "\n");
817 return RX_MAKE_EXITED (1);
820 tprintf ("Triggering %s exception\n", exception_info
[type
].str
);
822 old_psw
= regs
.r_psw
;
823 regs
.r_psw
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
828 return RX_MAKE_STEPPED ();
832 generate_access_exception (void)
836 rv
= generate_exception (EX_ACCESS
, regs
.r_pc
);
838 longjmp (decode_jmp_buf
, rv
);
842 do_fp_exception (unsigned long opcode_pc
)
845 EXCEPTION (EX_FLOATING
);
846 return RX_MAKE_STEPPED ();
850 op_is_memory (const RX_Opcode_Decoded
*rd
, int i
)
852 switch (rd
->op
[i
].type
)
854 case RX_Operand_Predec
:
855 case RX_Operand_Postinc
:
856 case RX_Operand_Indirect
:
862 #define OM(i) op_is_memory (opcode, i)
864 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
869 unsigned int uma
=0, umb
=0;
872 unsigned long long ll
;
874 unsigned long opcode_pc
;
876 const RX_Opcode_Decoded
*opcode
;
878 unsigned long long prev_cycle_count
;
880 #ifdef CYCLE_ACCURATE
885 prev_cycle_count
= regs
.cycle_count
;
888 #ifdef CYCLE_ACCURATE
895 maybe_get_mem_page (regs
.r_pc
);
897 opcode_pc
= regs
.r_pc
;
899 /* Note that we don't word-swap this point, there's no point. */
900 if (decode_cache_base
[opcode_pc
] == NULL
)
902 RX_Opcode_Decoded
*opcode_w
;
903 rx_data
.dpc
= opcode_pc
;
904 opcode_w
= decode_cache_base
[opcode_pc
] = calloc (1, sizeof (RX_Opcode_Decoded
));
905 opcode_size
= rx_decode_opcode (opcode_pc
, opcode_w
,
906 rx_get_byte
, &rx_data
);
911 opcode
= decode_cache_base
[opcode_pc
];
912 opcode_size
= opcode
->n_bytes
;
915 #ifdef CYCLE_ACCURATE
916 if (branch_alignment_penalty
)
918 if ((regs
.r_pc
^ (regs
.r_pc
+ opcode_size
- 1)) & ~7)
920 tprintf("1 cycle branch alignment penalty\n");
921 cycles (branch_alignment_penalty
);
923 branch_alignment_stalls
++;
926 branch_alignment_penalty
= 0;
930 regs
.r_pc
+= opcode_size
;
932 rx_flagmask
= opcode
->flags_s
;
933 rx_flagand
= ~(int)opcode
->flags_0
;
934 rx_flagor
= opcode
->flags_1
;
940 tprintf("|%lld| = ", sll
);
943 tprintf("%lld\n", sll
);
964 if (opcode
->op
[0].type
== RX_Operand_Register
)
976 if (opcode
->op
[0].type
== RX_Operand_Register
)
991 if (opcode
->op
[0].type
== RX_Operand_Register
)
1001 if (opcode
->op
[1].type
== RX_Operand_None
|| GS())
1003 #ifdef CYCLE_ACCURATE
1004 SI old_pc
= regs
.r_pc
;
1008 #ifdef CYCLE_ACCURATE
1009 delta
= regs
.r_pc
- old_pc
;
1010 if (delta
>= 0 && delta
< 16
1013 tprintf("near forward branch bonus\n");
1019 branch_alignment_penalty
= 1;
1026 #ifdef CYCLE_ACCURATE
1037 #ifdef CYCLE_ACCURATE
1038 /* Note: specs say 3, chip says 2. */
1039 if (delta
>= 0 && delta
< 16
1042 tprintf("near forward branch bonus\n");
1048 branch_alignment_penalty
= 1;
1055 #ifdef CYCLE_ACCURATE
1063 int old_psw
= regs
.r_psw
;
1065 DO_RETURN (RX_MAKE_HIT_BREAK ());
1066 if (regs
.r_intb
== 0)
1068 tprintf("BREAK hit, no vector table.\n");
1069 DO_RETURN (RX_MAKE_EXITED(1));
1071 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1074 regs
.r_pc
= mem_get_si (regs
.r_intb
);
1082 if (opcode
->op
[0].type
== RX_Operand_Register
)
1094 if (opcode
->op
[1].type
== RX_Operand_Register
)
1098 umb
= ma
& (1 << mb
);
1099 set_zc (! umb
, umb
);
1104 v
= 1 << opcode
->op
[0].reg
;
1113 case RXO_div
: /* d = d / s */
1116 tprintf("%d / %d = ", mb
, ma
);
1117 if (ma
== 0 || (ma
== -1 && (unsigned int) mb
== 0x80000000))
1120 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1126 set_flags (FLAGBIT_O
, 0);
1129 /* Note: spec says 3 to 22 cycles, we are pessimistic. */
1133 case RXO_divu
: /* d = d / s */
1136 tprintf("%u / %u = ", umb
, uma
);
1140 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1146 set_flags (FLAGBIT_O
, 0);
1149 /* Note: spec says 2 to 20 cycles, we are pessimistic. */
1156 sll
= (long long)ma
* (long long)mb
;
1157 tprintf("%d * %d = %lld\n", ma
, mb
, sll
);
1158 put_reg (opcode
->op
[0].reg
, sll
);
1159 put_reg (opcode
->op
[0].reg
+ 1, sll
>> 32);
1166 ll
= (long long)uma
* (long long)umb
;
1167 tprintf("%#x * %#x = %#llx\n", uma
, umb
, ll
);
1168 put_reg (opcode
->op
[0].reg
, ll
);
1169 put_reg (opcode
->op
[0].reg
+ 1, ll
>> 32);
1199 regs
.r_psw
= regs
.r_bpsw
;
1200 regs
.r_pc
= regs
.r_bpc
;
1201 #ifdef CYCLE_ACCURATE
1202 regs
.fast_return
= 0;
1215 mb
= rxfp_ftoi (ma
, FPRM_ZERO
);
1218 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1227 DO_RETURN (rx_syscall (regs
.r
[5]));
1231 int old_psw
= regs
.r_psw
;
1232 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1235 regs
.r_pc
= mem_get_si (regs
.r_intb
+ 4 * v
);
1243 mb
= rxfp_itof (ma
, regs
.r_fpsw
);
1245 tprintf("(float) %d = %x\n", ma
, mb
);
1254 #ifdef CYCLE_ACCURATE
1259 #ifdef CYCLE_ACCURATE
1260 regs
.link_register
= regs
.r_pc
;
1262 pushpc (get_reg (pc
));
1263 if (opcode
->id
== RXO_jsrrel
)
1265 #ifdef CYCLE_ACCURATE
1266 delta
= v
- regs
.r_pc
;
1269 #ifdef CYCLE_ACCURATE
1270 /* Note: docs say 3, chip says 2 */
1271 if (delta
>= 0 && delta
< 16)
1273 tprintf ("near forward jsr bonus\n");
1278 branch_alignment_penalty
= 1;
1281 regs
.fast_return
= 1;
1287 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1289 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1294 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1296 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1323 if (opcode
->op
[0].type
== RX_Operand_Register
1324 && opcode
->op
[0].reg
== 16 /* PSW */)
1326 /* Special case, LDC and POPC can't ever modify PM. */
1327 int pm
= regs
.r_psw
& FLAGBIT_PM
;
1332 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1338 /* various things can't be changed in user mode. */
1339 if (opcode
->op
[0].type
== RX_Operand_Register
)
1340 if (opcode
->op
[0].reg
== 32)
1342 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1343 v
|= regs
.r_psw
& (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1345 if (opcode
->op
[0].reg
== 34 /* ISP */
1346 || opcode
->op
[0].reg
== 37 /* BPSW */
1347 || opcode
->op
[0].reg
== 39 /* INTB */
1348 || opcode
->op
[0].reg
== 38 /* VCT */)
1349 /* These are ignored. */
1359 #ifdef CYCLE_ACCURATE
1360 if ((opcode
->op
[0].type
== RX_Operand_Predec
1361 && opcode
->op
[1].type
== RX_Operand_Register
)
1362 || (opcode
->op
[0].type
== RX_Operand_Postinc
1363 && opcode
->op
[1].type
== RX_Operand_Register
))
1365 /* Special case: push reg doesn't cause a memory stall. */
1367 tprintf("push special case\n");
1386 ll
= (unsigned long long) US1() * (unsigned long long) v
;
1393 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v
>> 16);
1395 put_reg64 (acc64
, ll
);
1401 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(v
);
1403 put_reg64 (acc64
, ll
);
1408 PD (get_reg (acchi
));
1413 PD (get_reg (acclo
));
1418 PD (get_reg (accmi
));
1423 put_reg (acchi
, GS ());
1428 put_reg (acclo
, GS ());
1433 regs
.r_psw
&= ~ FLAGBITS_IPL
;
1434 regs
.r_psw
|= (GS () << FLAGSHIFT_IPL
) & FLAGBITS_IPL
;
1449 /* POPM cannot pop R0 (sp). */
1450 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1451 EXCEPTION (EX_UNDEFINED
);
1452 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1454 regs
.r_pc
= opcode_pc
;
1455 DO_RETURN (RX_MAKE_STOPPED (SIGILL
));
1457 for (v
= opcode
->op
[1].reg
; v
<= opcode
->op
[2].reg
; v
++)
1461 put_reg (v
, pop ());
1466 /* PUSHM cannot push R0 (sp). */
1467 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1468 EXCEPTION (EX_UNDEFINED
);
1469 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1471 regs
.r_pc
= opcode_pc
;
1472 return RX_MAKE_STOPPED (SIGILL
);
1474 for (v
= opcode
->op
[2].reg
; v
>= opcode
->op
[1].reg
; v
--)
1479 cycles (opcode
->op
[2].reg
- opcode
->op
[1].reg
+ 1);
1483 ll
= get_reg64 (acc64
) << GS ();
1484 ll
+= 0x80000000ULL
;
1485 if ((signed long long)ll
> (signed long long)0x00007fff00000000ULL
)
1486 ll
= 0x00007fff00000000ULL
;
1487 else if ((signed long long)ll
< (signed long long)0xffff800000000000ULL
)
1488 ll
= 0xffff800000000000ULL
;
1490 ll
&= 0xffffffff00000000ULL
;
1491 put_reg64 (acc64
, ll
);
1497 regs
.r_pc
= poppc ();
1498 regs
.r_psw
= poppc ();
1500 regs
.r_psw
|= FLAGBIT_U
;
1501 #ifdef CYCLE_ACCURATE
1502 regs
.fast_return
= 0;
1509 umb
= (((uma
>> 24) & 0xff)
1510 | ((uma
>> 8) & 0xff00)
1511 | ((uma
<< 8) & 0xff0000)
1512 | ((uma
<< 24) & 0xff000000UL
));
1519 umb
= (((uma
>> 8) & 0x00ff00ff)
1520 | ((uma
<< 8) & 0xff00ff00UL
));
1528 #ifdef CYCLE_ACCURATE
1532 while (regs
.r
[3] != 0)
1536 switch (opcode
->size
)
1539 ma
= mem_get_si (regs
.r
[1]);
1540 mb
= mem_get_si (regs
.r
[2]);
1545 ma
= sign_ext (mem_get_hi (regs
.r
[1]), 16);
1546 mb
= sign_ext (mem_get_hi (regs
.r
[2]), 16);
1551 ma
= sign_ext (mem_get_qi (regs
.r
[1]), 8);
1552 mb
= sign_ext (mem_get_qi (regs
.r
[2]), 8);
1559 /* We do the multiply as a signed value. */
1560 sll
= (long long)ma
* (long long)mb
;
1561 tprintf(" %016llx = %d * %d\n", sll
, ma
, mb
);
1562 /* but we do the sum as unsigned, while sign extending the operands. */
1563 tmp
= regs
.r
[4] + (sll
& 0xffffffffUL
);
1564 regs
.r
[4] = tmp
& 0xffffffffUL
;
1567 tmp
+= regs
.r
[5] + (sll
& 0xffffffffUL
);
1568 regs
.r
[5] = tmp
& 0xffffffffUL
;
1571 tmp
+= regs
.r
[6] + (sll
& 0xffffffffUL
);
1572 regs
.r
[6] = tmp
& 0xffffffffUL
;
1573 tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1574 (unsigned long) regs
.r
[6],
1575 (unsigned long) regs
.r
[5],
1576 (unsigned long) regs
.r
[4]);
1580 if (regs
.r
[6] & 0x00008000)
1581 regs
.r
[6] |= 0xffff0000UL
;
1583 regs
.r
[6] &= 0x0000ffff;
1584 ma
= (regs
.r
[6] & 0x80000000UL
) ? FLAGBIT_S
: 0;
1585 if (regs
.r
[6] != 0 && regs
.r
[6] != 0xffffffffUL
)
1586 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
| FLAGBIT_O
);
1588 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
);
1589 #ifdef CYCLE_ACCURATE
1590 switch (opcode
->size
)
1593 cycles (6 + 4 * tx
);
1596 cycles (6 + 5 * (tx
/ 2) + 4 * (tx
% 2));
1599 cycles (6 + 7 * (tx
/ 4) + 4 * (tx
% 4));
1609 ma
= v
& 0x80000000UL
;
1621 uma
|= (carry
? 0x80000000UL
: 0);
1622 set_szc (uma
, 4, mb
);
1632 uma
= (uma
<< mb
) | (uma
>> (32-mb
));
1635 set_szc (uma
, 4, mb
);
1645 uma
= (uma
>> mb
) | (uma
<< (32-mb
));
1646 mb
= uma
& 0x80000000;
1648 set_szc (uma
, 4, mb
);
1656 mb
= rxfp_ftoi (ma
, regs
.r_fpsw
);
1659 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1666 #ifdef CYCLE_ACCURATE
1669 regs
.r_pc
= poppc ();
1670 #ifdef CYCLE_ACCURATE
1671 /* Note: specs say 5, chip says 3. */
1672 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1677 tprintf("fast return bonus\n");
1681 regs
.fast_return
= 0;
1682 branch_alignment_penalty
= 1;
1688 if (opcode
->op
[2].type
== RX_Operand_Register
)
1691 /* RTSD cannot pop R0 (sp). */
1692 put_reg (0, get_reg (0) + GS() - (opcode
->op
[0].reg
-opcode
->op
[2].reg
+1)*4);
1693 if (opcode
->op
[2].reg
== 0)
1694 EXCEPTION (EX_UNDEFINED
);
1695 #ifdef CYCLE_ACCURATE
1696 tx
= opcode
->op
[0].reg
- opcode
->op
[2].reg
+ 1;
1698 for (i
= opcode
->op
[2].reg
; i
<= opcode
->op
[0].reg
; i
++)
1701 put_reg (i
, pop ());
1706 #ifdef CYCLE_ACCURATE
1709 put_reg (0, get_reg (0) + GS());
1711 put_reg (pc
, poppc());
1712 #ifdef CYCLE_ACCURATE
1713 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1715 tprintf("fast return bonus\n");
1719 cycles (tx
< 3 ? 3 : tx
+ 1);
1723 cycles (tx
< 5 ? 5 : tx
+ 1);
1725 regs
.fast_return
= 0;
1726 branch_alignment_penalty
= 1;
1731 if (FLAG_O
&& FLAG_S
)
1733 else if (FLAG_O
&& ! FLAG_S
)
1739 MATH_OP (-, ! carry
);
1751 #ifdef CYCLE_ACCURATE
1754 while (regs
.r
[3] != 0)
1756 uma
= mem_get_qi (regs
.r
[1] ++);
1757 umb
= mem_get_qi (regs
.r
[2] ++);
1759 if (uma
!= umb
|| uma
== 0)
1765 set_zc (0, ((int)uma
- (int)umb
) >= 0);
1766 cycles (2 + 4 * (tx
/ 4) + 4 * (tx
% 4));
1770 v
= 1 << opcode
->op
[0].reg
;
1781 #ifdef CYCLE_ACCURATE
1786 uma
= mem_get_qi (regs
.r
[2] --);
1787 mem_put_qi (regs
.r
[1]--, uma
);
1790 #ifdef CYCLE_ACCURATE
1792 cycles (6 + 3 * (tx
/ 4) + 3 * (tx
% 4));
1794 cycles (2 + 3 * (tx
% 4));
1800 #ifdef CYCLE_ACCURATE
1805 uma
= mem_get_qi (regs
.r
[2] ++);
1806 mem_put_qi (regs
.r
[1]++, uma
);
1809 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1813 while (regs
.r
[3] != 0)
1815 uma
= mem_get_qi (regs
.r
[2] ++);
1816 mem_put_qi (regs
.r
[1]++, uma
);
1823 case RXO_shar
: /* d = ma >> mb */
1824 SHIFT_OP (sll
, int, mb
, >>=, 1);
1828 case RXO_shll
: /* d = ma << mb */
1829 SHIFT_OP (ll
, int, mb
, <<=, 0x80000000UL
);
1833 case RXO_shlr
: /* d = ma >> mb */
1834 SHIFT_OP (ll
, unsigned int, mb
, >>=, 1);
1840 #ifdef CYCLE_ACCURATE
1843 switch (opcode
->size
)
1846 while (regs
.r
[3] != 0)
1848 mem_put_si (regs
.r
[1], regs
.r
[2]);
1855 while (regs
.r
[3] != 0)
1857 mem_put_hi (regs
.r
[1], regs
.r
[2]);
1861 cycles (2 + (int)(tx
/ 2) + tx
% 2);
1864 while (regs
.r
[3] != 0)
1866 mem_put_qi (regs
.r
[1], regs
.r
[2]);
1870 cycles (2 + (int)(tx
/ 4) + tx
% 4);
1885 regs
.r_psw
|= FLAGBIT_I
;
1886 DO_RETURN (RX_MAKE_STOPPED(0));
1894 #ifdef CYCLE_ACCURATE
1902 switch (opcode
->size
)
1906 while (regs
.r
[3] != 0)
1909 umb
= mem_get_si (get_reg (1));
1914 cycles (3 + 3 * tx
);
1917 uma
= get_reg (2) & 0xffff;
1918 while (regs
.r
[3] != 0)
1921 umb
= mem_get_hi (get_reg (1));
1926 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
1929 uma
= get_reg (2) & 0xff;
1930 while (regs
.r
[3] != 0)
1933 umb
= mem_get_qi (regs
.r
[1]);
1938 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
1946 set_zc (0, ((int)uma
- (int)umb
) >= 0);
1951 #ifdef CYCLE_ACCURATE
1956 switch (opcode
->size
)
1960 while (regs
.r
[3] != 0)
1963 umb
= mem_get_si (get_reg (1));
1968 cycles (3 + 3 * tx
);
1971 uma
= get_reg (2) & 0xffff;
1972 while (regs
.r
[3] != 0)
1975 umb
= mem_get_hi (get_reg (1));
1980 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
1983 uma
= get_reg (2) & 0xff;
1984 while (regs
.r
[3] != 0)
1987 umb
= mem_get_qi (regs
.r
[1]);
1992 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
2000 set_zc (0, ((int)uma
- (int)umb
) >= 0);
2005 regs
.r_psw
|= FLAGBIT_I
;
2006 DO_RETURN (RX_MAKE_STOPPED(0));
2009 #ifdef CYCLE_ACCURATE
2012 v
= GS (); /* This is the memory operand, if any. */
2013 PS (GD ()); /* and this may change the address register. */
2016 #ifdef CYCLE_ACCURATE
2017 /* all M cycles happen during xchg's cycles. */
2028 EXCEPTION (EX_UNDEFINED
);
2031 #ifdef CYCLE_ACCURATE
2034 regs
.m2m
|= M2M_SRC
;
2036 regs
.m2m
|= M2M_DST
;
2043 if (prev_cycle_count
== regs
.cycle_count
)
2045 printf("Cycle count not updated! id %s\n", id_names
[opcode
->id
]);
2051 if (running_benchmark
)
2053 int omap
= op_lookup (opcode
->op
[0].type
, opcode
->op
[1].type
, opcode
->op
[2].type
);
2056 cycles_per_id
[opcode
->id
][omap
] += regs
.cycle_count
- prev_cycle_count
;
2057 times_per_id
[opcode
->id
][omap
] ++;
2059 times_per_pair
[prev_opcode_id
][po0
][opcode
->id
][omap
] ++;
2061 prev_opcode_id
= opcode
->id
;
2066 return RX_MAKE_STEPPED ();
2071 reset_pipeline_stats (void)
2073 memset (cycles_per_id
, 0, sizeof(cycles_per_id
));
2074 memset (times_per_id
, 0, sizeof(times_per_id
));
2076 register_stalls
= 0;
2078 branch_alignment_stalls
= 0;
2080 memset (times_per_pair
, 0, sizeof(times_per_pair
));
2081 running_benchmark
= 1;
2083 benchmark_start_cycle
= regs
.cycle_count
;
2087 halt_pipeline_stats (void)
2089 running_benchmark
= 0;
2090 benchmark_end_cycle
= regs
.cycle_count
;
2095 pipeline_stats (void)
2102 #ifdef CYCLE_ACCURATE
2105 printf ("cycles: %llu\n", regs
.cycle_count
);
2109 printf ("cycles: %13s\n", comma (regs
.cycle_count
));
2113 if (benchmark_start_cycle
)
2114 printf ("bmark: %13s\n", comma (benchmark_end_cycle
- benchmark_start_cycle
));
2117 for (i
= 0; i
< N_RXO
; i
++)
2118 for (o1
= 0; o1
< N_MAP
; o1
++)
2119 if (times_per_id
[i
][o1
])
2120 printf("%13s %13s %7.2f %s %s\n",
2121 comma (cycles_per_id
[i
][o1
]),
2122 comma (times_per_id
[i
][o1
]),
2123 (double)cycles_per_id
[i
][o1
] / times_per_id
[i
][o1
],
2124 op_cache_string(o1
),
2128 for (p
= 0; p
< N_RXO
; p
++)
2129 for (p1
= 0; p1
< N_MAP
; p1
++)
2130 for (i
= 0; i
< N_RXO
; i
++)
2131 for (o1
= 0; o1
< N_MAP
; o1
++)
2132 if (times_per_pair
[p
][p1
][i
][o1
])
2134 printf("%13s %s %-9s -> %s %s\n",
2135 comma (times_per_pair
[p
][p1
][i
][o1
]),
2136 op_cache_string(p1
),
2138 op_cache_string(o1
),
2143 printf("%13s memory stalls\n", comma (memory_stalls
));
2144 printf("%13s register stalls\n", comma (register_stalls
));
2145 printf("%13s branches taken (non-return)\n", comma (branch_stalls
));
2146 printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls
));
2147 printf("%13s fast returns\n", comma (fast_returns
));
This page took 0.072861 seconds and 4 git commands to generate.