1 /* Copyright (C) 1998, Cygnus Solutions */
9 #include "sim-assert.h"
13 #include "sky-gpuif.h"
14 #include "sky-device.h"
26 /* Internal function declarations */
28 static int pke_io_read_buffer(device
*, void*, int, address_word
,
29 unsigned, sim_cpu
*, sim_cia
);
30 static int pke_io_write_buffer(device
*, const void*, int, address_word
,
31 unsigned, sim_cpu
*, sim_cia
);
32 static void pke_reset(struct pke_device
*);
33 static void pke_issue(SIM_DESC
, struct pke_device
*);
34 static void pke_pc_advance(struct pke_device
*, int num_words
);
35 static struct fifo_quadword
* pke_pcrel_fifo(struct pke_device
*, int operand_num
,
36 unsigned_4
** operand
);
37 static unsigned_4
* pke_pcrel_operand(struct pke_device
*, int operand_num
);
38 static unsigned_4
pke_pcrel_operand_bits(struct pke_device
*, int bit_offset
,
39 int bit_width
, unsigned_4
* sourceaddr
);
40 static void pke_attach(SIM_DESC sd
, struct pke_device
* me
);
41 enum pke_check_target
{ chk_vu
, chk_path1
, chk_path2
, chk_path3
};
42 static int pke_check_stall(struct pke_device
* me
, enum pke_check_target what
);
43 static void pke_flip_dbf(struct pke_device
* me
);
44 static void pke_begin_interrupt_stall(struct pke_device
* me
);
45 /* PKEcode handlers */
46 static void pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
);
47 static void pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
);
48 static void pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
);
49 static void pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
);
50 static void pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
);
51 static void pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
);
52 static void pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
);
53 static void pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
);
54 static void pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
);
55 static void pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
);
56 static void pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
);
57 static void pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
);
58 static void pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
);
59 static void pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
);
60 static void pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
);
61 static void pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
);
62 static void pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
);
63 static void pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
);
64 static void pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
);
65 static void pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
);
66 static void pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
);
67 static void pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
);
68 unsigned_4
pke_fifo_flush(struct pke_fifo
*);
69 void pke_fifo_reset(struct pke_fifo
*);
70 struct fifo_quadword
* pke_fifo_fit(struct pke_fifo
*);
71 struct fifo_quadword
* pke_fifo_access(struct pke_fifo
*, unsigned_4 qwnum
);
72 void pke_fifo_old(struct pke_fifo
*, unsigned_4 qwnum
);
78 struct pke_device pke0_device
=
80 { "pke0", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
83 {}, 0, /* FIFO write buffer */
84 { NULL
, 0, 0, 0 }, /* FIFO */
85 NULL
, /* FIFO trace file */
86 -1, -1, 0, 0, 0, /* invalid FIFO cache */
91 struct pke_device pke1_device
=
93 { "pke1", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
96 {}, 0, /* FIFO write buffer */
97 { NULL
, 0, 0, 0 }, /* FIFO */
98 NULL
, /* FIFO trace file */
99 -1, -1, 0, 0, 0, /* invalid FIFO cache */
105 /* External functions */
108 /* Attach PKE addresses to main memory */
111 pke0_attach(SIM_DESC sd
)
113 pke_attach(sd
, & pke0_device
);
114 pke_reset(& pke0_device
);
118 pke1_attach(SIM_DESC sd
)
120 pke_attach(sd
, & pke1_device
);
121 pke_reset(& pke1_device
);
126 /* Issue a PKE instruction if possible */
129 pke0_issue(SIM_DESC sd
)
131 pke_issue(sd
, & pke0_device
);
135 pke1_issue(SIM_DESC sd
)
137 pke_issue(sd
, & pke1_device
);
142 /* Internal functions */
145 /* Attach PKE memory regions to simulator */
148 pke_attach(SIM_DESC sd
, struct pke_device
* me
)
151 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
152 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
,
153 PKE_REGISTER_WINDOW_SIZE
/*nr_bytes*/,
159 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
160 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
,
161 sizeof(quadword
) /*nr_bytes*/,
166 /* VU MEM0 tracking table */
167 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
168 ((me
->pke_number
== 0) ? VU0_MEM0_SRCADDR_START
: VU1_MEM0_SRCADDR_START
),
169 ((me
->pke_number
== 0) ? VU0_MEM0_SIZE
: VU1_MEM0_SIZE
) / 2,
174 /* VU MEM1 tracking table */
175 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
176 ((me
->pke_number
== 0) ? VU0_MEM1_SRCADDR_START
: VU1_MEM1_SRCADDR_START
),
177 ((me
->pke_number
== 0) ? VU0_MEM1_SIZE
: VU1_MEM1_SIZE
) / 4,
183 /* attach to trace file if appropriate */
185 char trace_envvar
[80];
186 char* trace_filename
= NULL
;
187 sprintf(trace_envvar
, "VIF%d_TRACE_FILE", me
->pke_number
);
188 trace_filename
= getenv(trace_envvar
);
189 if(trace_filename
!= NULL
)
191 me
->fifo_trace_file
= fopen(trace_filename
, "w");
192 if(me
->fifo_trace_file
== NULL
)
193 perror("VIF FIFO trace error on fopen");
195 setvbuf(me
->fifo_trace_file
, NULL
, _IOLBF
, 0);
201 /* Read PKE Pseudo-PC into buf in target order */
203 read_pke_pc (struct pke_device
*me
, void *buf
)
205 *((int *) buf
) = H2T_4( (me
->fifo_pc
<< 2) | me
->qw_pc
);
210 /* Read PKE reg into buf in target order */
212 read_pke_reg (struct pke_device
*me
, int reg_num
, void *buf
)
214 /* handle reads to individual registers; clear `readable' on error */
217 /* handle common case of register reading, side-effect free */
218 /* PKE1-only registers*/
224 if (me
->pke_number
== 0)
231 /* PKE0 & PKE1 common registers*/
250 *((int *) buf
) = H2T_4(me
->regs
[reg_num
][0]);
253 /* handle common case of write-only registers */
259 ASSERT(0); /* tests above should prevent this possibility */
266 /* Handle a PKE read; return no. of bytes read */
269 pke_io_read_buffer(device
*me_
,
277 /* downcast to gather embedding pke_device struct */
278 struct pke_device
* me
= (struct pke_device
*) me_
;
280 /* find my address ranges */
281 address_word my_reg_start
=
282 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
283 address_word my_fifo_addr
=
284 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
286 /* enforce that an access does not span more than one quadword */
287 address_word low
= ADDR_TRUNC_QW(addr
);
288 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
292 /* classify address & handle */
293 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
296 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
297 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
301 result
[0] = result
[1] = result
[2] = result
[3] = 0;
303 read_pke_reg (me
, reg_num
, result
);
305 /* perform transfer & return */
306 memcpy(dest
, ((unsigned_1
*) &result
) + reg_byte
, nr_bytes
);
311 else if(addr
>= my_fifo_addr
&&
312 addr
< my_fifo_addr
+ sizeof(quadword
))
316 /* FIFO is not readable: return a word of zeroes */
317 memset(dest
, 0, nr_bytes
);
325 /* Write PKE reg from buf, which is in target order */
327 write_pke_reg (struct pke_device
*me
, int reg_num
, const void *buf
)
330 /* make words host-endian */
331 unsigned_4 input
= T2H_4( *((unsigned_4
*) buf
) );
333 /* handle writes to individual registers; clear `writeable' on error */
337 /* Order these tests from least to most overriding, in case
338 multiple bits are set. */
339 if(BIT_MASK_GET(input
, PKE_REG_FBRST_STC_B
, PKE_REG_FBRST_STC_E
))
341 /* clear a bunch of status bits */
342 PKE_REG_MASK_SET(me
, STAT
, PSS
, 0);
343 PKE_REG_MASK_SET(me
, STAT
, PFS
, 0);
344 PKE_REG_MASK_SET(me
, STAT
, PIS
, 0);
345 PKE_REG_MASK_SET(me
, STAT
, INT
, 0);
346 PKE_REG_MASK_SET(me
, STAT
, ER0
, 0);
347 PKE_REG_MASK_SET(me
, STAT
, ER1
, 0);
348 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
349 /* will allow resumption of possible stalled instruction */
351 if(BIT_MASK_GET(input
, PKE_REG_FBRST_STP_B
, PKE_REG_FBRST_STP_E
))
353 me
->flags
|= PKE_FLAG_PENDING_PSS
;
355 if(BIT_MASK_GET(input
, PKE_REG_FBRST_FBK_B
, PKE_REG_FBRST_FBK_E
))
357 PKE_REG_MASK_SET(me
, STAT
, PFS
, 1);
359 if(BIT_MASK_GET(input
, PKE_REG_FBRST_RST_B
, PKE_REG_FBRST_RST_E
))
366 /* copy bottom three bits */
367 BIT_MASK_SET(me
->regs
[PKE_REG_ERR
][0], 0, 2, BIT_MASK_GET(input
, 0, 2));
371 /* copy bottom sixteen bits */
372 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(input
, 0, 15));
373 /* reset MRK bit in STAT */
374 PKE_REG_MASK_SET(me
, STAT
, MRK
, 0);
377 /* handle common case of read-only registers */
378 /* PKE1-only registers - not really necessary to handle separately */
384 if(me
->pke_number
== 0)
387 /* PKE0 & PKE1 common registers*/
389 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
409 ASSERT(0); /* test above should prevent this possibility */
415 return 0; /* error */
422 /* Handle a PKE write; return no. of bytes written */
425 pke_io_write_buffer(device
*me_
,
433 /* downcast to gather embedding pke_device struct */
434 struct pke_device
* me
= (struct pke_device
*) me_
;
436 /* find my address ranges */
437 address_word my_reg_start
=
438 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
439 address_word my_fifo_addr
=
440 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
442 /* enforce that an access does not span more than one quadword */
443 address_word low
= ADDR_TRUNC_QW(addr
);
444 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
448 /* classify address & handle */
449 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
452 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
453 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
457 input
[0] = input
[1] = input
[2] = input
[3] = 0;
459 /* write user-given bytes into input */
460 memcpy(((unsigned_1
*) &input
) + reg_byte
, src
, nr_bytes
);
462 write_pke_reg (me
, reg_num
, input
);
467 else if(addr
>= my_fifo_addr
&&
468 addr
< my_fifo_addr
+ sizeof(quadword
))
471 struct fifo_quadword
* fqw
;
472 int fifo_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside fifo quadword */
473 unsigned_4 dma_tag_present
= 0;
476 /* collect potentially-partial quadword in write buffer; LE byte order */
477 memcpy(((unsigned_1
*)& me
->fifo_qw_in_progress
) + fifo_byte
, src
, nr_bytes
);
478 /* mark bytes written */
479 for(i
= fifo_byte
; i
< fifo_byte
+ nr_bytes
; i
++)
480 BIT_MASK_SET(me
->fifo_qw_done
, i
, i
, 1);
482 /* return if quadword not quite written yet */
483 if(BIT_MASK_GET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1) !=
484 BIT_MASK_BTW(0, sizeof(quadword
)-1))
487 /* all done - process quadword after clearing flag */
488 BIT_MASK_SET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1, 0);
490 /* allocate required address in FIFO */
491 fqw
= pke_fifo_fit(& me
->fifo
);
494 /* fill in unclassified FIFO quadword data in host byte order */
495 fqw
->word_class
[0] = fqw
->word_class
[1] =
496 fqw
->word_class
[2] = fqw
->word_class
[3] = wc_unknown
;
497 fqw
->data
[0] = T2H_4(me
->fifo_qw_in_progress
[0]);
498 fqw
->data
[1] = T2H_4(me
->fifo_qw_in_progress
[1]);
499 fqw
->data
[2] = T2H_4(me
->fifo_qw_in_progress
[2]);
500 fqw
->data
[3] = T2H_4(me
->fifo_qw_in_progress
[3]);
502 /* read DMAC-supplied indicators */
503 ASSERT(sizeof(unsigned_4
) == 4);
504 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? DMA_D0_MADR
: DMA_D1_MADR
),
505 & fqw
->source_address
, /* converted to host-endian */
507 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? DMA_D0_PKTFLAG
: DMA_D1_PKTFLAG
),
513 /* lower two words are DMA tags */
514 fqw
->word_class
[0] = fqw
->word_class
[1] = wc_dma
;
517 /* set FQC to "1" as FIFO is now not empty */
518 PKE_REG_MASK_SET(me
, STAT
, FQC
, 1);
532 pke_reset(struct pke_device
* me
)
534 /* advance PC over last quadword in FIFO; keep previous FIFO history */
535 me
->fifo_pc
= pke_fifo_flush(& me
->fifo
);
537 /* clear registers, flag, other state */
538 memset(me
->regs
, 0, sizeof(me
->regs
));
539 me
->fifo_qw_done
= 0;
545 /* Issue & swallow next PKE opcode if possible/available */
548 pke_issue(SIM_DESC sd
, struct pke_device
* me
)
550 struct fifo_quadword
* fqw
;
552 unsigned_4 cmd
, intr
;
554 /* 1 -- fetch PKE instruction */
556 /* confirm availability of new quadword of PKE instructions */
557 fqw
= pke_fifo_access(& me
->fifo
, me
->fifo_pc
);
561 /* skip over DMA tag, if present */
562 pke_pc_advance(me
, 0);
563 /* note: this can only change qw_pc from 0 to 2 and will not
566 /* "fetch" instruction quadword and word */
567 fw
= fqw
->data
[me
->qw_pc
];
569 /* store word in PKECODE register */
570 me
->regs
[PKE_REG_CODE
][0] = fw
;
573 /* 2 -- test go / no-go for PKE execution */
575 /* switch on STAT:PSS if PSS-pending and in idle state */
576 if((PKE_REG_MASK_GET(me
, STAT
, PPS
) == PKE_REG_STAT_PPS_IDLE
) &&
577 (me
->flags
& PKE_FLAG_PENDING_PSS
) != 0)
579 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
580 PKE_REG_MASK_SET(me
, STAT
, PSS
, 1);
583 /* check for stall/halt control bits */
584 if(PKE_REG_MASK_GET(me
, STAT
, PFS
) ||
585 PKE_REG_MASK_GET(me
, STAT
, PSS
) || /* note special treatment below */
586 /* PEW bit not a reason to keep stalling - it's just an indication, re-computed below */
587 /* PGW bit not a reason to keep stalling - it's just an indication, re-computed below */
588 /* ER0/ER1 not a reason to keep stalling - it's just an indication */
589 PKE_REG_MASK_GET(me
, STAT
, PIS
))
591 /* (still) stalled */
592 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
593 /* try again next cycle */
598 /* 3 -- decode PKE instruction */
601 if(PKE_REG_MASK_GET(me
, STAT
, PPS
) == PKE_REG_STAT_PPS_IDLE
)
602 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_DECODE
);
604 /* Extract relevant bits from PKEcode */
605 intr
= BIT_MASK_GET(fw
, PKE_OPCODE_I_B
, PKE_OPCODE_I_E
);
606 cmd
= BIT_MASK_GET(fw
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
608 /* handle interrupts */
611 /* are we resuming an interrupt-stalled instruction? */
612 if(me
->flags
& PKE_FLAG_INT_NOLOOP
)
614 /* clear loop-prevention flag */
615 me
->flags
&= ~PKE_FLAG_INT_NOLOOP
;
617 /* fall through to decode & execute */
618 /* The pke_code_* functions should not check the MSB in the
621 else /* new interrupt-flagged instruction */
623 /* set INT flag in STAT register */
624 PKE_REG_MASK_SET(me
, STAT
, INT
, 1);
625 /* set loop-prevention flag */
626 me
->flags
|= PKE_FLAG_INT_NOLOOP
;
628 /* set PIS if stall not masked */
629 if(!PKE_REG_MASK_GET(me
, ERR
, MII
))
630 pke_begin_interrupt_stall(me
);
632 /* suspend this instruction unless it's PKEMARK */
633 if(!IS_PKE_CMD(cmd
, PKEMARK
))
635 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
640 ; /* fall through to decode & execute */
646 /* decode & execute */
647 if(IS_PKE_CMD(cmd
, PKENOP
))
648 pke_code_nop(me
, fw
);
649 else if(IS_PKE_CMD(cmd
, STCYCL
))
650 pke_code_stcycl(me
, fw
);
651 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, OFFSET
))
652 pke_code_offset(me
, fw
);
653 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, BASE
))
654 pke_code_base(me
, fw
);
655 else if(IS_PKE_CMD(cmd
, ITOP
))
656 pke_code_itop(me
, fw
);
657 else if(IS_PKE_CMD(cmd
, STMOD
))
658 pke_code_stmod(me
, fw
);
659 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, MSKPATH3
))
660 pke_code_mskpath3(me
, fw
);
661 else if(IS_PKE_CMD(cmd
, PKEMARK
))
662 pke_code_pkemark(me
, fw
);
663 else if(IS_PKE_CMD(cmd
, FLUSHE
))
664 pke_code_flushe(me
, fw
);
665 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSH
))
666 pke_code_flush(me
, fw
);
667 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSHA
))
668 pke_code_flusha(me
, fw
);
669 else if(IS_PKE_CMD(cmd
, PKEMSCAL
))
670 pke_code_pkemscal(me
, fw
);
671 else if(IS_PKE_CMD(cmd
, PKEMSCNT
))
672 pke_code_pkemscnt(me
, fw
);
673 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, PKEMSCALF
))
674 pke_code_pkemscalf(me
, fw
);
675 else if(IS_PKE_CMD(cmd
, STMASK
))
676 pke_code_stmask(me
, fw
);
677 else if(IS_PKE_CMD(cmd
, STROW
))
678 pke_code_strow(me
, fw
);
679 else if(IS_PKE_CMD(cmd
, STCOL
))
680 pke_code_stcol(me
, fw
);
681 else if(IS_PKE_CMD(cmd
, MPG
))
682 pke_code_mpg(me
, fw
);
683 else if(IS_PKE_CMD(cmd
, DIRECT
))
684 pke_code_direct(me
, fw
);
685 else if(IS_PKE_CMD(cmd
, DIRECTHL
))
686 pke_code_directhl(me
, fw
);
687 else if(IS_PKE_CMD(cmd
, UNPACK
))
688 pke_code_unpack(me
, fw
);
689 /* ... no other commands ... */
691 pke_code_error(me
, fw
);
696 /* Clear out contents of FIFO; act as if it was empty. Return PC
697 pointing to one-past-last word. */
700 pke_fifo_flush(struct pke_fifo
* fifo
)
702 /* don't modify any state! */
703 return fifo
->origin
+ fifo
->next
;
708 /* Clear out contents of FIFO; make it really empty. */
711 pke_fifo_reset(struct pke_fifo
* fifo
)
715 /* clear fifo quadwords */
716 for(i
=0; i
<fifo
->next
; i
++)
718 zfree(fifo
->quadwords
[i
]);
719 fifo
->quadwords
[i
] = NULL
;
729 /* Make space for the next quadword in the FIFO. Allocate/enlarge
730 FIFO pointer block if necessary. Return a pointer to it. */
732 struct fifo_quadword
*
733 pke_fifo_fit(struct pke_fifo
* fifo
)
735 struct fifo_quadword
* fqw
;
737 /* out of space on quadword pointer array? */
738 if(fifo
->next
== fifo
->length
) /* also triggered before fifo->quadwords allocated */
740 struct fifo_quadword
** new_qw
;
741 unsigned_4 new_length
= fifo
->length
+ PKE_FIFO_GROW_SIZE
;
743 /* allocate new pointer block */
744 new_qw
= zalloc(new_length
* sizeof(struct fifo_quadword
*));
745 ASSERT(new_qw
!= NULL
);
747 /* copy over old contents, if any */
748 if(fifo
->quadwords
!= NULL
)
750 /* copy over old pointers to beginning of new block */
751 memcpy(new_qw
, fifo
->quadwords
,
752 fifo
->length
* sizeof(struct fifo_quadword
*));
755 zfree(fifo
->quadwords
);
758 /* replace pointers & counts */
759 fifo
->quadwords
= new_qw
;
760 fifo
->length
= new_length
;
764 ASSERT(fifo
->quadwords
!= NULL
);
766 /* allocate new quadword from heap */
767 fqw
= zalloc(sizeof(struct fifo_quadword
));
770 /* push quadword onto fifo */
771 fifo
->quadwords
[fifo
->next
] = fqw
;
778 /* Return a pointer to the FIFO quadword with given absolute index, or
779 NULL if it is out of range */
781 struct fifo_quadword
*
782 pke_fifo_access(struct pke_fifo
* fifo
, unsigned_4 qwnum
)
784 struct fifo_quadword
* fqw
;
786 if((qwnum
< fifo
->origin
) || /* before history */
787 (qwnum
>= fifo
->origin
+ fifo
->next
)) /* after last available quadword */
791 ASSERT(fifo
->quadwords
!= NULL
); /* must be allocated already */
792 fqw
= fifo
->quadwords
[qwnum
- fifo
->origin
]; /* pull out pointer from array */
793 ASSERT(fqw
!= NULL
); /* must be allocated already */
800 /* Authorize release of any FIFO entries older than given absolute quadword. */
802 pke_fifo_old(struct pke_fifo
* fifo
, unsigned_4 qwnum
)
804 /* do we have any too-old FIFO elements? */
805 if(fifo
->origin
+ PKE_FIFO_ARCHEOLOGY
< qwnum
)
807 /* count quadwords to forget */
808 int horizon
= qwnum
- (fifo
->origin
+ PKE_FIFO_ARCHEOLOGY
);
811 /* free quadwords at indices below horizon */
812 for(i
=0; i
< horizon
; i
++)
813 zfree(fifo
->quadwords
[i
]);
815 /* move surviving quadword pointers down to beginning of array */
816 for(i
=horizon
; i
< fifo
->next
; i
++)
817 fifo
->quadwords
[i
-horizon
] = fifo
->quadwords
[i
];
819 /* clear duplicate pointers */
820 for(i
=fifo
->next
- horizon
; i
< fifo
->next
; i
++)
821 fifo
->quadwords
[i
] = NULL
;
823 /* adjust FIFO pointers */
824 fifo
->origin
= fifo
->origin
+ horizon
;
825 fifo
->next
= fifo
->next
- horizon
;
832 /* advance the PC by given number of data words; update STAT/FQC
833 field; assume FIFO is filled enough; classify passed-over words;
834 write FIFO trace line */
837 pke_pc_advance(struct pke_device
* me
, int num_words
)
840 struct fifo_quadword
* fq
= NULL
;
841 unsigned_4 old_fifo_pc
= me
->fifo_pc
;
843 ASSERT(num_words
>= 0);
845 /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */
849 /* find next quadword, if any */
850 fq
= pke_fifo_access(& me
->fifo
, me
->fifo_pc
);
852 /* skip over DMA tag words if present in word 0 or 1 */
853 if(fq
!= NULL
&& fq
->word_class
[me
->qw_pc
] == wc_dma
)
855 /* skip by going around loop an extra time */
859 /* nothing left to skip / no DMA tag here */
863 /* we are supposed to skip existing words */
866 /* one word skipped */
869 /* point to next word */
876 /* trace the consumption of the FIFO quadword we just skipped over */
877 /* fq still points to it */
878 if(me
->fifo_trace_file
!= NULL
)
880 /* assert complete classification */
881 ASSERT(fq
->word_class
[3] != wc_unknown
);
882 ASSERT(fq
->word_class
[2] != wc_unknown
);
883 ASSERT(fq
->word_class
[1] != wc_unknown
);
884 ASSERT(fq
->word_class
[0] != wc_unknown
);
886 /* print trace record */
887 fprintf(me
->fifo_trace_file
,
888 "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n",
889 (me
->pke_number
== 0 ? 0 : 1),
890 (unsigned) fq
->data
[3], (unsigned) fq
->data
[2],
891 (unsigned) fq
->data
[1], (unsigned) fq
->data
[0],
892 (unsigned) fq
->source_address
,
893 fq
->word_class
[3], fq
->word_class
[2],
894 fq
->word_class
[1], fq
->word_class
[0]);
896 } /* next quadword */
899 /* age old entries before PC */
900 if(me
->fifo_pc
!= old_fifo_pc
)
902 /* we advanced the fifo-pc; authorize disposal of anything
903 before previous PKEcode */
904 pke_fifo_old(& me
->fifo
, old_fifo_pc
);
907 /* clear FQC if FIFO is now empty */
908 fq
= pke_fifo_access(& me
->fifo
, me
->fifo_pc
);
911 PKE_REG_MASK_SET(me
, STAT
, FQC
, 0);
913 else /* annote the word where the PC lands as an PKEcode */
915 ASSERT(fq
->word_class
[me
->qw_pc
] == wc_pkecode
|| fq
->word_class
[me
->qw_pc
] == wc_unknown
);
916 fq
->word_class
[me
->qw_pc
] = wc_pkecode
;
924 /* Return pointer to FIFO quadword containing given operand# in FIFO.
925 `operand_num' starts at 1. Return pointer to operand word in last
926 argument, if non-NULL. If FIFO is not full enough, return 0.
927 Signal an ER0 indication upon skipping a DMA tag. */
929 struct fifo_quadword
*
930 pke_pcrel_fifo(struct pke_device
* me
, int operand_num
, unsigned_4
** operand
)
933 int new_qw_pc
, new_fifo_pc
;
934 struct fifo_quadword
* fq
= NULL
;
936 /* check for validity of last search results in cache */
937 if(me
->last_fifo_pc
== me
->fifo_pc
&&
938 me
->last_qw_pc
== me
->qw_pc
&&
939 operand_num
> me
->last_num
)
941 /* continue search from last stop */
942 new_fifo_pc
= me
->last_new_fifo_pc
;
943 new_qw_pc
= me
->last_new_qw_pc
;
944 num
= operand_num
- me
->last_num
;
948 /* start search from scratch */
949 new_fifo_pc
= me
->fifo_pc
;
950 new_qw_pc
= me
->qw_pc
;
956 /* printf("pke %d pcrel_fifo operand_num %d\n", me->pke_number, operand_num); */
960 /* one word skipped */
963 /* point to next word */
971 fq
= pke_fifo_access(& me
->fifo
, new_fifo_pc
);
973 /* check for FIFO underflow */
977 /* skip over DMA tag words if present in word 0 or 1 */
978 if(fq
->word_class
[new_qw_pc
] == wc_dma
)
981 PKE_REG_MASK_SET(me
, STAT
, ER0
, 1);
983 /* mismatch error! */
984 if(! PKE_REG_MASK_GET(me
, ERR
, ME0
))
986 pke_begin_interrupt_stall(me
);
987 /* don't stall just yet -- finish this instruction */
988 /* the PPS_STALL state will be entered by pke_issue() next time */
990 /* skip by going around loop an extra time */
996 /* return pointer to operand word itself */
999 *operand
= & fq
->data
[new_qw_pc
];
1001 /* annote the word where the pseudo-PC lands as an PKE operand */
1002 ASSERT(fq
->word_class
[new_qw_pc
] == wc_pkedata
|| fq
->word_class
[new_qw_pc
] == wc_unknown
);
1003 fq
->word_class
[new_qw_pc
] = wc_pkedata
;
1005 /* store search results in cache */
1007 me
->last_fifo_pc
= me
->fifo_pc
;
1008 me
->last_qw_pc
= me
->qw_pc
;
1010 me
->last_num
= operand_num
;
1011 me
->last_new_fifo_pc
= new_fifo_pc
;
1012 me
->last_new_qw_pc
= new_qw_pc
;
1019 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
1020 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
1021 them as an error (ER0). */
1024 pke_pcrel_operand(struct pke_device
* me
, int operand_num
)
1026 unsigned_4
* operand
= NULL
;
1027 struct fifo_quadword
* fifo_operand
;
1029 fifo_operand
= pke_pcrel_fifo(me
, operand_num
, & operand
);
1031 if(fifo_operand
== NULL
)
1032 ASSERT(operand
== NULL
); /* pke_pcrel_fifo() ought leave it untouched */
1038 /* Return a bit-field extract of given operand# in FIFO, and its
1039 word-accurate source-addr. `bit_offset' starts at 0, referring to
1040 LSB after PKE instruction word. Width must be >0, <=32. Assume
1041 FIFO is full enough. Skip over DMA tags, but mark them as an error
1045 pke_pcrel_operand_bits(struct pke_device
* me
, int bit_offset
, int bit_width
, unsigned_4
* source_addr
)
1047 unsigned_4
* word
= NULL
;
1049 struct fifo_quadword
* fifo_operand
;
1050 int wordnumber
, bitnumber
;
1053 wordnumber
= bit_offset
/32;
1054 bitnumber
= bit_offset
%32;
1056 /* find operand word with bitfield */
1057 fifo_operand
= pke_pcrel_fifo(me
, wordnumber
+ 1, &word
);
1058 ASSERT(word
!= NULL
);
1060 /* extract bitfield from word */
1061 value
= BIT_MASK_GET(*word
, bitnumber
, bitnumber
+ bit_width
- 1);
1063 /* extract source addr from fifo word */
1064 *source_addr
= fifo_operand
->source_address
;
1066 /* add word offset */
1068 if(word
== & fifo_operand
->data
[i
])
1069 *source_addr
+= sizeof(unsigned_4
) * i
;
1076 /* check for stall conditions on indicated devices (path* only on
1077 PKE1), do not change status; return 0 iff no stall */
1079 pke_check_stall(struct pke_device
* me
, enum pke_check_target what
)
1082 unsigned_4 cop2_stat
, gpuif_stat
;
1084 /* read status words */
1085 ASSERT(sizeof(unsigned_4
) == 4);
1086 PKE_MEM_READ(me
, (GIF_REG_STAT
),
1089 PKE_MEM_READ(me
, (COP2_REG_STAT_ADDR
),
1093 /* perform checks */
1096 if(me
->pke_number
== 0)
1097 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS0_B
, COP2_REG_STAT_VBS0_E
);
1098 else /* if(me->pke_number == 1) */
1099 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS1_B
, COP2_REG_STAT_VBS1_E
);
1101 else if(what
== chk_path1
) /* VU -> GPUIF */
1103 ASSERT(me
->pke_number
== 1);
1104 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 1)
1107 else if(what
== chk_path2
) /* PKE -> GPUIF */
1109 ASSERT(me
->pke_number
== 1);
1110 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 2)
1113 else if(what
== chk_path3
) /* DMA -> GPUIF */
1115 ASSERT(me
->pke_number
== 1);
1116 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 3)
1125 /* any stall reasons? */
1130 /* PKE1 only: flip the DBF bit; recompute TOPS, TOP */
1132 pke_flip_dbf(struct pke_device
* me
)
1135 /* compute new TOP */
1136 PKE_REG_MASK_SET(me
, TOP
, TOP
,
1137 PKE_REG_MASK_GET(me
, TOPS
, TOPS
));
1139 newdf
= PKE_REG_MASK_GET(me
, DBF
, DF
) ? 0 : 1;
1140 PKE_REG_MASK_SET(me
, DBF
, DF
, newdf
);
1141 PKE_REG_MASK_SET(me
, STAT
, DBF
, newdf
);
1142 /* compute new TOPS */
1143 PKE_REG_MASK_SET(me
, TOPS
, TOPS
,
1144 (PKE_REG_MASK_GET(me
, BASE
, BASE
) +
1145 newdf
* PKE_REG_MASK_GET(me
, OFST
, OFFSET
)));
1147 /* this is equivalent to last word from okadaa (98-02-25):
1149 2) TOPS=BASE + !DBF*OFFSET
1154 /* set the STAT:PIS bit and send an interrupt to the 5900 */
1156 pke_begin_interrupt_stall(struct pke_device
* me
)
1159 PKE_REG_MASK_SET(me
, STAT
, PIS
, 1);
1161 /* XXX: send interrupt to 5900? */
1167 /* PKEcode handler functions -- responsible for checking and
1168 confirming old stall conditions, executing pkecode, updating PC and
1169 status registers -- may assume being run on correct PKE unit */
1172 pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
)
1175 pke_pc_advance(me
, 1);
1176 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1181 pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
)
1183 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1185 /* copy immediate value into CYCLE reg */
1186 PKE_REG_MASK_SET(me
, CYCLE
, WL
, BIT_MASK_GET(imm
, 8, 15));
1187 PKE_REG_MASK_SET(me
, CYCLE
, CL
, BIT_MASK_GET(imm
, 0, 7));
1189 pke_pc_advance(me
, 1);
1190 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1195 pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
)
1197 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1199 /* copy 10 bits to OFFSET field */
1200 PKE_REG_MASK_SET(me
, OFST
, OFFSET
, BIT_MASK_GET(imm
, 0, 9));
1202 PKE_REG_MASK_SET(me
, DBF
, DF
, 0);
1203 /* clear other DBF bit */
1204 PKE_REG_MASK_SET(me
, STAT
, DBF
, 0);
1205 /* set TOPS = BASE */
1206 PKE_REG_MASK_SET(me
, TOPS
, TOPS
, PKE_REG_MASK_GET(me
, BASE
, BASE
));
1208 pke_pc_advance(me
, 1);
1209 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1214 pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
)
1216 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1218 /* copy 10 bits to BASE field */
1219 PKE_REG_MASK_SET(me
, BASE
, BASE
, BIT_MASK_GET(imm
, 0, 9));
1221 pke_pc_advance(me
, 1);
1222 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1227 pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
)
1229 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1231 /* copy 10 bits to ITOPS field */
1232 PKE_REG_MASK_SET(me
, ITOPS
, ITOPS
, BIT_MASK_GET(imm
, 0, 9));
1234 pke_pc_advance(me
, 1);
1235 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1240 pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
)
1242 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1244 /* copy 2 bits to MODE register */
1245 PKE_REG_MASK_SET(me
, MODE
, MDE
, BIT_MASK_GET(imm
, 0, 2));
1247 pke_pc_advance(me
, 1);
1248 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1253 pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
)
1255 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1256 unsigned_4 gif_mode
;
1258 /* set appropriate bit */
1259 if(BIT_MASK_GET(imm
, PKE_REG_MSKPATH3_B
, PKE_REG_MSKPATH3_E
) != 0)
1260 gif_mode
= GIF_REG_STAT_M3P
;
1264 /* write register to "read-only" register; gpuif code will look at M3P bit only */
1265 PKE_MEM_WRITE(me
, GIF_REG_VIF_M3P
, & gif_mode
, 4);
1268 pke_pc_advance(me
, 1);
1269 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1274 pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
)
1276 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1277 /* copy 16 bits to MARK register */
1278 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(imm
, 0, 15));
1279 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
1280 PKE_REG_MASK_SET(me
, STAT
, MRK
, 1);
1282 pke_pc_advance(me
, 1);
1283 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1288 pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
)
1290 /* compute next PEW bit */
1291 if(pke_check_stall(me
, chk_vu
))
1294 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1295 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1296 /* try again next cycle */
1301 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1302 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1303 pke_pc_advance(me
, 1);
1309 pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
)
1311 int something_busy
= 0;
1313 /* compute next PEW, PGW bits */
1314 if(pke_check_stall(me
, chk_vu
))
1317 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1320 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1323 if(pke_check_stall(me
, chk_path1
) ||
1324 pke_check_stall(me
, chk_path2
))
1327 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1330 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1335 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1336 /* try again next cycle */
1341 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1342 pke_pc_advance(me
, 1);
1348 pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
)
1350 int something_busy
= 0;
1352 /* compute next PEW, PGW bits */
1353 if(pke_check_stall(me
, chk_vu
))
1356 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1359 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1362 if(pke_check_stall(me
, chk_path1
) ||
1363 pke_check_stall(me
, chk_path2
) ||
1364 pke_check_stall(me
, chk_path3
))
1367 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1370 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1374 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1375 /* try again next cycle */
1380 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1381 pke_pc_advance(me
, 1);
1387 pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
)
1389 /* compute next PEW bit */
1390 if(pke_check_stall(me
, chk_vu
))
1393 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1394 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1395 /* try again next cycle */
1400 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1403 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1405 /* flip DBF on PKE1 */
1406 if(me
->pke_number
== 1)
1409 /* compute new PC for VU (host byte-order) */
1410 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1411 vu_pc
= T2H_4(vu_pc
);
1413 /* write new PC; callback function gets VU running */
1414 ASSERT(sizeof(unsigned_4
) == 4);
1415 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1419 /* copy ITOPS field to ITOP */
1420 PKE_REG_MASK_SET(me
, ITOP
, ITOP
, PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
1423 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1424 pke_pc_advance(me
, 1);
1431 pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
)
1433 /* compute next PEW bit */
1434 if(pke_check_stall(me
, chk_vu
))
1437 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1438 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1439 /* try again next cycle */
1446 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1448 /* flip DBF on PKE1 */
1449 if(me
->pke_number
== 1)
1453 ASSERT(sizeof(unsigned_4
) == 4);
1454 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1458 /* rewrite new PC; callback function gets VU running */
1459 ASSERT(sizeof(unsigned_4
) == 4);
1460 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1464 /* copy ITOPS field to ITOP */
1465 PKE_REG_MASK_SET(me
, ITOP
, ITOP
, PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
1468 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1469 pke_pc_advance(me
, 1);
1475 pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
)
1477 int something_busy
= 0;
1479 /* compute next PEW, PGW bits */
1480 if(pke_check_stall(me
, chk_vu
))
1483 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1486 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1489 if(pke_check_stall(me
, chk_path1
) ||
1490 pke_check_stall(me
, chk_path2
) ||
1491 pke_check_stall(me
, chk_path3
))
1494 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1497 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1502 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1503 /* try again next cycle */
1508 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1510 /* flip DBF on PKE1 */
1511 if(me
->pke_number
== 1)
1514 /* compute new PC for VU (host byte-order) */
1515 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1516 vu_pc
= T2H_4(vu_pc
);
1518 /* rewrite new PC; callback function gets VU running */
1519 ASSERT(sizeof(unsigned_4
) == 4);
1520 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1524 /* copy ITOPS field to ITOP */
1525 PKE_REG_MASK_SET(me
, ITOP
, ITOP
, PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
1528 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1529 pke_pc_advance(me
, 1);
1535 pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
)
1539 /* check that FIFO has one more word for STMASK operand */
1540 mask
= pke_pcrel_operand(me
, 1);
1543 /* "transferring" operand */
1544 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1547 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1549 /* fill the register */
1550 PKE_REG_MASK_SET(me
, MASK
, MASK
, *mask
);
1553 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1556 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1557 pke_pc_advance(me
, 2);
1561 /* need to wait for another word */
1562 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1563 /* try again next cycle */
1569 pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
)
1571 /* check that FIFO has four more words for STROW operand */
1572 unsigned_4
* last_op
;
1574 last_op
= pke_pcrel_operand(me
, 4);
1577 /* "transferring" operand */
1578 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1581 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1583 /* copy ROW registers: must all exist if 4th operand exists */
1584 me
->regs
[PKE_REG_R0
][0] = * pke_pcrel_operand(me
, 1);
1585 me
->regs
[PKE_REG_R1
][0] = * pke_pcrel_operand(me
, 2);
1586 me
->regs
[PKE_REG_R2
][0] = * pke_pcrel_operand(me
, 3);
1587 me
->regs
[PKE_REG_R3
][0] = * pke_pcrel_operand(me
, 4);
1590 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1593 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1594 pke_pc_advance(me
, 5);
1598 /* need to wait for another word */
1599 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1600 /* try again next cycle */
1606 pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
)
1608 /* check that FIFO has four more words for STCOL operand */
1609 unsigned_4
* last_op
;
1611 last_op
= pke_pcrel_operand(me
, 4);
1614 /* "transferring" operand */
1615 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1618 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1620 /* copy COL registers: must all exist if 4th operand exists */
1621 me
->regs
[PKE_REG_C0
][0] = * pke_pcrel_operand(me
, 1);
1622 me
->regs
[PKE_REG_C1
][0] = * pke_pcrel_operand(me
, 2);
1623 me
->regs
[PKE_REG_C2
][0] = * pke_pcrel_operand(me
, 3);
1624 me
->regs
[PKE_REG_C3
][0] = * pke_pcrel_operand(me
, 4);
1627 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1630 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1631 pke_pc_advance(me
, 5);
1635 /* need to wait for another word */
1636 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1637 /* try again next cycle */
1643 pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
)
1645 unsigned_4
* last_mpg_word
;
1646 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1647 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1649 /* assert 64-bit alignment of MPG operand */
1650 if(me
->qw_pc
!= 3 && me
->qw_pc
!= 1)
1651 return pke_code_error(me
, pkecode
);
1653 /* map zero to max+1 */
1654 if(num
==0) num
=0x100;
1656 /* check that FIFO has a few more words for MPG operand */
1657 last_mpg_word
= pke_pcrel_operand(me
, num
*2); /* num: number of 64-bit words */
1658 if(last_mpg_word
!= NULL
)
1660 /* perform implied FLUSHE */
1661 if(pke_check_stall(me
, chk_vu
))
1664 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1665 /* retry this instruction next clock */
1672 /* "transferring" operand */
1673 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1676 PKE_REG_MASK_SET(me
, NUM
, NUM
, num
);
1678 /* transfer VU instructions, one word-pair per iteration */
1679 for(i
=0; i
<num
; i
++)
1681 address_word vu_addr_base
, vu_addr
;
1682 address_word vutrack_addr_base
, vutrack_addr
;
1683 address_word vu_addr_max_size
;
1684 unsigned_4 vu_lower_opcode
, vu_upper_opcode
;
1685 unsigned_4
* operand
;
1686 unsigned_4 source_addr
;
1687 struct fifo_quadword
* fq
;
1692 next_num
= PKE_REG_MASK_GET(me
, NUM
, NUM
) - 1;
1693 PKE_REG_MASK_SET(me
, NUM
, NUM
, next_num
);
1695 /* imm: in 64-bit units for MPG instruction */
1696 /* VU*_MEM0 : instruction memory */
1697 vu_addr_base
= (me
->pke_number
== 0) ?
1698 VU0_MEM0_WINDOW_START
: VU1_MEM0_WINDOW_START
;
1699 vu_addr_max_size
= (me
->pke_number
== 0) ?
1700 VU0_MEM0_SIZE
: VU1_MEM0_SIZE
;
1701 vutrack_addr_base
= (me
->pke_number
== 0) ?
1702 VU0_MEM0_SRCADDR_START
: VU1_MEM0_SRCADDR_START
;
1704 /* compute VU address for this word-pair */
1705 vu_addr
= vu_addr_base
+ (imm
+ i
) * 8;
1706 /* check for vu_addr overflow */
1707 while(vu_addr
>= vu_addr_base
+ vu_addr_max_size
)
1708 vu_addr
-= vu_addr_max_size
;
1710 /* compute VU tracking address */
1711 vutrack_addr
= vutrack_addr_base
+ ((signed_8
)vu_addr
- (signed_8
)vu_addr_base
) / 2;
1713 /* Fetch operand words; assume they are already little-endian for VU imem */
1714 fq
= pke_pcrel_fifo(me
, i
*2 + 1, & operand
);
1715 vu_lower_opcode
= *operand
;
1717 source_addr
= fq
->source_address
;
1718 /* add word offset */
1720 if(operand
== & fq
->data
[j
])
1721 source_addr
+= sizeof(unsigned_4
) * j
;
1723 fq
= pke_pcrel_fifo(me
, i
*2 + 2, & operand
);
1724 vu_upper_opcode
= *operand
;
1726 /* write data into VU memory */
1727 /* lower (scalar) opcode comes in first word ; macro performs H2T! */
1728 PKE_MEM_WRITE(me
, vu_addr
,
1731 /* upper (vector) opcode comes in second word ; H2T */
1732 ASSERT(sizeof(unsigned_4
) == 4);
1733 PKE_MEM_WRITE(me
, vu_addr
+ 4,
1737 /* write tracking address in target byte-order */
1738 ASSERT(sizeof(unsigned_4
) == 4);
1739 PKE_MEM_WRITE(me
, vutrack_addr
,
1742 } /* VU xfer loop */
1745 ASSERT(PKE_REG_MASK_GET(me
, NUM
, NUM
) == 0);
1748 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1749 pke_pc_advance(me
, 1 + num
*2);
1751 } /* if FIFO full enough */
1754 /* need to wait for another word */
1755 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1756 /* retry this instruction next clock */
1762 pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
)
1764 /* check that FIFO has a few more words for DIRECT operand */
1765 unsigned_4
* last_direct_word
;
1766 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1768 /* assert 128-bit alignment of DIRECT operand */
1770 return pke_code_error(me
, pkecode
);
1772 /* map zero to max+1 */
1773 if(imm
==0) imm
=0x10000;
1775 last_direct_word
= pke_pcrel_operand(me
, imm
*4); /* imm: number of 128-bit words */
1776 if(last_direct_word
!= NULL
)
1780 unsigned_16 fifo_data
;
1782 /* "transferring" operand */
1783 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1785 /* transfer GPUIF quadwords, one word per iteration */
1786 for(i
=0; i
<imm
*4; i
++)
1788 unsigned_4
* operand
= pke_pcrel_operand(me
, 1+i
);
1790 /* collect word into quadword */
1791 *A4_16(&fifo_data
, 3 - (i
% 4)) = *operand
;
1793 /* write to GPUIF FIFO only with full quadword */
1796 ASSERT(sizeof(fifo_data
) == 16);
1797 PKE_MEM_WRITE(me
, GIF_PATH2_FIFO_ADDR
,
1800 } /* write collected quadword */
1801 } /* GPUIF xfer loop */
1804 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1805 pke_pc_advance(me
, 1 + imm
*4);
1806 } /* if FIFO full enough */
1809 /* need to wait for another word */
1810 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1811 /* retry this instruction next clock */
1817 pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
)
1819 /* treat the same as DIRECTH */
1820 pke_code_direct(me
, pkecode
);
1825 pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
)
1827 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1828 int cmd
= BIT_MASK_GET(pkecode
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
1829 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1830 int nummx
= (num
== 0) ? 0x0100 : num
;
1831 short vn
= BIT_MASK_GET(cmd
, 2, 3); /* unpack shape controls */
1832 short vl
= BIT_MASK_GET(cmd
, 0, 1);
1833 int m
= BIT_MASK_GET(cmd
, 4, 4);
1834 short cl
= PKE_REG_MASK_GET(me
, CYCLE
, CL
); /* cycle controls */
1835 short wl
= PKE_REG_MASK_GET(me
, CYCLE
, WL
);
1836 short addrwl
= (wl
== 0) ? 0x0100 : wl
;
1837 int r
= BIT_MASK_GET(imm
, 15, 15); /* indicator bits in imm value */
1838 int usn
= BIT_MASK_GET(imm
, 14, 14);
1840 int n
, num_operands
;
1841 unsigned_4
* last_operand_word
= NULL
;
1843 /* catch all illegal UNPACK variants */
1844 if(vl
== 3 && vn
< 3)
1846 pke_code_error(me
, pkecode
);
1850 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1854 n
= cl
* (nummx
/ addrwl
) + PKE_LIMIT(nummx
% addrwl
, cl
);
1855 num_operands
= (31 + (32 >> vl
) * (vn
+1) * n
)/32; /* round up to next word */
1857 /* confirm that FIFO has enough words in it */
1858 if(num_operands
> 0)
1859 last_operand_word
= pke_pcrel_operand(me
, num_operands
);
1860 if(last_operand_word
!= NULL
|| num_operands
== 0)
1862 address_word vu_addr_base
, vutrack_addr_base
;
1863 address_word vu_addr_max_size
;
1864 int vector_num_out
, vector_num_in
;
1866 /* "transferring" operand */
1867 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1869 /* don't check whether VU is idle */
1871 /* compute VU address base */
1872 if(me
->pke_number
== 0)
1874 vu_addr_base
= VU0_MEM1_WINDOW_START
;
1875 vu_addr_max_size
= VU0_MEM1_SIZE
;
1876 vutrack_addr_base
= VU0_MEM1_SRCADDR_START
;
1881 vu_addr_base
= VU1_MEM1_WINDOW_START
;
1882 vu_addr_max_size
= VU1_MEM1_SIZE
;
1883 vutrack_addr_base
= VU1_MEM1_SRCADDR_START
;
1887 PKE_REG_MASK_SET(me
, NUM
, NUM
, nummx
);
1889 /* transfer given number of vectors */
1890 vector_num_out
= 0; /* output vector number being processed */
1891 vector_num_in
= 0; /* argument vector number being processed */
1894 quadword vu_old_data
;
1895 quadword vu_new_data
;
1896 quadword unpacked_data
;
1897 address_word vu_addr
;
1898 address_word vutrack_addr
;
1899 unsigned_4 source_addr
= 0;
1904 next_num
= PKE_REG_MASK_GET(me
, NUM
, NUM
) - 1;
1905 PKE_REG_MASK_SET(me
, NUM
, NUM
, next_num
);
1907 /* compute VU destination address, as bytes in R5900 memory */
1910 /* map zero to max+1 */
1911 vu_addr
= vu_addr_base
+ 16 * (BIT_MASK_GET(imm
, 0, 9) +
1912 (vector_num_out
/ addrwl
) * cl
+
1913 (vector_num_out
% addrwl
));
1916 vu_addr
= vu_addr_base
+ 16 * (BIT_MASK_GET(imm
, 0, 9) +
1919 /* handle "R" double-buffering bit */
1921 vu_addr
+= 16 * PKE_REG_MASK_GET(me
, TOPS
, TOPS
);
1923 /* check for vu_addr overflow */
1924 while(vu_addr
>= vu_addr_base
+ vu_addr_max_size
)
1925 vu_addr
-= vu_addr_max_size
;
1927 /* compute address of tracking table entry */
1928 vutrack_addr
= vutrack_addr_base
+ ((signed_8
)vu_addr
- (signed_8
)vu_addr_base
) / 4;
1930 /* read old VU data word at address; reverse words if needed */
1932 unsigned_16 vu_old_badwords
;
1933 ASSERT(sizeof(vu_old_badwords
) == 16);
1934 PKE_MEM_READ(me
, vu_addr
,
1935 &vu_old_badwords
, 16);
1936 vu_old_data
[0] = * A4_16(& vu_old_badwords
, 3);
1937 vu_old_data
[1] = * A4_16(& vu_old_badwords
, 2);
1938 vu_old_data
[2] = * A4_16(& vu_old_badwords
, 1);
1939 vu_old_data
[3] = * A4_16(& vu_old_badwords
, 0);
1942 /* For cyclic unpack, next operand quadword may come from instruction stream
1945 (vector_num_out
% addrwl
) >= cl
)
1947 /* clear operand - used only in a "indeterminate" state */
1948 for(i
= 0; i
< 4; i
++)
1949 unpacked_data
[i
] = 0;
1953 /* compute packed vector dimensions */
1954 int vectorbits
= 0, unitbits
= 0;
1956 if(vl
< 3) /* PKE_UNPACK_*_{32,16,8} */
1958 unitbits
= (32 >> vl
);
1959 vectorbits
= unitbits
* (vn
+1);
1961 else if(vl
== 3 && vn
== 3) /* PKE_UNPACK_V4_5 */
1966 else /* illegal unpack variant */
1968 /* should have been caught at top of function */
1972 /* loop over columns */
1973 for(i
=0; i
<=vn
; i
++)
1977 /* offset in bits in current operand word */
1979 (vector_num_in
* vectorbits
) + (i
* unitbits
); /* # of bits from PKEcode */
1981 /* last unit of V4_5 is only one bit wide */
1982 if(vl
== 3 && vn
== 3 && i
== 3) /* PKE_UNPACK_V4_5 */
1985 /* confirm we're not reading more than we said we needed */
1986 if(vector_num_in
* vectorbits
>= num_operands
* 32)
1988 /* this condition may be triggered by illegal
1989 PKEcode / CYCLE combinations. */
1990 pke_code_error(me
, pkecode
);
1991 /* XXX: this case needs to be better understood,
1992 and detected at a better time. */
1996 /* fetch bitfield operand */
1997 operand
= pke_pcrel_operand_bits(me
, bitoffset
, unitbits
, & source_addr
);
1999 /* selectively sign-extend; not for V4_5 1-bit value */
2000 if(usn
|| unitbits
== 1)
2001 unpacked_data
[i
] = operand
;
2003 unpacked_data
[i
] = SEXT32(operand
, unitbits
-1);
2006 /* set remaining top words in vector */
2007 for(i
=vn
+1; i
<4; i
++)
2009 if(vn
== 0) /* S_{32,16,8}: copy lowest element */
2010 unpacked_data
[i
] = unpacked_data
[0];
2012 unpacked_data
[i
] = 0;
2015 /* consumed a vector from the PKE instruction stream */
2017 } /* unpack word from instruction operand */
2019 /* process STMOD register for accumulation operations */
2020 switch(PKE_REG_MASK_GET(me
, MODE
, MDE
))
2022 case PKE_MODE_ADDROW
: /* add row registers to output data */
2023 case PKE_MODE_ACCROW
: /* same .. later conditionally accumulate */
2025 /* exploit R0..R3 contiguity */
2026 unpacked_data
[i
] += me
->regs
[PKE_REG_R0
+ i
][0];
2029 case PKE_MODE_INPUT
: /* pass data through */
2030 default: /* specified as undefined */
2034 /* compute replacement word */
2035 if(m
) /* use mask register? */
2037 /* compute index into mask register for this word */
2038 int mask_index
= PKE_LIMIT(vector_num_out
% addrwl
, 3);
2040 for(i
=0; i
<4; i
++) /* loop over columns */
2042 int mask_op
= PKE_MASKREG_GET(me
, mask_index
, i
);
2043 unsigned_4
* masked_value
= NULL
;
2047 case PKE_MASKREG_INPUT
:
2048 masked_value
= & unpacked_data
[i
];
2050 /* conditionally accumulate */
2051 if(PKE_REG_MASK_GET(me
, MODE
, MDE
) == PKE_MODE_ACCROW
)
2052 me
->regs
[PKE_REG_R0
+ i
][0] = unpacked_data
[i
];
2056 case PKE_MASKREG_ROW
: /* exploit R0..R3 contiguity */
2057 masked_value
= & me
->regs
[PKE_REG_R0
+ i
][0];
2060 case PKE_MASKREG_COLUMN
: /* exploit C0..C3 contiguity */
2061 masked_value
= & me
->regs
[PKE_REG_C0
+ mask_index
][0];
2064 case PKE_MASKREG_NOTHING
:
2065 /* "write inhibit" by re-copying old data */
2066 masked_value
= & vu_old_data
[i
];
2071 /* no other cases possible */
2074 /* copy masked value for column */
2075 vu_new_data
[i
] = *masked_value
;
2076 } /* loop over columns */
2080 /* no mask - just copy over entire unpacked quadword */
2081 memcpy(vu_new_data
, unpacked_data
, sizeof(unpacked_data
));
2083 /* conditionally store accumulated row results */
2084 if(PKE_REG_MASK_GET(me
, MODE
, MDE
) == PKE_MODE_ACCROW
)
2086 me
->regs
[PKE_REG_R0
+ i
][0] = unpacked_data
[i
];
2089 /* write new VU data word at address; reverse words if needed */
2091 unsigned_16 vu_new_badwords
;
2092 * A4_16(& vu_new_badwords
, 3) = vu_new_data
[0];
2093 * A4_16(& vu_new_badwords
, 2) = vu_new_data
[1];
2094 * A4_16(& vu_new_badwords
, 1) = vu_new_data
[2];
2095 * A4_16(& vu_new_badwords
, 0) = vu_new_data
[3];
2096 ASSERT(sizeof(vu_new_badwords
) == 16);
2097 PKE_MEM_WRITE(me
, vu_addr
,
2098 &vu_new_badwords
, 16);
2101 /* write tracking address */
2102 ASSERT(sizeof(unsigned_4
) == 4);
2103 PKE_MEM_WRITE(me
, vutrack_addr
,
2107 /* next vector please */
2109 } /* vector transfer loop */
2110 while(PKE_REG_MASK_GET(me
, NUM
, NUM
) > 0);
2112 /* confirm we've written as many vectors as told */
2113 ASSERT(nummx
== vector_num_out
);
2116 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
2117 pke_pc_advance(me
, 1 + num_operands
);
2118 } /* PKE FIFO full enough */
2121 /* need to wait for another word */
2122 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
2123 /* retry this instruction next clock */
2129 pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
)
2131 /* set ER1 flag in STAT register */
2132 PKE_REG_MASK_SET(me
, STAT
, ER1
, 1);
2134 if(! PKE_REG_MASK_GET(me
, ERR
, ME1
))
2136 pke_begin_interrupt_stall(me
);
2137 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
2141 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
2144 /* advance over faulty word */
2145 pke_pc_advance(me
, 1);