1 /* Copyright (C) 1998, Cygnus Solutions */
12 #include "sim-assert.h"
15 #include "sky-gpuif.h"
18 /* Imported functions */
20 void device_error (device
*me
, char* message
); /* device.c */
23 /* Internal function declarations */
25 static int pke_io_read_buffer(device
*, void*, int, address_word
,
26 unsigned, sim_cpu
*, sim_cia
);
27 static int pke_io_write_buffer(device
*, const void*, int, address_word
,
28 unsigned, sim_cpu
*, sim_cia
);
29 static void pke_issue(SIM_DESC
, struct pke_device
*);
30 static void pke_pc_advance(struct pke_device
*, int num_words
);
31 static unsigned_4
* pke_pc_operand(struct pke_device
*, int operand_num
);
32 static unsigned_4
pke_pc_operand_bits(struct pke_device
*, int bit_offset
,
33 int bit_width
, unsigned_4
* sourceaddr
);
34 static struct fifo_quadword
* pke_pc_fifo(struct pke_device
*, int operand_num
,
35 unsigned_4
** operand
);
36 static void pke_attach(SIM_DESC sd
, struct pke_device
* me
);
37 enum pke_check_target
{ chk_vu
, chk_path1
, chk_path2
, chk_path3
};
38 static int pke_check_stall(struct pke_device
* me
, enum pke_check_target what
);
39 static void pke_flip_dbf(struct pke_device
* me
);
40 /* PKEcode handlers */
41 static void pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
);
42 static void pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
);
43 static void pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
);
44 static void pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
);
45 static void pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
);
46 static void pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
);
47 static void pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
);
48 static void pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
);
49 static void pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
);
50 static void pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
);
51 static void pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
);
52 static void pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
);
53 static void pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
);
54 static void pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
);
55 static void pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
);
56 static void pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
);
57 static void pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
);
58 static void pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
);
59 static void pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
);
60 static void pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
);
61 static void pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
);
62 static void pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
);
68 struct pke_device pke0_device
=
70 { "pke0", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
73 {}, 0, /* FIFO write buffer */
74 NULL
, 0, 0, NULL
, /* FIFO */
79 struct pke_device pke1_device
=
81 { "pke1", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
84 {}, 0, /* FIFO write buffer */
85 NULL
, 0, 0, NULL
, /* FIFO */
91 /* External functions */
94 /* Attach PKE addresses to main memory */
97 pke0_attach(SIM_DESC sd
)
99 pke_attach(sd
, & pke0_device
);
103 pke1_attach(SIM_DESC sd
)
105 pke_attach(sd
, & pke1_device
);
110 /* Issue a PKE instruction if possible */
113 pke0_issue(SIM_DESC sd
)
115 pke_issue(sd
, & pke0_device
);
119 pke1_issue(SIM_DESC sd
)
121 pke_issue(sd
, & pke1_device
);
126 /* Internal functions */
129 /* Attach PKE memory regions to simulator */
132 pke_attach(SIM_DESC sd
, struct pke_device
* me
)
135 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
136 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
,
137 PKE_REGISTER_WINDOW_SIZE
/*nr_bytes*/,
143 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
144 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
,
145 sizeof(quadword
) /*nr_bytes*/,
150 /* VU MEM0 tracking table */
151 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
152 ((me
->pke_number
== 0) ? VU0_MEM0_SRCADDR_START
: VU1_MEM0_SRCADDR_START
),
153 ((me
->pke_number
== 0) ? VU0_MEM0_SIZE
: VU1_MEM0_SIZE
) / 2,
158 /* VU MEM1 tracking table */
159 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
160 ((me
->pke_number
== 0) ? VU0_MEM1_SRCADDR_START
: VU1_MEM1_SRCADDR_START
),
161 ((me
->pke_number
== 0) ? VU0_MEM1_SIZE
: VU1_MEM1_SIZE
) / 4,
167 /* attach to trace file if appropriate */
169 char trace_envvar
[80];
170 char* trace_filename
= NULL
;
171 sprintf(trace_envvar
, "VIF%d_TRACE_FILE", me
->pke_number
);
172 trace_filename
= getenv(trace_envvar
);
173 if(trace_filename
!= NULL
)
175 me
->fifo_trace_file
= fopen(trace_filename
, "w");
176 if(me
->fifo_trace_file
== NULL
)
177 perror("VIF FIFO trace error on fopen");
179 setvbuf(me
->fifo_trace_file
, NULL
, _IOLBF
, 0);
186 /* Handle a PKE read; return no. of bytes read */
189 pke_io_read_buffer(device
*me_
,
197 /* downcast to gather embedding pke_device struct */
198 struct pke_device
* me
= (struct pke_device
*) me_
;
200 /* find my address ranges */
201 address_word my_reg_start
=
202 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
203 address_word my_fifo_addr
=
204 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
206 /* enforce that an access does not span more than one quadword */
207 address_word low
= ADDR_TRUNC_QW(addr
);
208 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
212 /* classify address & handle */
213 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
216 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
217 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
222 result
[0] = result
[1] = result
[2] = result
[3] = 0;
224 /* handle reads to individual registers; clear `readable' on error */
227 /* handle common case of register reading, side-effect free */
228 /* PKE1-only registers*/
234 if(me
->pke_number
== 0)
237 /* PKE0 & PKE1 common registers*/
256 result
[0] = H2T_4(me
->regs
[reg_num
][0]);
259 /* handle common case of write-only registers */
265 ASSERT(0); /* test above should prevent this possibility */
268 /* perform transfer & return */
272 memcpy(dest
, ((unsigned_1
*) &result
) + reg_byte
, nr_bytes
);
277 /* return zero bits */
278 memset(dest
, 0, nr_bytes
);
284 else if(addr
>= my_fifo_addr
&&
285 addr
< my_fifo_addr
+ sizeof(quadword
))
289 /* FIFO is not readable: return a word of zeroes */
290 memset(dest
, 0, nr_bytes
);
299 /* Handle a PKE read; return no. of bytes written */
302 pke_io_write_buffer(device
*me_
,
310 /* downcast to gather embedding pke_device struct */
311 struct pke_device
* me
= (struct pke_device
*) me_
;
313 /* find my address ranges */
314 address_word my_reg_start
=
315 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
316 address_word my_fifo_addr
=
317 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
319 /* enforce that an access does not span more than one quadword */
320 address_word low
= ADDR_TRUNC_QW(addr
);
321 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
325 /* classify address & handle */
326 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
329 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
330 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
335 input
[0] = input
[1] = input
[2] = input
[3] = 0;
337 /* write user-given bytes into input */
338 memcpy(((unsigned_1
*) &input
) + reg_byte
, src
, nr_bytes
);
340 /* make words host-endian */
341 input
[0] = T2H_4(input
[0]);
342 /* we may ignore other words */
344 /* handle writes to individual registers; clear `writeable' on error */
348 /* Order these tests from least to most overriding, in case
349 multiple bits are set. */
350 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_STC_B
, PKE_REG_FBRST_STC_E
))
352 /* clear a bunch of status bits */
353 PKE_REG_MASK_SET(me
, STAT
, PSS
, 0);
354 PKE_REG_MASK_SET(me
, STAT
, PFS
, 0);
355 PKE_REG_MASK_SET(me
, STAT
, PIS
, 0);
356 PKE_REG_MASK_SET(me
, STAT
, INT
, 0);
357 PKE_REG_MASK_SET(me
, STAT
, ER0
, 0);
358 PKE_REG_MASK_SET(me
, STAT
, ER1
, 0);
359 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
360 /* will allow resumption of possible stalled instruction */
362 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_STP_B
, PKE_REG_FBRST_STP_E
))
364 me
->flags
|= PKE_FLAG_PENDING_PSS
;
366 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_FBK_B
, PKE_REG_FBRST_FBK_E
))
368 PKE_REG_MASK_SET(me
, STAT
, PFS
, 1);
370 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_RST_B
, PKE_REG_FBRST_RST_E
))
372 /* clear FIFO by skipping to word after PC: also
373 prevents re-execution attempt of possible stalled
375 me
->fifo_num_elements
= me
->fifo_pc
;
376 /* clear registers, flag, other state */
377 memset(me
->regs
, 0, sizeof(me
->regs
));
378 me
->fifo_qw_done
= 0;
385 /* copy bottom three bits */
386 BIT_MASK_SET(me
->regs
[PKE_REG_ERR
][0], 0, 2, BIT_MASK_GET(input
[0], 0, 2));
390 /* copy bottom sixteen bits */
391 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(input
[0], 0, 15));
392 /* reset MRK bit in STAT */
393 PKE_REG_MASK_SET(me
, STAT
, MRK
, 0);
396 /* handle common case of read-only registers */
397 /* PKE1-only registers - not really necessary to handle separately */
403 if(me
->pke_number
== 0)
406 /* PKE0 & PKE1 common registers*/
408 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
428 ASSERT(0); /* test above should prevent this possibility */
441 else if(addr
>= my_fifo_addr
&&
442 addr
< my_fifo_addr
+ sizeof(quadword
))
445 struct fifo_quadword
* fqw
;
446 int fifo_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside fifo quadword */
447 unsigned_4 dma_tag_present
= 0;
450 /* collect potentially-partial quadword in write buffer; LE byte order */
451 memcpy(((unsigned_1
*)& me
->fifo_qw_in_progress
) + fifo_byte
, src
, nr_bytes
);
452 /* mark bytes written */
453 for(i
= fifo_byte
; i
< fifo_byte
+ nr_bytes
; i
++)
454 BIT_MASK_SET(me
->fifo_qw_done
, i
, i
, 1);
456 /* return if quadword not quite written yet */
457 if(BIT_MASK_GET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1) !=
458 BIT_MASK_BTW(0, sizeof(quadword
)-1))
461 /* all done - process quadword after clearing flag */
462 BIT_MASK_SET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1, 0);
464 /* ensure FIFO has enough elements */
465 if(me
->fifo_num_elements
== me
->fifo_buffer_size
)
468 int new_fifo_buffer_size
= me
->fifo_buffer_size
+ 20;
469 void* ptr
= realloc((void*) me
->fifo
, new_fifo_buffer_size
*sizeof(struct fifo_quadword
));
473 /* oops, cannot enlarge FIFO any more */
474 device_error(me_
, "Cannot enlarge FIFO buffer\n");
479 me
->fifo_buffer_size
= new_fifo_buffer_size
;
482 /* add new quadword at end of FIFO; store data in host-endian */
483 fqw
= & me
->fifo
[me
->fifo_num_elements
];
484 fqw
->word_class
[0] = fqw
->word_class
[1] =
485 fqw
->word_class
[2] = fqw
->word_class
[3] = wc_unknown
;
486 fqw
->data
[0] = T2H_4(me
->fifo_qw_in_progress
[0]);
487 fqw
->data
[1] = T2H_4(me
->fifo_qw_in_progress
[1]);
488 fqw
->data
[2] = T2H_4(me
->fifo_qw_in_progress
[2]);
489 fqw
->data
[3] = T2H_4(me
->fifo_qw_in_progress
[3]);
490 ASSERT(sizeof(unsigned_4
) == 4);
491 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? DMA_D0_MADR
: DMA_D1_MADR
),
492 & fqw
->source_address
, /* converted to host-endian */
494 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? DMA_D0_PKTFLAG
: DMA_D1_PKTFLAG
),
500 /* lower two words are DMA tags */
501 fqw
->word_class
[0] = fqw
->word_class
[1] = wc_dma
;
504 me
->fifo_num_elements
++;
506 /* set FQC to "1" as FIFO is now not empty */
507 PKE_REG_MASK_SET(me
, STAT
, FQC
, 1);
519 /* Issue & swallow next PKE opcode if possible/available */
522 pke_issue(SIM_DESC sd
, struct pke_device
* me
)
524 struct fifo_quadword
* fqw
;
526 unsigned_4 cmd
, intr
, num
;
529 /* 1 -- test go / no-go for PKE execution */
531 /* switch on STAT:PSS if PSS-pending and in idle state */
532 if((PKE_REG_MASK_GET(me
, STAT
, PPS
) == PKE_REG_STAT_PPS_IDLE
) &&
533 (me
->flags
& PKE_FLAG_PENDING_PSS
) != 0)
535 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
536 PKE_REG_MASK_SET(me
, STAT
, PSS
, 1);
539 /* check for stall/halt control bits */
540 if(PKE_REG_MASK_GET(me
, STAT
, PFS
) ||
541 PKE_REG_MASK_GET(me
, STAT
, PSS
) || /* note special treatment below */
542 /* PEW bit not a reason to keep stalling - it's re-checked below */
543 /* PGW bit not a reason to keep stalling - it's re-checked below */
544 /* maskable stall controls: ER0, ER1, PIS */
545 (PKE_REG_MASK_GET(me
, STAT
, ER0
) && !PKE_REG_MASK_GET(me
, ERR
, ME0
)) ||
546 (PKE_REG_MASK_GET(me
, STAT
, ER1
) && !PKE_REG_MASK_GET(me
, ERR
, ME1
)) ||
547 (PKE_REG_MASK_GET(me
, STAT
, PIS
) && !PKE_REG_MASK_GET(me
, ERR
, MII
)))
549 /* try again next cycle; no state change */
553 /* confirm availability of new quadword of PKE instructions */
554 if(me
->fifo_num_elements
<= me
->fifo_pc
)
558 /* 2 -- fetch PKE instruction */
560 /* skip over DMA tag, if present */
561 pke_pc_advance(me
, 0);
563 /* "fetch" instruction quadword and word */
564 fqw
= & me
->fifo
[me
->fifo_pc
];
565 fw
= fqw
->data
[me
->qw_pc
];
567 /* store word in PKECODE register */
568 me
->regs
[PKE_REG_CODE
][0] = fw
;
571 /* 3 -- decode PKE instruction */
573 /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
574 so op-code is in top byte. */
575 intr
= BIT_MASK_GET(fw
, PKE_OPCODE_I_B
, PKE_OPCODE_I_E
);
576 cmd
= BIT_MASK_GET(fw
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
577 num
= BIT_MASK_GET(fw
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
578 imm
= BIT_MASK_GET(fw
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
580 /* handle interrupts */
583 /* are we resuming an interrupt-flagged instruction? */
584 if(me
->flags
& PKE_FLAG_INT_NOLOOP
)
586 /* clear loop-prevention flag */
587 me
->flags
&= ~PKE_FLAG_INT_NOLOOP
;
588 /* mask interrupt bit from instruction word so re-decoded instructions don't stall */
589 BIT_MASK_SET(fw
, PKE_OPCODE_I_B
, PKE_OPCODE_I_E
, 0);
592 else /* new interrupt-flagged instruction */
594 /* set INT flag in STAT register */
595 PKE_REG_MASK_SET(me
, STAT
, INT
, 1);
596 /* set loop-prevention flag */
597 me
->flags
|= PKE_FLAG_INT_NOLOOP
;
599 /* XXX: send interrupt to 5900? */
604 if(PKE_REG_MASK_GET(me
, STAT
, PPS
) == PKE_REG_STAT_PPS_IDLE
)
605 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_DECODE
);
607 /* handle [new - first time] interrupts */
610 PKE_REG_MASK_SET(me
, STAT
, PIS
, 1);
611 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
612 /* presume stall state follows; only PKEMARK may go ahead anyway */
615 /* decode & execute */
616 if(IS_PKE_CMD(cmd
, PKENOP
) && !intr
)
617 pke_code_nop(me
, fw
);
618 else if(IS_PKE_CMD(cmd
, STCYCL
) && !intr
)
619 pke_code_stcycl(me
, fw
);
620 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, OFFSET
) && !intr
)
621 pke_code_offset(me
, fw
);
622 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, BASE
) && !intr
)
623 pke_code_base(me
, fw
);
624 else if(IS_PKE_CMD(cmd
, ITOP
) && !intr
)
625 pke_code_itop(me
, fw
);
626 else if(IS_PKE_CMD(cmd
, STMOD
) && !intr
)
627 pke_code_stmod(me
, fw
);
628 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, MSKPATH3
) && !intr
)
629 pke_code_mskpath3(me
, fw
);
630 else if(IS_PKE_CMD(cmd
, PKEMARK
))
631 pke_code_pkemark(me
, fw
);
632 else if(IS_PKE_CMD(cmd
, FLUSHE
) && !intr
)
633 pke_code_flushe(me
, fw
);
634 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSH
) && !intr
)
635 pke_code_flush(me
, fw
);
636 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSHA
) && !intr
)
637 pke_code_flusha(me
, fw
);
638 else if(IS_PKE_CMD(cmd
, PKEMSCAL
) && !intr
)
639 pke_code_pkemscal(me
, fw
);
640 else if(IS_PKE_CMD(cmd
, PKEMSCNT
) && !intr
)
641 pke_code_pkemscnt(me
, fw
);
642 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, PKEMSCALF
) && !intr
)
643 pke_code_pkemscalf(me
, fw
);
644 else if(IS_PKE_CMD(cmd
, STMASK
) && !intr
)
645 pke_code_stmask(me
, fw
);
646 else if(IS_PKE_CMD(cmd
, STROW
) && !intr
)
647 pke_code_strow(me
, fw
);
648 else if(IS_PKE_CMD(cmd
, STCOL
) && !intr
)
649 pke_code_stcol(me
, fw
);
650 else if(IS_PKE_CMD(cmd
, MPG
) && !intr
)
651 pke_code_mpg(me
, fw
);
652 else if(IS_PKE_CMD(cmd
, DIRECT
) && !intr
)
653 pke_code_direct(me
, fw
);
654 else if(IS_PKE_CMD(cmd
, DIRECTHL
) && !intr
)
655 pke_code_directhl(me
, fw
);
656 else if(IS_PKE_CMD(cmd
, UNPACK
) && !intr
)
657 pke_code_unpack(me
, fw
);
658 /* ... no other commands ... */
660 pke_code_error(me
, fw
);
665 /* advance the PC by given number of data words; update STAT/FQC
666 field; assume FIFO is filled enough; classify passed-over words;
667 write FIFO trace line */
670 pke_pc_advance(struct pke_device
* me
, int num_words
)
673 struct fifo_quadword
* fq
= NULL
;
674 ASSERT(num_words
>= 0);
676 /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */
680 fq
= & me
->fifo
[me
->fifo_pc
];
682 /* skip over DMA tag words if present in word 0 or 1 */
683 if(fq
->word_class
[me
->qw_pc
] == wc_dma
)
685 /* skip by going around loop an extra time */
689 /* nothing left to skip / no DMA tag here */
693 /* one word skipped */
696 /* point to next word */
703 /* trace the consumption of the FIFO quadword we just skipped over */
704 /* fq still points to it */
705 if(me
->fifo_trace_file
!= NULL
)
707 /* assert complete classification */
708 ASSERT(fq
->word_class
[3] != wc_unknown
);
709 ASSERT(fq
->word_class
[2] != wc_unknown
);
710 ASSERT(fq
->word_class
[1] != wc_unknown
);
711 ASSERT(fq
->word_class
[0] != wc_unknown
);
713 /* print trace record */
714 fprintf(me
->fifo_trace_file
,
715 "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n",
716 (me
->pke_number
== 0 ? 0 : 1),
717 (unsigned) fq
->data
[3], (unsigned) fq
->data
[2],
718 (unsigned) fq
->data
[1], (unsigned) fq
->data
[0],
719 (unsigned) fq
->source_address
,
720 fq
->word_class
[3], fq
->word_class
[2],
721 fq
->word_class
[1], fq
->word_class
[0]);
724 /* XXX: zap old entries in FIFO */
725 } /* next quadword */
728 /* clear FQC if FIFO is now empty */
729 if(me
->fifo_num_elements
== me
->fifo_pc
)
731 PKE_REG_MASK_SET(me
, STAT
, FQC
, 0);
733 else /* annote the word where the PC lands as an PKEcode */
735 fq
= & me
->fifo
[me
->fifo_pc
];
736 ASSERT(fq
->word_class
[me
->qw_pc
] == wc_pkecode
||
737 fq
->word_class
[me
->qw_pc
] == wc_unknown
);
738 fq
->word_class
[me
->qw_pc
] = wc_pkecode
;
744 /* Return pointer to FIFO quadword containing given operand# in FIFO.
745 `operand_num' starts at 1. Return pointer to operand word in last
746 argument, if non-NULL. If FIFO is not full enough, return 0.
747 Signal an ER0 indication upon skipping a DMA tag. */
749 struct fifo_quadword
*
750 pke_pc_fifo(struct pke_device
* me
, int operand_num
, unsigned_4
** operand
)
752 int num
= operand_num
;
753 int new_qw_pc
, new_fifo_pc
;
754 struct fifo_quadword
* fq
= NULL
;
758 /* snapshot current pointers */
759 new_fifo_pc
= me
->fifo_pc
;
760 new_qw_pc
= me
->qw_pc
;
762 /* printf("pke %d pc_fifo operand_num %d\n", me->pke_number, operand_num); */
766 /* one word skipped */
769 /* point to next word */
777 /* check for FIFO underflow */
778 if(me
->fifo_num_elements
== new_fifo_pc
)
784 /* skip over DMA tag words if present in word 0 or 1 */
785 fq
= & me
->fifo
[new_fifo_pc
];
786 if(fq
->word_class
[new_qw_pc
] == wc_dma
)
788 /* mismatch error! */
789 PKE_REG_MASK_SET(me
, STAT
, ER0
, 1);
790 /* skip by going around loop an extra time */
796 /* return pointer to operand word itself */
799 *operand
= & fq
->data
[new_qw_pc
];
801 /* annote the word where the pseudo lands as an PKE operand */
802 ASSERT(fq
->word_class
[new_qw_pc
] == wc_pkedata
||
803 fq
->word_class
[new_qw_pc
] == wc_unknown
);
804 fq
->word_class
[new_qw_pc
] = wc_pkedata
;
811 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
812 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
813 them as an error (ER0). */
816 pke_pc_operand(struct pke_device
* me
, int operand_num
)
818 unsigned_4
* operand
= NULL
;
819 struct fifo_quadword
* fifo_operand
;
821 fifo_operand
= pke_pc_fifo(me
, operand_num
, & operand
);
823 if(fifo_operand
== NULL
)
824 ASSERT(operand
== NULL
); /* pke_pc_fifo() ought leave it untouched */
830 /* Return a bit-field extract of given operand# in FIFO, and its
831 source-addr. `bit_offset' starts at 0, referring to LSB after PKE
832 instruction word. Width must be >0, <=32. Assume FIFO is full
833 enough. Skip over DMA tags, but mark them as an error (ER0). */
836 pke_pc_operand_bits(struct pke_device
* me
, int bit_offset
, int bit_width
, unsigned_4
* source_addr
)
838 unsigned_4
* word
= NULL
;
840 struct fifo_quadword
* fifo_operand
;
841 int wordnumber
, bitnumber
;
843 wordnumber
= bit_offset
/32;
844 bitnumber
= bit_offset
%32;
846 /* find operand word with bitfield */
847 fifo_operand
= pke_pc_fifo(me
, wordnumber
+ 1, &word
);
848 ASSERT(word
!= NULL
);
850 /* extract bitfield from word */
851 value
= BIT_MASK_GET(*word
, bitnumber
, bitnumber
+ bit_width
- 1);
853 /* extract source addr from fifo word */
854 *source_addr
= fifo_operand
->source_address
;
861 /* check for stall conditions on indicated devices (path* only on
862 PKE1), do not change status; return 0 iff no stall */
864 pke_check_stall(struct pke_device
* me
, enum pke_check_target what
)
867 unsigned_4 cop2_stat
, gpuif_stat
;
869 /* read status words */
870 ASSERT(sizeof(unsigned_4
) == 4);
871 PKE_MEM_READ(me
, (GIF_REG_STAT
),
874 PKE_MEM_READ(me
, (COP2_REG_STAT_ADDR
),
881 if(me
->pke_number
== 0)
882 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS0_B
, COP2_REG_STAT_VBS0_E
);
883 else /* if(me->pke_number == 1) */
884 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS1_B
, COP2_REG_STAT_VBS1_E
);
886 else if(what
== chk_path1
) /* VU -> GPUIF */
888 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 1)
891 else if(what
== chk_path2
) /* PKE -> GPUIF */
893 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 2)
896 else if(what
== chk_path3
) /* DMA -> GPUIF */
898 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 3)
907 /* any stall reasons? */
912 /* PKE1 only: flip the DBF bit; recompute TOPS, TOP */
914 pke_flip_dbf(struct pke_device
* me
)
917 /* compute new TOP */
918 PKE_REG_MASK_SET(me
, TOP
, TOP
,
919 PKE_REG_MASK_GET(me
, TOPS
, TOPS
));
921 newdf
= PKE_REG_MASK_GET(me
, DBF
, DF
) ? 0 : 1;
922 PKE_REG_MASK_SET(me
, DBF
, DF
, newdf
);
923 PKE_REG_MASK_SET(me
, STAT
, DBF
, newdf
);
924 /* compute new TOPS */
925 PKE_REG_MASK_SET(me
, TOPS
, TOPS
,
926 (PKE_REG_MASK_GET(me
, BASE
, BASE
) +
927 newdf
* PKE_REG_MASK_GET(me
, OFST
, OFFSET
)));
929 /* this is equivalent to last word from okadaa (98-02-25):
931 2) TOPS=BASE + !DBF*OFFSET
937 /* PKEcode handler functions -- responsible for checking and
938 confirming old stall conditions, executing pkecode, updating PC and
939 status registers -- may assume being run on correct PKE unit */
942 pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
)
945 pke_pc_advance(me
, 1);
946 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
951 pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
)
953 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
955 /* copy immediate value into CYCLE reg */
956 PKE_REG_MASK_SET(me
, CYCLE
, WL
, BIT_MASK_GET(imm
, 8, 15));
957 PKE_REG_MASK_SET(me
, CYCLE
, CL
, BIT_MASK_GET(imm
, 0, 7));
959 pke_pc_advance(me
, 1);
960 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
965 pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
)
967 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
969 /* copy 10 bits to OFFSET field */
970 PKE_REG_MASK_SET(me
, OFST
, OFFSET
, BIT_MASK_GET(imm
, 0, 9));
972 PKE_REG_MASK_SET(me
, DBF
, DF
, 0);
973 /* clear other DBF bit */
974 PKE_REG_MASK_SET(me
, STAT
, DBF
, 0);
975 /* set TOPS = BASE */
976 PKE_REG_MASK_SET(me
, TOPS
, TOPS
, PKE_REG_MASK_GET(me
, BASE
, BASE
));
978 pke_pc_advance(me
, 1);
979 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
984 pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
)
986 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
988 /* copy 10 bits to BASE field */
989 PKE_REG_MASK_SET(me
, BASE
, BASE
, BIT_MASK_GET(imm
, 0, 9));
991 pke_pc_advance(me
, 1);
992 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
997 pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
)
999 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1001 /* copy 10 bits to ITOPS field */
1002 PKE_REG_MASK_SET(me
, ITOPS
, ITOPS
, BIT_MASK_GET(imm
, 0, 9));
1004 pke_pc_advance(me
, 1);
1005 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1010 pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
)
1012 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1014 /* copy 2 bits to MODE register */
1015 PKE_REG_MASK_SET(me
, MODE
, MDE
, BIT_MASK_GET(imm
, 0, 2));
1017 pke_pc_advance(me
, 1);
1018 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1023 pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
)
1025 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1026 unsigned_4 gif_mode
;
1028 /* set appropriate bit */
1029 if(BIT_MASK_GET(imm
, PKE_REG_MSKPATH3_B
, PKE_REG_MSKPATH3_E
) != 0)
1030 gif_mode
= GIF_REG_MODE_M3R_MASK
;
1034 /* write register; patrickm code will look at M3R bit only */
1035 PKE_MEM_WRITE(me
, GIF_REG_MODE
, & gif_mode
, 4);
1038 pke_pc_advance(me
, 1);
1039 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1044 pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
)
1046 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1047 /* copy 16 bits to MARK register */
1048 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(imm
, 0, 15));
1049 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
1050 PKE_REG_MASK_SET(me
, STAT
, MRK
, 1);
1052 pke_pc_advance(me
, 1);
1053 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1058 pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
)
1060 /* compute next PEW bit */
1061 if(pke_check_stall(me
, chk_vu
))
1064 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1065 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1066 /* try again next cycle */
1071 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1072 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1073 pke_pc_advance(me
, 1);
1079 pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
)
1081 int something_busy
= 0;
1083 /* compute next PEW, PGW bits */
1084 if(pke_check_stall(me
, chk_vu
))
1087 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1090 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1093 if(pke_check_stall(me
, chk_path1
) ||
1094 pke_check_stall(me
, chk_path2
))
1097 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1100 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1105 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1106 /* try again next cycle */
1111 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1112 pke_pc_advance(me
, 1);
1118 pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
)
1120 int something_busy
= 0;
1122 /* compute next PEW, PGW bits */
1123 if(pke_check_stall(me
, chk_vu
))
1126 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1129 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1132 if(pke_check_stall(me
, chk_path1
) ||
1133 pke_check_stall(me
, chk_path2
) ||
1134 pke_check_stall(me
, chk_path3
))
1137 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1140 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1144 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1145 /* try again next cycle */
1150 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1151 pke_pc_advance(me
, 1);
1157 pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
)
1159 /* compute next PEW bit */
1160 if(pke_check_stall(me
, chk_vu
))
1163 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1164 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1165 /* try again next cycle */
1170 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1173 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1175 /* flip DBF on PKE1 */
1176 if(me
->pke_number
== 1)
1179 /* compute new PC for VU (host byte-order) */
1180 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1181 vu_pc
= T2H_4(vu_pc
);
1183 /* write new PC; callback function gets VU running */
1184 ASSERT(sizeof(unsigned_4
) == 4);
1185 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1189 /* copy ITOPS field to ITOP */
1190 PKE_REG_MASK_SET(me
, ITOP
, ITOP
, PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
1193 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1194 pke_pc_advance(me
, 1);
1201 pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
)
1203 /* compute next PEW bit */
1204 if(pke_check_stall(me
, chk_vu
))
1207 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1208 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1209 /* try again next cycle */
1216 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1218 /* flip DBF on PKE1 */
1219 if(me
->pke_number
== 1)
1223 ASSERT(sizeof(unsigned_4
) == 4);
1224 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1228 /* rewrite new PC; callback function gets VU running */
1229 ASSERT(sizeof(unsigned_4
) == 4);
1230 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1234 /* copy ITOPS field to ITOP */
1235 PKE_REG_MASK_SET(me
, ITOP
, ITOP
, PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
1238 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1239 pke_pc_advance(me
, 1);
1245 pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
)
1247 int something_busy
= 0;
1249 /* compute next PEW, PGW bits */
1250 if(pke_check_stall(me
, chk_vu
))
1253 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1256 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1259 if(pke_check_stall(me
, chk_path1
) ||
1260 pke_check_stall(me
, chk_path2
) ||
1261 pke_check_stall(me
, chk_path3
))
1264 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1267 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1272 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1273 /* try again next cycle */
1278 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1280 /* flip DBF on PKE1 */
1281 if(me
->pke_number
== 1)
1284 /* compute new PC for VU (host byte-order) */
1285 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1286 vu_pc
= T2H_4(vu_pc
);
1288 /* rewrite new PC; callback function gets VU running */
1289 ASSERT(sizeof(unsigned_4
) == 4);
1290 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1294 /* copy ITOPS field to ITOP */
1295 PKE_REG_MASK_SET(me
, ITOP
, ITOP
, PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
1298 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1299 pke_pc_advance(me
, 1);
1305 pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
)
1309 /* check that FIFO has one more word for STMASK operand */
1310 mask
= pke_pc_operand(me
, 1);
1313 /* "transferring" operand */
1314 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1317 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1319 /* fill the register */
1320 PKE_REG_MASK_SET(me
, MASK
, MASK
, *mask
);
1323 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1326 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1327 pke_pc_advance(me
, 2);
1331 /* need to wait for another word */
1332 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1333 /* try again next cycle */
1339 pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
)
1341 /* check that FIFO has four more words for STROW operand */
1342 unsigned_4
* last_op
;
1344 last_op
= pke_pc_operand(me
, 4);
1347 /* "transferring" operand */
1348 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1351 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1353 /* copy ROW registers: must all exist if 4th operand exists */
1354 me
->regs
[PKE_REG_R0
][0] = * pke_pc_operand(me
, 1);
1355 me
->regs
[PKE_REG_R1
][0] = * pke_pc_operand(me
, 2);
1356 me
->regs
[PKE_REG_R2
][0] = * pke_pc_operand(me
, 3);
1357 me
->regs
[PKE_REG_R3
][0] = * pke_pc_operand(me
, 4);
1360 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1363 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1364 pke_pc_advance(me
, 5);
1368 /* need to wait for another word */
1369 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1370 /* try again next cycle */
1376 pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
)
1378 /* check that FIFO has four more words for STCOL operand */
1379 unsigned_4
* last_op
;
1381 last_op
= pke_pc_operand(me
, 4);
1384 /* "transferring" operand */
1385 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1388 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1390 /* copy COL registers: must all exist if 4th operand exists */
1391 me
->regs
[PKE_REG_C0
][0] = * pke_pc_operand(me
, 1);
1392 me
->regs
[PKE_REG_C1
][0] = * pke_pc_operand(me
, 2);
1393 me
->regs
[PKE_REG_C2
][0] = * pke_pc_operand(me
, 3);
1394 me
->regs
[PKE_REG_C3
][0] = * pke_pc_operand(me
, 4);
1397 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1400 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1401 pke_pc_advance(me
, 5);
1405 /* need to wait for another word */
1406 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1407 /* try again next cycle */
1413 pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
)
1415 unsigned_4
* last_mpg_word
;
1416 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1417 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1419 /* assert 64-bit alignment of MPG operand */
1420 if(me
->qw_pc
!= 3 && me
->qw_pc
!= 1)
1421 return pke_code_error(me
, pkecode
);
1423 /* map zero to max+1 */
1424 if(num
==0) num
=0x100;
1426 /* check that FIFO has a few more words for MPG operand */
1427 last_mpg_word
= pke_pc_operand(me
, num
*2); /* num: number of 64-bit words */
1428 if(last_mpg_word
!= NULL
)
1430 /* perform implied FLUSHE */
1431 if(pke_check_stall(me
, chk_vu
))
1434 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1435 /* retry this instruction next clock */
1442 /* "transferring" operand */
1443 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1446 PKE_REG_MASK_SET(me
, NUM
, NUM
, num
);
1448 /* transfer VU instructions, one word-pair per iteration */
1449 for(i
=0; i
<num
; i
++)
1451 address_word vu_addr_base
, vu_addr
;
1452 address_word vutrack_addr_base
, vutrack_addr
;
1453 address_word vu_addr_max_size
;
1454 unsigned_4 vu_lower_opcode
, vu_upper_opcode
;
1455 unsigned_4
* operand
;
1456 struct fifo_quadword
* fq
;
1460 next_num
= PKE_REG_MASK_GET(me
, NUM
, NUM
) - 1;
1461 PKE_REG_MASK_SET(me
, NUM
, NUM
, next_num
);
1463 /* imm: in 64-bit units for MPG instruction */
1464 /* VU*_MEM0 : instruction memory */
1465 vu_addr_base
= (me
->pke_number
== 0) ?
1466 VU0_MEM0_WINDOW_START
: VU1_MEM0_WINDOW_START
;
1467 vu_addr_max_size
= (me
->pke_number
== 0) ?
1468 VU0_MEM0_SIZE
: VU1_MEM0_SIZE
;
1469 vutrack_addr_base
= (me
->pke_number
== 0) ?
1470 VU0_MEM0_SRCADDR_START
: VU1_MEM0_SRCADDR_START
;
1472 /* compute VU address for this word-pair */
1473 vu_addr
= vu_addr_base
+ (imm
+ i
) * 8;
1474 /* check for vu_addr overflow */
1475 while(vu_addr
>= vu_addr_base
+ vu_addr_max_size
)
1476 vu_addr
-= vu_addr_max_size
;
1478 /* compute VU tracking address */
1479 vutrack_addr
= vutrack_addr_base
+ ((signed_8
)vu_addr
- (signed_8
)vu_addr_base
) / 2;
1481 /* Fetch operand words; assume they are already little-endian for VU imem */
1482 fq
= pke_pc_fifo(me
, i
*2 + 1, & operand
);
1483 vu_lower_opcode
= *operand
;
1484 vu_upper_opcode
= *pke_pc_operand(me
, i
*2 + 2);
1486 /* write data into VU memory */
1487 /* lower (scalar) opcode comes in first word ; macro performs H2T! */
1488 PKE_MEM_WRITE(me
, vu_addr
,
1491 /* upper (vector) opcode comes in second word ; H2T */
1492 ASSERT(sizeof(unsigned_4
) == 4);
1493 PKE_MEM_WRITE(me
, vu_addr
+ 4,
1497 /* write tracking address in target byte-order */
1498 ASSERT(sizeof(unsigned_4
) == 4);
1499 PKE_MEM_WRITE(me
, vutrack_addr
,
1500 & fq
->source_address
,
1502 } /* VU xfer loop */
1505 ASSERT(PKE_REG_MASK_GET(me
, NUM
, NUM
) == 0);
1508 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1509 pke_pc_advance(me
, 1 + num
*2);
1511 } /* if FIFO full enough */
1514 /* need to wait for another word */
1515 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1516 /* retry this instruction next clock */
1522 pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
)
1524 /* check that FIFO has a few more words for DIRECT operand */
1525 unsigned_4
* last_direct_word
;
1526 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1528 /* assert 128-bit alignment of DIRECT operand */
1530 return pke_code_error(me
, pkecode
);
1532 /* map zero to max+1 */
1533 if(imm
==0) imm
=0x10000;
1535 last_direct_word
= pke_pc_operand(me
, imm
*4); /* imm: number of 128-bit words */
1536 if(last_direct_word
!= NULL
)
1540 unsigned_16 fifo_data
;
1542 /* "transferring" operand */
1543 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1545 /* transfer GPUIF quadwords, one word per iteration */
1546 for(i
=0; i
<imm
*4; i
++)
1548 unsigned_4
* operand
= pke_pc_operand(me
, 1+i
);
1550 /* collect word into quadword */
1551 *A4_16(&fifo_data
, 3 - (i
% 4)) = *operand
;
1553 /* write to GPUIF FIFO only with full quadword */
1556 ASSERT(sizeof(fifo_data
) == 16);
1557 PKE_MEM_WRITE(me
, GIF_PATH2_FIFO_ADDR
,
1560 } /* write collected quadword */
1562 } /* GPUIF xfer loop */
1565 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1566 pke_pc_advance(me
, 1 + imm
*4);
1567 } /* if FIFO full enough */
1570 /* need to wait for another word */
1571 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1572 /* retry this instruction next clock */
1578 pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
)
1580 /* treat the same as DIRECTH */
1581 pke_code_direct(me
, pkecode
);
1586 pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
)
1588 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1589 int cmd
= BIT_MASK_GET(pkecode
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
1590 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1591 short vn
= BIT_MASK_GET(cmd
, 2, 3); /* unpack shape controls */
1592 short vl
= BIT_MASK_GET(cmd
, 0, 1);
1593 int m
= BIT_MASK_GET(cmd
, 4, 4);
1594 short cl
= PKE_REG_MASK_GET(me
, CYCLE
, CL
); /* cycle controls */
1595 short wl
= PKE_REG_MASK_GET(me
, CYCLE
, WL
);
1596 int r
= BIT_MASK_GET(imm
, 15, 15); /* indicator bits in imm value */
1597 int usn
= BIT_MASK_GET(imm
, 14, 14);
1599 int n
, num_operands
;
1600 unsigned_4
* last_operand_word
= NULL
;
1602 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1606 n
= cl
* (num
/wl
) + PKE_LIMIT(num
% wl
, cl
);
1607 num_operands
= ((32 >> vl
) * (vn
+1) * n
)/32;
1609 /* confirm that FIFO has enough words in it */
1610 if(num_operands
> 0)
1611 last_operand_word
= pke_pc_operand(me
, num_operands
);
1612 if(last_operand_word
!= NULL
|| num_operands
== 0)
1614 address_word vu_addr_base
, vutrack_addr_base
;
1615 address_word vu_addr_max_size
;
1616 int vector_num_out
, vector_num_in
;
1618 /* "transferring" operand */
1619 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1621 /* don't check whether VU is idle */
1623 /* compute VU address base */
1624 if(me
->pke_number
== 0)
1626 vu_addr_base
= VU0_MEM1_WINDOW_START
;
1627 vu_addr_max_size
= VU0_MEM1_SIZE
;
1628 vutrack_addr_base
= VU0_MEM1_SRCADDR_START
;
1633 vu_addr_base
= VU1_MEM1_WINDOW_START
;
1634 vu_addr_max_size
= VU1_MEM1_SIZE
;
1635 vutrack_addr_base
= VU1_MEM1_SRCADDR_START
;
1639 PKE_REG_MASK_SET(me
, NUM
, NUM
, num
== 0 ? 0x100 : num
);
1641 /* transfer given number of vectors */
1642 vector_num_out
= 0; /* output vector number being processed */
1643 vector_num_in
= 0; /* argument vector number being processed */
1646 quadword vu_old_data
;
1647 quadword vu_new_data
;
1648 quadword unpacked_data
;
1649 address_word vu_addr
;
1650 address_word vutrack_addr
;
1651 unsigned_4 source_addr
= 0;
1656 next_num
= PKE_REG_MASK_GET(me
, NUM
, NUM
) - 1;
1657 PKE_REG_MASK_SET(me
, NUM
, NUM
, next_num
);
1659 /* compute VU destination address, as bytes in R5900 memory */
1662 /* map zero to max+1 */
1663 int addrwl
= (wl
== 0) ? 0x0100 : wl
;
1664 vu_addr
= vu_addr_base
+ 16 * (BIT_MASK_GET(imm
, 0, 9) +
1665 (r
? PKE_REG_MASK_GET(me
, TOPS
, TOPS
) : 0) +
1666 cl
*(vector_num_out
/addrwl
) +
1667 (vector_num_out
%addrwl
));
1670 vu_addr
= vu_addr_base
+ 16 * (BIT_MASK_GET(imm
, 0, 9) +
1671 (r
? PKE_REG_MASK_GET(me
, TOPS
, TOPS
) : 0) +
1674 /* check for vu_addr overflow */
1675 while(vu_addr
>= vu_addr_base
+ vu_addr_max_size
)
1676 vu_addr
-= vu_addr_max_size
;
1678 /* compute address of tracking table entry */
1679 vutrack_addr
= vutrack_addr_base
+ ((signed_8
)vu_addr
- (signed_8
)vu_addr_base
) / 4;
1681 /* read old VU data word at address; reverse words if needed */
1683 unsigned_16 vu_old_badwords
;
1684 ASSERT(sizeof(vu_old_badwords
) == 16);
1685 PKE_MEM_READ(me
, vu_addr
,
1686 &vu_old_badwords
, 16);
1687 vu_old_data
[0] = * A4_16(& vu_old_badwords
, 3);
1688 vu_old_data
[1] = * A4_16(& vu_old_badwords
, 2);
1689 vu_old_data
[2] = * A4_16(& vu_old_badwords
, 1);
1690 vu_old_data
[3] = * A4_16(& vu_old_badwords
, 0);
1693 /* For cyclic unpack, next operand quadword may come from instruction stream
1695 if((num
== 0 && cl
== 0 && wl
== 0) || /* shortcut clear */
1696 ((cl
< wl
) && ((vector_num_out
% wl
) >= cl
))) /* wl != 0, set above */
1698 /* clear operand - used only in a "indeterminate" state */
1699 for(i
= 0; i
< 4; i
++)
1700 unpacked_data
[i
] = 0;
1704 /* compute packed vector dimensions */
1705 int vectorbits
, unitbits
;
1707 if(vl
< 3) /* PKE_UNPACK_*_{32,16,8} */
1709 unitbits
= (32 >> vl
);
1710 vectorbits
= unitbits
* (vn
+1);
1712 else if(vl
== 3 && vn
== 3) /* PKE_UNPACK_V4_5 */
1717 else /* illegal unpack variant */
1719 /* treat as illegal instruction */
1720 pke_code_error(me
, pkecode
);
1724 /* loop over columns */
1725 for(i
=0; i
<=vn
; i
++)
1729 /* offset in bits in current operand word */
1731 (vector_num_in
* vectorbits
) + (i
* unitbits
); /* # of bits from PKEcode */
1733 /* last unit of V4_5 is only one bit wide */
1734 if(vl
== 3 && vn
== 3 && i
== 3) /* PKE_UNPACK_V4_5 */
1737 /* fetch bitfield operand */
1738 operand
= pke_pc_operand_bits(me
, bitoffset
, unitbits
, & source_addr
);
1740 /* selectively sign-extend; not for V4_5 1-bit value */
1741 if(usn
|| unitbits
== 1)
1742 unpacked_data
[i
] = operand
;
1744 unpacked_data
[i
] = SEXT32(operand
, unitbits
-1);
1747 /* consumed a vector from the PKE instruction stream */
1749 } /* unpack word from instruction operand */
1751 /* compute replacement word */
1752 if(m
) /* use mask register? */
1754 /* compute index into mask register for this word */
1755 int addrwl
= (wl
== 0) ? 0x0100 : wl
;
1756 int mask_index
= PKE_LIMIT(vector_num_out
% addrwl
, 3);
1758 for(i
=0; i
<4; i
++) /* loop over columns */
1760 int mask_op
= PKE_MASKREG_GET(me
, mask_index
, i
);
1761 unsigned_4
* masked_value
= NULL
;
1762 unsigned_4 zero
= 0;
1766 case PKE_MASKREG_INPUT
:
1767 /* for vn == 0, all columns are copied from column 0 */
1769 masked_value
= & unpacked_data
[0];
1771 masked_value
= & zero
; /* arbitrary data: undefined in spec */
1773 masked_value
= & unpacked_data
[i
];
1776 case PKE_MASKREG_ROW
: /* exploit R0..R3 contiguity */
1777 masked_value
= & me
->regs
[PKE_REG_R0
+ i
][0];
1780 case PKE_MASKREG_COLUMN
: /* exploit C0..C3 contiguity */
1781 masked_value
= & me
->regs
[PKE_REG_C0
+ mask_index
][0];
1784 case PKE_MASKREG_NOTHING
:
1785 /* "write inhibit" by re-copying old data */
1786 masked_value
= & vu_old_data
[i
];
1791 /* no other cases possible */
1794 /* copy masked value for column */
1795 vu_new_data
[i
] = *masked_value
;
1796 } /* loop over columns */
1800 /* no mask - just copy over entire unpacked quadword */
1801 memcpy(vu_new_data
, unpacked_data
, sizeof(unpacked_data
));
1804 /* process STMOD register for accumulation operations */
1805 switch(PKE_REG_MASK_GET(me
, MODE
, MDE
))
1807 case PKE_MODE_ADDROW
: /* add row registers to output data */
1809 /* exploit R0..R3 contiguity */
1810 vu_new_data
[i
] += me
->regs
[PKE_REG_R0
+ i
][0];
1813 case PKE_MODE_ACCROW
: /* add row registers to output data; accumulate */
1816 /* exploit R0..R3 contiguity */
1817 vu_new_data
[i
] += me
->regs
[PKE_REG_R0
+ i
][0];
1818 me
->regs
[PKE_REG_R0
+ i
][0] = vu_new_data
[i
];
1822 case PKE_MODE_INPUT
: /* pass data through */
1827 /* write new VU data word at address; reverse words if needed */
1829 unsigned_16 vu_new_badwords
;
1830 * A4_16(& vu_new_badwords
, 3) = vu_new_data
[0];
1831 * A4_16(& vu_new_badwords
, 2) = vu_new_data
[1];
1832 * A4_16(& vu_new_badwords
, 1) = vu_new_data
[2];
1833 * A4_16(& vu_new_badwords
, 0) = vu_new_data
[3];
1834 ASSERT(sizeof(vu_new_badwords
) == 16);
1835 PKE_MEM_WRITE(me
, vu_addr
,
1836 &vu_new_badwords
, 16);
1839 /* write tracking address */
1840 ASSERT(sizeof(unsigned_4
) == 4);
1841 PKE_MEM_WRITE(me
, vutrack_addr
,
1845 /* next vector please */
1847 } /* vector transfer loop */
1848 while(PKE_REG_MASK_GET(me
, NUM
, NUM
) > 0);
1851 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1852 pke_pc_advance(me
, 1 + num_operands
);
1853 } /* PKE FIFO full enough */
1856 /* need to wait for another word */
1857 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1858 /* retry this instruction next clock */
1864 pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
)
1866 /* set ER1 flag in STAT register */
1867 PKE_REG_MASK_SET(me
, STAT
, ER1
, 1);
1868 /* advance over faulty word */
1869 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1870 pke_pc_advance(me
, 1);