/* Copyright (C) 1998, Cygnus Solutions */
-#include "sky-pke.h"
-#include <stdlib.h>
+#include "config.h"
+
+#include <stdlib.h>
+#include "sim-main.h"
+#include "sim-bits.h"
+#include "sim-assert.h"
+#include "sky-pke.h"
+#include "sky-dma.h"
+#include "sky-vu.h"
+#include "sky-gpuif.h"
+#include "sky-device.h"
-/* Imported functions */
-void device_error (device *me, char* message); /* device.c */
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#endif
/* Internal function declarations */
unsigned, sim_cpu*, sim_cia);
static int pke_io_write_buffer(device*, const void*, int, address_word,
unsigned, sim_cpu*, sim_cia);
-static void pke_issue(struct pke_device*);
+static void pke_reset(struct pke_device*);
+static void pke_issue(SIM_DESC, struct pke_device*);
+static void pke_pc_advance(struct pke_device*, int num_words);
+static struct fifo_quadword* pke_pcrel_fifo(struct pke_device*, int operand_num,
+ unsigned_4** operand);
+static unsigned_4* pke_pcrel_operand(struct pke_device*, int operand_num);
+static unsigned_4 pke_pcrel_operand_bits(struct pke_device*, int bit_offset,
+ int bit_width, unsigned_4* sourceaddr);
+static void pke_attach(SIM_DESC sd, struct pke_device* me);
+enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 };
+static int pke_check_stall(struct pke_device* me, enum pke_check_target what);
+static void pke_flip_dbf(struct pke_device* me);
+static void pke_begin_interrupt_stall(struct pke_device* me);
+/* PKEcode handlers */
+static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_base(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode);
+static void pke_code_error(struct pke_device* me, unsigned_4 pkecode);
+unsigned_4 pke_fifo_flush(struct pke_fifo*);
+void pke_fifo_reset(struct pke_fifo*);
+struct fifo_quadword* pke_fifo_fit(struct pke_fifo*);
+struct fifo_quadword* pke_fifo_access(struct pke_fifo*, unsigned_4 qwnum);
+void pke_fifo_old(struct pke_fifo*, unsigned_4 qwnum);
+
/* Static data */
{
{ "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
0, 0, /* ID, flags */
- PKE0_REGISTER_WINDOW_START, PKE0_FIFO_START, /* memory-mapping addresses */
{}, /* regs */
- NULL, 0, 0, NULL, /* FIFO */
- 0 /* pc */
+ {}, 0, /* FIFO write buffer */
+ { NULL, 0, 0, 0 }, /* FIFO */
+ NULL, /* FIFO trace file */
+ -1, -1, 0, 0, 0, /* invalid FIFO cache */
+ 0, 0 /* pc */
};
{
{ "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
1, 0, /* ID, flags */
- PKE1_REGISTER_WINDOW_START, PKE1_FIFO_START, /* memory-mapping addresses */
{}, /* regs */
- NULL, 0, 0, NULL, /* FIFO */
- 0 /* pc */
+ {}, 0, /* FIFO write buffer */
+ { NULL, 0, 0, 0 }, /* FIFO */
+ NULL, /* FIFO trace file */
+ -1, -1, 0, 0, 0, /* invalid FIFO cache */
+ 0, 0 /* pc */
};
/* External functions */
-/* Attach PKE0 addresses to main memory */
+/* Attach PKE addresses to main memory */
void
pke0_attach(SIM_DESC sd)
{
- sim_core_attach (sd,
- NULL,
- 0 /*level*/,
- access_read_write,
- 0 /*space ???*/,
- pke0_device.register_memory_addr,
- PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
- 0 /*modulo*/,
- (device*) &pke0_device,
- NULL /*buffer*/);
+ pke_attach(sd, & pke0_device);
+ pke_reset(& pke0_device);
+}
- sim_core_attach (sd,
- NULL,
- 0 /*level*/,
- access_read_write,
- 0 /*space ???*/,
- pke0_device.fifo_memory_addr,
- sizeof(quadword) /*nr_bytes*/,
- 0 /*modulo*/,
- (device*) &pke1_device,
- NULL /*buffer*/);
+void
+pke1_attach(SIM_DESC sd)
+{
+ pke_attach(sd, & pke1_device);
+ pke_reset(& pke1_device);
}
-/* Attach PKE1 addresses to main memory */
+
+/* Issue a PKE instruction if possible */
void
-pke1_attach(SIM_DESC sd)
+pke0_issue(SIM_DESC sd)
+{
+ pke_issue(sd, & pke0_device);
+}
+
+void
+pke1_issue(SIM_DESC sd)
+{
+ pke_issue(sd, & pke1_device);
+}
+
+
+
+/* Internal functions */
+
+
+/* Attach PKE memory regions to simulator */
+
+void
+pke_attach(SIM_DESC sd, struct pke_device* me)
{
- sim_core_attach (sd,
- NULL,
- 0 /*level*/,
- access_read_write,
- 0 /*space ???*/,
- pke1_device.register_memory_addr,
+ /* register file */
+ sim_core_attach (sd, NULL, 0, access_read_write, 0,
+ (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START,
PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
0 /*modulo*/,
- (device*) &pke1_device,
+ (device*) me,
NULL /*buffer*/);
- sim_core_attach (sd,
- NULL,
- 0 /*level*/,
- access_read_write,
- 0 /*space ???*/,
- pke1_device.fifo_memory_addr,
+ /* FIFO port */
+ sim_core_attach (sd, NULL, 0, access_read_write, 0,
+ (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR,
sizeof(quadword) /*nr_bytes*/,
0 /*modulo*/,
- (device*) &pke1_device,
+ (device*) me,
NULL /*buffer*/);
-}
+ /* VU MEM0 tracking table */
+ sim_core_attach (sd, NULL, 0, access_read_write, 0,
+ ((me->pke_number == 0) ? VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START),
+ ((me->pke_number == 0) ? VU0_MEM0_SIZE : VU1_MEM0_SIZE) / 2,
+ 0 /*modulo*/,
+ NULL,
+ NULL /*buffer*/);
-/* Issue a PKE0 instruction if possible */
+ /* VU MEM1 tracking table */
+ sim_core_attach (sd, NULL, 0, access_read_write, 0,
+ ((me->pke_number == 0) ? VU0_MEM1_SRCADDR_START : VU1_MEM1_SRCADDR_START),
+ ((me->pke_number == 0) ? VU0_MEM1_SIZE : VU1_MEM1_SIZE) / 4,
+ 0 /*modulo*/,
+ NULL,
+ NULL /*buffer*/);
-void
-pke0_issue()
-{
- pke_issue(& pke0_device);
-}
+ /* attach to trace file if appropriate */
+ {
+ char trace_envvar[80];
+ char* trace_filename = NULL;
+ sprintf(trace_envvar, "VIF%d_TRACE_FILE", me->pke_number);
+ trace_filename = getenv(trace_envvar);
+ if(trace_filename != NULL)
+ {
+ me->fifo_trace_file = fopen(trace_filename, "w");
+ if(me->fifo_trace_file == NULL)
+ perror("VIF FIFO trace error on fopen");
+ else
+ setvbuf(me->fifo_trace_file, NULL, _IOLBF, 0);
+ }
+ }
+}
-/* Issue a PKE1 instruction if possible */
-void
-pke1_issue()
+/* Read PKE Pseudo-PC into buf in target order */
+int
+read_pke_pc (struct pke_device *me, void *buf)
{
- pke_issue(& pke0_device);
+ *((int *) buf) = H2T_4( (me->fifo_pc << 2) | me->qw_pc );
+ return 4;
}
+/* Read PKE reg into buf in target order */
+int
+read_pke_reg (struct pke_device *me, int reg_num, void *buf)
+{
+ /* handle reads to individual registers; clear `readable' on error */
+ switch (reg_num)
+ {
+ /* handle common case of register reading, side-effect free */
+ /* PKE1-only registers*/
+ case PKE_REG_BASE:
+ case PKE_REG_OFST:
+ case PKE_REG_TOPS:
+ case PKE_REG_TOP:
+ case PKE_REG_DBF:
+ if (me->pke_number == 0)
+ {
+ *((int *) buf) = 0;
+ break;
+ }
+ /* fall through */
+
+ /* PKE0 & PKE1 common registers*/
+ case PKE_REG_STAT:
+ case PKE_REG_ERR:
+ case PKE_REG_MARK:
+ case PKE_REG_CYCLE:
+ case PKE_REG_MODE:
+ case PKE_REG_NUM:
+ case PKE_REG_MASK:
+ case PKE_REG_CODE:
+ case PKE_REG_ITOPS:
+ case PKE_REG_ITOP:
+ case PKE_REG_R0:
+ case PKE_REG_R1:
+ case PKE_REG_R2:
+ case PKE_REG_R3:
+ case PKE_REG_C0:
+ case PKE_REG_C1:
+ case PKE_REG_C2:
+ case PKE_REG_C3:
+ *((int *) buf) = H2T_4(me->regs[reg_num][0]);
+ break;
+
+ /* handle common case of write-only registers */
+ case PKE_REG_FBRST:
+ *((int *) buf) = 0;
+ break;
+
+ default:
+ ASSERT(0); /* tests above should prevent this possibility */
+ }
-/* Internal functions */
+ return 4;
+}
/* Handle a PKE read; return no. of bytes read */
int space,
address_word addr,
unsigned nr_bytes,
- sim_cpu *processor,
+ sim_cpu *cpu,
sim_cia cia)
{
/* downcast to gather embedding pke_device struct */
struct pke_device* me = (struct pke_device*) me_;
+ /* find my address ranges */
+ address_word my_reg_start =
+ (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
+ address_word my_fifo_addr =
+ (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
+
/* enforce that an access does not span more than one quadword */
address_word low = ADDR_TRUNC_QW(addr);
address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
return 0;
/* classify address & handle */
- if(addr >= me->register_memory_addr &&
- addr < me->register_memory_addr + PKE_REGISTER_WINDOW_SIZE)
+ if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
{
/* register bank */
- int reg_num = ADDR_TRUNC_QW(addr - me->register_memory_addr) >> 4;
- int readable = 1;
+ int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
+ int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
+ quadword result;
- /* ensure readibility of register: all okay except PKE1-only ones read on PKE0 */
- switch(reg_num)
- {
- case PKE_REG_BASE:
- case PKE_REG_OFST:
- case PKE_REG_TOPS:
- case PKE_REG_TOP:
- case PKE_REG_DBF:
- if(me->pke_number == 0) /* PKE0 cannot access these registers */
- readable = 0;
- }
+ /* clear result */
+ result[0] = result[1] = result[2] = result[3] = 0;
- /* perform read & return */
- if(readable)
- {
- /* find byte-offset inside register bank */
- int reg_byte = ADDR_OFFSET_QW(addr);
- void* src = ((unsigned_1*) (& me->regs[reg_num])) + reg_byte;
- /* copy the bits */
- memcpy(dest, src, nr_bytes);
- /* okay */
- return nr_bytes;
- }
- else
- {
- /* error */
- return 0;
- }
+ read_pke_reg (me, reg_num, result);
+ /* perform transfer & return */
+ memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes);
+
+ return nr_bytes;
/* NOTREACHED */
}
- else if(addr >= me->fifo_memory_addr &&
- addr < me->fifo_memory_addr + sizeof(quadword))
+ else if(addr >= my_fifo_addr &&
+ addr < my_fifo_addr + sizeof(quadword))
{
/* FIFO */
- /* XXX: FIFO is not readable. */
- return 0;
+ /* FIFO is not readable: return a word of zeroes */
+ memset(dest, 0, nr_bytes);
+ return nr_bytes;
}
/* NOTREACHED */
+ return 0;
+}
+
+/* Write PKE reg from buf, which is in target order */
+int
+write_pke_reg (struct pke_device *me, int reg_num, const void *buf)
+{
+ int writeable = 1;
+ /* make words host-endian */
+ unsigned_4 input = T2H_4( *((unsigned_4 *) buf) );
+
+ /* handle writes to individual registers; clear `writeable' on error */
+ switch (reg_num)
+ {
+ case PKE_REG_FBRST:
+ /* Order these tests from least to most overriding, in case
+ multiple bits are set. */
+ if(BIT_MASK_GET(input, PKE_REG_FBRST_STC_B, PKE_REG_FBRST_STC_E))
+ {
+ /* clear a bunch of status bits */
+ PKE_REG_MASK_SET(me, STAT, PSS, 0);
+ PKE_REG_MASK_SET(me, STAT, PFS, 0);
+ PKE_REG_MASK_SET(me, STAT, PIS, 0);
+ PKE_REG_MASK_SET(me, STAT, INT, 0);
+ PKE_REG_MASK_SET(me, STAT, ER0, 0);
+ PKE_REG_MASK_SET(me, STAT, ER1, 0);
+ me->flags &= ~PKE_FLAG_PENDING_PSS;
+ /* will allow resumption of possible stalled instruction */
+ }
+ if(BIT_MASK_GET(input, PKE_REG_FBRST_STP_B, PKE_REG_FBRST_STP_E))
+ {
+ me->flags |= PKE_FLAG_PENDING_PSS;
+ }
+ if(BIT_MASK_GET(input, PKE_REG_FBRST_FBK_B, PKE_REG_FBRST_FBK_E))
+ {
+ PKE_REG_MASK_SET(me, STAT, PFS, 1);
+ }
+ if(BIT_MASK_GET(input, PKE_REG_FBRST_RST_B, PKE_REG_FBRST_RST_E))
+ {
+ pke_reset(me);
+ }
+ break;
+
+ case PKE_REG_ERR:
+ /* copy bottom three bits */
+ BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input, 0, 2));
+ break;
+
+ case PKE_REG_MARK:
+ /* copy bottom sixteen bits */
+ PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input, 0, 15));
+ /* reset MRK bit in STAT */
+ PKE_REG_MASK_SET(me, STAT, MRK, 0);
+ break;
+
+ /* handle common case of read-only registers */
+ /* PKE1-only registers - not really necessary to handle separately */
+ case PKE_REG_BASE:
+ case PKE_REG_OFST:
+ case PKE_REG_TOPS:
+ case PKE_REG_TOP:
+ case PKE_REG_DBF:
+ if(me->pke_number == 0)
+ writeable = 0;
+ /* fall through */
+ /* PKE0 & PKE1 common registers*/
+ case PKE_REG_STAT:
+ /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
+ case PKE_REG_CYCLE:
+ case PKE_REG_MODE:
+ case PKE_REG_NUM:
+ case PKE_REG_MASK:
+ case PKE_REG_CODE:
+ case PKE_REG_ITOPS:
+ case PKE_REG_ITOP:
+ case PKE_REG_R0:
+ case PKE_REG_R1:
+ case PKE_REG_R2:
+ case PKE_REG_R3:
+ case PKE_REG_C0:
+ case PKE_REG_C1:
+ case PKE_REG_C2:
+ case PKE_REG_C3:
+ writeable = 0;
+ break;
+
+ default:
+ ASSERT(0); /* test above should prevent this possibility */
+ }
+
+ /* perform return */
+ if(! writeable)
+ {
+ return 0; /* error */
+ }
+
+ return 4;
}
-/* Handle a PKE read; return no. of bytes written */
+/* Handle a PKE write; return no. of bytes written */
int
pke_io_write_buffer(device *me_,
int space,
address_word addr,
unsigned nr_bytes,
- sim_cpu *processor,
+ sim_cpu *cpu,
sim_cia cia)
{
/* downcast to gather embedding pke_device struct */
struct pke_device* me = (struct pke_device*) me_;
+ /* find my address ranges */
+ address_word my_reg_start =
+ (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
+ address_word my_fifo_addr =
+ (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
+
/* enforce that an access does not span more than one quadword */
address_word low = ADDR_TRUNC_QW(addr);
address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
return 0;
/* classify address & handle */
- if(addr >= me->register_memory_addr &&
- addr < me->register_memory_addr + PKE_REGISTER_WINDOW_SIZE)
+ if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
{
/* register bank */
- int reg_num = ADDR_TRUNC_QW(addr - me->register_memory_addr) >> 4;
- int writeable = 1;
+ int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
+ int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
+ quadword input;
+
+ /* clear input */
+ input[0] = input[1] = input[2] = input[3] = 0;
+
+ /* write user-given bytes into input */
+ memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
+
+ write_pke_reg (me, reg_num, input);
+ return nr_bytes;
- /* ensure readibility of register: all okay except PKE1-only ones read on PKE0 */
- switch(reg_num)
+ /* NOTREACHED */
+ }
+ else if(addr >= my_fifo_addr &&
+ addr < my_fifo_addr + sizeof(quadword))
+ {
+ /* FIFO */
+ struct fifo_quadword* fqw;
+ int fifo_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside fifo quadword */
+ unsigned_4 dma_tag_present = 0;
+ int i;
+
+ /* collect potentially-partial quadword in write buffer; LE byte order */
+ memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes);
+ /* mark bytes written */
+ for(i = fifo_byte; i < fifo_byte + nr_bytes; i++)
+ BIT_MASK_SET(me->fifo_qw_done, i, i, 1);
+
+ /* return if quadword not quite written yet */
+ if(BIT_MASK_GET(me->fifo_qw_done, 0, sizeof(quadword)-1) !=
+ BIT_MASK_BTW(0, sizeof(quadword)-1))
+ return nr_bytes;
+
+ /* all done - process quadword after clearing flag */
+ BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0);
+
+ /* allocate required address in FIFO */
+ fqw = pke_fifo_fit(& me->fifo);
+ ASSERT(fqw != NULL);
+
+ /* fill in unclassified FIFO quadword data in host byte order */
+ fqw->word_class[0] = fqw->word_class[1] =
+ fqw->word_class[2] = fqw->word_class[3] = wc_unknown;
+ fqw->data[0] = T2H_4(me->fifo_qw_in_progress[0]);
+ fqw->data[1] = T2H_4(me->fifo_qw_in_progress[1]);
+ fqw->data[2] = T2H_4(me->fifo_qw_in_progress[2]);
+ fqw->data[3] = T2H_4(me->fifo_qw_in_progress[3]);
+
+ /* read DMAC-supplied indicators */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR),
+ & fqw->source_address, /* converted to host-endian */
+ 4);
+ PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG),
+ & dma_tag_present,
+ 4);
+
+ if(dma_tag_present)
{
- case PKE_REG_BASE:
- case PKE_REG_OFST:
- case PKE_REG_TOPS:
- case PKE_REG_TOP:
- case PKE_REG_DBF:
- if(me->pke_number == 0) /* PKE0 cannot access these registers */
- writeable = 0;
+ /* lower two words are DMA tags */
+ fqw->word_class[0] = fqw->word_class[1] = wc_dma;
}
- /* perform write & return */
- if(writeable)
+ /* set FQC to "1" as FIFO is now not empty */
+ PKE_REG_MASK_SET(me, STAT, FQC, 1);
+
+ /* okay */
+ return nr_bytes;
+ }
+
+ /* NOTREACHED */
+ return 0;
+}
+
+
+
+/* Reset the PKE */
+void
+pke_reset(struct pke_device* me)
+{
+ /* advance PC over last quadword in FIFO; keep previous FIFO history */
+ me->fifo_pc = pke_fifo_flush(& me->fifo);
+ me->qw_pc = 0;
+ /* clear registers, flag, other state */
+ memset(me->regs, 0, sizeof(me->regs));
+ me->fifo_qw_done = 0;
+ me->flags = 0;
+}
+
+
+
+/* Issue & swallow next PKE opcode if possible/available */
+
+void
+pke_issue(SIM_DESC sd, struct pke_device* me)
+{
+ struct fifo_quadword* fqw;
+ unsigned_4 fw;
+ unsigned_4 cmd, intr;
+
+ /* 1 -- fetch PKE instruction */
+
+ /* confirm availability of new quadword of PKE instructions */
+ fqw = pke_fifo_access(& me->fifo, me->fifo_pc);
+ if(fqw == NULL)
+ return;
+
+ /* skip over DMA tag, if present */
+ pke_pc_advance(me, 0);
+ /* note: this can only change qw_pc from 0 to 2 and will not
+ invalidate fqw */
+
+ /* "fetch" instruction quadword and word */
+ fw = fqw->data[me->qw_pc];
+
+ /* store word in PKECODE register */
+ me->regs[PKE_REG_CODE][0] = fw;
+
+
+ /* 2 -- test go / no-go for PKE execution */
+
+ /* switch on STAT:PSS if PSS-pending and in idle state */
+ if((PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) &&
+ (me->flags & PKE_FLAG_PENDING_PSS) != 0)
+ {
+ me->flags &= ~PKE_FLAG_PENDING_PSS;
+ PKE_REG_MASK_SET(me, STAT, PSS, 1);
+ }
+
+ /* check for stall/halt control bits */
+ if(PKE_REG_MASK_GET(me, STAT, PFS) ||
+ PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */
+ /* PEW bit not a reason to keep stalling - it's just an indication, re-computed below */
+ /* PGW bit not a reason to keep stalling - it's just an indication, re-computed below */
+ /* ER0/ER1 not a reason to keep stalling - it's just an indication */
+ PKE_REG_MASK_GET(me, STAT, PIS))
+ {
+ /* (still) stalled */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ /* try again next cycle */
+ return;
+ }
+
+
+ /* 3 -- decode PKE instruction */
+
+ /* decoding */
+ if(PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE)
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE);
+
+ /* Extract relevant bits from PKEcode */
+ intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E);
+ cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
+
+ /* handle interrupts */
+ if(intr)
+ {
+ /* are we resuming an interrupt-stalled instruction? */
+ if(me->flags & PKE_FLAG_INT_NOLOOP)
+ {
+ /* clear loop-prevention flag */
+ me->flags &= ~PKE_FLAG_INT_NOLOOP;
+
+ /* fall through to decode & execute */
+ /* The pke_code_* functions should not check the MSB in the
+ pkecode. */
+ }
+ else /* new interrupt-flagged instruction */
{
- /* find byte-offset inside register bank */
- int reg_byte = ADDR_OFFSET_QW(addr);
- void* dest = ((unsigned_1*) (& me->regs[reg_num])) + reg_byte;
- /* copy the bits */
- memcpy(dest, src, nr_bytes);
- return nr_bytes;
+ /* set INT flag in STAT register */
+ PKE_REG_MASK_SET(me, STAT, INT, 1);
+ /* set loop-prevention flag */
+ me->flags |= PKE_FLAG_INT_NOLOOP;
+
+ /* set PIS if stall not masked */
+ if(!PKE_REG_MASK_GET(me, ERR, MII))
+ pke_begin_interrupt_stall(me);
+
+ /* suspend this instruction unless it's PKEMARK */
+ if(!IS_PKE_CMD(cmd, PKEMARK))
+ {
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ return;
+ }
+ else
+ {
+ ; /* fall through to decode & execute */
+ }
}
- else
+ }
+
+
+ /* decode & execute */
+ if(IS_PKE_CMD(cmd, PKENOP))
+ pke_code_nop(me, fw);
+ else if(IS_PKE_CMD(cmd, STCYCL))
+ pke_code_stcycl(me, fw);
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET))
+ pke_code_offset(me, fw);
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE))
+ pke_code_base(me, fw);
+ else if(IS_PKE_CMD(cmd, ITOP))
+ pke_code_itop(me, fw);
+ else if(IS_PKE_CMD(cmd, STMOD))
+ pke_code_stmod(me, fw);
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3))
+ pke_code_mskpath3(me, fw);
+ else if(IS_PKE_CMD(cmd, PKEMARK))
+ pke_code_pkemark(me, fw);
+ else if(IS_PKE_CMD(cmd, FLUSHE))
+ pke_code_flushe(me, fw);
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH))
+ pke_code_flush(me, fw);
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
+ pke_code_flusha(me, fw);
+ else if(IS_PKE_CMD(cmd, PKEMSCAL))
+ pke_code_pkemscal(me, fw);
+ else if(IS_PKE_CMD(cmd, PKEMSCNT))
+ pke_code_pkemscnt(me, fw);
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
+ pke_code_pkemscalf(me, fw);
+ else if(IS_PKE_CMD(cmd, STMASK))
+ pke_code_stmask(me, fw);
+ else if(IS_PKE_CMD(cmd, STROW))
+ pke_code_strow(me, fw);
+ else if(IS_PKE_CMD(cmd, STCOL))
+ pke_code_stcol(me, fw);
+ else if(IS_PKE_CMD(cmd, MPG))
+ pke_code_mpg(me, fw);
+ else if(IS_PKE_CMD(cmd, DIRECT))
+ pke_code_direct(me, fw);
+ else if(IS_PKE_CMD(cmd, DIRECTHL))
+ pke_code_directhl(me, fw);
+ else if(IS_PKE_CMD(cmd, UNPACK))
+ pke_code_unpack(me, fw);
+ /* ... no other commands ... */
+ else
+ pke_code_error(me, fw);
+}
+
+
+
+/* Clear out contents of FIFO; act as if it was empty. Return PC
+ pointing to one-past-last word. */
+
+unsigned_4
+pke_fifo_flush(struct pke_fifo* fifo)
+{
+ /* don't modify any state! */
+ return fifo->origin + fifo->next;
+}
+
+
+
+/* Clear out contents of FIFO; make it really empty. */
+
+void
+pke_fifo_reset(struct pke_fifo* fifo)
+{
+ int i;
+
+ /* clear fifo quadwords */
+ for(i=0; i<fifo->next; i++)
+ {
+ zfree(fifo->quadwords[i]);
+ fifo->quadwords[i] = NULL;
+ }
+
+ /* reset pointers */
+ fifo->origin = 0;
+ fifo->next = 0;
+}
+
+
+
+/* Make space for the next quadword in the FIFO. Allocate/enlarge
+ FIFO pointer block if necessary. Return a pointer to it. */
+
+struct fifo_quadword*
+pke_fifo_fit(struct pke_fifo* fifo)
+{
+ struct fifo_quadword* fqw;
+
+ /* out of space on quadword pointer array? */
+ if(fifo->next == fifo->length) /* also triggered before fifo->quadwords allocated */
+ {
+ struct fifo_quadword** new_qw;
+ unsigned_4 new_length = fifo->length + PKE_FIFO_GROW_SIZE;
+
+ /* allocate new pointer block */
+ new_qw = zalloc(new_length * sizeof(struct fifo_quadword*));
+ ASSERT(new_qw != NULL);
+
+ /* copy over old contents, if any */
+ if(fifo->quadwords != NULL)
{
- /* error */
- return 0;
- }
+ /* copy over old pointers to beginning of new block */
+ memcpy(new_qw, fifo->quadwords,
+ fifo->length * sizeof(struct fifo_quadword*));
+
+ /* free old block */
+ zfree(fifo->quadwords);
+ }
- /* NOTREACHED */
+ /* replace pointers & counts */
+ fifo->quadwords = new_qw;
+ fifo->length = new_length;
+ }
+
+ /* sanity check */
+ ASSERT(fifo->quadwords != NULL);
+
+ /* allocate new quadword from heap */
+ fqw = zalloc(sizeof(struct fifo_quadword));
+ ASSERT(fqw != NULL);
+
+ /* push quadword onto fifo */
+ fifo->quadwords[fifo->next] = fqw;
+ fifo->next++;
+ return fqw;
+}
+
+
+
+/* Return a pointer to the FIFO quadword with given absolute index, or
+ NULL if it is out of range */
+
+struct fifo_quadword*
+pke_fifo_access(struct pke_fifo* fifo, unsigned_4 qwnum)
+{
+ struct fifo_quadword* fqw;
+
+ if((qwnum < fifo->origin) || /* before history */
+ (qwnum >= fifo->origin + fifo->next)) /* after last available quadword */
+ fqw = NULL;
+ else
+ {
+ ASSERT(fifo->quadwords != NULL); /* must be allocated already */
+ fqw = fifo->quadwords[qwnum - fifo->origin]; /* pull out pointer from array */
+ ASSERT(fqw != NULL); /* must be allocated already */
}
- else if(addr >= me->fifo_memory_addr &&
- addr < me->fifo_memory_addr + sizeof(quadword))
+
+ return fqw;
+}
+
+
+/* Authorize release of any FIFO entries older than given absolute quadword. */
+void
+pke_fifo_old(struct pke_fifo* fifo, unsigned_4 qwnum)
+{
+ /* do we have any too-old FIFO elements? */
+ if(fifo->origin + PKE_FIFO_ARCHEOLOGY < qwnum)
{
- /* FIFO */
+ /* count quadwords to forget */
+ int horizon = qwnum - (fifo->origin + PKE_FIFO_ARCHEOLOGY);
+ int i;
+
+ /* free quadwords at indices below horizon */
+ for(i=0; i < horizon; i++)
+ zfree(fifo->quadwords[i]);
+
+ /* move surviving quadword pointers down to beginning of array */
+ for(i=horizon; i < fifo->next; i++)
+ fifo->quadwords[i-horizon] = fifo->quadwords[i];
+
+ /* clear duplicate pointers */
+ for(i=fifo->next - horizon; i < fifo->next; i++)
+ fifo->quadwords[i] = NULL;
+
+ /* adjust FIFO pointers */
+ fifo->origin = fifo->origin + horizon;
+ fifo->next = fifo->next - horizon;
+ }
+}
+
+
+
+
+/* advance the PC by given number of data words; update STAT/FQC
+ field; assume FIFO is filled enough; classify passed-over words;
+ write FIFO trace line */
+
+void
+pke_pc_advance(struct pke_device* me, int num_words)
+{
+ int num = num_words;
+ struct fifo_quadword* fq = NULL;
+ unsigned_4 old_fifo_pc = me->fifo_pc;
+
+ ASSERT(num_words >= 0);
- /* assert transfer size == 128 bits */
- if(nr_bytes != sizeof(quadword))
- return 0;
+ /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */
+
+ while(1)
+ {
+ /* find next quadword, if any */
+ fq = pke_fifo_access(& me->fifo, me->fifo_pc);
- /* ensure FIFO has enough elements */
- if(me->fifo_num_elements == me->fifo_buffer_size)
+ /* skip over DMA tag words if present in word 0 or 1 */
+ if(fq != NULL && fq->word_class[me->qw_pc] == wc_dma)
{
- /* time to grow */
- int new_fifo_buffer_size = me->fifo_buffer_size + 20;
- void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(quadword));
+ /* skip by going around loop an extra time */
+ num ++;
+ }
+
+ /* nothing left to skip / no DMA tag here */
+ if(num == 0)
+ break;
- if(ptr == NULL)
+ /* we are supposed to skip existing words */
+ ASSERT(fq != NULL);
+
+ /* one word skipped */
+ num --;
+
+ /* point to next word */
+ me->qw_pc ++;
+ if(me->qw_pc == 4)
+ {
+ me->qw_pc = 0;
+ me->fifo_pc ++;
+
+ /* trace the consumption of the FIFO quadword we just skipped over */
+ /* fq still points to it */
+ if(me->fifo_trace_file != NULL)
{
- /* oops, cannot enlarge FIFO any more */
- device_error(me_, "Cannot enlarge FIFO buffer\n");
- return 0;
+ /* assert complete classification */
+ ASSERT(fq->word_class[3] != wc_unknown);
+ ASSERT(fq->word_class[2] != wc_unknown);
+ ASSERT(fq->word_class[1] != wc_unknown);
+ ASSERT(fq->word_class[0] != wc_unknown);
+
+ /* print trace record */
+ fprintf(me->fifo_trace_file,
+ "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n",
+ (me->pke_number == 0 ? 0 : 1),
+ (unsigned) fq->data[3], (unsigned) fq->data[2],
+ (unsigned) fq->data[1], (unsigned) fq->data[0],
+ (unsigned) fq->source_address,
+ fq->word_class[3], fq->word_class[2],
+ fq->word_class[1], fq->word_class[0]);
}
+ } /* next quadword */
+ }
+
+ /* age old entries before PC */
+ if(me->fifo_pc != old_fifo_pc)
+ {
+ /* we advanced the fifo-pc; authorize disposal of anything
+ before previous PKEcode */
+ pke_fifo_old(& me->fifo, old_fifo_pc);
+ }
+
+ /* clear FQC if FIFO is now empty */
+ fq = pke_fifo_access(& me->fifo, me->fifo_pc);
+ if(fq == NULL)
+ {
+ PKE_REG_MASK_SET(me, STAT, FQC, 0);
+ }
+ else /* annote the word where the PC lands as an PKEcode */
+ {
+ ASSERT(fq->word_class[me->qw_pc] == wc_pkecode || fq->word_class[me->qw_pc] == wc_unknown);
+ fq->word_class[me->qw_pc] = wc_pkecode;
+ }
+}
+
+
+
+
+
+/* Return pointer to FIFO quadword containing given operand# in FIFO.
+ `operand_num' starts at 1. Return pointer to operand word in last
+ argument, if non-NULL. If FIFO is not full enough, return 0.
+ Signal an ER0 indication upon skipping a DMA tag. */
- me->fifo_buffer_size = new_fifo_buffer_size;
+struct fifo_quadword*
+pke_pcrel_fifo(struct pke_device* me, int operand_num, unsigned_4** operand)
+{
+ int num;
+ int new_qw_pc, new_fifo_pc;
+ struct fifo_quadword* fq = NULL;
+
+ /* check for validity of last search results in cache */
+ if(me->last_fifo_pc == me->fifo_pc &&
+ me->last_qw_pc == me->qw_pc &&
+ operand_num > me->last_num)
+ {
+ /* continue search from last stop */
+ new_fifo_pc = me->last_new_fifo_pc;
+ new_qw_pc = me->last_new_qw_pc;
+ num = operand_num - me->last_num;
+ }
+ else
+ {
+ /* start search from scratch */
+ new_fifo_pc = me->fifo_pc;
+ new_qw_pc = me->qw_pc;
+ num = operand_num;
+ }
+
+ ASSERT(num > 0);
+
+ /* printf("pke %d pcrel_fifo operand_num %d\n", me->pke_number, operand_num); */
+
+ do
+ {
+ /* one word skipped */
+ num --;
+
+ /* point to next word */
+ new_qw_pc ++;
+ if(new_qw_pc == 4)
+ {
+ new_qw_pc = 0;
+ new_fifo_pc ++;
}
- /* add new quadword at end of FIFO */
- memcpy(& me->fifo[++me->fifo_num_elements], src, nr_bytes);
-
- /* okay */
- return nr_bytes;
+ fq = pke_fifo_access(& me->fifo, new_fifo_pc);
+
+ /* check for FIFO underflow */
+ if(fq == NULL)
+ break;
+
+ /* skip over DMA tag words if present in word 0 or 1 */
+ if(fq->word_class[new_qw_pc] == wc_dma)
+ {
+ /* set ER0 */
+ PKE_REG_MASK_SET(me, STAT, ER0, 1);
+
+ /* mismatch error! */
+ if(! PKE_REG_MASK_GET(me, ERR, ME0))
+ {
+ pke_begin_interrupt_stall(me);
+ /* don't stall just yet -- finish this instruction */
+ /* the PPS_STALL state will be entered by pke_issue() next time */
+ }
+ /* skip by going around loop an extra time */
+ num ++;
+ }
}
+ while(num > 0);
- /* NOTREACHED */
-}
+ /* return pointer to operand word itself */
+ if(fq != NULL)
+ {
+ *operand = & fq->data[new_qw_pc];
+
+ /* annote the word where the pseudo-PC lands as an PKE operand */
+ ASSERT(fq->word_class[new_qw_pc] == wc_pkedata || fq->word_class[new_qw_pc] == wc_unknown);
+ fq->word_class[new_qw_pc] = wc_pkedata;
+
+ /* store search results in cache */
+ /* keys */
+ me->last_fifo_pc = me->fifo_pc;
+ me->last_qw_pc = me->qw_pc;
+ /* values */
+ me->last_num = operand_num;
+ me->last_new_fifo_pc = new_fifo_pc;
+ me->last_new_qw_pc = new_qw_pc;
+ }
+ return fq;
+}
-/* Issue & swallow one PKE opcode if possible */
+/* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
+ If FIFO is not full enough, return 0. Skip over DMA tags, but mark
+ them as an error (ER0). */
-void
-pke_issue(struct pke_device* me)
+unsigned_4*
+pke_pcrel_operand(struct pke_device* me, int operand_num)
{
+ unsigned_4* operand = NULL;
+ struct fifo_quadword* fifo_operand;
+
+ fifo_operand = pke_pcrel_fifo(me, operand_num, & operand);
+ if(fifo_operand == NULL)
+ ASSERT(operand == NULL); /* pke_pcrel_fifo() ought leave it untouched */
+ return operand;
}
+/* Return a bit-field extract of given operand# in FIFO, and its
+ word-accurate source-addr. `bit_offset' starts at 0, referring to
+ LSB after PKE instruction word. Width must be >0, <=32. Assume
+ FIFO is full enough. Skip over DMA tags, but mark them as an error
+ (ER0). */
+
+unsigned_4
+pke_pcrel_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr)
+{
+ unsigned_4* word = NULL;
+ unsigned_4 value;
+ struct fifo_quadword* fifo_operand;
+ int wordnumber, bitnumber;
+ int i;
+
+ wordnumber = bit_offset/32;
+ bitnumber = bit_offset%32;
+
+ /* find operand word with bitfield */
+ fifo_operand = pke_pcrel_fifo(me, wordnumber + 1, &word);
+ ASSERT(word != NULL);
+
+ /* extract bitfield from word */
+ value = BIT_MASK_GET(*word, bitnumber, bitnumber + bit_width - 1);
+
+ /* extract source addr from fifo word */
+ *source_addr = fifo_operand->source_address;
+
+ /* add word offset */
+ for(i=0; i<3; i++)
+ if(word == & fifo_operand->data[i])
+ *source_addr += sizeof(unsigned_4) * i;
+
+ return value;
+}
+
+
+
+/* check for stall conditions on indicated devices (path* only on
+ PKE1), do not change status; return 0 iff no stall */
+int
+pke_check_stall(struct pke_device* me, enum pke_check_target what)
+{
+ int any_stall = 0;
+ unsigned_4 cop2_stat, gpuif_stat;
+
+ /* read status words */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_READ(me, (GIF_REG_STAT),
+ & gpuif_stat,
+ 4);
+ PKE_MEM_READ(me, (COP2_REG_STAT_ADDR),
+ & cop2_stat,
+ 4);
+
+ /* perform checks */
+ if(what == chk_vu)
+ {
+ if(me->pke_number == 0)
+ any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS0_B, COP2_REG_STAT_VBS0_E);
+ else /* if(me->pke_number == 1) */
+ any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS1_B, COP2_REG_STAT_VBS1_E);
+ }
+ else if(what == chk_path1) /* VU -> GPUIF */
+ {
+ ASSERT(me->pke_number == 1);
+ if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1)
+ any_stall = 1;
+ }
+ else if(what == chk_path2) /* PKE -> GPUIF */
+ {
+ ASSERT(me->pke_number == 1);
+ if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2)
+ any_stall = 1;
+ }
+ else if(what == chk_path3) /* DMA -> GPUIF */
+ {
+ ASSERT(me->pke_number == 1);
+ if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3)
+ any_stall = 1;
+ }
+ else
+ {
+ /* invalid what */
+ ASSERT(0);
+ }
+
+ /* any stall reasons? */
+ return any_stall;
+}
+
+
+/* PKE1 only: flip the DBF bit; recompute TOPS, TOP */
+void
+pke_flip_dbf(struct pke_device* me)
+{
+ int newdf;
+ /* compute new TOP */
+ PKE_REG_MASK_SET(me, TOP, TOP,
+ PKE_REG_MASK_GET(me, TOPS, TOPS));
+ /* flip DBF */
+ newdf = PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1;
+ PKE_REG_MASK_SET(me, DBF, DF, newdf);
+ PKE_REG_MASK_SET(me, STAT, DBF, newdf);
+ /* compute new TOPS */
+ PKE_REG_MASK_SET(me, TOPS, TOPS,
+ (PKE_REG_MASK_GET(me, BASE, BASE) +
+ newdf * PKE_REG_MASK_GET(me, OFST, OFFSET)));
+
+ /* this is equivalent to last word from okadaa (98-02-25):
+ 1) TOP=TOPS;
+ 2) TOPS=BASE + !DBF*OFFSET
+ 3) DBF=!DBF */
+}
+
+
+/* set the STAT:PIS bit and send an interrupt to the 5900 */
+void
+pke_begin_interrupt_stall(struct pke_device* me)
+{
+ /* set PIS */
+ PKE_REG_MASK_SET(me, STAT, PIS, 1);
+
+ /* XXX: send interrupt to 5900? */
+}
+
+
+
+
+/* PKEcode handler functions -- responsible for checking and
+ confirming old stall conditions, executing pkecode, updating PC and
+ status registers -- may assume being run on correct PKE unit */
+
+void
+pke_code_nop(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
+
+
+void
+pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* copy immediate value into CYCLE reg */
+ PKE_REG_MASK_SET(me, CYCLE, WL, BIT_MASK_GET(imm, 8, 15));
+ PKE_REG_MASK_SET(me, CYCLE, CL, BIT_MASK_GET(imm, 0, 7));
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
+
+
+void
+pke_code_offset(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* copy 10 bits to OFFSET field */
+ PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
+ /* clear DBF bit */
+ PKE_REG_MASK_SET(me, DBF, DF, 0);
+ /* clear other DBF bit */
+ PKE_REG_MASK_SET(me, STAT, DBF, 0);
+ /* set TOPS = BASE */
+ PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
+
+
+void
+pke_code_base(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* copy 10 bits to BASE field */
+ PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
+
+
+void
+pke_code_itop(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* copy 10 bits to ITOPS field */
+ PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
+
+
+void
+pke_code_stmod(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* copy 2 bits to MODE register */
+ PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
+
+
+void
+pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ unsigned_4 gif_mode;
+
+ /* set appropriate bit */
+ if(BIT_MASK_GET(imm, PKE_REG_MSKPATH3_B, PKE_REG_MSKPATH3_E) != 0)
+ gif_mode = GIF_REG_STAT_M3P;
+ else
+ gif_mode = 0;
+
+ /* write register to "read-only" register; gpuif code will look at M3P bit only */
+ PKE_MEM_WRITE(me, GIF_REG_VIF_M3P, & gif_mode, 4);
+
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
+
+
+void
+pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ /* copy 16 bits to MARK register */
+ PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
+ /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
+ PKE_REG_MASK_SET(me, STAT, MRK, 1);
+ /* done */
+ pke_pc_advance(me, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+}
+
+
+void
+pke_code_flushe(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* compute next PEW bit */
+ if(pke_check_stall(me, chk_vu))
+ {
+ /* VU busy */
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ /* try again next cycle */
+ }
+ else
+ {
+ /* VU idle */
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+}
+
+
+void
+pke_code_flush(struct pke_device* me, unsigned_4 pkecode)
+{
+ int something_busy = 0;
+
+ /* compute next PEW, PGW bits */
+ if(pke_check_stall(me, chk_vu))
+ {
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ }
+ else
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
+
+
+ if(pke_check_stall(me, chk_path1) ||
+ pke_check_stall(me, chk_path2))
+ {
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PGW, 1);
+ }
+ else
+ PKE_REG_MASK_SET(me, STAT, PGW, 0);
+
+ /* go or no go */
+ if(something_busy)
+ {
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+ else
+ {
+ /* all idle */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+}
+
+
+void
+pke_code_flusha(struct pke_device* me, unsigned_4 pkecode)
+{
+ int something_busy = 0;
+
+ /* compute next PEW, PGW bits */
+ if(pke_check_stall(me, chk_vu))
+ {
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ }
+ else
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
+
+
+ if(pke_check_stall(me, chk_path1) ||
+ pke_check_stall(me, chk_path2) ||
+ pke_check_stall(me, chk_path3))
+ {
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PGW, 1);
+ }
+ else
+ PKE_REG_MASK_SET(me, STAT, PGW, 0);
+
+ if(something_busy)
+ {
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+ else
+ {
+ /* all idle */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+}
+
+
+void
+pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* compute next PEW bit */
+ if(pke_check_stall(me, chk_vu))
+ {
+ /* VU busy */
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ /* try again next cycle */
+ }
+ else
+ {
+ unsigned_4 vu_pc;
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* VU idle */
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
+
+ /* flip DBF on PKE1 */
+ if(me->pke_number == 1)
+ pke_flip_dbf(me);
+
+ /* compute new PC for VU (host byte-order) */
+ vu_pc = BIT_MASK_GET(imm, 0, 15);
+ vu_pc = T2H_4(vu_pc);
+
+ /* write new PC; callback function gets VU running */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
+ & vu_pc,
+ 4);
+
+ /* copy ITOPS field to ITOP */
+ PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS));
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+}
+
+
+
+void
+pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* compute next PEW bit */
+ if(pke_check_stall(me, chk_vu))
+ {
+ /* VU busy */
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ /* try again next cycle */
+ }
+ else
+ {
+ unsigned_4 vu_pc;
+
+ /* VU idle */
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
+
+ /* flip DBF on PKE1 */
+ if(me->pke_number == 1)
+ pke_flip_dbf(me);
+
+ /* read old PC */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_READ(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
+ & vu_pc,
+ 4);
+
+ /* rewrite new PC; callback function gets VU running */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
+ & vu_pc,
+ 4);
+
+ /* copy ITOPS field to ITOP */
+ PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS));
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+}
+
+
+void
+pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode)
+{
+ int something_busy = 0;
+
+ /* compute next PEW, PGW bits */
+ if(pke_check_stall(me, chk_vu))
+ {
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PEW, 1);
+ }
+ else
+ PKE_REG_MASK_SET(me, STAT, PEW, 0);
+
+
+ if(pke_check_stall(me, chk_path1) ||
+ pke_check_stall(me, chk_path2) ||
+ pke_check_stall(me, chk_path3))
+ {
+ something_busy = 1;
+ PKE_REG_MASK_SET(me, STAT, PGW, 1);
+ }
+ else
+ PKE_REG_MASK_SET(me, STAT, PGW, 0);
+
+ /* go or no go */
+ if(something_busy)
+ {
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+ else
+ {
+ unsigned_4 vu_pc;
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* flip DBF on PKE1 */
+ if(me->pke_number == 1)
+ pke_flip_dbf(me);
+
+ /* compute new PC for VU (host byte-order) */
+ vu_pc = BIT_MASK_GET(imm, 0, 15);
+ vu_pc = T2H_4(vu_pc);
+
+ /* rewrite new PC; callback function gets VU running */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
+ & vu_pc,
+ 4);
+
+ /* copy ITOPS field to ITOP */
+ PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS));
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1);
+ }
+}
+
+
+void
+pke_code_stmask(struct pke_device* me, unsigned_4 pkecode)
+{
+ unsigned_4* mask;
+
+ /* check that FIFO has one more word for STMASK operand */
+ mask = pke_pcrel_operand(me, 1);
+ if(mask != NULL)
+ {
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* set NUM */
+ PKE_REG_MASK_SET(me, NUM, NUM, 1);
+
+ /* fill the register */
+ PKE_REG_MASK_SET(me, MASK, MASK, *mask);
+
+ /* set NUM */
+ PKE_REG_MASK_SET(me, NUM, NUM, 0);
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 2);
+ }
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+}
+
+
+void
+pke_code_strow(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* check that FIFO has four more words for STROW operand */
+ unsigned_4* last_op;
+
+ last_op = pke_pcrel_operand(me, 4);
+ if(last_op != NULL)
+ {
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* set NUM */
+ PKE_REG_MASK_SET(me, NUM, NUM, 1);
+
+ /* copy ROW registers: must all exist if 4th operand exists */
+ me->regs[PKE_REG_R0][0] = * pke_pcrel_operand(me, 1);
+ me->regs[PKE_REG_R1][0] = * pke_pcrel_operand(me, 2);
+ me->regs[PKE_REG_R2][0] = * pke_pcrel_operand(me, 3);
+ me->regs[PKE_REG_R3][0] = * pke_pcrel_operand(me, 4);
+
+ /* set NUM */
+ PKE_REG_MASK_SET(me, NUM, NUM, 0);
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 5);
+ }
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+}
+
+
+void
+pke_code_stcol(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* check that FIFO has four more words for STCOL operand */
+ unsigned_4* last_op;
+
+ last_op = pke_pcrel_operand(me, 4);
+ if(last_op != NULL)
+ {
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* set NUM */
+ PKE_REG_MASK_SET(me, NUM, NUM, 1);
+
+ /* copy COL registers: must all exist if 4th operand exists */
+ me->regs[PKE_REG_C0][0] = * pke_pcrel_operand(me, 1);
+ me->regs[PKE_REG_C1][0] = * pke_pcrel_operand(me, 2);
+ me->regs[PKE_REG_C2][0] = * pke_pcrel_operand(me, 3);
+ me->regs[PKE_REG_C3][0] = * pke_pcrel_operand(me, 4);
+
+ /* set NUM */
+ PKE_REG_MASK_SET(me, NUM, NUM, 0);
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 5);
+ }
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* try again next cycle */
+ }
+}
+
+
+void
+pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
+{
+ unsigned_4* last_mpg_word;
+ int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* assert 64-bit alignment of MPG operand */
+ if(me->qw_pc != 3 && me->qw_pc != 1)
+ return pke_code_error(me, pkecode);
+
+ /* map zero to max+1 */
+ if(num==0) num=0x100;
+
+ /* check that FIFO has a few more words for MPG operand */
+ last_mpg_word = pke_pcrel_operand(me, num*2); /* num: number of 64-bit words */
+ if(last_mpg_word != NULL)
+ {
+ /* perform implied FLUSHE */
+ if(pke_check_stall(me, chk_vu))
+ {
+ /* VU busy */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ /* retry this instruction next clock */
+ }
+ else
+ {
+ /* VU idle */
+ int i;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* set NUM */
+ PKE_REG_MASK_SET(me, NUM, NUM, num);
+
+ /* transfer VU instructions, one word-pair per iteration */
+ for(i=0; i<num; i++)
+ {
+ address_word vu_addr_base, vu_addr;
+ address_word vutrack_addr_base, vutrack_addr;
+ address_word vu_addr_max_size;
+ unsigned_4 vu_lower_opcode, vu_upper_opcode;
+ unsigned_4* operand;
+ unsigned_4 source_addr;
+ struct fifo_quadword* fq;
+ int next_num;
+ int j;
+
+ /* decrement NUM */
+ next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1;
+ PKE_REG_MASK_SET(me, NUM, NUM, next_num);
+
+ /* imm: in 64-bit units for MPG instruction */
+ /* VU*_MEM0 : instruction memory */
+ vu_addr_base = (me->pke_number == 0) ?
+ VU0_MEM0_WINDOW_START : VU1_MEM0_WINDOW_START;
+ vu_addr_max_size = (me->pke_number == 0) ?
+ VU0_MEM0_SIZE : VU1_MEM0_SIZE;
+ vutrack_addr_base = (me->pke_number == 0) ?
+ VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
+
+ /* compute VU address for this word-pair */
+ vu_addr = vu_addr_base + (imm + i) * 8;
+ /* check for vu_addr overflow */
+ while(vu_addr >= vu_addr_base + vu_addr_max_size)
+ vu_addr -= vu_addr_max_size;
+
+ /* compute VU tracking address */
+ vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 2;
+
+ /* Fetch operand words; assume they are already little-endian for VU imem */
+ fq = pke_pcrel_fifo(me, i*2 + 1, & operand);
+ vu_lower_opcode = *operand;
+
+ source_addr = fq->source_address;
+ /* add word offset */
+ for(j=0; j<3; j++)
+ if(operand == & fq->data[j])
+ source_addr += sizeof(unsigned_4) * j;
+
+ fq = pke_pcrel_fifo(me, i*2 + 2, & operand);
+ vu_upper_opcode = *operand;
+
+ /* write data into VU memory */
+ /* lower (scalar) opcode comes in first word ; macro performs H2T! */
+ PKE_MEM_WRITE(me, vu_addr,
+ & vu_lower_opcode,
+ 4);
+ /* upper (vector) opcode comes in second word ; H2T */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_WRITE(me, vu_addr + 4,
+ & vu_upper_opcode,
+ 4);
+
+ /* write tracking address in target byte-order */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_WRITE(me, vutrack_addr,
+ & source_addr,
+ 4);
+ } /* VU xfer loop */
+
+ /* check NUM */
+ ASSERT(PKE_REG_MASK_GET(me, NUM, NUM) == 0);
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1 + num*2);
+ }
+ } /* if FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* retry this instruction next clock */
+ }
+}
+
+
+void
+pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* check that FIFO has a few more words for DIRECT operand */
+ unsigned_4* last_direct_word;
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ /* assert 128-bit alignment of DIRECT operand */
+ if(me->qw_pc != 3)
+ return pke_code_error(me, pkecode);
+
+ /* map zero to max+1 */
+ if(imm==0) imm=0x10000;
+
+ last_direct_word = pke_pcrel_operand(me, imm*4); /* imm: number of 128-bit words */
+ if(last_direct_word != NULL)
+ {
+ /* VU idle */
+ int i;
+ unsigned_16 fifo_data;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* transfer GPUIF quadwords, one word per iteration */
+ for(i=0; i<imm*4; i++)
+ {
+ unsigned_4* operand = pke_pcrel_operand(me, 1+i);
+
+ /* collect word into quadword */
+ *A4_16(&fifo_data, 3 - (i % 4)) = *operand;
+
+ /* write to GPUIF FIFO only with full quadword */
+ if(i % 4 == 3)
+ {
+ ASSERT(sizeof(fifo_data) == 16);
+ PKE_MEM_WRITE(me, GIF_PATH2_FIFO_ADDR,
+ & fifo_data,
+ 16);
+ } /* write collected quadword */
+ } /* GPUIF xfer loop */
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1 + imm*4);
+ } /* if FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* retry this instruction next clock */
+ }
+}
+
+
+void
+pke_code_directhl(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* treat the same as DIRECTH */
+ pke_code_direct(me, pkecode);
+}
+
+
+void
+pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
+{
+ int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+ int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
+ int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
+ int nummx = (num == 0) ? 0x0100 : num;
+ short vn = BIT_MASK_GET(cmd, 2, 3); /* unpack shape controls */
+ short vl = BIT_MASK_GET(cmd, 0, 1);
+ int m = BIT_MASK_GET(cmd, 4, 4);
+ short cl = PKE_REG_MASK_GET(me, CYCLE, CL); /* cycle controls */
+ short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
+ short addrwl = (wl == 0) ? 0x0100 : wl;
+ int r = BIT_MASK_GET(imm, 15, 15); /* indicator bits in imm value */
+ int usn = BIT_MASK_GET(imm, 14, 14);
+
+ int n, num_operands;
+ unsigned_4* last_operand_word = NULL;
+
+ /* catch all illegal UNPACK variants */
+ if(vl == 3 && vn < 3)
+ {
+ pke_code_error(me, pkecode);
+ return;
+ }
+
+ /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
+ if(cl >= addrwl)
+ n = num;
+ else
+ n = cl * (nummx / addrwl) + PKE_LIMIT(nummx % addrwl, cl);
+ num_operands = (31 + (32 >> vl) * (vn+1) * n)/32; /* round up to next word */
+
+ /* confirm that FIFO has enough words in it */
+ if(num_operands > 0)
+ last_operand_word = pke_pcrel_operand(me, num_operands);
+ if(last_operand_word != NULL || num_operands == 0)
+ {
+ address_word vu_addr_base, vutrack_addr_base;
+ address_word vu_addr_max_size;
+ int vector_num_out, vector_num_in;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* don't check whether VU is idle */
+
+ /* compute VU address base */
+ if(me->pke_number == 0)
+ {
+ vu_addr_base = VU0_MEM1_WINDOW_START;
+ vu_addr_max_size = VU0_MEM1_SIZE;
+ vutrack_addr_base = VU0_MEM1_SRCADDR_START;
+ r = 0;
+ }
+ else
+ {
+ vu_addr_base = VU1_MEM1_WINDOW_START;
+ vu_addr_max_size = VU1_MEM1_SIZE;
+ vutrack_addr_base = VU1_MEM1_SRCADDR_START;
+ }
+
+ /* set NUM */
+ PKE_REG_MASK_SET(me, NUM, NUM, nummx);
+
+ /* transfer given number of vectors */
+ vector_num_out = 0; /* output vector number being processed */
+ vector_num_in = 0; /* argument vector number being processed */
+ do
+ {
+ quadword vu_old_data;
+ quadword vu_new_data;
+ quadword unpacked_data;
+ address_word vu_addr;
+ address_word vutrack_addr;
+ unsigned_4 source_addr = 0;
+ int i;
+ int next_num;
+
+ /* decrement NUM */
+ next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1;
+ PKE_REG_MASK_SET(me, NUM, NUM, next_num);
+
+ /* compute VU destination address, as bytes in R5900 memory */
+ if(cl >= wl)
+ {
+ /* map zero to max+1 */
+ vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) +
+ (vector_num_out / addrwl) * cl +
+ (vector_num_out % addrwl));
+ }
+ else
+ vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) +
+ vector_num_out);
+
+ /* handle "R" double-buffering bit */
+ if(r)
+ vu_addr += 16 * PKE_REG_MASK_GET(me, TOPS, TOPS);
+
+ /* check for vu_addr overflow */
+ while(vu_addr >= vu_addr_base + vu_addr_max_size)
+ vu_addr -= vu_addr_max_size;
+
+ /* compute address of tracking table entry */
+ vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4;
+
+ /* read old VU data word at address; reverse words if needed */
+ {
+ unsigned_16 vu_old_badwords;
+ ASSERT(sizeof(vu_old_badwords) == 16);
+ PKE_MEM_READ(me, vu_addr,
+ &vu_old_badwords, 16);
+ vu_old_data[0] = * A4_16(& vu_old_badwords, 3);
+ vu_old_data[1] = * A4_16(& vu_old_badwords, 2);
+ vu_old_data[2] = * A4_16(& vu_old_badwords, 1);
+ vu_old_data[3] = * A4_16(& vu_old_badwords, 0);
+ }
+
+ /* For cyclic unpack, next operand quadword may come from instruction stream
+ or be zero. */
+ if((cl < addrwl) &&
+ (vector_num_out % addrwl) >= cl)
+ {
+ /* clear operand - used only in a "indeterminate" state */
+ for(i = 0; i < 4; i++)
+ unpacked_data[i] = 0;
+ }
+ else
+ {
+ /* compute packed vector dimensions */
+ int vectorbits = 0, unitbits = 0;
+
+ if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */
+ {
+ unitbits = (32 >> vl);
+ vectorbits = unitbits * (vn+1);
+ }
+ else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */
+ {
+ unitbits = 5;
+ vectorbits = 16;
+ }
+ else /* illegal unpack variant */
+ {
+ /* should have been caught at top of function */
+ ASSERT(0);
+ }
+
+ /* loop over columns */
+ for(i=0; i<=vn; i++)
+ {
+ unsigned_4 operand;
+
+ /* offset in bits in current operand word */
+ int bitoffset =
+ (vector_num_in * vectorbits) + (i * unitbits); /* # of bits from PKEcode */
+
+ /* last unit of V4_5 is only one bit wide */
+ if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */
+ unitbits = 1;
+
+ /* confirm we're not reading more than we said we needed */
+ if(vector_num_in * vectorbits >= num_operands * 32)
+ {
+ /* this condition may be triggered by illegal
+ PKEcode / CYCLE combinations. */
+ pke_code_error(me, pkecode);
+ /* XXX: this case needs to be better understood,
+ and detected at a better time. */
+ return;
+ }
+
+ /* fetch bitfield operand */
+ operand = pke_pcrel_operand_bits(me, bitoffset, unitbits, & source_addr);
+
+ /* selectively sign-extend; not for V4_5 1-bit value */
+ if(usn || unitbits == 1)
+ unpacked_data[i] = operand;
+ else
+ unpacked_data[i] = SEXT32(operand, unitbits-1);
+ }
+
+ /* set remaining top words in vector */
+ for(i=vn+1; i<4; i++)
+ {
+ if(vn == 0) /* S_{32,16,8}: copy lowest element */
+ unpacked_data[i] = unpacked_data[0];
+ else
+ unpacked_data[i] = 0;
+ }
+
+ /* consumed a vector from the PKE instruction stream */
+ vector_num_in ++;
+ } /* unpack word from instruction operand */
+
+ /* process STMOD register for accumulation operations */
+ switch(PKE_REG_MASK_GET(me, MODE, MDE))
+ {
+ case PKE_MODE_ADDROW: /* add row registers to output data */
+ case PKE_MODE_ACCROW: /* same .. later conditionally accumulate */
+ for(i=0; i<4; i++)
+ /* exploit R0..R3 contiguity */
+ unpacked_data[i] += me->regs[PKE_REG_R0 + i][0];
+ break;
+
+ case PKE_MODE_INPUT: /* pass data through */
+ default: /* specified as undefined */
+ ;
+ }
+
+ /* compute replacement word */
+ if(m) /* use mask register? */
+ {
+ /* compute index into mask register for this word */
+ int mask_index = PKE_LIMIT(vector_num_out % addrwl, 3);
+
+ for(i=0; i<4; i++) /* loop over columns */
+ {
+ int mask_op = PKE_MASKREG_GET(me, mask_index, i);
+ unsigned_4* masked_value = NULL;
+
+ switch(mask_op)
+ {
+ case PKE_MASKREG_INPUT:
+ masked_value = & unpacked_data[i];
+
+ /* conditionally accumulate */
+ if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW)
+ me->regs[PKE_REG_R0 + i][0] = unpacked_data[i];
+
+ break;
+
+ case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
+ masked_value = & me->regs[PKE_REG_R0 + i][0];
+ break;
+
+ case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
+ masked_value = & me->regs[PKE_REG_C0 + mask_index][0];
+ break;
+
+ case PKE_MASKREG_NOTHING:
+ /* "write inhibit" by re-copying old data */
+ masked_value = & vu_old_data[i];
+ break;
+
+ default:
+ ASSERT(0);
+ /* no other cases possible */
+ }
+
+ /* copy masked value for column */
+ vu_new_data[i] = *masked_value;
+ } /* loop over columns */
+ } /* mask */
+ else
+ {
+ /* no mask - just copy over entire unpacked quadword */
+ memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
+
+ /* conditionally store accumulated row results */
+ if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW)
+ for(i=0; i<4; i++)
+ me->regs[PKE_REG_R0 + i][0] = unpacked_data[i];
+ }
+
+ /* write new VU data word at address; reverse words if needed */
+ {
+ unsigned_16 vu_new_badwords;
+ * A4_16(& vu_new_badwords, 3) = vu_new_data[0];
+ * A4_16(& vu_new_badwords, 2) = vu_new_data[1];
+ * A4_16(& vu_new_badwords, 1) = vu_new_data[2];
+ * A4_16(& vu_new_badwords, 0) = vu_new_data[3];
+ ASSERT(sizeof(vu_new_badwords) == 16);
+ PKE_MEM_WRITE(me, vu_addr,
+ &vu_new_badwords, 16);
+ }
+
+ /* write tracking address */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_WRITE(me, vutrack_addr,
+ & source_addr,
+ 4);
+
+ /* next vector please */
+ vector_num_out ++;
+ } /* vector transfer loop */
+ while(PKE_REG_MASK_GET(me, NUM, NUM) > 0);
+
+ /* confirm we've written as many vectors as told */
+ ASSERT(nummx == vector_num_out);
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1 + num_operands);
+ } /* PKE FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* retry this instruction next clock */
+ }
+}
+
+
+void
+pke_code_error(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* set ER1 flag in STAT register */
+ PKE_REG_MASK_SET(me, STAT, ER1, 1);
+
+ if(! PKE_REG_MASK_GET(me, ERR, ME1))
+ {
+ pke_begin_interrupt_stall(me);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ }
+ else
+ {
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ }
+
+ /* advance over faulty word */
+ pke_pc_advance(me, 1);
+}