Commit | Line | Data |
---|---|---|
aea481da DE |
1 | /* Copyright (C) 1998, Cygnus Solutions */ |
2 | ||
e2306992 | 3 | |
10572b6a AC |
4 | #include "config.h" |
5 | ||
aea481da | 6 | #include <stdlib.h> |
15232df4 | 7 | #include "sim-main.h" |
db6dac32 | 8 | #include "sim-bits.h" |
fba9bfed | 9 | #include "sim-assert.h" |
15232df4 FCE |
10 | #include "sky-pke.h" |
11 | #include "sky-dma.h" | |
12 | #include "sky-vu.h" | |
fba9bfed | 13 | #include "sky-gpuif.h" |
fd909089 | 14 | #include "sky-device.h" |
aea481da | 15 | |
15232df4 | 16 | |
10572b6a AC |
17 | #ifdef HAVE_STRING_H |
18 | #include <string.h> | |
19 | #else | |
20 | #ifdef HAVE_STRINGS_H | |
21 | #include <strings.h> | |
22 | #endif | |
23 | #endif | |
24 | ||
aea481da DE |
25 | |
26 | /* Internal function declarations */ | |
27 | ||
28 | static int pke_io_read_buffer(device*, void*, int, address_word, | |
29 | unsigned, sim_cpu*, sim_cia); | |
30 | static int pke_io_write_buffer(device*, const void*, int, address_word, | |
31 | unsigned, sim_cpu*, sim_cia); | |
9614fb3c | 32 | static void pke_reset(struct pke_device*); |
e2306992 | 33 | static void pke_issue(SIM_DESC, struct pke_device*); |
fba9bfed | 34 | static void pke_pc_advance(struct pke_device*, int num_words); |
9614fb3c FCE |
35 | static struct fifo_quadword* pke_pcrel_fifo(struct pke_device*, int operand_num, |
36 | unsigned_4** operand); | |
37 | static unsigned_4* pke_pcrel_operand(struct pke_device*, int operand_num); | |
38 | static unsigned_4 pke_pcrel_operand_bits(struct pke_device*, int bit_offset, | |
39 | int bit_width, unsigned_4* sourceaddr); | |
fba9bfed | 40 | static void pke_attach(SIM_DESC sd, struct pke_device* me); |
43a6998b FCE |
41 | enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 }; |
42 | static int pke_check_stall(struct pke_device* me, enum pke_check_target what); | |
43 | static void pke_flip_dbf(struct pke_device* me); | |
9614fb3c | 44 | static void pke_begin_interrupt_stall(struct pke_device* me); |
43a6998b FCE |
45 | /* PKEcode handlers */ |
46 | static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode); | |
47 | static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode); | |
48 | static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode); | |
49 | static void pke_code_base(struct pke_device* me, unsigned_4 pkecode); | |
50 | static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode); | |
51 | static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode); | |
52 | static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode); | |
53 | static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode); | |
54 | static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode); | |
55 | static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode); | |
56 | static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode); | |
57 | static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode); | |
58 | static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode); | |
59 | static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode); | |
60 | static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode); | |
61 | static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode); | |
62 | static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode); | |
63 | static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode); | |
64 | static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode); | |
65 | static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode); | |
66 | static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode); | |
67 | static void pke_code_error(struct pke_device* me, unsigned_4 pkecode); | |
15232df4 FCE |
68 | unsigned_4 pke_fifo_flush(struct pke_fifo*); |
69 | void pke_fifo_reset(struct pke_fifo*); | |
70 | struct fifo_quadword* pke_fifo_fit(struct pke_fifo*); | |
71 | struct fifo_quadword* pke_fifo_access(struct pke_fifo*, unsigned_4 qwnum); | |
72 | void pke_fifo_old(struct pke_fifo*, unsigned_4 qwnum); | |
fba9bfed | 73 | |
aea481da DE |
74 | |
75 | ||
76 | /* Static data */ | |
77 | ||
78 | struct pke_device pke0_device = | |
79 | { | |
80 | { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ | |
81 | 0, 0, /* ID, flags */ | |
aea481da | 82 | {}, /* regs */ |
db6dac32 | 83 | {}, 0, /* FIFO write buffer */ |
9614fb3c FCE |
84 | { NULL, 0, 0, 0 }, /* FIFO */ |
85 | NULL, /* FIFO trace file */ | |
121d6745 | 86 | -1, -1, 0, 0, 0, /* invalid FIFO cache */ |
fba9bfed | 87 | 0, 0 /* pc */ |
aea481da DE |
88 | }; |
89 | ||
90 | ||
91 | struct pke_device pke1_device = | |
92 | { | |
93 | { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ | |
94 | 1, 0, /* ID, flags */ | |
aea481da | 95 | {}, /* regs */ |
db6dac32 | 96 | {}, 0, /* FIFO write buffer */ |
9614fb3c FCE |
97 | { NULL, 0, 0, 0 }, /* FIFO */ |
98 | NULL, /* FIFO trace file */ | |
121d6745 | 99 | -1, -1, 0, 0, 0, /* invalid FIFO cache */ |
fba9bfed | 100 | 0, 0 /* pc */ |
aea481da DE |
101 | }; |
102 | ||
103 | ||
104 | ||
105 | /* External functions */ | |
106 | ||
107 | ||
fba9bfed | 108 | /* Attach PKE addresses to main memory */ |
aea481da DE |
109 | |
110 | void | |
111 | pke0_attach(SIM_DESC sd) | |
112 | { | |
fba9bfed | 113 | pke_attach(sd, & pke0_device); |
9614fb3c | 114 | pke_reset(& pke0_device); |
fba9bfed | 115 | } |
aea481da | 116 | |
fba9bfed FCE |
117 | void |
118 | pke1_attach(SIM_DESC sd) | |
119 | { | |
120 | pke_attach(sd, & pke1_device); | |
9614fb3c | 121 | pke_reset(& pke1_device); |
aea481da DE |
122 | } |
123 | ||
124 | ||
fba9bfed FCE |
125 | |
126 | /* Issue a PKE instruction if possible */ | |
aea481da DE |
127 | |
128 | void | |
e2306992 | 129 | pke0_issue(SIM_DESC sd) |
aea481da | 130 | { |
e2306992 | 131 | pke_issue(sd, & pke0_device); |
fba9bfed FCE |
132 | } |
133 | ||
134 | void | |
e2306992 | 135 | pke1_issue(SIM_DESC sd) |
fba9bfed | 136 | { |
653c2590 | 137 | pke_issue(sd, & pke1_device); |
fba9bfed FCE |
138 | } |
139 | ||
140 | ||
141 | ||
142 | /* Internal functions */ | |
143 | ||
144 | ||
145 | /* Attach PKE memory regions to simulator */ | |
146 | ||
147 | void | |
148 | pke_attach(SIM_DESC sd, struct pke_device* me) | |
149 | { | |
150 | /* register file */ | |
e2306992 | 151 | sim_core_attach (sd, NULL, 0, access_read_write, 0, |
fba9bfed | 152 | (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START, |
aea481da DE |
153 | PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/, |
154 | 0 /*modulo*/, | |
e2306992 | 155 | (device*) me, |
aea481da DE |
156 | NULL /*buffer*/); |
157 | ||
fba9bfed | 158 | /* FIFO port */ |
e2306992 | 159 | sim_core_attach (sd, NULL, 0, access_read_write, 0, |
fba9bfed | 160 | (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR, |
aea481da DE |
161 | sizeof(quadword) /*nr_bytes*/, |
162 | 0 /*modulo*/, | |
e2306992 | 163 | (device*) me, |
aea481da | 164 | NULL /*buffer*/); |
aea481da | 165 | |
534a3d5c | 166 | /* VU MEM0 tracking table */ |
e2306992 | 167 | sim_core_attach (sd, NULL, 0, access_read_write, 0, |
534a3d5c FCE |
168 | ((me->pke_number == 0) ? VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START), |
169 | ((me->pke_number == 0) ? VU0_MEM0_SIZE : VU1_MEM0_SIZE) / 2, | |
fba9bfed | 170 | 0 /*modulo*/, |
534a3d5c FCE |
171 | NULL, |
172 | NULL /*buffer*/); | |
173 | ||
174 | /* VU MEM1 tracking table */ | |
175 | sim_core_attach (sd, NULL, 0, access_read_write, 0, | |
176 | ((me->pke_number == 0) ? VU0_MEM1_SRCADDR_START : VU1_MEM1_SRCADDR_START), | |
177 | ((me->pke_number == 0) ? VU0_MEM1_SIZE : VU1_MEM1_SIZE) / 4, | |
178 | 0 /*modulo*/, | |
179 | NULL, | |
180 | NULL /*buffer*/); | |
181 | ||
e2306992 FCE |
182 | |
183 | /* attach to trace file if appropriate */ | |
184 | { | |
185 | char trace_envvar[80]; | |
186 | char* trace_filename = NULL; | |
187 | sprintf(trace_envvar, "VIF%d_TRACE_FILE", me->pke_number); | |
188 | trace_filename = getenv(trace_envvar); | |
189 | if(trace_filename != NULL) | |
190 | { | |
191 | me->fifo_trace_file = fopen(trace_filename, "w"); | |
192 | if(me->fifo_trace_file == NULL) | |
e2306992 | 193 | perror("VIF FIFO trace error on fopen"); |
b4d2f483 FCE |
194 | else |
195 | setvbuf(me->fifo_trace_file, NULL, _IOLBF, 0); | |
e2306992 FCE |
196 | } |
197 | } | |
aea481da DE |
198 | } |
199 | ||
c567d0b9 RU |
200 | /* Read PKE Pseudo-PC into buf in target order */ |
201 | int | |
202 | read_pke_pc (struct pke_device *me, void *buf) | |
203 | { | |
204 | *((int *) buf) = H2T_4( me->fifo_pc ); | |
205 | return 4; | |
206 | } | |
207 | ||
208 | /* Read PKE reg into buf in target order */ | |
209 | int | |
210 | read_pke_reg (struct pke_device *me, int reg_num, void *buf) | |
211 | { | |
212 | /* handle reads to individual registers; clear `readable' on error */ | |
213 | switch (reg_num) | |
214 | { | |
215 | /* handle common case of register reading, side-effect free */ | |
216 | /* PKE1-only registers*/ | |
217 | case PKE_REG_BASE: | |
218 | case PKE_REG_OFST: | |
219 | case PKE_REG_TOPS: | |
220 | case PKE_REG_TOP: | |
221 | case PKE_REG_DBF: | |
222 | if (me->pke_number == 0) | |
223 | { | |
224 | *((int *) buf) = 0; | |
225 | break; | |
226 | } | |
227 | /* fall through */ | |
228 | ||
229 | /* PKE0 & PKE1 common registers*/ | |
230 | case PKE_REG_STAT: | |
231 | case PKE_REG_ERR: | |
232 | case PKE_REG_MARK: | |
233 | case PKE_REG_CYCLE: | |
234 | case PKE_REG_MODE: | |
235 | case PKE_REG_NUM: | |
236 | case PKE_REG_MASK: | |
237 | case PKE_REG_CODE: | |
238 | case PKE_REG_ITOPS: | |
239 | case PKE_REG_ITOP: | |
240 | case PKE_REG_R0: | |
241 | case PKE_REG_R1: | |
242 | case PKE_REG_R2: | |
243 | case PKE_REG_R3: | |
244 | case PKE_REG_C0: | |
245 | case PKE_REG_C1: | |
246 | case PKE_REG_C2: | |
247 | case PKE_REG_C3: | |
248 | *((int *) buf) = H2T_4(me->regs[reg_num][0]); | |
249 | break; | |
250 | ||
251 | /* handle common case of write-only registers */ | |
252 | case PKE_REG_FBRST: | |
253 | *((int *) buf) = 0; | |
254 | break; | |
255 | ||
256 | default: | |
257 | ASSERT(0); /* tests above should prevent this possibility */ | |
258 | } | |
259 | ||
260 | return 4; | |
261 | } | |
aea481da DE |
262 | |
263 | ||
aea481da DE |
264 | /* Handle a PKE read; return no. of bytes read */ |
265 | ||
266 | int | |
267 | pke_io_read_buffer(device *me_, | |
268 | void *dest, | |
269 | int space, | |
270 | address_word addr, | |
271 | unsigned nr_bytes, | |
fba9bfed | 272 | sim_cpu *cpu, |
aea481da DE |
273 | sim_cia cia) |
274 | { | |
275 | /* downcast to gather embedding pke_device struct */ | |
276 | struct pke_device* me = (struct pke_device*) me_; | |
277 | ||
fba9bfed FCE |
278 | /* find my address ranges */ |
279 | address_word my_reg_start = | |
280 | (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START; | |
281 | address_word my_fifo_addr = | |
282 | (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR; | |
283 | ||
aea481da DE |
284 | /* enforce that an access does not span more than one quadword */ |
285 | address_word low = ADDR_TRUNC_QW(addr); | |
286 | address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1); | |
287 | if(low != high) | |
288 | return 0; | |
289 | ||
290 | /* classify address & handle */ | |
fba9bfed | 291 | if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE)) |
aea481da DE |
292 | { |
293 | /* register bank */ | |
fba9bfed FCE |
294 | int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; |
295 | int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ | |
fba9bfed FCE |
296 | quadword result; |
297 | ||
298 | /* clear result */ | |
299 | result[0] = result[1] = result[2] = result[3] = 0; | |
aea481da | 300 | |
c567d0b9 | 301 | read_pke_reg (me, reg_num, result); |
aea481da | 302 | |
fba9bfed | 303 | /* perform transfer & return */ |
c567d0b9 | 304 | memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes); |
aea481da | 305 | |
b4d2f483 | 306 | return nr_bytes; |
aea481da DE |
307 | /* NOTREACHED */ |
308 | } | |
fba9bfed FCE |
309 | else if(addr >= my_fifo_addr && |
310 | addr < my_fifo_addr + sizeof(quadword)) | |
aea481da DE |
311 | { |
312 | /* FIFO */ | |
313 | ||
fba9bfed FCE |
314 | /* FIFO is not readable: return a word of zeroes */ |
315 | memset(dest, 0, nr_bytes); | |
316 | return nr_bytes; | |
aea481da DE |
317 | } |
318 | ||
319 | /* NOTREACHED */ | |
fba9bfed | 320 | return 0; |
aea481da DE |
321 | } |
322 | ||
c567d0b9 RU |
323 | /* Write PKE reg from buf, which is in target order */ |
324 | int | |
325 | write_pke_reg (struct pke_device *me, int reg_num, const void *buf) | |
326 | { | |
327 | int writeable = 1; | |
328 | /* make words host-endian */ | |
329 | unsigned_4 input = T2H_4( *((unsigned_4 *) buf) ); | |
330 | ||
331 | /* handle writes to individual registers; clear `writeable' on error */ | |
332 | switch (reg_num) | |
333 | { | |
334 | case PKE_REG_FBRST: | |
335 | /* Order these tests from least to most overriding, in case | |
336 | multiple bits are set. */ | |
337 | if(BIT_MASK_GET(input, PKE_REG_FBRST_STC_B, PKE_REG_FBRST_STC_E)) | |
338 | { | |
339 | /* clear a bunch of status bits */ | |
340 | PKE_REG_MASK_SET(me, STAT, PSS, 0); | |
341 | PKE_REG_MASK_SET(me, STAT, PFS, 0); | |
342 | PKE_REG_MASK_SET(me, STAT, PIS, 0); | |
343 | PKE_REG_MASK_SET(me, STAT, INT, 0); | |
344 | PKE_REG_MASK_SET(me, STAT, ER0, 0); | |
345 | PKE_REG_MASK_SET(me, STAT, ER1, 0); | |
346 | me->flags &= ~PKE_FLAG_PENDING_PSS; | |
347 | /* will allow resumption of possible stalled instruction */ | |
348 | } | |
349 | if(BIT_MASK_GET(input, PKE_REG_FBRST_STP_B, PKE_REG_FBRST_STP_E)) | |
350 | { | |
351 | me->flags |= PKE_FLAG_PENDING_PSS; | |
352 | } | |
353 | if(BIT_MASK_GET(input, PKE_REG_FBRST_FBK_B, PKE_REG_FBRST_FBK_E)) | |
354 | { | |
355 | PKE_REG_MASK_SET(me, STAT, PFS, 1); | |
356 | } | |
357 | if(BIT_MASK_GET(input, PKE_REG_FBRST_RST_B, PKE_REG_FBRST_RST_E)) | |
358 | { | |
359 | pke_reset(me); | |
360 | } | |
361 | break; | |
362 | ||
363 | case PKE_REG_ERR: | |
364 | /* copy bottom three bits */ | |
365 | BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input, 0, 2)); | |
366 | break; | |
367 | ||
368 | case PKE_REG_MARK: | |
369 | /* copy bottom sixteen bits */ | |
370 | PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input, 0, 15)); | |
371 | /* reset MRK bit in STAT */ | |
372 | PKE_REG_MASK_SET(me, STAT, MRK, 0); | |
373 | break; | |
374 | ||
375 | /* handle common case of read-only registers */ | |
376 | /* PKE1-only registers - not really necessary to handle separately */ | |
377 | case PKE_REG_BASE: | |
378 | case PKE_REG_OFST: | |
379 | case PKE_REG_TOPS: | |
380 | case PKE_REG_TOP: | |
381 | case PKE_REG_DBF: | |
382 | if(me->pke_number == 0) | |
383 | writeable = 0; | |
384 | /* fall through */ | |
385 | /* PKE0 & PKE1 common registers*/ | |
386 | case PKE_REG_STAT: | |
387 | /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */ | |
388 | case PKE_REG_CYCLE: | |
389 | case PKE_REG_MODE: | |
390 | case PKE_REG_NUM: | |
391 | case PKE_REG_MASK: | |
392 | case PKE_REG_CODE: | |
393 | case PKE_REG_ITOPS: | |
394 | case PKE_REG_ITOP: | |
395 | case PKE_REG_R0: | |
396 | case PKE_REG_R1: | |
397 | case PKE_REG_R2: | |
398 | case PKE_REG_R3: | |
399 | case PKE_REG_C0: | |
400 | case PKE_REG_C1: | |
401 | case PKE_REG_C2: | |
402 | case PKE_REG_C3: | |
403 | writeable = 0; | |
404 | break; | |
405 | ||
406 | default: | |
407 | ASSERT(0); /* test above should prevent this possibility */ | |
408 | } | |
409 | ||
410 | /* perform return */ | |
411 | if(! writeable) | |
412 | { | |
413 | return 0; /* error */ | |
414 | } | |
aea481da | 415 | |
c567d0b9 RU |
416 | return 4; |
417 | } | |
418 | /* Handle a PKE write; return no. of bytes written */ | |
aea481da DE |
419 | |
420 | int | |
421 | pke_io_write_buffer(device *me_, | |
422 | const void *src, | |
423 | int space, | |
424 | address_word addr, | |
425 | unsigned nr_bytes, | |
fba9bfed | 426 | sim_cpu *cpu, |
aea481da DE |
427 | sim_cia cia) |
428 | { | |
429 | /* downcast to gather embedding pke_device struct */ | |
430 | struct pke_device* me = (struct pke_device*) me_; | |
431 | ||
fba9bfed FCE |
432 | /* find my address ranges */ |
433 | address_word my_reg_start = | |
434 | (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START; | |
435 | address_word my_fifo_addr = | |
436 | (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR; | |
437 | ||
aea481da DE |
438 | /* enforce that an access does not span more than one quadword */ |
439 | address_word low = ADDR_TRUNC_QW(addr); | |
440 | address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1); | |
441 | if(low != high) | |
442 | return 0; | |
443 | ||
444 | /* classify address & handle */ | |
fba9bfed | 445 | if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE)) |
aea481da DE |
446 | { |
447 | /* register bank */ | |
fba9bfed FCE |
448 | int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; |
449 | int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ | |
fba9bfed FCE |
450 | quadword input; |
451 | ||
452 | /* clear input */ | |
453 | input[0] = input[1] = input[2] = input[3] = 0; | |
aea481da | 454 | |
fba9bfed FCE |
455 | /* write user-given bytes into input */ |
456 | memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes); | |
457 | ||
c567d0b9 | 458 | write_pke_reg (me, reg_num, input); |
b4d2f483 FCE |
459 | return nr_bytes; |
460 | ||
aea481da DE |
461 | /* NOTREACHED */ |
462 | } | |
fba9bfed FCE |
463 | else if(addr >= my_fifo_addr && |
464 | addr < my_fifo_addr + sizeof(quadword)) | |
aea481da DE |
465 | { |
466 | /* FIFO */ | |
fba9bfed | 467 | struct fifo_quadword* fqw; |
db6dac32 | 468 | int fifo_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside fifo quadword */ |
e2306992 | 469 | unsigned_4 dma_tag_present = 0; |
db6dac32 FCE |
470 | int i; |
471 | ||
f0bb94cd | 472 | /* collect potentially-partial quadword in write buffer; LE byte order */ |
db6dac32 FCE |
473 | memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes); |
474 | /* mark bytes written */ | |
475 | for(i = fifo_byte; i < fifo_byte + nr_bytes; i++) | |
476 | BIT_MASK_SET(me->fifo_qw_done, i, i, 1); | |
477 | ||
478 | /* return if quadword not quite written yet */ | |
479 | if(BIT_MASK_GET(me->fifo_qw_done, 0, sizeof(quadword)-1) != | |
e2306992 | 480 | BIT_MASK_BTW(0, sizeof(quadword)-1)) |
db6dac32 | 481 | return nr_bytes; |
aea481da | 482 | |
db6dac32 FCE |
483 | /* all done - process quadword after clearing flag */ |
484 | BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0); | |
aea481da | 485 | |
9614fb3c FCE |
486 | /* allocate required address in FIFO */ |
487 | fqw = pke_fifo_fit(& me->fifo); | |
488 | ASSERT(fqw != NULL); | |
aea481da | 489 | |
9614fb3c | 490 | /* fill in unclassified FIFO quadword data in host byte order */ |
e2306992 FCE |
491 | fqw->word_class[0] = fqw->word_class[1] = |
492 | fqw->word_class[2] = fqw->word_class[3] = wc_unknown; | |
f0bb94cd FCE |
493 | fqw->data[0] = T2H_4(me->fifo_qw_in_progress[0]); |
494 | fqw->data[1] = T2H_4(me->fifo_qw_in_progress[1]); | |
495 | fqw->data[2] = T2H_4(me->fifo_qw_in_progress[2]); | |
496 | fqw->data[3] = T2H_4(me->fifo_qw_in_progress[3]); | |
9614fb3c FCE |
497 | |
498 | /* read DMAC-supplied indicators */ | |
e2306992 | 499 | ASSERT(sizeof(unsigned_4) == 4); |
534a3d5c | 500 | PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR), |
f0bb94cd | 501 | & fqw->source_address, /* converted to host-endian */ |
e2306992 | 502 | 4); |
534a3d5c | 503 | PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG), |
e2306992 FCE |
504 | & dma_tag_present, |
505 | 4); | |
506 | ||
507 | if(dma_tag_present) | |
508 | { | |
509 | /* lower two words are DMA tags */ | |
510 | fqw->word_class[0] = fqw->word_class[1] = wc_dma; | |
511 | } | |
512 | ||
fba9bfed FCE |
513 | /* set FQC to "1" as FIFO is now not empty */ |
514 | PKE_REG_MASK_SET(me, STAT, FQC, 1); | |
aea481da DE |
515 | |
516 | /* okay */ | |
517 | return nr_bytes; | |
518 | } | |
519 | ||
520 | /* NOTREACHED */ | |
fba9bfed | 521 | return 0; |
aea481da DE |
522 | } |
523 | ||
524 | ||
525 | ||
9614fb3c FCE |
526 | /* Reset the PKE */ |
527 | void | |
528 | pke_reset(struct pke_device* me) | |
529 | { | |
530 | /* advance PC over last quadword in FIFO; keep previous FIFO history */ | |
531 | me->fifo_pc = pke_fifo_flush(& me->fifo); | |
532 | me->qw_pc = 0; | |
533 | /* clear registers, flag, other state */ | |
534 | memset(me->regs, 0, sizeof(me->regs)); | |
535 | me->fifo_qw_done = 0; | |
536 | me->flags = 0; | |
537 | } | |
538 | ||
539 | ||
540 | ||
fba9bfed | 541 | /* Issue & swallow next PKE opcode if possible/available */ |
aea481da DE |
542 | |
543 | void | |
e2306992 | 544 | pke_issue(SIM_DESC sd, struct pke_device* me) |
aea481da | 545 | { |
fba9bfed FCE |
546 | struct fifo_quadword* fqw; |
547 | unsigned_4 fw; | |
10572b6a | 548 | unsigned_4 cmd, intr; |
fba9bfed | 549 | |
fd909089 FCE |
550 | /* 1 -- fetch PKE instruction */ |
551 | ||
552 | /* confirm availability of new quadword of PKE instructions */ | |
9614fb3c FCE |
553 | fqw = pke_fifo_access(& me->fifo, me->fifo_pc); |
554 | if(fqw == NULL) | |
fd909089 FCE |
555 | return; |
556 | ||
557 | /* skip over DMA tag, if present */ | |
558 | pke_pc_advance(me, 0); | |
9614fb3c FCE |
559 | /* note: this can only change qw_pc from 0 to 2 and will not |
560 | invalidate fqw */ | |
fd909089 FCE |
561 | |
562 | /* "fetch" instruction quadword and word */ | |
fd909089 FCE |
563 | fw = fqw->data[me->qw_pc]; |
564 | ||
565 | /* store word in PKECODE register */ | |
566 | me->regs[PKE_REG_CODE][0] = fw; | |
567 | ||
568 | ||
569 | /* 2 -- test go / no-go for PKE execution */ | |
fba9bfed | 570 | |
db6dac32 FCE |
571 | /* switch on STAT:PSS if PSS-pending and in idle state */ |
572 | if((PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) && | |
573 | (me->flags & PKE_FLAG_PENDING_PSS) != 0) | |
574 | { | |
575 | me->flags &= ~PKE_FLAG_PENDING_PSS; | |
576 | PKE_REG_MASK_SET(me, STAT, PSS, 1); | |
577 | } | |
578 | ||
fba9bfed | 579 | /* check for stall/halt control bits */ |
db6dac32 FCE |
580 | if(PKE_REG_MASK_GET(me, STAT, PFS) || |
581 | PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */ | |
9614fb3c FCE |
582 | /* PEW bit not a reason to keep stalling - it's just an indication, re-computed below */ |
583 | /* PGW bit not a reason to keep stalling - it's just an indication, re-computed below */ | |
584 | /* ER0/ER1 not a reason to keep stalling - it's just an indication */ | |
fd909089 | 585 | PKE_REG_MASK_GET(me, STAT, PIS)) |
fba9bfed | 586 | { |
fd909089 FCE |
587 | /* (still) stalled */ |
588 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
589 | /* try again next cycle */ | |
43a6998b | 590 | return; |
fba9bfed | 591 | } |
fba9bfed | 592 | |
fba9bfed FCE |
593 | |
594 | /* 3 -- decode PKE instruction */ | |
595 | ||
fd909089 FCE |
596 | /* decoding */ |
597 | if(PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) | |
598 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE); | |
599 | ||
600 | /* Extract relevant bits from PKEcode */ | |
fba9bfed FCE |
601 | intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E); |
602 | cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E); | |
fba9bfed | 603 | |
d22ea5d0 | 604 | /* handle interrupts */ |
fba9bfed FCE |
605 | if(intr) |
606 | { | |
fd909089 | 607 | /* are we resuming an interrupt-stalled instruction? */ |
d22ea5d0 FCE |
608 | if(me->flags & PKE_FLAG_INT_NOLOOP) |
609 | { | |
610 | /* clear loop-prevention flag */ | |
611 | me->flags &= ~PKE_FLAG_INT_NOLOOP; | |
fd909089 FCE |
612 | |
613 | /* fall through to decode & execute */ | |
614 | /* The pke_code_* functions should not check the MSB in the | |
615 | pkecode. */ | |
d22ea5d0 FCE |
616 | } |
617 | else /* new interrupt-flagged instruction */ | |
618 | { | |
619 | /* set INT flag in STAT register */ | |
620 | PKE_REG_MASK_SET(me, STAT, INT, 1); | |
621 | /* set loop-prevention flag */ | |
622 | me->flags |= PKE_FLAG_INT_NOLOOP; | |
623 | ||
fd909089 FCE |
624 | /* set PIS if stall not masked */ |
625 | if(!PKE_REG_MASK_GET(me, ERR, MII)) | |
9614fb3c | 626 | pke_begin_interrupt_stall(me); |
fd909089 FCE |
627 | |
628 | /* suspend this instruction unless it's PKEMARK */ | |
629 | if(!IS_PKE_CMD(cmd, PKEMARK)) | |
630 | { | |
631 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
632 | return; | |
633 | } | |
634 | else | |
635 | { | |
636 | ; /* fall through to decode & execute */ | |
637 | } | |
d22ea5d0 | 638 | } |
fba9bfed FCE |
639 | } |
640 | ||
5068e793 | 641 | |
43a6998b | 642 | /* decode & execute */ |
fd909089 | 643 | if(IS_PKE_CMD(cmd, PKENOP)) |
43a6998b | 644 | pke_code_nop(me, fw); |
fd909089 | 645 | else if(IS_PKE_CMD(cmd, STCYCL)) |
43a6998b | 646 | pke_code_stcycl(me, fw); |
fd909089 | 647 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET)) |
43a6998b | 648 | pke_code_offset(me, fw); |
fd909089 | 649 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE)) |
43a6998b | 650 | pke_code_base(me, fw); |
fd909089 | 651 | else if(IS_PKE_CMD(cmd, ITOP)) |
43a6998b | 652 | pke_code_itop(me, fw); |
fd909089 | 653 | else if(IS_PKE_CMD(cmd, STMOD)) |
43a6998b | 654 | pke_code_stmod(me, fw); |
fd909089 | 655 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) |
43a6998b | 656 | pke_code_mskpath3(me, fw); |
fba9bfed | 657 | else if(IS_PKE_CMD(cmd, PKEMARK)) |
43a6998b | 658 | pke_code_pkemark(me, fw); |
fd909089 | 659 | else if(IS_PKE_CMD(cmd, FLUSHE)) |
43a6998b | 660 | pke_code_flushe(me, fw); |
fd909089 | 661 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH)) |
43a6998b | 662 | pke_code_flush(me, fw); |
fd909089 | 663 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA)) |
43a6998b | 664 | pke_code_flusha(me, fw); |
fd909089 | 665 | else if(IS_PKE_CMD(cmd, PKEMSCAL)) |
43a6998b | 666 | pke_code_pkemscal(me, fw); |
fd909089 | 667 | else if(IS_PKE_CMD(cmd, PKEMSCNT)) |
43a6998b | 668 | pke_code_pkemscnt(me, fw); |
fd909089 | 669 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF)) |
43a6998b | 670 | pke_code_pkemscalf(me, fw); |
fd909089 | 671 | else if(IS_PKE_CMD(cmd, STMASK)) |
43a6998b | 672 | pke_code_stmask(me, fw); |
fd909089 | 673 | else if(IS_PKE_CMD(cmd, STROW)) |
43a6998b | 674 | pke_code_strow(me, fw); |
fd909089 | 675 | else if(IS_PKE_CMD(cmd, STCOL)) |
43a6998b | 676 | pke_code_stcol(me, fw); |
fd909089 | 677 | else if(IS_PKE_CMD(cmd, MPG)) |
43a6998b | 678 | pke_code_mpg(me, fw); |
fd909089 | 679 | else if(IS_PKE_CMD(cmd, DIRECT)) |
43a6998b | 680 | pke_code_direct(me, fw); |
fd909089 | 681 | else if(IS_PKE_CMD(cmd, DIRECTHL)) |
43a6998b | 682 | pke_code_directhl(me, fw); |
fd909089 | 683 | else if(IS_PKE_CMD(cmd, UNPACK)) |
43a6998b | 684 | pke_code_unpack(me, fw); |
db6dac32 | 685 | /* ... no other commands ... */ |
43a6998b FCE |
686 | else |
687 | pke_code_error(me, fw); | |
688 | } | |
689 | ||
690 | ||
691 | ||
9614fb3c FCE |
692 | /* Clear out contents of FIFO; act as if it was empty. Return PC |
693 | pointing to one-past-last word. */ | |
694 | ||
695 | unsigned_4 | |
696 | pke_fifo_flush(struct pke_fifo* fifo) | |
697 | { | |
698 | /* don't modify any state! */ | |
699 | return fifo->origin + fifo->next; | |
700 | } | |
701 | ||
702 | ||
703 | ||
121d6745 FCE |
704 | /* Clear out contents of FIFO; make it really empty. */ |
705 | ||
706 | void | |
707 | pke_fifo_reset(struct pke_fifo* fifo) | |
708 | { | |
709 | int i; | |
710 | ||
711 | /* clear fifo quadwords */ | |
712 | for(i=0; i<fifo->next; i++) | |
713 | { | |
714 | zfree(fifo->quadwords[i]); | |
715 | fifo->quadwords[i] = NULL; | |
716 | } | |
717 | ||
718 | /* reset pointers */ | |
719 | fifo->origin = 0; | |
720 | fifo->next = 0; | |
721 | } | |
722 | ||
723 | ||
724 | ||
9614fb3c FCE |
725 | /* Make space for the next quadword in the FIFO. Allocate/enlarge |
726 | FIFO pointer block if necessary. Return a pointer to it. */ | |
727 | ||
728 | struct fifo_quadword* | |
729 | pke_fifo_fit(struct pke_fifo* fifo) | |
730 | { | |
731 | struct fifo_quadword* fqw; | |
732 | ||
733 | /* out of space on quadword pointer array? */ | |
734 | if(fifo->next == fifo->length) /* also triggered before fifo->quadwords allocated */ | |
735 | { | |
736 | struct fifo_quadword** new_qw; | |
737 | unsigned_4 new_length = fifo->length + PKE_FIFO_GROW_SIZE; | |
738 | ||
739 | /* allocate new pointer block */ | |
740 | new_qw = zalloc(new_length * sizeof(struct fifo_quadword*)); | |
741 | ASSERT(new_qw != NULL); | |
742 | ||
0b9843e5 FCE |
743 | /* copy over old contents, if any */ |
744 | if(fifo->quadwords != NULL) | |
745 | { | |
746 | /* copy over old pointers to beginning of new block */ | |
747 | memcpy(new_qw, fifo->quadwords, | |
748 | fifo->length * sizeof(struct fifo_quadword*)); | |
749 | ||
750 | /* free old block */ | |
751 | zfree(fifo->quadwords); | |
752 | } | |
9614fb3c FCE |
753 | |
754 | /* replace pointers & counts */ | |
755 | fifo->quadwords = new_qw; | |
756 | fifo->length = new_length; | |
757 | } | |
758 | ||
759 | /* sanity check */ | |
760 | ASSERT(fifo->quadwords != NULL); | |
761 | ||
762 | /* allocate new quadword from heap */ | |
763 | fqw = zalloc(sizeof(struct fifo_quadword)); | |
764 | ASSERT(fqw != NULL); | |
765 | ||
766 | /* push quadword onto fifo */ | |
767 | fifo->quadwords[fifo->next] = fqw; | |
768 | fifo->next++; | |
769 | return fqw; | |
770 | } | |
771 | ||
772 | ||
773 | ||
774 | /* Return a pointer to the FIFO quadword with given absolute index, or | |
775 | NULL if it is out of range */ | |
776 | ||
777 | struct fifo_quadword* | |
778 | pke_fifo_access(struct pke_fifo* fifo, unsigned_4 qwnum) | |
779 | { | |
780 | struct fifo_quadword* fqw; | |
781 | ||
782 | if((qwnum < fifo->origin) || /* before history */ | |
783 | (qwnum >= fifo->origin + fifo->next)) /* after last available quadword */ | |
784 | fqw = NULL; | |
785 | else | |
786 | { | |
787 | ASSERT(fifo->quadwords != NULL); /* must be allocated already */ | |
788 | fqw = fifo->quadwords[qwnum - fifo->origin]; /* pull out pointer from array */ | |
789 | ASSERT(fqw != NULL); /* must be allocated already */ | |
790 | } | |
791 | ||
792 | return fqw; | |
793 | } | |
794 | ||
795 | ||
796 | /* Authorize release of any FIFO entries older than given absolute quadword. */ | |
797 | void | |
798 | pke_fifo_old(struct pke_fifo* fifo, unsigned_4 qwnum) | |
799 | { | |
800 | /* do we have any too-old FIFO elements? */ | |
801 | if(fifo->origin + PKE_FIFO_ARCHEOLOGY < qwnum) | |
802 | { | |
803 | /* count quadwords to forget */ | |
804 | int horizon = qwnum - (fifo->origin + PKE_FIFO_ARCHEOLOGY); | |
805 | int i; | |
806 | ||
807 | /* free quadwords at indices below horizon */ | |
808 | for(i=0; i < horizon; i++) | |
809 | zfree(fifo->quadwords[i]); | |
810 | ||
811 | /* move surviving quadword pointers down to beginning of array */ | |
812 | for(i=horizon; i < fifo->next; i++) | |
813 | fifo->quadwords[i-horizon] = fifo->quadwords[i]; | |
814 | ||
815 | /* clear duplicate pointers */ | |
816 | for(i=fifo->next - horizon; i < fifo->next; i++) | |
817 | fifo->quadwords[i] = NULL; | |
818 | ||
819 | /* adjust FIFO pointers */ | |
820 | fifo->origin = fifo->origin + horizon; | |
821 | fifo->next = fifo->next - horizon; | |
822 | } | |
823 | } | |
824 | ||
825 | ||
826 | ||
827 | ||
43a6998b | 828 | /* advance the PC by given number of data words; update STAT/FQC |
e2306992 FCE |
829 | field; assume FIFO is filled enough; classify passed-over words; |
830 | write FIFO trace line */ | |
43a6998b FCE |
831 | |
832 | void | |
833 | pke_pc_advance(struct pke_device* me, int num_words) | |
834 | { | |
835 | int num = num_words; | |
e2306992 | 836 | struct fifo_quadword* fq = NULL; |
9614fb3c FCE |
837 | unsigned_4 old_fifo_pc = me->fifo_pc; |
838 | ||
e2306992 | 839 | ASSERT(num_words >= 0); |
43a6998b | 840 | |
b4d2f483 FCE |
841 | /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */ |
842 | ||
843 | while(1) | |
fba9bfed | 844 | { |
9614fb3c FCE |
845 | /* find next quadword, if any */ |
846 | fq = pke_fifo_access(& me->fifo, me->fifo_pc); | |
43a6998b FCE |
847 | |
848 | /* skip over DMA tag words if present in word 0 or 1 */ | |
9614fb3c | 849 | if(fq != NULL && fq->word_class[me->qw_pc] == wc_dma) |
fba9bfed | 850 | { |
43a6998b FCE |
851 | /* skip by going around loop an extra time */ |
852 | num ++; | |
fba9bfed | 853 | } |
b4d2f483 FCE |
854 | |
855 | /* nothing left to skip / no DMA tag here */ | |
856 | if(num == 0) | |
857 | break; | |
e2306992 | 858 | |
9614fb3c FCE |
859 | /* we are supposed to skip existing words */ |
860 | ASSERT(fq != NULL); | |
861 | ||
b4d2f483 FCE |
862 | /* one word skipped */ |
863 | num --; | |
864 | ||
865 | /* point to next word */ | |
866 | me->qw_pc ++; | |
867 | if(me->qw_pc == 4) | |
e2306992 | 868 | { |
b4d2f483 FCE |
869 | me->qw_pc = 0; |
870 | me->fifo_pc ++; | |
e2306992 | 871 | |
b4d2f483 FCE |
872 | /* trace the consumption of the FIFO quadword we just skipped over */ |
873 | /* fq still points to it */ | |
874 | if(me->fifo_trace_file != NULL) | |
e2306992 | 875 | { |
b4d2f483 FCE |
876 | /* assert complete classification */ |
877 | ASSERT(fq->word_class[3] != wc_unknown); | |
878 | ASSERT(fq->word_class[2] != wc_unknown); | |
879 | ASSERT(fq->word_class[1] != wc_unknown); | |
880 | ASSERT(fq->word_class[0] != wc_unknown); | |
e2306992 | 881 | |
b4d2f483 FCE |
882 | /* print trace record */ |
883 | fprintf(me->fifo_trace_file, | |
884 | "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n", | |
885 | (me->pke_number == 0 ? 0 : 1), | |
886 | (unsigned) fq->data[3], (unsigned) fq->data[2], | |
887 | (unsigned) fq->data[1], (unsigned) fq->data[0], | |
888 | (unsigned) fq->source_address, | |
889 | fq->word_class[3], fq->word_class[2], | |
890 | fq->word_class[1], fq->word_class[0]); | |
891 | } | |
b4d2f483 FCE |
892 | } /* next quadword */ |
893 | } | |
43a6998b | 894 | |
9614fb3c FCE |
895 | /* age old entries before PC */ |
896 | if(me->fifo_pc != old_fifo_pc) | |
897 | { | |
898 | /* we advanced the fifo-pc; authorize disposal of anything | |
899 | before previous PKEcode */ | |
900 | pke_fifo_old(& me->fifo, old_fifo_pc); | |
901 | } | |
902 | ||
43a6998b | 903 | /* clear FQC if FIFO is now empty */ |
9614fb3c FCE |
904 | fq = pke_fifo_access(& me->fifo, me->fifo_pc); |
905 | if(fq == NULL) | |
fba9bfed | 906 | { |
43a6998b | 907 | PKE_REG_MASK_SET(me, STAT, FQC, 0); |
fba9bfed | 908 | } |
e2306992 FCE |
909 | else /* annote the word where the PC lands as an PKEcode */ |
910 | { | |
9614fb3c | 911 | ASSERT(fq->word_class[me->qw_pc] == wc_pkecode || fq->word_class[me->qw_pc] == wc_unknown); |
e2306992 FCE |
912 | fq->word_class[me->qw_pc] = wc_pkecode; |
913 | } | |
43a6998b | 914 | } |
fba9bfed | 915 | |
fba9bfed | 916 | |
fba9bfed | 917 | |
9614fb3c FCE |
918 | |
919 | ||
43a6998b FCE |
920 | /* Return pointer to FIFO quadword containing given operand# in FIFO. |
921 | `operand_num' starts at 1. Return pointer to operand word in last | |
922 | argument, if non-NULL. If FIFO is not full enough, return 0. | |
923 | Signal an ER0 indication upon skipping a DMA tag. */ | |
fba9bfed | 924 | |
43a6998b | 925 | struct fifo_quadword* |
9614fb3c | 926 | pke_pcrel_fifo(struct pke_device* me, int operand_num, unsigned_4** operand) |
43a6998b | 927 | { |
121d6745 | 928 | int num; |
43a6998b | 929 | int new_qw_pc, new_fifo_pc; |
e2306992 | 930 | struct fifo_quadword* fq = NULL; |
fba9bfed | 931 | |
121d6745 FCE |
932 | /* check for validity of last search results in cache */ |
933 | if(me->last_fifo_pc == me->fifo_pc && | |
934 | me->last_qw_pc == me->qw_pc && | |
935 | operand_num > me->last_num) | |
936 | { | |
937 | /* continue search from last stop */ | |
938 | new_fifo_pc = me->last_new_fifo_pc; | |
939 | new_qw_pc = me->last_new_qw_pc; | |
940 | num = operand_num - me->last_num; | |
941 | } | |
942 | else | |
943 | { | |
944 | /* start search from scratch */ | |
945 | new_fifo_pc = me->fifo_pc; | |
946 | new_qw_pc = me->qw_pc; | |
947 | num = operand_num; | |
948 | } | |
fba9bfed | 949 | |
121d6745 | 950 | ASSERT(num > 0); |
fba9bfed | 951 | |
9614fb3c | 952 | /* printf("pke %d pcrel_fifo operand_num %d\n", me->pke_number, operand_num); */ |
b4d2f483 | 953 | |
e2306992 | 954 | do |
43a6998b FCE |
955 | { |
956 | /* one word skipped */ | |
957 | num --; | |
fba9bfed | 958 | |
43a6998b FCE |
959 | /* point to next word */ |
960 | new_qw_pc ++; | |
961 | if(new_qw_pc == 4) | |
fba9bfed | 962 | { |
43a6998b FCE |
963 | new_qw_pc = 0; |
964 | new_fifo_pc ++; | |
fba9bfed | 965 | } |
fba9bfed | 966 | |
9614fb3c FCE |
967 | fq = pke_fifo_access(& me->fifo, new_fifo_pc); |
968 | ||
43a6998b | 969 | /* check for FIFO underflow */ |
9614fb3c FCE |
970 | if(fq == NULL) |
971 | break; | |
43a6998b FCE |
972 | |
973 | /* skip over DMA tag words if present in word 0 or 1 */ | |
e2306992 | 974 | if(fq->word_class[new_qw_pc] == wc_dma) |
fba9bfed | 975 | { |
9614fb3c FCE |
976 | /* set ER0 */ |
977 | PKE_REG_MASK_SET(me, STAT, ER0, 1); | |
978 | ||
43a6998b | 979 | /* mismatch error! */ |
fd909089 FCE |
980 | if(! PKE_REG_MASK_GET(me, ERR, ME0)) |
981 | { | |
9614fb3c | 982 | pke_begin_interrupt_stall(me); |
fd909089 FCE |
983 | /* don't stall just yet -- finish this instruction */ |
984 | /* the PPS_STALL state will be entered by pke_issue() next time */ | |
985 | } | |
43a6998b FCE |
986 | /* skip by going around loop an extra time */ |
987 | num ++; | |
fba9bfed FCE |
988 | } |
989 | } | |
e2306992 | 990 | while(num > 0); |
fba9bfed | 991 | |
43a6998b | 992 | /* return pointer to operand word itself */ |
e2306992 FCE |
993 | if(fq != NULL) |
994 | { | |
995 | *operand = & fq->data[new_qw_pc]; | |
fba9bfed | 996 | |
9614fb3c FCE |
997 | /* annote the word where the pseudo-PC lands as an PKE operand */ |
998 | ASSERT(fq->word_class[new_qw_pc] == wc_pkedata || fq->word_class[new_qw_pc] == wc_unknown); | |
e2306992 | 999 | fq->word_class[new_qw_pc] = wc_pkedata; |
121d6745 FCE |
1000 | |
1001 | /* store search results in cache */ | |
1002 | /* keys */ | |
1003 | me->last_fifo_pc = me->fifo_pc; | |
1004 | me->last_qw_pc = me->qw_pc; | |
1005 | /* values */ | |
1006 | me->last_num = operand_num; | |
1007 | me->last_new_fifo_pc = new_fifo_pc; | |
1008 | me->last_new_qw_pc = new_qw_pc; | |
e2306992 FCE |
1009 | } |
1010 | ||
1011 | return fq; | |
43a6998b | 1012 | } |
fba9bfed | 1013 | |
fba9bfed | 1014 | |
43a6998b FCE |
1015 | /* Return pointer to given operand# in FIFO. `operand_num' starts at 1. |
1016 | If FIFO is not full enough, return 0. Skip over DMA tags, but mark | |
1017 | them as an error (ER0). */ | |
fba9bfed | 1018 | |
43a6998b | 1019 | unsigned_4* |
9614fb3c | 1020 | pke_pcrel_operand(struct pke_device* me, int operand_num) |
43a6998b FCE |
1021 | { |
1022 | unsigned_4* operand = NULL; | |
1023 | struct fifo_quadword* fifo_operand; | |
fba9bfed | 1024 | |
9614fb3c | 1025 | fifo_operand = pke_pcrel_fifo(me, operand_num, & operand); |
fba9bfed | 1026 | |
43a6998b | 1027 | if(fifo_operand == NULL) |
9614fb3c | 1028 | ASSERT(operand == NULL); /* pke_pcrel_fifo() ought leave it untouched */ |
fba9bfed | 1029 | |
43a6998b FCE |
1030 | return operand; |
1031 | } | |
fba9bfed | 1032 | |
fba9bfed | 1033 | |
db6dac32 FCE |
1034 | /* Return a bit-field extract of given operand# in FIFO, and its |
1035 | source-addr. `bit_offset' starts at 0, referring to LSB after PKE | |
1036 | instruction word. Width must be >0, <=32. Assume FIFO is full | |
1037 | enough. Skip over DMA tags, but mark them as an error (ER0). */ | |
1038 | ||
1039 | unsigned_4 | |
9614fb3c | 1040 | pke_pcrel_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr) |
db6dac32 FCE |
1041 | { |
1042 | unsigned_4* word = NULL; | |
1043 | unsigned_4 value; | |
1044 | struct fifo_quadword* fifo_operand; | |
534a3d5c FCE |
1045 | int wordnumber, bitnumber; |
1046 | ||
1047 | wordnumber = bit_offset/32; | |
1048 | bitnumber = bit_offset%32; | |
db6dac32 FCE |
1049 | |
1050 | /* find operand word with bitfield */ | |
9614fb3c | 1051 | fifo_operand = pke_pcrel_fifo(me, wordnumber + 1, &word); |
534a3d5c | 1052 | ASSERT(word != NULL); |
db6dac32 FCE |
1053 | |
1054 | /* extract bitfield from word */ | |
534a3d5c | 1055 | value = BIT_MASK_GET(*word, bitnumber, bitnumber + bit_width - 1); |
db6dac32 FCE |
1056 | |
1057 | /* extract source addr from fifo word */ | |
1058 | *source_addr = fifo_operand->source_address; | |
1059 | ||
1060 | return value; | |
1061 | } | |
1062 | ||
fba9bfed | 1063 | |
fba9bfed | 1064 | |
d22ea5d0 FCE |
1065 | /* check for stall conditions on indicated devices (path* only on |
1066 | PKE1), do not change status; return 0 iff no stall */ | |
43a6998b FCE |
1067 | int |
1068 | pke_check_stall(struct pke_device* me, enum pke_check_target what) | |
1069 | { | |
1070 | int any_stall = 0; | |
e2306992 | 1071 | unsigned_4 cop2_stat, gpuif_stat; |
43a6998b | 1072 | |
e2306992 | 1073 | /* read status words */ |
534a3d5c FCE |
1074 | ASSERT(sizeof(unsigned_4) == 4); |
1075 | PKE_MEM_READ(me, (GIF_REG_STAT), | |
1076 | & gpuif_stat, | |
1077 | 4); | |
1078 | PKE_MEM_READ(me, (COP2_REG_STAT_ADDR), | |
1079 | & cop2_stat, | |
1080 | 4); | |
e2306992 | 1081 | |
43a6998b FCE |
1082 | /* perform checks */ |
1083 | if(what == chk_vu) | |
1084 | { | |
e2306992 FCE |
1085 | if(me->pke_number == 0) |
1086 | any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS0_B, COP2_REG_STAT_VBS0_E); | |
1087 | else /* if(me->pke_number == 1) */ | |
1088 | any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS1_B, COP2_REG_STAT_VBS1_E); | |
db6dac32 FCE |
1089 | } |
1090 | else if(what == chk_path1) /* VU -> GPUIF */ | |
1091 | { | |
1092 | if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1) | |
43a6998b | 1093 | any_stall = 1; |
fba9bfed | 1094 | } |
db6dac32 | 1095 | else if(what == chk_path2) /* PKE -> GPUIF */ |
fba9bfed | 1096 | { |
db6dac32 FCE |
1097 | if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2) |
1098 | any_stall = 1; | |
1099 | } | |
1100 | else if(what == chk_path3) /* DMA -> GPUIF */ | |
1101 | { | |
1102 | if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3) | |
43a6998b FCE |
1103 | any_stall = 1; |
1104 | } | |
1105 | else | |
1106 | { | |
db6dac32 FCE |
1107 | /* invalid what */ |
1108 | ASSERT(0); | |
43a6998b | 1109 | } |
fba9bfed | 1110 | |
43a6998b FCE |
1111 | /* any stall reasons? */ |
1112 | return any_stall; | |
1113 | } | |
fba9bfed | 1114 | |
fba9bfed | 1115 | |
d22ea5d0 | 1116 | /* PKE1 only: flip the DBF bit; recompute TOPS, TOP */ |
43a6998b FCE |
1117 | void |
1118 | pke_flip_dbf(struct pke_device* me) | |
1119 | { | |
370e0ef7 | 1120 | int newdf; |
d22ea5d0 | 1121 | /* compute new TOP */ |
733cfc78 IC |
1122 | PKE_REG_MASK_SET(me, TOP, TOP, |
1123 | PKE_REG_MASK_GET(me, TOPS, TOPS)); | |
43a6998b | 1124 | /* flip DBF */ |
370e0ef7 FCE |
1125 | newdf = PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1; |
1126 | PKE_REG_MASK_SET(me, DBF, DF, newdf); | |
1127 | PKE_REG_MASK_SET(me, STAT, DBF, newdf); | |
43a6998b FCE |
1128 | /* compute new TOPS */ |
1129 | PKE_REG_MASK_SET(me, TOPS, TOPS, | |
1130 | (PKE_REG_MASK_GET(me, BASE, BASE) + | |
370e0ef7 FCE |
1131 | newdf * PKE_REG_MASK_GET(me, OFST, OFFSET))); |
1132 | ||
d22ea5d0 FCE |
1133 | /* this is equivalent to last word from okadaa (98-02-25): |
1134 | 1) TOP=TOPS; | |
1135 | 2) TOPS=BASE + !DBF*OFFSET | |
1136 | 3) DBF=!DBF */ | |
43a6998b | 1137 | } |
fba9bfed | 1138 | |
fba9bfed | 1139 | |
9614fb3c FCE |
1140 | /* set the STAT:PIS bit and send an interrupt to the 5900 */ |
1141 | void | |
1142 | pke_begin_interrupt_stall(struct pke_device* me) | |
1143 | { | |
1144 | /* set PIS */ | |
1145 | PKE_REG_MASK_SET(me, STAT, PIS, 1); | |
1146 | ||
1147 | /* XXX: send interrupt to 5900? */ | |
1148 | } | |
1149 | ||
1150 | ||
1151 | ||
fba9bfed | 1152 | |
43a6998b FCE |
1153 | /* PKEcode handler functions -- responsible for checking and |
1154 | confirming old stall conditions, executing pkecode, updating PC and | |
1155 | status registers -- may assume being run on correct PKE unit */ | |
1156 | ||
1157 | void | |
1158 | pke_code_nop(struct pke_device* me, unsigned_4 pkecode) | |
1159 | { | |
1160 | /* done */ | |
1161 | pke_pc_advance(me, 1); | |
1162 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1163 | } | |
fba9bfed | 1164 | |
fba9bfed | 1165 | |
43a6998b FCE |
1166 | void |
1167 | pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode) | |
1168 | { | |
1169 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1170 | |
43a6998b | 1171 | /* copy immediate value into CYCLE reg */ |
534a3d5c FCE |
1172 | PKE_REG_MASK_SET(me, CYCLE, WL, BIT_MASK_GET(imm, 8, 15)); |
1173 | PKE_REG_MASK_SET(me, CYCLE, CL, BIT_MASK_GET(imm, 0, 7)); | |
43a6998b FCE |
1174 | /* done */ |
1175 | pke_pc_advance(me, 1); | |
1176 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1177 | } | |
fba9bfed | 1178 | |
fba9bfed | 1179 | |
43a6998b FCE |
1180 | void |
1181 | pke_code_offset(struct pke_device* me, unsigned_4 pkecode) | |
1182 | { | |
1183 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1184 | |
43a6998b FCE |
1185 | /* copy 10 bits to OFFSET field */ |
1186 | PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9)); | |
1187 | /* clear DBF bit */ | |
1188 | PKE_REG_MASK_SET(me, DBF, DF, 0); | |
1189 | /* clear other DBF bit */ | |
1190 | PKE_REG_MASK_SET(me, STAT, DBF, 0); | |
1191 | /* set TOPS = BASE */ | |
1192 | PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE)); | |
1193 | /* done */ | |
1194 | pke_pc_advance(me, 1); | |
1195 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1196 | } | |
fba9bfed | 1197 | |
fba9bfed | 1198 | |
43a6998b FCE |
1199 | void |
1200 | pke_code_base(struct pke_device* me, unsigned_4 pkecode) | |
1201 | { | |
1202 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1203 | |
43a6998b FCE |
1204 | /* copy 10 bits to BASE field */ |
1205 | PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9)); | |
43a6998b FCE |
1206 | /* done */ |
1207 | pke_pc_advance(me, 1); | |
1208 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1209 | } | |
fba9bfed | 1210 | |
fba9bfed | 1211 | |
43a6998b FCE |
1212 | void |
1213 | pke_code_itop(struct pke_device* me, unsigned_4 pkecode) | |
1214 | { | |
1215 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1216 | |
43a6998b FCE |
1217 | /* copy 10 bits to ITOPS field */ |
1218 | PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9)); | |
1219 | /* done */ | |
1220 | pke_pc_advance(me, 1); | |
1221 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1222 | } | |
fba9bfed | 1223 | |
fba9bfed | 1224 | |
43a6998b FCE |
1225 | void |
1226 | pke_code_stmod(struct pke_device* me, unsigned_4 pkecode) | |
1227 | { | |
1228 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1229 | |
43a6998b FCE |
1230 | /* copy 2 bits to MODE register */ |
1231 | PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2)); | |
1232 | /* done */ | |
1233 | pke_pc_advance(me, 1); | |
1234 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1235 | } | |
fba9bfed | 1236 | |
43a6998b FCE |
1237 | |
1238 | void | |
1239 | pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode) | |
1240 | { | |
b4d2f483 FCE |
1241 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); |
1242 | unsigned_4 gif_mode; | |
1243 | ||
89154e47 | 1244 | /* set appropriate bit */ |
b4d2f483 | 1245 | if(BIT_MASK_GET(imm, PKE_REG_MSKPATH3_B, PKE_REG_MSKPATH3_E) != 0) |
89154e47 | 1246 | gif_mode = GIF_REG_MODE_M3R_MASK; |
b4d2f483 | 1247 | else |
89154e47 | 1248 | gif_mode = 0; |
b4d2f483 | 1249 | |
89154e47 | 1250 | /* write register; patrickm code will look at M3R bit only */ |
b4d2f483 FCE |
1251 | PKE_MEM_WRITE(me, GIF_REG_MODE, & gif_mode, 4); |
1252 | ||
b4d2f483 FCE |
1253 | /* done */ |
1254 | pke_pc_advance(me, 1); | |
1255 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
43a6998b FCE |
1256 | } |
1257 | ||
1258 | ||
1259 | void | |
1260 | pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode) | |
1261 | { | |
1262 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1263 | /* copy 16 bits to MARK register */ | |
1264 | PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15)); | |
1265 | /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */ | |
1266 | PKE_REG_MASK_SET(me, STAT, MRK, 1); | |
1267 | /* done */ | |
1268 | pke_pc_advance(me, 1); | |
1269 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1270 | } | |
1271 | ||
1272 | ||
1273 | void | |
1274 | pke_code_flushe(struct pke_device* me, unsigned_4 pkecode) | |
1275 | { | |
1276 | /* compute next PEW bit */ | |
1277 | if(pke_check_stall(me, chk_vu)) | |
1278 | { | |
1279 | /* VU busy */ | |
1280 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
1281 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
1282 | /* try again next cycle */ | |
fba9bfed | 1283 | } |
fba9bfed FCE |
1284 | else |
1285 | { | |
43a6998b FCE |
1286 | /* VU idle */ |
1287 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
1288 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
fba9bfed FCE |
1289 | pke_pc_advance(me, 1); |
1290 | } | |
fba9bfed FCE |
1291 | } |
1292 | ||
1293 | ||
43a6998b FCE |
1294 | void |
1295 | pke_code_flush(struct pke_device* me, unsigned_4 pkecode) | |
1296 | { | |
1297 | int something_busy = 0; | |
1298 | ||
1299 | /* compute next PEW, PGW bits */ | |
1300 | if(pke_check_stall(me, chk_vu)) | |
1301 | { | |
1302 | something_busy = 1; | |
1303 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
1304 | } | |
1305 | else | |
1306 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
fba9bfed FCE |
1307 | |
1308 | ||
43a6998b FCE |
1309 | if(pke_check_stall(me, chk_path1) || |
1310 | pke_check_stall(me, chk_path2)) | |
1311 | { | |
1312 | something_busy = 1; | |
1313 | PKE_REG_MASK_SET(me, STAT, PGW, 1); | |
1314 | } | |
1315 | else | |
1316 | PKE_REG_MASK_SET(me, STAT, PGW, 0); | |
fba9bfed | 1317 | |
43a6998b FCE |
1318 | /* go or no go */ |
1319 | if(something_busy) | |
1320 | { | |
1321 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1322 | /* try again next cycle */ | |
1323 | } | |
1324 | else | |
1325 | { | |
1326 | /* all idle */ | |
1327 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1328 | pke_pc_advance(me, 1); | |
1329 | } | |
1330 | } | |
fba9bfed | 1331 | |
fba9bfed FCE |
1332 | |
1333 | void | |
43a6998b | 1334 | pke_code_flusha(struct pke_device* me, unsigned_4 pkecode) |
fba9bfed | 1335 | { |
43a6998b | 1336 | int something_busy = 0; |
fba9bfed | 1337 | |
43a6998b FCE |
1338 | /* compute next PEW, PGW bits */ |
1339 | if(pke_check_stall(me, chk_vu)) | |
fba9bfed | 1340 | { |
43a6998b FCE |
1341 | something_busy = 1; |
1342 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
fba9bfed | 1343 | } |
43a6998b FCE |
1344 | else |
1345 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
fba9bfed | 1346 | |
43a6998b FCE |
1347 | |
1348 | if(pke_check_stall(me, chk_path1) || | |
1349 | pke_check_stall(me, chk_path2) || | |
1350 | pke_check_stall(me, chk_path3)) | |
fba9bfed | 1351 | { |
43a6998b FCE |
1352 | something_busy = 1; |
1353 | PKE_REG_MASK_SET(me, STAT, PGW, 1); | |
fba9bfed | 1354 | } |
43a6998b FCE |
1355 | else |
1356 | PKE_REG_MASK_SET(me, STAT, PGW, 0); | |
fba9bfed | 1357 | |
43a6998b FCE |
1358 | if(something_busy) |
1359 | { | |
1360 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1361 | /* try again next cycle */ | |
1362 | } | |
1363 | else | |
1364 | { | |
1365 | /* all idle */ | |
1366 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1367 | pke_pc_advance(me, 1); | |
1368 | } | |
fba9bfed FCE |
1369 | } |
1370 | ||
1371 | ||
43a6998b FCE |
1372 | void |
1373 | pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode) | |
1374 | { | |
1375 | /* compute next PEW bit */ | |
1376 | if(pke_check_stall(me, chk_vu)) | |
1377 | { | |
1378 | /* VU busy */ | |
1379 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
1380 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
1381 | /* try again next cycle */ | |
1382 | } | |
1383 | else | |
1384 | { | |
1385 | unsigned_4 vu_pc; | |
1386 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1387 | ||
1388 | /* VU idle */ | |
1389 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
1390 | ||
1391 | /* flip DBF on PKE1 */ | |
1392 | if(me->pke_number == 1) | |
1393 | pke_flip_dbf(me); | |
1394 | ||
f0bb94cd | 1395 | /* compute new PC for VU (host byte-order) */ |
db6dac32 | 1396 | vu_pc = BIT_MASK_GET(imm, 0, 15); |
f0bb94cd | 1397 | vu_pc = T2H_4(vu_pc); |
534a3d5c | 1398 | |
43a6998b | 1399 | /* write new PC; callback function gets VU running */ |
534a3d5c FCE |
1400 | ASSERT(sizeof(unsigned_4) == 4); |
1401 | PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), | |
1402 | & vu_pc, | |
1403 | 4); | |
43a6998b | 1404 | |
d22ea5d0 FCE |
1405 | /* copy ITOPS field to ITOP */ |
1406 | PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS)); | |
1407 | ||
43a6998b FCE |
1408 | /* done */ |
1409 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1410 | pke_pc_advance(me, 1); | |
1411 | } | |
1412 | } | |
fba9bfed | 1413 | |
fba9bfed | 1414 | |
43a6998b FCE |
1415 | |
1416 | void | |
1417 | pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode) | |
fba9bfed | 1418 | { |
43a6998b FCE |
1419 | /* compute next PEW bit */ |
1420 | if(pke_check_stall(me, chk_vu)) | |
1421 | { | |
1422 | /* VU busy */ | |
1423 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
1424 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
1425 | /* try again next cycle */ | |
1426 | } | |
1427 | else | |
1428 | { | |
1429 | unsigned_4 vu_pc; | |
fba9bfed | 1430 | |
43a6998b FCE |
1431 | /* VU idle */ |
1432 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
fba9bfed | 1433 | |
43a6998b FCE |
1434 | /* flip DBF on PKE1 */ |
1435 | if(me->pke_number == 1) | |
1436 | pke_flip_dbf(me); | |
fba9bfed | 1437 | |
43a6998b | 1438 | /* read old PC */ |
534a3d5c FCE |
1439 | ASSERT(sizeof(unsigned_4) == 4); |
1440 | PKE_MEM_READ(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), | |
1441 | & vu_pc, | |
1442 | 4); | |
43a6998b FCE |
1443 | |
1444 | /* rewrite new PC; callback function gets VU running */ | |
534a3d5c FCE |
1445 | ASSERT(sizeof(unsigned_4) == 4); |
1446 | PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), | |
1447 | & vu_pc, | |
1448 | 4); | |
43a6998b | 1449 | |
d22ea5d0 FCE |
1450 | /* copy ITOPS field to ITOP */ |
1451 | PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS)); | |
1452 | ||
43a6998b FCE |
1453 | /* done */ |
1454 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1455 | pke_pc_advance(me, 1); | |
1456 | } | |
1457 | } | |
1458 | ||
1459 | ||
1460 | void | |
1461 | pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode) | |
1462 | { | |
1463 | int something_busy = 0; | |
1464 | ||
1465 | /* compute next PEW, PGW bits */ | |
1466 | if(pke_check_stall(me, chk_vu)) | |
fba9bfed | 1467 | { |
43a6998b FCE |
1468 | something_busy = 1; |
1469 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
fba9bfed | 1470 | } |
43a6998b FCE |
1471 | else |
1472 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
fba9bfed | 1473 | |
43a6998b FCE |
1474 | |
1475 | if(pke_check_stall(me, chk_path1) || | |
1476 | pke_check_stall(me, chk_path2) || | |
1477 | pke_check_stall(me, chk_path3)) | |
1478 | { | |
1479 | something_busy = 1; | |
1480 | PKE_REG_MASK_SET(me, STAT, PGW, 1); | |
1481 | } | |
fba9bfed | 1482 | else |
43a6998b | 1483 | PKE_REG_MASK_SET(me, STAT, PGW, 0); |
fba9bfed | 1484 | |
43a6998b FCE |
1485 | /* go or no go */ |
1486 | if(something_busy) | |
1487 | { | |
1488 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1489 | /* try again next cycle */ | |
1490 | } | |
1491 | else | |
1492 | { | |
1493 | unsigned_4 vu_pc; | |
1494 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1495 | ||
1496 | /* flip DBF on PKE1 */ | |
1497 | if(me->pke_number == 1) | |
1498 | pke_flip_dbf(me); | |
1499 | ||
f0bb94cd | 1500 | /* compute new PC for VU (host byte-order) */ |
db6dac32 | 1501 | vu_pc = BIT_MASK_GET(imm, 0, 15); |
f0bb94cd | 1502 | vu_pc = T2H_4(vu_pc); |
534a3d5c FCE |
1503 | |
1504 | /* rewrite new PC; callback function gets VU running */ | |
1505 | ASSERT(sizeof(unsigned_4) == 4); | |
1506 | PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), | |
1507 | & vu_pc, | |
1508 | 4); | |
43a6998b | 1509 | |
d22ea5d0 FCE |
1510 | /* copy ITOPS field to ITOP */ |
1511 | PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS)); | |
1512 | ||
43a6998b FCE |
1513 | /* done */ |
1514 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1515 | pke_pc_advance(me, 1); | |
1516 | } | |
fba9bfed FCE |
1517 | } |
1518 | ||
1519 | ||
43a6998b FCE |
1520 | void |
1521 | pke_code_stmask(struct pke_device* me, unsigned_4 pkecode) | |
1522 | { | |
43a6998b | 1523 | unsigned_4* mask; |
d22ea5d0 | 1524 | |
d22ea5d0 | 1525 | /* check that FIFO has one more word for STMASK operand */ |
9614fb3c | 1526 | mask = pke_pcrel_operand(me, 1); |
43a6998b FCE |
1527 | if(mask != NULL) |
1528 | { | |
1529 | /* "transferring" operand */ | |
1530 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
db6dac32 FCE |
1531 | |
1532 | /* set NUM */ | |
1533 | PKE_REG_MASK_SET(me, NUM, NUM, 1); | |
1534 | ||
43a6998b FCE |
1535 | /* fill the register */ |
1536 | PKE_REG_MASK_SET(me, MASK, MASK, *mask); | |
db6dac32 FCE |
1537 | |
1538 | /* set NUM */ | |
1539 | PKE_REG_MASK_SET(me, NUM, NUM, 0); | |
1540 | ||
43a6998b FCE |
1541 | /* done */ |
1542 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
e2306992 | 1543 | pke_pc_advance(me, 2); |
43a6998b FCE |
1544 | } |
1545 | else | |
1546 | { | |
1547 | /* need to wait for another word */ | |
1548 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1549 | /* try again next cycle */ | |
1550 | } | |
1551 | } | |
fba9bfed | 1552 | |
fba9bfed | 1553 | |
43a6998b FCE |
1554 | void |
1555 | pke_code_strow(struct pke_device* me, unsigned_4 pkecode) | |
fba9bfed | 1556 | { |
43a6998b FCE |
1557 | /* check that FIFO has four more words for STROW operand */ |
1558 | unsigned_4* last_op; | |
1559 | ||
9614fb3c | 1560 | last_op = pke_pcrel_operand(me, 4); |
43a6998b FCE |
1561 | if(last_op != NULL) |
1562 | { | |
1563 | /* "transferring" operand */ | |
1564 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1565 | ||
db6dac32 FCE |
1566 | /* set NUM */ |
1567 | PKE_REG_MASK_SET(me, NUM, NUM, 1); | |
1568 | ||
43a6998b | 1569 | /* copy ROW registers: must all exist if 4th operand exists */ |
9614fb3c FCE |
1570 | me->regs[PKE_REG_R0][0] = * pke_pcrel_operand(me, 1); |
1571 | me->regs[PKE_REG_R1][0] = * pke_pcrel_operand(me, 2); | |
1572 | me->regs[PKE_REG_R2][0] = * pke_pcrel_operand(me, 3); | |
1573 | me->regs[PKE_REG_R3][0] = * pke_pcrel_operand(me, 4); | |
43a6998b | 1574 | |
db6dac32 FCE |
1575 | /* set NUM */ |
1576 | PKE_REG_MASK_SET(me, NUM, NUM, 0); | |
1577 | ||
43a6998b FCE |
1578 | /* done */ |
1579 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1580 | pke_pc_advance(me, 5); | |
1581 | } | |
1582 | else | |
1583 | { | |
1584 | /* need to wait for another word */ | |
1585 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1586 | /* try again next cycle */ | |
1587 | } | |
1588 | } | |
aea481da | 1589 | |
fba9bfed | 1590 | |
43a6998b FCE |
1591 | void |
1592 | pke_code_stcol(struct pke_device* me, unsigned_4 pkecode) | |
1593 | { | |
1594 | /* check that FIFO has four more words for STCOL operand */ | |
1595 | unsigned_4* last_op; | |
1596 | ||
9614fb3c | 1597 | last_op = pke_pcrel_operand(me, 4); |
43a6998b | 1598 | if(last_op != NULL) |
fba9bfed | 1599 | { |
43a6998b FCE |
1600 | /* "transferring" operand */ |
1601 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1602 | ||
db6dac32 FCE |
1603 | /* set NUM */ |
1604 | PKE_REG_MASK_SET(me, NUM, NUM, 1); | |
1605 | ||
43a6998b | 1606 | /* copy COL registers: must all exist if 4th operand exists */ |
9614fb3c FCE |
1607 | me->regs[PKE_REG_C0][0] = * pke_pcrel_operand(me, 1); |
1608 | me->regs[PKE_REG_C1][0] = * pke_pcrel_operand(me, 2); | |
1609 | me->regs[PKE_REG_C2][0] = * pke_pcrel_operand(me, 3); | |
1610 | me->regs[PKE_REG_C3][0] = * pke_pcrel_operand(me, 4); | |
43a6998b | 1611 | |
db6dac32 FCE |
1612 | /* set NUM */ |
1613 | PKE_REG_MASK_SET(me, NUM, NUM, 0); | |
1614 | ||
43a6998b FCE |
1615 | /* done */ |
1616 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1617 | pke_pc_advance(me, 5); | |
fba9bfed | 1618 | } |
fba9bfed | 1619 | else |
43a6998b FCE |
1620 | { |
1621 | /* need to wait for another word */ | |
1622 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1623 | /* try again next cycle */ | |
1624 | } | |
1625 | } | |
fba9bfed | 1626 | |
43a6998b FCE |
1627 | |
1628 | void | |
1629 | pke_code_mpg(struct pke_device* me, unsigned_4 pkecode) | |
1630 | { | |
1631 | unsigned_4* last_mpg_word; | |
1632 | int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); | |
1633 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1634 | ||
653c2590 FCE |
1635 | /* assert 64-bit alignment of MPG operand */ |
1636 | if(me->qw_pc != 3 && me->qw_pc != 1) | |
1637 | return pke_code_error(me, pkecode); | |
1638 | ||
43a6998b FCE |
1639 | /* map zero to max+1 */ |
1640 | if(num==0) num=0x100; | |
1641 | ||
1642 | /* check that FIFO has a few more words for MPG operand */ | |
9614fb3c | 1643 | last_mpg_word = pke_pcrel_operand(me, num*2); /* num: number of 64-bit words */ |
43a6998b FCE |
1644 | if(last_mpg_word != NULL) |
1645 | { | |
1646 | /* perform implied FLUSHE */ | |
db6dac32 | 1647 | if(pke_check_stall(me, chk_vu)) |
653c2590 FCE |
1648 | { |
1649 | /* VU busy */ | |
1650 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
1651 | /* retry this instruction next clock */ | |
1652 | } | |
1653 | else | |
43a6998b FCE |
1654 | { |
1655 | /* VU idle */ | |
1656 | int i; | |
1657 | ||
1658 | /* "transferring" operand */ | |
1659 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1660 | ||
534a3d5c FCE |
1661 | /* set NUM */ |
1662 | PKE_REG_MASK_SET(me, NUM, NUM, num); | |
1663 | ||
1664 | /* transfer VU instructions, one word-pair per iteration */ | |
1665 | for(i=0; i<num; i++) | |
43a6998b FCE |
1666 | { |
1667 | address_word vu_addr_base, vu_addr; | |
1668 | address_word vutrack_addr_base, vutrack_addr; | |
89154e47 | 1669 | address_word vu_addr_max_size; |
653c2590 | 1670 | unsigned_4 vu_lower_opcode, vu_upper_opcode; |
43a6998b | 1671 | unsigned_4* operand; |
534a3d5c FCE |
1672 | struct fifo_quadword* fq; |
1673 | int next_num; | |
1674 | ||
1675 | /* decrement NUM */ | |
1676 | next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1; | |
1677 | PKE_REG_MASK_SET(me, NUM, NUM, next_num); | |
43a6998b | 1678 | |
db6dac32 | 1679 | /* imm: in 64-bit units for MPG instruction */ |
43a6998b FCE |
1680 | /* VU*_MEM0 : instruction memory */ |
1681 | vu_addr_base = (me->pke_number == 0) ? | |
733cfc78 | 1682 | VU0_MEM0_WINDOW_START : VU1_MEM0_WINDOW_START; |
89154e47 FCE |
1683 | vu_addr_max_size = (me->pke_number == 0) ? |
1684 | VU0_MEM0_SIZE : VU1_MEM0_SIZE; | |
43a6998b FCE |
1685 | vutrack_addr_base = (me->pke_number == 0) ? |
1686 | VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START; | |
89154e47 FCE |
1687 | |
1688 | /* compute VU address for this word-pair */ | |
1689 | vu_addr = vu_addr_base + (imm + i) * 8; | |
1690 | /* check for vu_addr overflow */ | |
1691 | while(vu_addr >= vu_addr_base + vu_addr_max_size) | |
1692 | vu_addr -= vu_addr_max_size; | |
1693 | ||
1694 | /* compute VU tracking address */ | |
1695 | vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 2; | |
534a3d5c | 1696 | |
653c2590 | 1697 | /* Fetch operand words; assume they are already little-endian for VU imem */ |
9614fb3c | 1698 | fq = pke_pcrel_fifo(me, i*2 + 1, & operand); |
653c2590 | 1699 | vu_lower_opcode = *operand; |
9614fb3c | 1700 | vu_upper_opcode = *pke_pcrel_operand(me, i*2 + 2); |
43a6998b FCE |
1701 | |
1702 | /* write data into VU memory */ | |
f0bb94cd | 1703 | /* lower (scalar) opcode comes in first word ; macro performs H2T! */ |
534a3d5c | 1704 | PKE_MEM_WRITE(me, vu_addr, |
b4d2f483 | 1705 | & vu_lower_opcode, |
653c2590 | 1706 | 4); |
f0bb94cd | 1707 | /* upper (vector) opcode comes in second word ; H2T */ |
b4d2f483 | 1708 | ASSERT(sizeof(unsigned_4) == 4); |
653c2590 | 1709 | PKE_MEM_WRITE(me, vu_addr + 4, |
b4d2f483 | 1710 | & vu_upper_opcode, |
653c2590 | 1711 | 4); |
43a6998b | 1712 | |
653c2590 | 1713 | /* write tracking address in target byte-order */ |
653c2590 | 1714 | ASSERT(sizeof(unsigned_4) == 4); |
534a3d5c | 1715 | PKE_MEM_WRITE(me, vutrack_addr, |
f0bb94cd | 1716 | & fq->source_address, |
534a3d5c | 1717 | 4); |
43a6998b | 1718 | } /* VU xfer loop */ |
db6dac32 FCE |
1719 | |
1720 | /* check NUM */ | |
1721 | ASSERT(PKE_REG_MASK_GET(me, NUM, NUM) == 0); | |
43a6998b FCE |
1722 | |
1723 | /* done */ | |
1724 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1725 | pke_pc_advance(me, 1 + num*2); | |
1726 | } | |
43a6998b FCE |
1727 | } /* if FIFO full enough */ |
1728 | else | |
1729 | { | |
1730 | /* need to wait for another word */ | |
1731 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1732 | /* retry this instruction next clock */ | |
1733 | } | |
aea481da DE |
1734 | } |
1735 | ||
1736 | ||
43a6998b FCE |
1737 | void |
1738 | pke_code_direct(struct pke_device* me, unsigned_4 pkecode) | |
1739 | { | |
1740 | /* check that FIFO has a few more words for DIRECT operand */ | |
1741 | unsigned_4* last_direct_word; | |
1742 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
43a6998b | 1743 | |
653c2590 FCE |
1744 | /* assert 128-bit alignment of DIRECT operand */ |
1745 | if(me->qw_pc != 3) | |
1746 | return pke_code_error(me, pkecode); | |
1747 | ||
43a6998b FCE |
1748 | /* map zero to max+1 */ |
1749 | if(imm==0) imm=0x10000; | |
1750 | ||
9614fb3c | 1751 | last_direct_word = pke_pcrel_operand(me, imm*4); /* imm: number of 128-bit words */ |
43a6998b FCE |
1752 | if(last_direct_word != NULL) |
1753 | { | |
1754 | /* VU idle */ | |
1755 | int i; | |
f0bb94cd | 1756 | unsigned_16 fifo_data; |
43a6998b FCE |
1757 | |
1758 | /* "transferring" operand */ | |
1759 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1760 | ||
1761 | /* transfer GPUIF quadwords, one word per iteration */ | |
1762 | for(i=0; i<imm*4; i++) | |
1763 | { | |
9614fb3c | 1764 | unsigned_4* operand = pke_pcrel_operand(me, 1+i); |
43a6998b FCE |
1765 | |
1766 | /* collect word into quadword */ | |
f0bb94cd | 1767 | *A4_16(&fifo_data, 3 - (i % 4)) = *operand; |
0b9843e5 | 1768 | |
534a3d5c FCE |
1769 | /* write to GPUIF FIFO only with full quadword */ |
1770 | if(i % 4 == 3) | |
43a6998b | 1771 | { |
534a3d5c FCE |
1772 | ASSERT(sizeof(fifo_data) == 16); |
1773 | PKE_MEM_WRITE(me, GIF_PATH2_FIFO_ADDR, | |
f0bb94cd | 1774 | & fifo_data, |
534a3d5c | 1775 | 16); |
43a6998b | 1776 | } /* write collected quadword */ |
43a6998b FCE |
1777 | } /* GPUIF xfer loop */ |
1778 | ||
1779 | /* done */ | |
1780 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1781 | pke_pc_advance(me, 1 + imm*4); | |
1782 | } /* if FIFO full enough */ | |
1783 | else | |
1784 | { | |
1785 | /* need to wait for another word */ | |
1786 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1787 | /* retry this instruction next clock */ | |
1788 | } | |
1789 | } | |
fba9bfed | 1790 | |
43a6998b FCE |
1791 | |
1792 | void | |
1793 | pke_code_directhl(struct pke_device* me, unsigned_4 pkecode) | |
fba9bfed | 1794 | { |
43a6998b FCE |
1795 | /* treat the same as DIRECTH */ |
1796 | pke_code_direct(me, pkecode); | |
1797 | } | |
fba9bfed | 1798 | |
43a6998b FCE |
1799 | |
1800 | void | |
1801 | pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) | |
1802 | { | |
1803 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1804 | int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E); | |
1805 | int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); | |
9614fb3c | 1806 | int nummx = (num == 0) ? 0x0100 : num; |
db6dac32 | 1807 | short vn = BIT_MASK_GET(cmd, 2, 3); /* unpack shape controls */ |
43a6998b | 1808 | short vl = BIT_MASK_GET(cmd, 0, 1); |
43a6998b | 1809 | int m = BIT_MASK_GET(cmd, 4, 4); |
db6dac32 | 1810 | short cl = PKE_REG_MASK_GET(me, CYCLE, CL); /* cycle controls */ |
43a6998b | 1811 | short wl = PKE_REG_MASK_GET(me, CYCLE, WL); |
9614fb3c | 1812 | short addrwl = (wl == 0) ? 0x0100 : wl; |
db6dac32 | 1813 | int r = BIT_MASK_GET(imm, 15, 15); /* indicator bits in imm value */ |
653c2590 | 1814 | int usn = BIT_MASK_GET(imm, 14, 14); |
db6dac32 | 1815 | |
43a6998b | 1816 | int n, num_operands; |
534a3d5c | 1817 | unsigned_4* last_operand_word = NULL; |
9614fb3c FCE |
1818 | |
1819 | /* catch all illegal UNPACK variants */ | |
1820 | if(vl == 3 && vn < 3) | |
1821 | { | |
1822 | pke_code_error(me, pkecode); | |
1823 | return; | |
1824 | } | |
fba9bfed | 1825 | |
43a6998b | 1826 | /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */ |
9614fb3c | 1827 | if(cl >= addrwl) |
43a6998b FCE |
1828 | n = num; |
1829 | else | |
9614fb3c | 1830 | n = cl * (nummx / addrwl) + PKE_LIMIT(nummx % addrwl, cl); |
fd909089 | 1831 | num_operands = (31 + (32 >> vl) * (vn+1) * n)/32; /* round up to next word */ |
43a6998b FCE |
1832 | |
1833 | /* confirm that FIFO has enough words in it */ | |
534a3d5c | 1834 | if(num_operands > 0) |
9614fb3c | 1835 | last_operand_word = pke_pcrel_operand(me, num_operands); |
534a3d5c | 1836 | if(last_operand_word != NULL || num_operands == 0) |
43a6998b | 1837 | { |
534a3d5c FCE |
1838 | address_word vu_addr_base, vutrack_addr_base; |
1839 | address_word vu_addr_max_size; | |
1840 | int vector_num_out, vector_num_in; | |
43a6998b FCE |
1841 | |
1842 | /* "transferring" operand */ | |
1843 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1844 | ||
1845 | /* don't check whether VU is idle */ | |
db6dac32 FCE |
1846 | |
1847 | /* compute VU address base */ | |
43a6998b | 1848 | if(me->pke_number == 0) |
534a3d5c | 1849 | { |
d22ea5d0 | 1850 | vu_addr_base = VU0_MEM1_WINDOW_START; |
534a3d5c | 1851 | vu_addr_max_size = VU0_MEM1_SIZE; |
d22ea5d0 FCE |
1852 | vutrack_addr_base = VU0_MEM1_SRCADDR_START; |
1853 | r = 0; | |
534a3d5c | 1854 | } |
43a6998b FCE |
1855 | else |
1856 | { | |
d22ea5d0 | 1857 | vu_addr_base = VU1_MEM1_WINDOW_START; |
534a3d5c | 1858 | vu_addr_max_size = VU1_MEM1_SIZE; |
d22ea5d0 | 1859 | vutrack_addr_base = VU1_MEM1_SRCADDR_START; |
43a6998b | 1860 | } |
db6dac32 FCE |
1861 | |
1862 | /* set NUM */ | |
9614fb3c | 1863 | PKE_REG_MASK_SET(me, NUM, NUM, nummx); |
db6dac32 | 1864 | |
43a6998b | 1865 | /* transfer given number of vectors */ |
534a3d5c FCE |
1866 | vector_num_out = 0; /* output vector number being processed */ |
1867 | vector_num_in = 0; /* argument vector number being processed */ | |
db6dac32 | 1868 | do |
43a6998b FCE |
1869 | { |
1870 | quadword vu_old_data; | |
1871 | quadword vu_new_data; | |
1872 | quadword unpacked_data; | |
1873 | address_word vu_addr; | |
534a3d5c | 1874 | address_word vutrack_addr; |
db6dac32 | 1875 | unsigned_4 source_addr = 0; |
43a6998b | 1876 | int i; |
534a3d5c FCE |
1877 | int next_num; |
1878 | ||
db6dac32 | 1879 | /* decrement NUM */ |
534a3d5c FCE |
1880 | next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1; |
1881 | PKE_REG_MASK_SET(me, NUM, NUM, next_num); | |
1882 | ||
43a6998b FCE |
1883 | /* compute VU destination address, as bytes in R5900 memory */ |
1884 | if(cl >= wl) | |
1885 | { | |
1886 | /* map zero to max+1 */ | |
d22ea5d0 | 1887 | vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) + |
fd909089 FCE |
1888 | (vector_num_out / addrwl) * cl + |
1889 | (vector_num_out % addrwl)); | |
43a6998b FCE |
1890 | } |
1891 | else | |
d22ea5d0 | 1892 | vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) + |
d22ea5d0 | 1893 | vector_num_out); |
fd909089 FCE |
1894 | |
1895 | /* handle "R" double-buffering bit */ | |
1896 | if(r) | |
1897 | vu_addr += 16 * PKE_REG_MASK_GET(me, TOPS, TOPS); | |
534a3d5c FCE |
1898 | |
1899 | /* check for vu_addr overflow */ | |
1900 | while(vu_addr >= vu_addr_base + vu_addr_max_size) | |
1901 | vu_addr -= vu_addr_max_size; | |
1902 | ||
1903 | /* compute address of tracking table entry */ | |
1904 | vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4; | |
db6dac32 | 1905 | |
f0bb94cd FCE |
1906 | /* read old VU data word at address; reverse words if needed */ |
1907 | { | |
1908 | unsigned_16 vu_old_badwords; | |
1909 | ASSERT(sizeof(vu_old_badwords) == 16); | |
1910 | PKE_MEM_READ(me, vu_addr, | |
1911 | &vu_old_badwords, 16); | |
1912 | vu_old_data[0] = * A4_16(& vu_old_badwords, 3); | |
1913 | vu_old_data[1] = * A4_16(& vu_old_badwords, 2); | |
1914 | vu_old_data[2] = * A4_16(& vu_old_badwords, 1); | |
1915 | vu_old_data[3] = * A4_16(& vu_old_badwords, 0); | |
1916 | } | |
653c2590 | 1917 | |
43a6998b FCE |
1918 | /* For cyclic unpack, next operand quadword may come from instruction stream |
1919 | or be zero. */ | |
9614fb3c FCE |
1920 | if((cl < addrwl) && |
1921 | (vector_num_out % addrwl) >= cl) | |
43a6998b FCE |
1922 | { |
1923 | /* clear operand - used only in a "indeterminate" state */ | |
1924 | for(i = 0; i < 4; i++) | |
1925 | unpacked_data[i] = 0; | |
1926 | } | |
1927 | else | |
1928 | { | |
db6dac32 | 1929 | /* compute packed vector dimensions */ |
9614fb3c | 1930 | int vectorbits = 0, unitbits = 0; |
db6dac32 FCE |
1931 | |
1932 | if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */ | |
43a6998b | 1933 | { |
db6dac32 FCE |
1934 | unitbits = (32 >> vl); |
1935 | vectorbits = unitbits * (vn+1); | |
1936 | } | |
1937 | else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */ | |
1938 | { | |
1939 | unitbits = 5; | |
1940 | vectorbits = 16; | |
1941 | } | |
1942 | else /* illegal unpack variant */ | |
1943 | { | |
9614fb3c FCE |
1944 | /* should have been caught at top of function */ |
1945 | ASSERT(0); | |
db6dac32 FCE |
1946 | } |
1947 | ||
1948 | /* loop over columns */ | |
1949 | for(i=0; i<=vn; i++) | |
1950 | { | |
1951 | unsigned_4 operand; | |
fba9bfed | 1952 | |
db6dac32 FCE |
1953 | /* offset in bits in current operand word */ |
1954 | int bitoffset = | |
534a3d5c | 1955 | (vector_num_in * vectorbits) + (i * unitbits); /* # of bits from PKEcode */ |
43a6998b | 1956 | |
db6dac32 FCE |
1957 | /* last unit of V4_5 is only one bit wide */ |
1958 | if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */ | |
1959 | unitbits = 1; | |
1960 | ||
9614fb3c FCE |
1961 | /* confirm we're not reading more than we said we needed */ |
1962 | if(vector_num_in * vectorbits >= num_operands * 32) | |
1963 | { | |
1964 | /* this condition may be triggered by illegal | |
1965 | PKEcode / CYCLE combinations. */ | |
1966 | pke_code_error(me, pkecode); | |
1967 | /* XXX: this case needs to be better understood, | |
1968 | and detected at a better time. */ | |
1969 | return; | |
1970 | } | |
1971 | ||
db6dac32 | 1972 | /* fetch bitfield operand */ |
9614fb3c | 1973 | operand = pke_pcrel_operand_bits(me, bitoffset, unitbits, & source_addr); |
db6dac32 FCE |
1974 | |
1975 | /* selectively sign-extend; not for V4_5 1-bit value */ | |
653c2590 | 1976 | if(usn || unitbits == 1) |
db6dac32 | 1977 | unpacked_data[i] = operand; |
653c2590 FCE |
1978 | else |
1979 | unpacked_data[i] = SEXT32(operand, unitbits-1); | |
43a6998b | 1980 | } |
534a3d5c | 1981 | |
9614fb3c FCE |
1982 | /* set remaining top words in vector */ |
1983 | for(i=vn+1; i<4; i++) | |
1984 | { | |
1985 | if(vn == 0) /* S_{32,16,8}: copy lowest element */ | |
1986 | unpacked_data[i] = unpacked_data[0]; | |
1987 | else | |
1988 | unpacked_data[i] = 0; | |
1989 | } | |
fd909089 | 1990 | |
534a3d5c FCE |
1991 | /* consumed a vector from the PKE instruction stream */ |
1992 | vector_num_in ++; | |
db6dac32 | 1993 | } /* unpack word from instruction operand */ |
43a6998b | 1994 | |
9614fb3c FCE |
1995 | /* process STMOD register for accumulation operations */ |
1996 | switch(PKE_REG_MASK_GET(me, MODE, MDE)) | |
1997 | { | |
1998 | case PKE_MODE_ADDROW: /* add row registers to output data */ | |
b59e0b68 | 1999 | case PKE_MODE_ACCROW: /* same .. later conditionally accumulate */ |
9614fb3c FCE |
2000 | for(i=0; i<4; i++) |
2001 | /* exploit R0..R3 contiguity */ | |
2002 | unpacked_data[i] += me->regs[PKE_REG_R0 + i][0]; | |
2003 | break; | |
2004 | ||
9614fb3c FCE |
2005 | case PKE_MODE_INPUT: /* pass data through */ |
2006 | default: /* specified as undefined */ | |
2007 | ; | |
2008 | } | |
2009 | ||
db6dac32 | 2010 | /* compute replacement word */ |
43a6998b FCE |
2011 | if(m) /* use mask register? */ |
2012 | { | |
2013 | /* compute index into mask register for this word */ | |
b4d2f483 | 2014 | int mask_index = PKE_LIMIT(vector_num_out % addrwl, 3); |
43a6998b | 2015 | |
534a3d5c | 2016 | for(i=0; i<4; i++) /* loop over columns */ |
43a6998b FCE |
2017 | { |
2018 | int mask_op = PKE_MASKREG_GET(me, mask_index, i); | |
2019 | unsigned_4* masked_value = NULL; | |
43a6998b FCE |
2020 | |
2021 | switch(mask_op) | |
2022 | { | |
2023 | case PKE_MASKREG_INPUT: | |
9614fb3c | 2024 | masked_value = & unpacked_data[i]; |
b59e0b68 FCE |
2025 | |
2026 | /* conditionally accumulate */ | |
2027 | if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW) | |
2028 | me->regs[PKE_REG_R0 + i][0] = unpacked_data[i]; | |
2029 | ||
43a6998b FCE |
2030 | break; |
2031 | ||
2032 | case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */ | |
2033 | masked_value = & me->regs[PKE_REG_R0 + i][0]; | |
2034 | break; | |
2035 | ||
2036 | case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */ | |
534a3d5c | 2037 | masked_value = & me->regs[PKE_REG_C0 + mask_index][0]; |
43a6998b FCE |
2038 | break; |
2039 | ||
2040 | case PKE_MASKREG_NOTHING: | |
2041 | /* "write inhibit" by re-copying old data */ | |
2042 | masked_value = & vu_old_data[i]; | |
2043 | break; | |
2044 | ||
2045 | default: | |
2046 | ASSERT(0); | |
2047 | /* no other cases possible */ | |
2048 | } | |
2049 | ||
2050 | /* copy masked value for column */ | |
db6dac32 | 2051 | vu_new_data[i] = *masked_value; |
43a6998b | 2052 | } /* loop over columns */ |
db6dac32 | 2053 | } /* mask */ |
43a6998b FCE |
2054 | else |
2055 | { | |
2056 | /* no mask - just copy over entire unpacked quadword */ | |
2057 | memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data)); | |
b59e0b68 FCE |
2058 | |
2059 | /* conditionally store accumulated row results */ | |
2060 | if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW) | |
2061 | for(i=0; i<4; i++) | |
2062 | me->regs[PKE_REG_R0 + i][0] = unpacked_data[i]; | |
43a6998b | 2063 | } |
43a6998b | 2064 | |
f0bb94cd FCE |
2065 | /* write new VU data word at address; reverse words if needed */ |
2066 | { | |
2067 | unsigned_16 vu_new_badwords; | |
2068 | * A4_16(& vu_new_badwords, 3) = vu_new_data[0]; | |
2069 | * A4_16(& vu_new_badwords, 2) = vu_new_data[1]; | |
2070 | * A4_16(& vu_new_badwords, 1) = vu_new_data[2]; | |
2071 | * A4_16(& vu_new_badwords, 0) = vu_new_data[3]; | |
2072 | ASSERT(sizeof(vu_new_badwords) == 16); | |
2073 | PKE_MEM_WRITE(me, vu_addr, | |
2074 | &vu_new_badwords, 16); | |
2075 | } | |
2076 | ||
2077 | /* write tracking address */ | |
534a3d5c FCE |
2078 | ASSERT(sizeof(unsigned_4) == 4); |
2079 | PKE_MEM_WRITE(me, vutrack_addr, | |
2080 | & source_addr, | |
2081 | 4); | |
43a6998b FCE |
2082 | |
2083 | /* next vector please */ | |
534a3d5c | 2084 | vector_num_out ++; |
43a6998b | 2085 | } /* vector transfer loop */ |
db6dac32 | 2086 | while(PKE_REG_MASK_GET(me, NUM, NUM) > 0); |
43a6998b | 2087 | |
9614fb3c FCE |
2088 | /* confirm we've written as many vectors as told */ |
2089 | ASSERT(nummx == vector_num_out); | |
2090 | ||
43a6998b FCE |
2091 | /* done */ |
2092 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
e2306992 | 2093 | pke_pc_advance(me, 1 + num_operands); |
43a6998b FCE |
2094 | } /* PKE FIFO full enough */ |
2095 | else | |
2096 | { | |
2097 | /* need to wait for another word */ | |
2098 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
2099 | /* retry this instruction next clock */ | |
2100 | } | |
2101 | } | |
2102 | ||
2103 | ||
2104 | void | |
2105 | pke_code_error(struct pke_device* me, unsigned_4 pkecode) | |
2106 | { | |
9614fb3c FCE |
2107 | /* set ER1 flag in STAT register */ |
2108 | PKE_REG_MASK_SET(me, STAT, ER1, 1); | |
2109 | ||
fd909089 FCE |
2110 | if(! PKE_REG_MASK_GET(me, ERR, ME1)) |
2111 | { | |
9614fb3c | 2112 | pke_begin_interrupt_stall(me); |
fd909089 FCE |
2113 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); |
2114 | } | |
2115 | else | |
2116 | { | |
2117 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
2118 | } | |
2119 | ||
43a6998b | 2120 | /* advance over faulty word */ |
43a6998b | 2121 | pke_pc_advance(me, 1); |
fba9bfed | 2122 | } |