* emultempl/spu_ovl.S: Add debug printf code.
[deliverable/binutils-gdb.git] / ld / emultempl / spu_ovl.S
1 /* Overlay manager for SPU.
2
3 Copyright 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of the GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20 MA 02110-1301, USA. */
21
22 /**
23 * MFC DMA defn's.
24 */
25 #define MFC_GET_CMD 0x40
26 #define MFC_MAX_DMA_SIZE 0x4000
27 #define MFC_TAG_UPDATE_ALL 2
28 #define MFC_TAG_ID 0
29
30
31 /**
32 * Temporary register allocations.
33 * These are saved/restored here.
34 */
35 #define tab $75
36 #define cgbits $75
37 #define add64 $75
38 #define ealo $75
39 #define newmask $75
40 #define tagstat $75
41 #define bchn $75
42 #define rv1 $75
43
44 #define off $76
45 #define off64 $76
46 #define maxsize $76
47 #define oldmask $76
48 #define sz $76
49 #define lnkr $76
50 #define rv2 $76
51
52 #define cur $77
53 #define cmp $77
54 #define buf $77
55 #define genwi $77
56 #define tagid $77
57 #define cmd $77
58 #define rv3 $77
59
60 #define cgshuf $78
61
62 #define vma $6
63
64 #define map $7
65 #define osize $7
66 #define cmp2 $7
67
68 #define ea64 $8
69 #define retval $8
70
71 #ifdef OVLY_IRQ_SAVE
72 #define irqtmp $8
73 #define irq_stat $9
74 #endif
75
76 # Stack quadword minux N
77 #define SQWM1 -16*1
78 #define SQWM2 -16*2
79 #define SQWM3 -16*3
80 #define SQWM4 -16*4
81 #define SQWM5 -16*5
82 #define SQWM6 -16*6
83 #define SQWM7 -16*7
84 #define SQWM8 -16*8
85 #define SQWM9 -16*9
86 #define SQWM10 -16*10
87 #define SQWM11 -16*11
88 #define SQWM12 -16*12
89 #define SQWM13 -16*13
90 #define SQWM14 -16*14
91 #define SQWM15 -16*15
92 #define SQWM16 -16*16
93
94 .extern _ovly_table
95 .extern _ovly_buf_table
96
97 #ifdef OVLY_PRINTFS
98 #define SPE_C99_VPRINTF 37
99 __entry_event_format:
100 .string "In entry_event_hook segment=0x%08x entry-address=0x%08x\n"
101 __debug_event_format:
102 .string "In debug_event_hook link-register=0x%08x %08x %08x %08x\n"
103 __dma_event_format:
104 .string "In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x\n"
105 __ovly_buf_table_format:
106 .string "_ovly_buf_table[%08x]=%08x\n"
107 #endif
108
109 .text
110 .align 4
111 .type __rv_pattern, @object
112 .size __rv_pattern, 16
113 __rv_pattern:
114 .word 0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213
115 .type __cg_pattern, @object
116 .size __cg_pattern, 16
117 __cg_pattern:
118 .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
119
120 /**
121 * __ovly_return - stub for returning from overlay functions.
122 *
123 * inputs:
124 * $lr link register
125 *
126 * outputs:
127 * $78 old partition number, to be reloaded
128 * $79 return address in old partion number
129 */
130 .global __ovly_return
131 .type __ovly_return, @function
132
133 .word 0
134 __ovly_return:
135 shlqbyi $78, $lr, 4
136 shlqbyi $79, $lr, 8
137 biz $78, $79
138 .size __ovly_return, . - __ovly_return
139
140 /**
141 * __ovly_load - copy an overlay partion to local store.
142 *
143 * inputs:
144 * $78 partition number to be loaded.
145 * $79 branch target in new partition.
146 * $lr link register, containing return addr.
147 *
148 * outputs:
149 * $lr new link register, returning through __ovly_return.
150 *
151 * Copy a new overlay partition into local store, or return
152 * immediately if the partition is already resident.
153 */
154 .global __ovly_load
155 .type __ovly_load, @function
156
157 __ovly_load:
158 /* Save temporary registers to stack. */
159 stqd $6, -16($sp)
160 stqd $7, -32($sp)
161 stqd $8, -48($sp)
162
163 #ifdef OVLY_IRQ_SAVE
164 /* Save irq state, then disable interrupts. */
165 stqd $9, -64($sp)
166 ila irqtmp, __ovly_irq_save
167 rdch irq_stat, $SPU_RdMachStat
168 bid irqtmp
169 __ovly_irq_save:
170 #endif
171
172 #ifdef OVLY_PRINTFS
173 //==============================================
174 // In entry_event_hook segment=0x%08x entry-address=0x%08x
175 //==============================================
176 # save registers
177 stqd $10, SQWM5($sp)
178 stqd $11, SQWM6($sp)
179 stqd $12, SQWM7($sp)
180 # Place input parameters onto the stack to form the
181 # local storage memory image.
182 ila $10, __entry_event_format
183 stqd $10, SQWM12($sp)
184 ai $10, $sp, SQWM9
185 stqd $10, SQWM11($sp)
186 stqd $sp, SQWM10($sp)
187 stqd $78, SQWM9($sp)
188 stqd $79, SQWM8($sp)
189 # Construct a message consisting of the 8-bit opcode
190 # and 24-bit local store pointer to the input
191 # parameters and place it forllowing the stop and signal
192 ila $10, 0x3ffff # address mask
193 ilhu $11, SPE_C99_VPRINTF << 8
194 ai $12, $sp, SQWM12 # parameter pointer
195 selb $11, $11, $12, $10 # combine command & address ptr
196 brsl $10, next1a
197 next1a:
198 .type next1a, @function
199 lqr $12, message1a
200 cwd $10, message1a-next1a($10)
201 shufb $11, $11, $12, $10 # insert msg into inst word
202 stqr $11, message1a # store cmd/ptr into msg word
203 dsync
204 # Notify the PPE to perform the assisted call request
205 # by issing a stop and signal with a signal code
206 # of 0x2100 (C99 class)
207 stop 0x2100
208 message1a:
209 .word 0
210
211 # save registers
212 stqd $13, SQWM8($sp)
213 stqd $14, SQWM9($sp)
214 stqd $15, SQWM10($sp)
215 stqd $16, SQWM11($sp)
216
217 # initialize loop
218 il $13, 1
219 ila $14, _ovly_buf_table
220 ila $15, _ovly_buf_table_end
221
222 loop_start1:
223 # Place input parameters onto the stack to form the
224 # local storage memory image.
225 ila $10, __ovly_buf_table_format
226 stqd $10, SQWM16($sp)
227 ai $10, $sp, SQWM13
228 stqd $10, SQWM15($sp)
229 stqd $sp, SQWM14($sp)
230 stqd $13, SQWM13($sp)
231 lqd $16, 0($14)
232 rotqby $16, $16, $14
233 stqd $16, SQWM12($sp)
234 # Construct a message consisting of the 8-bit opcode
235 # and 24-bit local store pointer to the input
236 # parameters and place it forllowing the stop and signal
237 ila $10, 0x3ffff # address mask
238 ilhu $11, SPE_C99_VPRINTF << 8
239 ai $12, $sp, SQWM16 # parameter pointer
240 selb $11, $11, $12, $10 # combine command & address ptr
241 brsl $10, next1b
242 next1b:
243 .type next1b, @function
244 lqr $12, message1b
245 cwd $10, message1b-next1b($10)
246 shufb $11, $11, $12, $10 # insert msg into inst word
247 stqr $11, message1b # store cmd/ptr into msg word
248 dsync
249 # Notify the PPE to perform the assisted call request
250 # by issing a stop and signal with a signal code
251 # of 0x2100 (C99 class)
252 stop 0x2100
253 message1b:
254 .word 0
255
256 # move to next entry
257 ai $13, $13, 1
258 ai $14, $14, 4
259 clgt $16, $15, $14
260 brnz $16, loop_start1
261
262 # restore registers
263 lqd $16, SQWM11($sp)
264 lqd $15, SQWM10($sp)
265 lqd $14, SQWM9($sp)
266 lqd $13, SQWM8($sp)
267 lqd $12, SQWM7($sp)
268 lqd $11, SQWM6($sp)
269 lqd $10, SQWM5($sp)
270 //==============================================
271 #endif
272
273 /* Set branch hint to overlay target. */
274 hbr __ovly_load_ret, $79
275
276 /* Get caller's overlay index by back chaining through stack frames.
277 * Loop until end of stack (back chain all-zeros) or
278 * encountered a link register we set here. */
279 lqd bchn, 0($sp)
280 ila retval, __ovly_return
281
282 __ovly_backchain_loop:
283 lqd lnkr, 16(bchn)
284 lqd bchn, 0(bchn)
285 ceq cmp, lnkr, retval
286 ceqi cmp2, bchn, 0
287 or cmp, cmp, cmp2
288 brz cmp, __ovly_backchain_loop
289
290 /* If we reached the zero back-chain, then lnkr is bogus. Clear the
291 * part of lnkr that we use later (slot 3). */
292 rotqbyi cmp2, cmp2, 4
293 andc lnkr, lnkr, cmp2
294
295 /* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */
296 lqd rv1, (__rv_pattern-__ovly_return+4)(retval)
297 shufb rv2, retval, lnkr, rv1
298 shufb rv3, $lr, $78, rv1
299 fsmbi rv1, 0xff
300 selb rv2, rv2, rv3, rv1
301 /* If we have a tail call from one overlay function to another overlay,
302 then lr is already set up. Don't change it. */
303 ceq rv1, $lr, retval
304 fsmb rv1, rv1
305 selb $lr, rv2, $lr, rv1
306
307 /* Branch to $79 if non-overlay */
308 brz $78, __ovly_load_restore
309
310 /* Load values from _ovly_table[$78].
311 * extern struct {
312 * u32 vma;
313 * u32 size;
314 * u32 file_offset;
315 * u32 buf;
316 * } _ovly_table[];
317 */
318 shli off, $78, 4
319 ila tab, _ovly_table - 16
320 lqx vma, tab, off
321 rotqbyi buf, vma, 12
322
323 /* Load values from _ovly_buf_table[buf].
324 * extern struct {
325 * u32 mapped;
326 * } _ovly_buf_table[];
327 */
328 ila tab, _ovly_buf_table
329 ai off, buf, -1
330 shli off, off, 2
331 lqx map, tab, off
332 rotqby cur, map, off
333
334 /* Branch to $79 now if overlay is already mapped. */
335 ceq cmp, $78, cur
336 brnz cmp, __ovly_load_restore
337
338 /* Marker for profiling code. If we get here, we are about to load
339 * a new overlay.
340 */
341 .global __ovly_load_event
342 .type __ovly_load_event, @function
343 __ovly_load_event:
344
345 /* Set _ovly_buf_table[buf].mapped = $78. */
346 cwx genwi, tab, off
347 shufb map, $78, map, genwi
348 stqx map, tab, off
349
350 /* A new partition needs to be loaded. Prepare for DMA loop.
351 * _EAR_ is the 64b base EA, filled in at run time by the
352 * loader, and indicating the value for SPU executable image start.
353 */
354 lqd cgshuf, (__cg_pattern-__ovly_return+4)(retval)
355 rotqbyi osize, vma, 4
356 rotqbyi sz, vma, 8
357 lqa ea64, _EAR_
358
359 __ovly_xfer_loop:
360 /* 64b add to compute next ea64. */
361 rotqmbyi off64, sz, -4
362 cg cgbits, ea64, off64
363 shufb add64, cgbits, cgbits, cgshuf
364 addx add64, ea64, off64
365 ori ea64, add64, 0
366
367 /* Setup DMA parameters, then issue DMA request. */
368 rotqbyi ealo, add64, 4
369 ila maxsize, MFC_MAX_DMA_SIZE
370 cgt cmp, osize, maxsize
371 selb sz, osize, maxsize, cmp
372 ila tagid, MFC_TAG_ID
373 wrch $MFC_LSA, vma
374 wrch $MFC_EAH, ea64
375 wrch $MFC_EAL, ealo
376 wrch $MFC_Size, sz
377 wrch $MFC_TagId, tagid
378 ila cmd, MFC_GET_CMD
379 wrch $MFC_Cmd, cmd
380
381 #ifdef OVLY_PRINTFS
382 //==============================================
383 // In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x
384 //==============================================
385 # save registers
386 stqd $10, SQWM5($sp)
387 stqd $11, SQWM6($sp)
388 stqd $12, SQWM7($sp)
389 # Place input parameters onto the stack to form the
390 # local storage memory image.
391 ila $10, __dma_event_format
392 stqd $10, SQWM14($sp)
393 ai $10, $sp, SQWM11
394 stqd $10, SQWM13($sp)
395 stqd $sp, SQWM12($sp)
396 stqd vma, SQWM11($sp)
397 stqd ea64, SQWM10($sp)
398 stqd ealo, SQWM9($sp)
399 stqd sz, SQWM8($sp)
400 # Construct a message consisting of the 8-bit opcode
401 # and 24-bit local store pointer to the input
402 # parameters and place it forllowing the stop and signal
403 ila $10, 0x3ffff # address mask
404 ilhu $11, SPE_C99_VPRINTF << 8
405 ai $12, $sp, SQWM14 # parameter pointer
406 selb $11, $11, $12, $10 # combine command & address ptr
407 brsl $10, next3a
408 next3a:
409 .type next3a, @function
410 lqr $12, message3a
411 cwd $10, message3a-next3a($10)
412 shufb $11, $11, $12, $10 # insert msg into inst word
413 stqr $11, message3a # store cmd/ptr into msg word
414 dsync
415 # Notify the PPE to perform the assisted call request
416 # by issing a stop and signal with a signal code
417 # of 0x2100 (C99 class)
418 stop 0x2100
419 message3a:
420 .word 0
421
422 # restore registers
423 lqd $12, SQWM7($sp)
424 lqd $11, SQWM6($sp)
425 lqd $10, SQWM5($sp)
426 //==============================================
427 #endif
428
429 /* Increment vma, decrement size, branch back as needed. */
430 a vma, vma, sz
431 sf osize, sz, osize
432 brnz osize, __ovly_xfer_loop
433
434 /* Save app's tagmask, wait for DMA complete, restore mask. */
435 rdch oldmask, $MFC_RdTagMask
436 #if MFC_TAG_ID < 16
437 ilh newmask, 1 << MFC_TAG_ID
438 #else
439 ilhu newmask, 1 << (MFC_TAG_ID - 16)
440 #endif
441 wrch $MFC_WrTagMask, newmask
442 ila tagstat, MFC_TAG_UPDATE_ALL
443 wrch $MFC_WrTagUpdate, tagstat
444 rdch tagstat, $MFC_RdTagStat
445 sync
446 wrch $MFC_WrTagMask, oldmask
447
448 #ifdef OVLY_PRINTFS
449 //==============================================
450 // In debug_event_hook link-register=0x%08x %08x %08x %08x
451 //==============================================
452 # save registers
453 stqd $10, SQWM5($sp)
454 stqd $11, SQWM6($sp)
455 stqd $12, SQWM7($sp)
456 # Place input parameters onto the stack to form the
457 # local storage memory image.
458 ila $10, __debug_event_format
459 stqd $10, SQWM14($sp)
460 ai $10, $sp, SQWM11
461 stqd $10, SQWM13($sp)
462 stqd $sp, SQWM12($sp)
463 stqd $lr, SQWM11($sp)
464 rotqbyi $10, $lr, 4
465 stqd $10, SQWM10($sp)
466 rotqbyi $10, $10, 4
467 stqd $10, SQWM9($sp)
468 rotqbyi $10, $10, 4
469 stqd $10, SQWM8($sp)
470 # Construct a message consisting of the 8-bit opcode
471 # and 24-bit local store pointer to the input
472 # parameters and place it forllowing the stop and signal
473 ila $10, 0x3ffff # address mask
474 ilhu $11, SPE_C99_VPRINTF << 8
475 ai $12, $sp, SQWM14 # parameter pointer
476 selb $11, $11, $12, $10 # combine command & address ptr
477 brsl $10, next2a
478 next2a:
479 .type next2a, @function
480 lqr $12, message2a
481 cwd $10, message2a-next2a($10)
482 shufb $11, $11, $12, $10 # insert msg into inst word
483 stqr $11, message2a # store cmd/ptr into msg word
484 dsync
485 # Notify the PPE to perform the assisted call request
486 # by issing a stop and signal with a signal code
487 # of 0x2100 (C99 class)
488 stop 0x2100
489 message2a:
490 .word 0
491
492 # save registers
493 stqd $13, SQWM8($sp)
494 stqd $14, SQWM9($sp)
495 stqd $15, SQWM10($sp)
496 stqd $16, SQWM11($sp)
497
498 # initialize loop
499 il $13, 1
500 ila $14, _ovly_buf_table
501 ila $15, _ovly_buf_table_end
502
503 loop_start2:
504 # Place input parameters onto the stack to form the
505 # local storage memory image.
506 ila $10, __ovly_buf_table_format
507 stqd $10, SQWM16($sp)
508 ai $10, $sp, SQWM13
509 stqd $10, SQWM15($sp)
510 stqd $sp, SQWM14($sp)
511 stqd $13, SQWM13($sp)
512 lqd $16, 0($14)
513 rotqby $16, $16, $14
514 stqd $16, SQWM12($sp)
515 # Construct a message consisting of the 8-bit opcode
516 # and 24-bit local store pointer to the input
517 # parameters and place it forllowing the stop and signal
518 ila $10, 0x3ffff # address mask
519 ilhu $11, SPE_C99_VPRINTF << 8
520 ai $12, $sp, SQWM16 # parameter pointer
521 selb $11, $11, $12, $10 # combine command & address ptr
522 brsl $10, next2b
523 next2b:
524 .type next2b, @function
525 lqr $12, message2b
526 cwd $10, message2b-next2b($10)
527 shufb $11, $11, $12, $10 # insert msg into inst word
528 stqr $11, message2b # store cmd/ptr into msg word
529 dsync
530 # Notify the PPE to perform the assisted call request
531 # by issing a stop and signal with a signal code
532 # of 0x2100 (C99 class)
533 stop 0x2100
534 message2b:
535 .word 0
536
537 # move to next entry
538 ai $13, $13, 1
539 ai $14, $14, 4
540 clgt $16, $15, $14
541 brnz $16, loop_start2
542
543 # restore registers
544 lqd $16, SQWM11($sp)
545 lqd $15, SQWM10($sp)
546 lqd $14, SQWM9($sp)
547 lqd $13, SQWM8($sp)
548 lqd $12, SQWM7($sp)
549 lqd $11, SQWM6($sp)
550 lqd $10, SQWM5($sp)
551 //==============================================
552 #endif
553
554 .global _ovly_debug_event
555 .type _ovly_debug_event, @function
556 _ovly_debug_event:
557 /* GDB inserts debugger trap here. */
558 nop
559
560 __ovly_load_restore:
561 #ifdef OVLY_IRQ_SAVE
562 /* Conditionally re-enable interrupts. */
563 andi irq_stat, irq_stat, 1
564 ila irqtmp, __ovly_irq_restore
565 binze irq_stat, irqtmp
566 __ovly_irq_restore:
567 lqd $9, -64($sp)
568 #endif
569
570 /* Restore saved registers. */
571 lqd $8, -48($sp)
572 lqd $7, -32($sp)
573 lqd $6, -16($sp)
574
575 __ovly_load_ret:
576 /* Branch to target address. */
577 bi $79
578
579 .size __ovly_load, . - __ovly_load
This page took 0.045486 seconds and 5 git commands to generate.