* bsd-uthread.c (bsd_uthread_wait): Don't try to fetch thread IDs
[deliverable/binutils-gdb.git] / ld / emultempl / spu_ovl.S
CommitLineData
e9f53129
AM
1/* Overlay manager for SPU.
2
45d3b878 3 Copyright 2006, 2007 Free Software Foundation, Inc.
e9f53129
AM
4
5 This file is part of GLD, the Gnu Linker.
6
7 GLD is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GLD is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GLD; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22/**
23 * MFC DMA defn's.
24 */
25#define MFC_GET_CMD 0x40
26#define MFC_MAX_DMA_SIZE 0x4000
27#define MFC_TAG_UPDATE_ALL 2
28#define MFC_TAG_ID 0
29
30
31/**
32 * Temporary register allocations.
33 * These are saved/restored here.
34 */
35#define tab $75
36#define cgbits $75
37#define add64 $75
38#define ealo $75
39#define newmask $75
40#define tagstat $75
41#define bchn $75
42#define rv1 $75
43
44#define off $76
45#define off64 $76
46#define maxsize $76
47#define oldmask $76
48#define sz $76
49#define lnkr $76
50#define rv2 $76
51
52#define cur $77
53#define cmp $77
54#define buf $77
55#define genwi $77
56#define tagid $77
57#define cmd $77
58#define rv3 $77
59
60#define cgshuf $78
61
62#define vma $6
63
64#define map $7
b1e37473 65#define osize $7
e9f53129
AM
66#define cmp2 $7
67
68#define ea64 $8
69#define retval $8
70
71#ifdef OVLY_IRQ_SAVE
72#define irqtmp $8
73#define irq_stat $9
74#endif
75
76 .extern _ovly_table
77 .extern _ovly_buf_table
78
79 .text
80 .align 4
a7e4b3fe
AM
81 .type __rv_pattern, @object
82 .size __rv_pattern, 16
e9f53129
AM
83__rv_pattern:
84 .word 0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213
a7e4b3fe
AM
85 .type __cg_pattern, @object
86 .size __cg_pattern, 16
e9f53129
AM
87__cg_pattern:
88 .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
89
90/**
91 * __ovly_return - stub for returning from overlay functions.
92 *
93 * inputs:
94 * $lr link register
95 *
96 * outputs:
97 * $78 old partition number, to be reloaded
98 * $79 return address in old partion number
99 */
100 .global __ovly_return
101 .type __ovly_return, @function
102
103 .word 0
104__ovly_return:
105 shlqbyi $78, $lr, 4
106 shlqbyi $79, $lr, 8
107 biz $78, $79
b1e37473 108 .size __ovly_return, . - __ovly_return
e9f53129
AM
109
110/**
111 * __ovly_load - copy an overlay partion to local store.
112 *
113 * inputs:
114 * $78 partition number to be loaded.
115 * $79 branch target in new partition.
116 * $lr link register, containing return addr.
117 *
118 * outputs:
119 * $lr new link register, returning through __ovly_return.
120 *
121 * Copy a new overlay partition into local store, or return
122 * immediately if the partition is already resident.
123 */
124 .global __ovly_load
125 .type __ovly_load, @function
126
127__ovly_load:
128/* Save temporary registers to stack. */
129 stqd $6, -16($sp)
130 stqd $7, -32($sp)
131 stqd $8, -48($sp)
132
133#ifdef OVLY_IRQ_SAVE
134/* Save irq state, then disable interrupts. */
135 stqd $9, -64($sp)
136 ila irqtmp, __ovly_irq_save
137 rdch irq_stat, $SPU_RdMachStat
138 bid irqtmp
139__ovly_irq_save:
140#endif
141
142/* Set branch hint to overlay target. */
143 hbr __ovly_load_ret, $79
144
145/* Get caller's overlay index by back chaining through stack frames.
146 * Loop until end of stack (back chain all-zeros) or
147 * encountered a link register we set here. */
148 lqd bchn, 0($sp)
149 ila retval, __ovly_return
150
151__ovly_backchain_loop:
152 lqd lnkr, 16(bchn)
153 lqd bchn, 0(bchn)
154 ceq cmp, lnkr, retval
155 ceqi cmp2, bchn, 0
156 or cmp, cmp, cmp2
157 brz cmp, __ovly_backchain_loop
158
159/* If we reached the zero back-chain, then lnkr is bogus. Clear the
160 * part of lnkr that we use later (slot 3). */
161 rotqbyi cmp2, cmp2, 4
162 andc lnkr, lnkr, cmp2
163
164/* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */
165 lqd rv1, (__rv_pattern-__ovly_return+4)(retval)
166 shufb rv2, retval, lnkr, rv1
167 shufb rv3, $lr, $78, rv1
168 fsmbi rv1, 0xff
107eb3fc
AM
169 selb rv2, rv2, rv3, rv1
170/* If we have a tail call from one overlay function to another overlay,
171 then lr is already set up. Don't change it. */
172 ceq rv1, $lr, retval
173 fsmb rv1, rv1
174 selb $lr, rv2, $lr, rv1
e9f53129
AM
175
176/* Branch to $79 if non-overlay */
177 brz $78, __ovly_load_restore
178
179/* Load values from _ovly_table[$78].
180 * extern struct {
181 * u32 vma;
182 * u32 size;
183 * u32 file_offset;
184 * u32 buf;
185 * } _ovly_table[];
186 */
187 shli off, $78, 4
188 ila tab, _ovly_table - 16
189 lqx vma, tab, off
190 rotqbyi buf, vma, 12
191
192/* Load values from _ovly_buf_table[buf].
193 * extern struct {
194 * u32 mapped;
195 * } _ovly_buf_table[];
196 */
197 ila tab, _ovly_buf_table
198 ai off, buf, -1
199 shli off, off, 2
200 lqx map, tab, off
201 rotqby cur, map, off
202
203/* Branch to $79 now if overlay is already mapped. */
204 ceq cmp, $78, cur
205 brnz cmp, __ovly_load_restore
206
b1e37473
AM
207/* Marker for profiling code. If we get here, we are about to load
208 * a new overlay.
209 */
210 .global __ovly_load_event
211 .type __ovly_load_event, @function
212__ovly_load_event:
213
e9f53129
AM
214/* Set _ovly_buf_table[buf].mapped = $78. */
215 cwx genwi, tab, off
216 shufb map, $78, map, genwi
217 stqx map, tab, off
218
219/* A new partition needs to be loaded. Prepare for DMA loop.
220 * _EAR_ is the 64b base EA, filled in at run time by the
221 * loader, and indicating the value for SPU executable image start.
222 */
223 lqd cgshuf, (__cg_pattern-__ovly_return+4)(retval)
b1e37473 224 rotqbyi osize, vma, 4
e9f53129
AM
225 rotqbyi sz, vma, 8
226 lqa ea64, _EAR_
227
228__ovly_xfer_loop:
229/* 64b add to compute next ea64. */
230 rotqmbyi off64, sz, -4
231 cg cgbits, ea64, off64
232 shufb add64, cgbits, cgbits, cgshuf
233 addx add64, ea64, off64
234 ori ea64, add64, 0
235
236/* Setup DMA parameters, then issue DMA request. */
237 rotqbyi ealo, add64, 4
238 ila maxsize, MFC_MAX_DMA_SIZE
b1e37473
AM
239 cgt cmp, osize, maxsize
240 selb sz, osize, maxsize, cmp
e9f53129
AM
241 ila tagid, MFC_TAG_ID
242 wrch $MFC_LSA, vma
243 wrch $MFC_EAH, ea64
244 wrch $MFC_EAL, ealo
245 wrch $MFC_Size, sz
246 wrch $MFC_TagId, tagid
247 ila cmd, MFC_GET_CMD
248 wrch $MFC_Cmd, cmd
249
250/* Increment vma, decrement size, branch back as needed. */
251 a vma, vma, sz
b1e37473
AM
252 sf osize, sz, osize
253 brnz osize, __ovly_xfer_loop
e9f53129
AM
254
255/* Save app's tagmask, wait for DMA complete, restore mask. */
256 rdch oldmask, $MFC_RdTagMask
257#if MFC_TAG_ID < 16
258 ilh newmask, 1 << MFC_TAG_ID
259#else
260 ilhu newmask, 1 << (MFC_TAG_ID - 16)
261#endif
262 wrch $MFC_WrTagMask, newmask
263 ila tagstat, MFC_TAG_UPDATE_ALL
264 wrch $MFC_WrTagUpdate, tagstat
265 rdch tagstat, $MFC_RdTagStat
266 sync
267 wrch $MFC_WrTagMask, oldmask
268
269 .global _ovly_debug_event
270 .type _ovly_debug_event, @function
271_ovly_debug_event:
272/* GDB inserts debugger trap here. */
273 nop
274
275__ovly_load_restore:
276#ifdef OVLY_IRQ_SAVE
277/* Conditionally re-enable interrupts. */
278 andi irq_stat, irq_stat, 1
279 ila irqtmp, __ovly_irq_restore
280 binze irq_stat, irqtmp
281__ovly_irq_restore:
282 lqd $9, -64($sp)
283#endif
284
285/* Restore saved registers. */
286 lqd $8, -48($sp)
287 lqd $7, -32($sp)
288 lqd $6, -16($sp)
289
290__ovly_load_ret:
291/* Branch to target address. */
292 bi $79
b1e37473
AM
293
294 .size __ovly_load, . - __ovly_load
This page took 0.078002 seconds and 4 git commands to generate.