bfd/
[deliverable/binutils-gdb.git] / ld / emultempl / spu_ovl.S
1 /* Overlay manager for SPU.
2
3 Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
4
5 This file is part of the GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20 MA 02110-1301, USA. */
21
22 /* MFC DMA defn's. */
23 #define MFC_GET_CMD 0x40
24 #define MFC_MAX_DMA_SIZE 0x4000
25 #define MFC_TAG_UPDATE_ALL 2
26 #define MFC_TAG_ID 0
27
28 /* Register usage. */
29 #define reserved1 $75
30 #define parm $75
31 #define tab1 reserved1
32 #define tab2 reserved1
33 #define vma reserved1
34 #define oldvma reserved1
35 #define newmask reserved1
36 #define map reserved1
37
38 #define reserved2 $76
39 #define off1 reserved2
40 #define off2 reserved2
41 #define present1 reserved2
42 #define present2 reserved2
43 #define sz reserved2
44 #define cmp reserved2
45 #define add64 reserved2
46 #define cgbits reserved2
47 #define off3 reserved2
48 #define off4 reserved2
49 #define addr4 reserved2
50 #define off5 reserved2
51 #define tagstat reserved2
52
53 #define reserved3 $77
54 #define size1 reserved3
55 #define size2 reserved3
56 #define rv3 reserved3
57 #define ealo reserved3
58 #define cmd reserved3
59 #define off64 reserved3
60 #define tab3 reserved3
61 #define tab4 reserved3
62 #define tab5 reserved3
63
64 #define reserved4 $78
65 #define ovl reserved4
66 #define rv2 reserved4
67 #define rv5 reserved4
68 #define cgshuf reserved4
69 #define newovl reserved4
70
71 #define reserved5 $79
72 #define target reserved5
73
74 #define save1 $72
75 #define rv4 save1
76 #define rv7 save1
77 #define tagid save1
78 #define maxsize save1
79 #define pbyte save1
80 #define pbit save1
81
82 #define save2 $73
83 #define cur save2
84 #define rv6 save2
85 #define osize save2
86 #define zovl save2
87 #define oldovl save2
88 #define newvma save2
89
90 #define save3 $74
91 #define rv1 save3
92 #define ea64 save3
93 #define buf3 save3
94 #define genwi save3
95 #define newmap save3
96 #define oldmask save3
97
98
99 .text
100 .align 4
101 .type __rv_pattern, @object
102 .size __rv_pattern, 16
103 __rv_pattern:
104 .word 0x00010203, 0x10111213, 0x80808080, 0x80808080
105
106 .type __cg_pattern, @object
107 .size __cg_pattern, 16
108 __cg_pattern:
109 .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
110
111 .type __ovly_current, @object
112 .size __ovly_current, 16
113 __ovly_current:
114 .space 16
115
116 /*
117 * __ovly_return - stub for returning from overlay functions.
118 *
119 * On entry the four slots of $lr are:
120 * __ovly_return, prev ovl index, caller return addr, undefined.
121 *
122 * Load the previous overlay and jump to the caller return address.
123 * Updates __ovly_current.
124 */
125 .align 4
126 .global __ovly_return
127 .type __ovly_return, @function
128 __ovly_return:
129 ila tab1, _ovly_table - 16 # 0,2 0
130 shlqbyi ovl, $lr, 4 # 1,4 0
131 #nop
132 shlqbyi target, $lr, 8 # 1,4 1
133 #nop; lnop
134 #nop; lnop
135 shli off1, ovl, 4 # 0,4 4
136 #lnop
137 #nop
138 hbr ovly_ret9, target # 1,15 5
139 #nop; lnop
140 #nop; lnop
141 #nop
142 lqx vma, tab1, off1 # 1,6 8
143 #nop; lnop
144 #nop; lnop
145 #nop; lnop
146 #nop; lnop
147 #nop; lnop
148 #nop
149 rotqbyi size1, vma, 4 # 1,4 14
150 #nop
151 stqd save3, -48($sp) # 1,6 15
152 #nop
153 stqd save2, -32($sp) # 1,6 16
154 #nop
155 stqd save1, -16($sp) # 1,6 17
156 andi present1, size1, 1 # 0,2 18
157 stqr ovl, __ovly_current # 1,6 18
158 #nop; lnop
159 #nop
160 brz present1, do_load # 1,4 20
161 ovly_ret9:
162 #nop
163 bi target # 1,4 21
164
165 /*
166 * __ovly_load - copy an overlay partion to local store.
167 *
168 * On entry $75 points to a word consisting of the overlay index in
169 * the top 14 bits, and the target address in the bottom 18 bits.
170 *
171 * Sets up $lr to return via __ovly_return.
172 * Updates __ovly_current.
173 */
174 .align 3
175 .global __ovly_load
176 .type __ovly_load, @function
177 __ovly_load:
178 #if OVL_STUB_SIZE == 8
179 ########
180 #nop
181 lqd target, 0(parm) # 1,6 -11
182 #nop; lnop
183 #nop; lnop
184 #nop; lnop
185 #nop; lnop
186 #nop; lnop
187 #nop
188 rotqby target, target, parm # 1,4 -5
189 ila tab2, _ovly_table - 16 # 0,2 -4
190 stqd save3, -48($sp) # 1,6 -4
191 #nop
192 stqd save2, -32($sp) # 1,6 -3
193 #nop
194 stqd save1, -16($sp) # 1,6 -2
195 rotmi ovl, target, -18 # 0,4 -1
196 hbr ovly_load9, target # 1,15 -1
197 ila rv1, __ovly_return # 0,2 0
198 #lnop
199 #nop; lnop
200 #nop
201 lqr cur, __ovly_current # 1,6 2
202 shli off2, ovl, 4 # 0,4 3
203 stqr ovl, __ovly_current # 1,6 3
204 ceq rv2, $lr, rv1 # 0,2 4
205 lqr rv3, __rv_pattern # 1,6 4
206 #nop; lnop
207 #nop; lnop
208 #nop
209 lqx vma, tab2, off2 # 1,6 7
210 ########
211 #else /* OVL_STUB_SIZE == 16 */
212 ########
213 ila tab2, _ovly_table - 16 # 0,2 0
214 stqd save3, -48($sp) # 1,6 0
215 ila rv1, __ovly_return # 0,2 1
216 stqd save2, -32($sp) # 1,6 1
217 shli off2, ovl, 4 # 0,4 2
218 lqr cur, __ovly_current # 1,6 2
219 nop
220 stqr ovl, __ovly_current # 1,6 3
221 ceq rv2, $lr, rv1 # 0,2 4
222 lqr rv3, __rv_pattern # 1,6 4
223 #nop
224 hbr ovly_load9, target # 1,15 5
225 #nop
226 lqx vma, tab2, off2 # 1,6 6
227 #nop
228 stqd save1, -16($sp) # 1,6 7
229 ########
230 #endif
231
232 #nop; lnop
233 #nop; lnop
234 #nop
235 shufb rv4, rv1, cur, rv3 # 1,4 10
236 #nop
237 fsmb rv5, rv2 # 1,4 11
238 #nop
239 rotqmbyi rv6, $lr, -8 # 1,4 12
240 #nop
241 rotqbyi size2, vma, 4 # 1,4 13
242 #nop
243 lqd save3, -48($sp) # 1,6 14
244 #nop; lnop
245 or rv7, rv4, rv6 # 0,2 16
246 lqd save2, -32($sp) # 1,6 16
247 andi present2, size2, 1 # 0,2 17
248 lnop # 1,0 17
249 selb $lr, rv7, $lr, rv5 # 0,2 18
250 lqd save1, -16($sp) # 1,6 18
251 #nop
252 brz present2, do_load # 1,4 19
253 ovly_load9:
254 #nop
255 bi target # 1,4 20
256
257 /* If we get here, we are about to load a new overlay.
258 * "vma" contains the relevant entry from _ovly_table[].
259 * extern struct {
260 * u32 vma;
261 * u32 size;
262 * u32 file_offset;
263 * u32 buf;
264 * } _ovly_table[];
265 */
266 .align 3
267 .global __ovly_load_event
268 .type __ovly_load_event, @function
269 __ovly_load_event:
270 do_load:
271 #nop
272 rotqbyi sz, vma, 8 # 1,4 0
273 #nop
274 rotqbyi osize, vma, 4 # 1,4 1
275 #nop
276 lqa ea64, _EAR_ # 1,6 2
277 #nop
278 lqr cgshuf, __cg_pattern # 1,6 3
279
280 /* We could predict the branch at the end of this loop by adding a few
281 instructions, and there are plenty of free cycles to do so without
282 impacting loop execution time. However, it doesn't make a great
283 deal of sense since we need to wait for the dma to complete anyway. */
284 __ovly_xfer_loop:
285 #nop
286 rotqmbyi off64, sz, -4 # 1,4 4
287 #nop; lnop
288 #nop; lnop
289 #nop; lnop
290 cg cgbits, ea64, off64 # 0,2 8
291 #lnop
292 #nop; lnop
293 #nop
294 shufb add64, cgbits, cgbits, cgshuf # 1,4 10
295 #nop; lnop
296 #nop; lnop
297 #nop; lnop
298 addx add64, ea64, off64 # 0,2 14
299 #lnop
300 ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
301 lnop
302 ori ea64, add64, 0 # 0,2 16
303 rotqbyi ealo, add64, 4 # 1,4 16
304 cgt cmp, osize, maxsize # 0,2 17
305 wrch $MFC_LSA, vma # 1,6 17
306 #nop; lnop
307 selb sz, osize, maxsize, cmp # 0,2 19
308 wrch $MFC_EAH, ea64 # 1,6 19
309 ila tagid, MFC_TAG_ID # 0,2 20
310 wrch $MFC_EAL, ealo # 1,6 20
311 ila cmd, MFC_GET_CMD # 0,2 21
312 wrch $MFC_Size, sz # 1,6 21
313 sf osize, sz, osize # 0,2 22
314 wrch $MFC_TagId, tagid # 1,6 22
315 a vma, vma, sz # 0,2 23
316 wrch $MFC_Cmd, cmd # 1,6 23
317 #nop
318 brnz osize, __ovly_xfer_loop # 1,4 24
319
320 /* Now update our data structions while waiting for DMA to complete.
321 Low bit of .size needs to be cleared on the _ovly_table entry
322 corresponding to the evicted overlay, and set on the entry for the
323 newly loaded overlay. Note that no overlay may in fact be evicted
324 as _ovly_buf_table[] starts with all zeros. Don't zap .size entry
325 for zero index! Also of course update the _ovly_buf_table entry. */
326 #nop
327 lqr newovl, __ovly_current # 1,6 25
328 #nop; lnop
329 #nop; lnop
330 #nop; lnop
331 #nop; lnop
332 #nop; lnop
333 shli off3, newovl, 4 # 0,4 31
334 #lnop
335 ila tab3, _ovly_table - 16 # 0,2 32
336 #lnop
337 #nop
338 fsmbi pbyte, 0x100 # 1,4 33
339 #nop; lnop
340 #nop
341 lqx vma, tab3, off3 # 1,6 35
342 #nop; lnop
343 andi pbit, pbyte, 1 # 0,2 37
344 lnop
345 #nop; lnop
346 #nop; lnop
347 #nop; lnop
348 or newvma, vma, pbit # 0,2 41
349 rotqbyi buf3, vma, 12 # 1,4 41
350 #nop; lnop
351 #nop
352 stqx newvma, tab3, off3 # 1,6 43
353 #nop; lnop
354 shli off4, buf3, 2 # 1,4 45
355 #lnop
356 ila tab4, _ovly_buf_table - 4 # 0,2 46
357 #lnop
358 #nop; lnop
359 #nop; lnop
360 #nop
361 lqx map, tab4, off4 # 1,6 49
362 #nop
363 cwx genwi, tab4, off4 # 1,4 50
364 a addr4, tab4, off4 # 0,2 51
365 #lnop
366 #nop; lnop
367 #nop; lnop
368 #nop; lnop
369 #nop
370 rotqby oldovl, map, addr4 # 1,4 55
371 #nop
372 shufb newmap, newovl, map, genwi # 0,4 56
373 #if MFC_TAG_ID < 16
374 ila newmask, 1 << MFC_TAG_ID # 0,2 57
375 #else
376 ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57
377 #endif
378 #lnop
379 #nop; lnop
380 #nop; lnop
381 stqd newmap, 0(addr4) # 1,6 60
382
383 /* Save app's tagmask, wait for DMA complete, restore mask. */
384 ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61
385 rdch oldmask, $MFC_RdTagMask # 1,6 61
386 #nop
387 wrch $MFC_WrTagMask, newmask # 1,6 62
388 #nop
389 wrch $MFC_WrTagUpdate, tagstat # 1,6 63
390 #nop
391 rdch tagstat, $MFC_RdTagStat # 1,6 64
392 #nop
393 sync # 1,4 65
394 /* Any hint prior to the sync is lost. A hint here allows the branch
395 to complete 15 cycles after the hint. With no hint the branch will
396 take 18 or 19 cycles. */
397 ila tab5, _ovly_table - 16 # 0,2 66
398 hbr do_load99, target # 1,15 66
399 shli off5, oldovl, 4 # 0,4 67
400 wrch $MFC_WrTagMask, oldmask # 1,6 67
401 ceqi zovl, oldovl, 0 # 0,2 68
402 #lnop
403 #nop; lnop
404 #nop
405 fsm zovl, zovl # 1,4 70
406 #nop
407 lqx oldvma, tab5, off5 # 1,6 71
408 #nop
409 lqd save3, -48($sp) # 1,6 72
410 #nop; lnop
411 andc pbit, pbit, zovl # 0,2 74
412 lqd save2, -32($sp) # 1,6 74
413 #nop; lnop
414 #nop; lnop
415 andc oldvma, oldvma, pbit # 0,2 77
416 lqd save1, -16($sp) # 1,6 77
417 #nop; lnop
418 nop
419 stqx oldvma, tab5, off5 # 1,6 79
420 #nop; lnop
421
422 .global _ovly_debug_event
423 .type _ovly_debug_event, @function
424 _ovly_debug_event:
425 nop
426 /* Branch to target address. */
427 do_load99:
428 bi target # 1,4 81
429
430 .size __ovly_load, . - __ovly_load
This page took 0.041468 seconds and 5 git commands to generate.