*** empty log message ***
[deliverable/binutils-gdb.git] / ld / emultempl / spu_ovl.S
CommitLineData
e9f53129
AM
1/* Overlay manager for SPU.
2
45d3b878 3 Copyright 2006, 2007 Free Software Foundation, Inc.
e9f53129 4
f96b4a7b 5 This file is part of the GNU Binutils.
e9f53129 6
f96b4a7b 7 This program is free software; you can redistribute it and/or modify
e9f53129 8 it under the terms of the GNU General Public License as published by
f96b4a7b
NC
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
e9f53129 11
f96b4a7b 12 This program is distributed in the hope that it will be useful,
e9f53129
AM
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f96b4a7b
NC
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20 MA 02110-1301, USA. */
e9f53129 21
47f6dab9 22/* MFC DMA defn's. */
e9f53129
AM
23#define MFC_GET_CMD 0x40
24#define MFC_MAX_DMA_SIZE 0x4000
25#define MFC_TAG_UPDATE_ALL 2
26#define MFC_TAG_ID 0
27
47f6dab9
AM
28/* Register usage. */
29#define reserved1 $75
30#define parm $75
31#define tab1 reserved1
32#define tab2 reserved1
33#define vma reserved1
34#define oldvma reserved1
35#define newmask reserved1
36#define map reserved1
37
38#define reserved2 $76
39#define off1 reserved2
40#define off2 reserved2
41#define present1 reserved2
42#define present2 reserved2
43#define sz reserved2
44#define cmp reserved2
45#define add64 reserved2
46#define cgbits reserved2
47#define off3 reserved2
48#define off4 reserved2
49#define off5 reserved2
50#define tagstat reserved2
51
52#define reserved3 $77
53#define buf1 reserved3
54#define buf2 reserved3
55#define rv3 reserved3
56#define ealo reserved3
57#define cmd reserved3
58#define off64 reserved3
59#define tab3 reserved3
60#define tab4 reserved3
61#define tab5 reserved3
62
63#define reserved4 $78
64#define ovl reserved4
65#define rv2 reserved4
66#define rv5 reserved4
67#define cgshuf reserved4
68#define newovl reserved4
69
70#define reserved5 $79
71#define target reserved5
72
73#define save1 $72
74#define rv4 save1
75#define rv7 save1
76#define tagid save1
77#define maxsize save1
78#define pbyte save1
79#define pbit save1
80
81#define save2 $73
82#define cur save2
83#define rv6 save2
84#define osize save2
85#define zovl save2
86#define oldovl save2
87#define newvma save2
88
89#define save3 $74
90#define rv1 save3
91#define ea64 save3
92#define buf3 save3
93#define genwi save3
94#define newmap save3
95#define oldmask save3
e9f53129 96
c828a49f 97
e9f53129 98 .text
47f6dab9
AM
99 .align 4
100 .type __rv_pattern, @object
101 .size __rv_pattern, 16
e9f53129 102__rv_pattern:
47f6dab9
AM
103 .word 0x00010203, 0x10111213, 0x80808080, 0x80808080
104
105 .type __cg_pattern, @object
106 .size __cg_pattern, 16
e9f53129 107__cg_pattern:
47f6dab9
AM
108 .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
109
110 .type __ovly_current, @object
111 .size __ovly_current, 16
112__ovly_current:
113 .space 16
e9f53129 114
47f6dab9 115/*
e9f53129
AM
116 * __ovly_return - stub for returning from overlay functions.
117 *
47f6dab9
AM
118 * On entry the four slots of $lr are:
119 * __ovly_return, prev ovl index, caller return addr, undefined.
e9f53129 120 *
47f6dab9
AM
121 * Load the previous overlay and jump to the caller return address.
122 * Updates __ovly_current.
e9f53129 123 */
47f6dab9
AM
124 .align 4
125 .global __ovly_return
126 .type __ovly_return, @function
e9f53129 127__ovly_return:
47f6dab9
AM
128 ila tab1, _ovly_table - 16 # 0,2 0
129 shlqbyi ovl, $lr, 4 # 1,4 0
130#nop
131 shlqbyi target, $lr, 8 # 1,4 1
132#nop; lnop
133#nop; lnop
134 shli off1, ovl, 4 # 0,4 4
135#lnop
136#nop
137 hbr ovly_ret9, target # 1,15 5
138#nop; lnop
139#nop; lnop
140#nop
141 lqx vma, tab1, off1 # 1,6 8
142#nop; lnop
143#nop; lnop
144#nop; lnop
145#nop; lnop
146#nop; lnop
147#nop
148 rotqbyi buf1, vma, 12 # 1,4 14
149#nop
150 stqd save3, -48($sp) # 1,6 15
151#nop
152 stqd save2, -32($sp) # 1,6 16
153#nop
154 stqd save1, -16($sp) # 1,6 17
155 andi present1, buf1, 1 # 0,2 18
156 stqd ovl, (__ovly_current - __ovly_return)($lr) # 1,6 18
157#nop; lnop
158#nop
159 brz present1, __ovly_load_event # 1,4 20
160ovly_ret9:
161#nop
162 bi target # 1,4 21
163
164/*
e9f53129
AM
165 * __ovly_load - copy an overlay partion to local store.
166 *
47f6dab9
AM
167 * On entry $75 points to a word consisting of the overlay index in
168 * the top 14 bits, and the target address in the bottom 18 bits.
e9f53129 169 *
47f6dab9
AM
170 * Sets up $lr to return via __ovly_return.
171 * Updates __ovly_current.
e9f53129 172 */
47f6dab9
AM
173 .align 3
174 .global __ovly_load
175 .type __ovly_load, @function
e9f53129 176__ovly_load:
47f6dab9
AM
177#if OVL_STUB_SIZE == 8
178########
179#nop
180 lqd target, 0(parm) # 1,6 -11
181#nop; lnop
182#nop; lnop
183#nop; lnop
184#nop; lnop
185#nop; lnop
186#nop
187 rotqby target, target, parm # 1,4 -5
188 ila tab2, _ovly_table - 16 # 0,2 -4
189 stqd save3, -48($sp) # 1,6 -4
190#nop
191 stqd save2, -32($sp) # 1,6 -3
192#nop
193 stqd save1, -16($sp) # 1,6 -2
194 rotmi ovl, target, -18 # 0,4 -1
195 hbr ovly_load9, target # 1,15 -1
196 ila rv1, __ovly_return # 0,2 0
197#lnop
198#nop; lnop
199#nop
200 lqd cur, (__ovly_current - __ovly_return)(rv1) # 1,6 2
201 shli off2, ovl, 4 # 0,4 3
202 stqd ovl, (__ovly_current - __ovly_return)(rv1) # 1,6 3
203 ceq rv2, $lr, rv1 # 0,2 4
204 lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4
205#nop; lnop
206#nop; lnop
207#nop
208 lqx vma, tab2, off2 # 1,6 7
209########
210#else /* OVL_STUB_SIZE == 16 */
211########
212 ila tab2, _ovly_table - 16 # 0,2 0
213 stqd save3, -48($sp) # 1,6 0
214 ila rv1, __ovly_return # 0,2 1
215 stqd save2, -32($sp) # 1,6 1
216 shli off2, ovl, 4 # 0,4 2
217 lqa cur, __ovly_current # 1,6 2
218 nop
219 stqa ovl, __ovly_current # 1,6 3
220 ceq rv2, $lr, rv1 # 0,2 4
221 lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4
222#nop
223 hbr ovly_load9, target # 1,15 5
224#nop
225 lqx vma, tab2, off2 # 1,6 6
226#nop
227 stqd save1, -16($sp) # 1,6 7
228########
c828a49f
AM
229#endif
230
47f6dab9
AM
231#nop; lnop
232#nop; lnop
233#nop
234 shufb rv4, rv1, cur, rv3 # 1,4 10
235#nop
236 fsmb rv5, rv2 # 1,4 11
237#nop
238 rotqmbyi rv6, $lr, -8 # 1,4 12
239#nop
240 rotqbyi buf2, vma, 12 # 1,4 13
241#nop
242 lqd save3, -48($sp) # 1,6 14
243#nop; lnop
244 or rv7, rv4, rv6 # 0,2 16
245 lqd save2, -32($sp) # 1,6 16
246 andi present2, buf2, 1 # 0,2 17
247 lnop # 1,0 17
248 selb $lr, rv7, $lr, rv5 # 0,2 18
249 lqd save1, -16($sp) # 1,6 18
250#nop
251 brz present2, __ovly_load_event # 1,4 19
252ovly_load9:
253#nop
254 bi target # 1,4 20
255
256/* If we get here, we are about to load a new overlay.
257 * "vma" contains the relevant entry from _ovly_table[].
e9f53129
AM
258 * extern struct {
259 * u32 vma;
260 * u32 size;
261 * u32 file_offset;
262 * u32 buf;
263 * } _ovly_table[];
264 */
47f6dab9
AM
265 .align 3
266 .global __ovly_load_event
267 .type __ovly_load_event, @function
b1e37473 268__ovly_load_event:
47f6dab9
AM
269#nop
270 rotqbyi sz, vma, 8 # 1,4 0
271#nop
272 rotqbyi osize, vma, 4 # 1,4 1
273#nop
274 lqa ea64, _EAR_ # 1,6 2
275#nop
276 lqd cgshuf, (__cg_pattern - __ovly_return)($lr) # 1,6 3
277
278/* We could predict the branch at the end of this loop by adding a few
279 instructions, and there are plenty of free cycles to do so without
280 impacting loop execution time. However, it doesn't make a great
281 deal of sense since we need to wait for the dma to complete anyway. */
e9f53129 282__ovly_xfer_loop:
47f6dab9
AM
283#nop
284 rotqmbyi off64, sz, -4 # 1,4 4
285#nop; lnop
286#nop; lnop
287#nop; lnop
288 cg cgbits, ea64, off64 # 0,2 8
289#lnop
290#nop; lnop
291#nop
292 shufb add64, cgbits, cgbits, cgshuf # 1,4 10
293#nop; lnop
294#nop; lnop
295#nop; lnop
296 addx add64, ea64, off64 # 0,2 14
297#lnop
298 ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
299 lnop
300 ori ea64, add64, 0 # 0,2 16
301 rotqbyi ealo, add64, 4 # 1,4 16
302 cgt cmp, osize, maxsize # 0,2 17
303 wrch $MFC_LSA, vma # 1,6 17
304#nop; lnop
305 selb sz, osize, maxsize, cmp # 0,2 19
306 wrch $MFC_EAH, ea64 # 1,6 19
307 ila tagid, MFC_TAG_ID # 0,2 20
308 wrch $MFC_EAL, ealo # 1,6 20
309 ila cmd, MFC_GET_CMD # 0,2 21
310 wrch $MFC_Size, sz # 1,6 21
311 sf osize, sz, osize # 0,2 22
312 wrch $MFC_TagId, tagid # 1,6 22
313 a vma, vma, sz # 0,2 23
314 wrch $MFC_Cmd, cmd # 1,6 23
315#nop
316 brnz osize, __ovly_xfer_loop # 1,4 24
317
318/* Now update our data structions while waiting for DMA to complete.
319 Low bit of .buf needs to be cleared on the _ovly_table entry
320 corresponding to the evicted overlay, and set on the entry for the
321 newly loaded overlay. Note that no overlay may in fact be evicted
322 as _ovly_buf_table[] starts with all zeros. Don't zap .buf entry
323 for zero index! Also of course update the _ovly_buf_table entry. */
324#nop
325 lqd newovl, (__ovly_current - __ovly_return)($lr) # 1,6 25
326#nop; lnop
327#nop; lnop
328#nop; lnop
329#nop; lnop
330#nop; lnop
331 shli off3, newovl, 4 # 0,4 31
332#lnop
333 ila tab3, _ovly_table - 16 # 0,2 32
334#lnop
335#nop
336 fsmbi pbyte, 1 # 1,4 33
337#nop; lnop
338#nop
339 lqx vma, tab3, off3 # 1,6 35
340#nop; lnop
341 andi pbit, pbyte, 1 # 0,2 37
342 lnop
343#nop; lnop
344#nop; lnop
345#nop; lnop
346 or newvma, vma, pbit # 0,2 41
347 rotqbyi buf3, vma, 12 # 1,4 41
348#nop; lnop
349#nop
350 stqx newvma, tab3, off3 # 1,6 43
351#nop; lnop
352 shli off4, buf3, 2 # 1,4 45
353#lnop
354 ila tab4, _ovly_buf_table # 0,2 46
355#lnop
356#nop; lnop
357#nop; lnop
358#nop
359 lqx map, tab4, off4 # 1,6 49
360#nop
361 cwx genwi, tab4, off4 # 1,4 50
362#nop; lnop
363#nop; lnop
364#nop; lnop
365#nop; lnop
366#nop
367 rotqby oldovl, map, off4 # 1,4 55
368 nop
369 shufb newmap, newovl, map, genwi # 0,4 56
e9f53129 370#if MFC_TAG_ID < 16
47f6dab9 371 ila newmask, 1 << MFC_TAG_ID # 0,2 57
e9f53129 372#else
47f6dab9 373 ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57
c828a49f 374#endif
47f6dab9
AM
375#lnop
376#nop; lnop
377#nop; lnop
378 stqx newmap, tab4, off4 # 1,6 60
379
380/* Save app's tagmask, wait for DMA complete, restore mask. */
381 ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61
382 rdch oldmask, $MFC_RdTagMask # 1,6 61
383#nop
384 wrch $MFC_WrTagMask, newmask # 1,6 62
385#nop
386 wrch $MFC_WrTagUpdate, tagstat # 1,6 63
387#nop
388 rdch tagstat, $MFC_RdTagStat # 1,6 64
389#nop
390 sync # 1,4 65
391/* Any hint prior to the sync is lost. A hint here allows the branch
392 to complete 15 cycles after the hint. With no hint the branch will
393 take 18 or 19 cycles. */
394 ila tab5, _ovly_table - 16 # 0,2 66
395 hbr do_load99, target # 1,15 66
396 shli off5, oldovl, 4 # 0,4 67
397 wrch $MFC_WrTagMask, oldmask # 1,6 67
398 ceqi zovl, oldovl, 0 # 0,2 68
399#lnop
400#nop; lnop
401#nop
402 fsm zovl, zovl # 1,4 70
403#nop
404 lqx oldvma, tab5, off5 # 1,6 71
405#nop
406 lqd save3, -48($sp) # 1,6 72
407#nop; lnop
408 andc pbit, pbit, zovl # 0,2 74
409 lqd save2, -32($sp) # 1,6 74
410#nop; lnop
411#nop; lnop
412 andc oldvma, oldvma, pbit # 0,2 77
413 lqd save1, -16($sp) # 1,6 77
414#nop; lnop
415 nop
416 stqx oldvma, tab5, off5 # 1,6 79
417#nop; lnop
c828a49f 418
47f6dab9
AM
419 .global _ovly_debug_event
420 .type _ovly_debug_event, @function
e9f53129 421_ovly_debug_event:
e9f53129 422 nop
e9f53129 423/* Branch to target address. */
47f6dab9
AM
424do_load99:
425 bi target # 1,4 81
b1e37473 426
47f6dab9 427 .size __ovly_load, . - __ovly_load
This page took 0.130279 seconds and 4 git commands to generate.