sh: sys_sh consolidation for arch_get_unmapped_area().
[deliverable/linux.git] / arch / sh / mm / copy_page.S
CommitLineData
510c72ad 1/*
1da177e4
LT
2 * copy_page, __copy_user_page, __copy_user implementation of SuperH
3 *
4 * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima
5 * Copyright (C) 2002 Toshinobu Sugioka
510c72ad 6 * Copyright (C) 2006 Paul Mundt
1da177e4
LT
7 */
8#include <linux/linkage.h>
510c72ad 9#include <asm/page.h>
1da177e4
LT
10
11/*
12 * copy_page_slow
13 * @to: P1 address
14 * @from: P1 address
15 *
16 * void copy_page_slow(void *to, void *from)
17 */
18
19/*
20 * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
510c72ad 21 * r8 --- from + PAGE_SIZE
1da177e4
LT
22 * r9 --- not used
23 * r10 --- to
24 * r11 --- from
25 */
26ENTRY(copy_page_slow)
27 mov.l r8,@-r15
28 mov.l r10,@-r15
29 mov.l r11,@-r15
30 mov r4,r10
31 mov r5,r11
32 mov r5,r8
510c72ad 33 mov.l .Lpsz,r0
1da177e4
LT
34 add r0,r8
35 !
361: mov.l @r11+,r0
37 mov.l @r11+,r1
38 mov.l @r11+,r2
39 mov.l @r11+,r3
40 mov.l @r11+,r4
41 mov.l @r11+,r5
42 mov.l @r11+,r6
43 mov.l @r11+,r7
44#if defined(CONFIG_CPU_SH3)
45 mov.l r0,@r10
46#elif defined(CONFIG_CPU_SH4)
47 movca.l r0,@r10
48 mov r10,r0
49#endif
50 add #32,r10
51 mov.l r7,@-r10
52 mov.l r6,@-r10
53 mov.l r5,@-r10
54 mov.l r4,@-r10
55 mov.l r3,@-r10
56 mov.l r2,@-r10
57 mov.l r1,@-r10
58#if defined(CONFIG_CPU_SH4)
59 ocbwb @r0
60#endif
61 cmp/eq r11,r8
62 bf/s 1b
63 add #28,r10
64 !
65 mov.l @r15+,r11
66 mov.l @r15+,r10
67 mov.l @r15+,r8
68 rts
69 nop
70
0d08b5fb 71 .align 2
510c72ad 72.Lpsz: .long PAGE_SIZE
1da177e4
LT
73/*
74 * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
75 * Return the number of bytes NOT copied
76 */
77#define EX(...) \
78 9999: __VA_ARGS__ ; \
79 .section __ex_table, "a"; \
80 .long 9999b, 6000f ; \
81 .previous
82ENTRY(__copy_user)
023ef184
SM
83 ! Check if small number of bytes
84 mov #11,r0
1da177e4 85 mov r4,r3
023ef184
SM
86 cmp/gt r0,r6 ! r6 (len) > r0 (11)
87 bf/s .L_cleanup_loop_no_pop
88 add r6,r3 ! last destination address
89
90 ! Calculate bytes needed to align to src
91 mov.l r11,@-r15
92 neg r5,r0
93 mov.l r10,@-r15
1da177e4 94 add #4,r0
023ef184 95 mov.l r9,@-r15
1da177e4 96 and #3,r0
023ef184 97 mov.l r8,@-r15
1da177e4 98 tst r0,r0
023ef184 99 bt 2f
1da177e4 100
023ef184
SM
1011:
102 ! Copy bytes to long word align src
103EX( mov.b @r5+,r1 )
104 dt r0
1da177e4 105 add #-1,r6
023ef184
SM
106EX( mov.b r1,@r4 )
107 bf/s 1b
1da177e4
LT
108 add #1,r4
109
023ef184
SM
110 ! Jump to appropriate routine depending on dest
1112: mov #3,r1
112 mov r6, r2
113 and r4,r1
1da177e4 114 shlr2 r2
1da177e4
LT
115 shll2 r1
116 mova .L_jump_tbl,r0
117 mov.l @(r0,r1),r1
118 jmp @r1
119 nop
120
121 .align 2
122.L_jump_tbl:
123 .long .L_dest00
124 .long .L_dest01
125 .long .L_dest10
126 .long .L_dest11
127
023ef184
SM
128/*
129 * Come here if there are less than 12 bytes to copy
130 *
131 * Keep the branch target close, so the bf/s callee doesn't overflow
132 * and result in a more expensive branch being inserted. This is the
133 * fast-path for small copies, the jump via the jump table will hit the
134 * default slow-path cleanup. -PFM.
135 */
136.L_cleanup_loop_no_pop:
137 tst r6,r6 ! Check explicitly for zero
138 bt 1f
139
1402:
141EX( mov.b @r5+,r0 )
142 dt r6
143EX( mov.b r0,@r4 )
144 bf/s 2b
145 add #1,r4
146
1471: mov #0,r0 ! normal return
1485000:
149
150# Exception handler:
151.section .fixup, "ax"
1526000:
153 mov.l 8000f,r1
154 mov r3,r0
155 jmp @r1
156 sub r4,r0
157 .align 2
1588000: .long 5000b
159
160.previous
161 rts
162 nop
163
1da177e4
LT
164! Destination = 00
165
166.L_dest00:
023ef184
SM
167 ! Skip the large copy for small transfers
168 mov #(32+32-4), r0
169 cmp/gt r6, r0 ! r0 (60) > r6 (len)
170 bt 1f
171
172 ! Align dest to a 32 byte boundary
173 neg r4,r0
174 add #0x20, r0
175 and #0x1f, r0
176 tst r0, r0
177 bt 2f
178
179 sub r0, r6
180 shlr2 r0
1813:
182EX( mov.l @r5+,r1 )
183 dt r0
184EX( mov.l r1,@r4 )
185 bf/s 3b
186 add #4,r4
187
1da177e4
LT
1882:
189EX( mov.l @r5+,r0 )
023ef184
SM
190EX( mov.l @r5+,r1 )
191EX( mov.l @r5+,r2 )
192EX( mov.l @r5+,r7 )
1da177e4
LT
193EX( mov.l @r5+,r8 )
194EX( mov.l @r5+,r9 )
195EX( mov.l @r5+,r10 )
023ef184 196EX( mov.l @r5+,r11 )
0e670685 197#ifdef CONFIG_CPU_SH4
023ef184 198EX( movca.l r0,@r4 )
0e670685
SM
199#else
200EX( mov.l r0,@r4 )
201#endif
023ef184
SM
202 add #-32, r6
203EX( mov.l r1,@(4,r4) )
204 mov #32, r0
205EX( mov.l r2,@(8,r4) )
206 cmp/gt r6, r0 ! r0 (32) > r6 (len)
207EX( mov.l r7,@(12,r4) )
208EX( mov.l r8,@(16,r4) )
209EX( mov.l r9,@(20,r4) )
210EX( mov.l r10,@(24,r4) )
211EX( mov.l r11,@(28,r4) )
1da177e4
LT
212 bf/s 2b
213 add #32,r4
023ef184
SM
214
2151: mov r6, r0
216 shlr2 r0
217 tst r0, r0
1da177e4
LT
218 bt .L_cleanup
2191:
023ef184
SM
220EX( mov.l @r5+,r1 )
221 dt r0
222EX( mov.l r1,@r4 )
1da177e4
LT
223 bf/s 1b
224 add #4,r4
225
226 bra .L_cleanup
227 nop
228
229! Destination = 10
230
231.L_dest10:
232 mov r2,r7
233 shlr2 r7
234 shlr r7
235 tst r7,r7
236 mov #7,r0
237 bt/s 1f
238 and r0,r2
2392:
240 dt r7
023ef184 241#ifdef CONFIG_CPU_LITTLE_ENDIAN
1da177e4
LT
242EX( mov.l @r5+,r0 )
243EX( mov.l @r5+,r1 )
244EX( mov.l @r5+,r8 )
245EX( mov.l @r5+,r9 )
246EX( mov.l @r5+,r10 )
247EX( mov.w r0,@r4 )
248 add #2,r4
249 xtrct r1,r0
250 xtrct r8,r1
251 xtrct r9,r8
252 xtrct r10,r9
253
254EX( mov.l r0,@r4 )
255EX( mov.l r1,@(4,r4) )
256EX( mov.l r8,@(8,r4) )
257EX( mov.l r9,@(12,r4) )
258
259EX( mov.l @r5+,r1 )
260EX( mov.l @r5+,r8 )
261EX( mov.l @r5+,r0 )
262 xtrct r1,r10
263 xtrct r8,r1
264 xtrct r0,r8
265 shlr16 r0
266EX( mov.l r10,@(16,r4) )
267EX( mov.l r1,@(20,r4) )
268EX( mov.l r8,@(24,r4) )
269EX( mov.w r0,@(28,r4) )
270 bf/s 2b
271 add #30,r4
272#else
273EX( mov.l @(28,r5),r0 )
274EX( mov.l @(24,r5),r8 )
275EX( mov.l @(20,r5),r9 )
276EX( mov.l @(16,r5),r10 )
277EX( mov.w r0,@(30,r4) )
278 add #-2,r4
279 xtrct r8,r0
280 xtrct r9,r8
281 xtrct r10,r9
282EX( mov.l r0,@(28,r4) )
283EX( mov.l r8,@(24,r4) )
284EX( mov.l r9,@(20,r4) )
285
286EX( mov.l @(12,r5),r0 )
287EX( mov.l @(8,r5),r8 )
288 xtrct r0,r10
289EX( mov.l @(4,r5),r9 )
290 mov.l r10,@(16,r4)
291EX( mov.l @r5,r10 )
292 xtrct r8,r0
293 xtrct r9,r8
294 xtrct r10,r9
295EX( mov.l r0,@(12,r4) )
296EX( mov.l r8,@(8,r4) )
297 swap.w r10,r0
298EX( mov.l r9,@(4,r4) )
299EX( mov.w r0,@(2,r4) )
300
301 add #32,r5
302 bf/s 2b
303 add #34,r4
304#endif
305 tst r2,r2
306 bt .L_cleanup
307
3081: ! Read longword, write two words per iteration
309EX( mov.l @r5+,r0 )
310 dt r2
023ef184 311#ifdef CONFIG_CPU_LITTLE_ENDIAN
1da177e4
LT
312EX( mov.w r0,@r4 )
313 shlr16 r0
314EX( mov.w r0,@(2,r4) )
315#else
316EX( mov.w r0,@(2,r4) )
317 shlr16 r0
318EX( mov.w r0,@r4 )
319#endif
320 bf/s 1b
321 add #4,r4
322
323 bra .L_cleanup
324 nop
325
326! Destination = 01 or 11
327
328.L_dest01:
329.L_dest11:
330 ! Read longword, write byte, word, byte per iteration
331EX( mov.l @r5+,r0 )
332 dt r2
023ef184 333#ifdef CONFIG_CPU_LITTLE_ENDIAN
1da177e4
LT
334EX( mov.b r0,@r4 )
335 shlr8 r0
336 add #1,r4
337EX( mov.w r0,@r4 )
338 shlr16 r0
339EX( mov.b r0,@(2,r4) )
340 bf/s .L_dest01
341 add #3,r4
342#else
343EX( mov.b r0,@(3,r4) )
344 shlr8 r0
345 swap.w r0,r7
346EX( mov.b r7,@r4 )
347 add #1,r4
348EX( mov.w r0,@r4 )
349 bf/s .L_dest01
350 add #3,r4
351#endif
352
353! Cleanup last few bytes
354.L_cleanup:
355 mov r6,r0
356 and #3,r0
357 tst r0,r0
358 bt .L_exit
359 mov r0,r6
360
361.L_cleanup_loop:
362EX( mov.b @r5+,r0 )
363 dt r6
364EX( mov.b r0,@r4 )
365 bf/s .L_cleanup_loop
366 add #1,r4
367
368.L_exit:
369 mov #0,r0 ! normal return
023ef184 370
1da177e4
LT
3715000:
372
373# Exception handler:
374.section .fixup, "ax"
3756000:
376 mov.l 8000f,r1
377 mov r3,r0
378 jmp @r1
379 sub r4,r0
380 .align 2
3818000: .long 5000b
382
383.previous
384 mov.l @r15+,r8
385 mov.l @r15+,r9
023ef184 386 mov.l @r15+,r10
1da177e4 387 rts
023ef184 388 mov.l @r15+,r11
This page took 0.369383 seconds and 5 git commands to generate.