Commit | Line | Data |
---|---|---|
c51b4488 GY |
1 | /* |
2 | * File: arch/blackfin/mach-bf561/atomic.S | |
3 | * Author: Philippe Gerum <rpm@xenomai.org> | |
4 | * | |
5 | * Copyright 2007 Analog Devices Inc. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, see the file COPYING, or write | |
19 | * to the Free Software Foundation, Inc., | |
20 | * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | #include <linux/linkage.h> | |
24 | #include <asm/blackfin.h> | |
25 | #include <asm/cache.h> | |
26 | #include <asm/asm-offsets.h> | |
27 | #include <asm/rwlock.h> | |
28 | #include <asm/cplb.h> | |
29 | ||
30 | .text | |
31 | ||
32 | .macro coreslot_loadaddr reg:req | |
33 | \reg\().l = _corelock; | |
34 | \reg\().h = _corelock; | |
35 | .endm | |
36 | ||
37 | /* | |
38 | * r0 = address of atomic data to flush and invalidate (32bit). | |
39 | * | |
40 | * Clear interrupts and return the old mask. | |
41 | * We assume that no atomic data can span cachelines. | |
42 | * | |
43 | * Clobbers: r2:0, p0 | |
44 | */ | |
45 | ENTRY(_get_core_lock) | |
46 | r1 = -L1_CACHE_BYTES; | |
47 | r1 = r0 & r1; | |
48 | cli r0; | |
49 | coreslot_loadaddr p0; | |
50 | .Lretry_corelock: | |
51 | testset (p0); | |
52 | if cc jump .Ldone_corelock; | |
53 | SSYNC(r2); | |
54 | jump .Lretry_corelock | |
55 | .Ldone_corelock: | |
56 | p0 = r1; | |
57 | CSYNC(r2); | |
58 | flushinv[p0]; | |
59 | SSYNC(r2); | |
60 | rts; | |
61 | ENDPROC(_get_core_lock) | |
62 | ||
63 | /* | |
64 | * r0 = address of atomic data in uncacheable memory region (32bit). | |
65 | * | |
66 | * Clear interrupts and return the old mask. | |
67 | * | |
68 | * Clobbers: r0, p0 | |
69 | */ | |
70 | ENTRY(_get_core_lock_noflush) | |
71 | cli r0; | |
72 | coreslot_loadaddr p0; | |
73 | .Lretry_corelock_noflush: | |
74 | testset (p0); | |
75 | if cc jump .Ldone_corelock_noflush; | |
76 | SSYNC(r2); | |
77 | jump .Lretry_corelock_noflush | |
78 | .Ldone_corelock_noflush: | |
79 | rts; | |
80 | ENDPROC(_get_core_lock_noflush) | |
81 | ||
82 | /* | |
83 | * r0 = interrupt mask to restore. | |
84 | * r1 = address of atomic data to flush and invalidate (32bit). | |
85 | * | |
86 | * Interrupts are masked on entry (see _get_core_lock). | |
87 | * Clobbers: r2:0, p0 | |
88 | */ | |
89 | ENTRY(_put_core_lock) | |
90 | /* Write-through cache assumed, so no flush needed here. */ | |
91 | coreslot_loadaddr p0; | |
92 | r1 = 0; | |
93 | [p0] = r1; | |
94 | SSYNC(r2); | |
95 | sti r0; | |
96 | rts; | |
97 | ENDPROC(_put_core_lock) | |
98 | ||
99 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
100 | ||
101 | ENTRY(___raw_smp_mark_barrier_asm) | |
102 | [--sp] = rets; | |
103 | [--sp] = ( r7:5 ); | |
104 | [--sp] = r0; | |
105 | [--sp] = p1; | |
106 | [--sp] = p0; | |
107 | call _get_core_lock_noflush; | |
108 | ||
109 | /* | |
110 | * Calculate current core mask | |
111 | */ | |
112 | GET_CPUID(p1, r7); | |
113 | r6 = 1; | |
114 | r6 <<= r7; | |
115 | ||
116 | /* | |
117 | * Set bit of other cores in barrier mask. Don't change current core bit. | |
118 | */ | |
119 | p1.l = _barrier_mask; | |
120 | p1.h = _barrier_mask; | |
121 | r7 = [p1]; | |
122 | r5 = r7 & r6; | |
123 | r7 = ~r6; | |
124 | cc = r5 == 0; | |
125 | if cc jump 1f; | |
126 | r7 = r7 | r6; | |
127 | 1: | |
128 | [p1] = r7; | |
129 | SSYNC(r2); | |
130 | ||
131 | call _put_core_lock; | |
132 | p0 = [sp++]; | |
133 | p1 = [sp++]; | |
134 | r0 = [sp++]; | |
135 | ( r7:5 ) = [sp++]; | |
136 | rets = [sp++]; | |
137 | rts; | |
138 | ENDPROC(___raw_smp_mark_barrier_asm) | |
139 | ||
140 | ENTRY(___raw_smp_check_barrier_asm) | |
141 | [--sp] = rets; | |
142 | [--sp] = ( r7:5 ); | |
143 | [--sp] = r0; | |
144 | [--sp] = p1; | |
145 | [--sp] = p0; | |
146 | call _get_core_lock_noflush; | |
147 | ||
148 | /* | |
149 | * Calculate current core mask | |
150 | */ | |
151 | GET_CPUID(p1, r7); | |
152 | r6 = 1; | |
153 | r6 <<= r7; | |
154 | ||
155 | /* | |
156 | * Clear current core bit in barrier mask if it is set. | |
157 | */ | |
158 | p1.l = _barrier_mask; | |
159 | p1.h = _barrier_mask; | |
160 | r7 = [p1]; | |
161 | r5 = r7 & r6; | |
162 | cc = r5 == 0; | |
163 | if cc jump 1f; | |
164 | r6 = ~r6; | |
165 | r7 = r7 & r6; | |
166 | [p1] = r7; | |
167 | SSYNC(r2); | |
168 | ||
169 | call _put_core_lock; | |
170 | ||
171 | /* | |
172 | * Invalidate the entire D-cache of current core. | |
173 | */ | |
174 | sp += -12; | |
175 | call _resync_core_dcache | |
176 | sp += 12; | |
177 | jump 2f; | |
178 | 1: | |
179 | call _put_core_lock; | |
180 | 2: | |
181 | p0 = [sp++]; | |
182 | p1 = [sp++]; | |
183 | r0 = [sp++]; | |
184 | ( r7:5 ) = [sp++]; | |
185 | rets = [sp++]; | |
186 | rts; | |
187 | ENDPROC(___raw_smp_check_barrier_asm) | |
188 | ||
189 | /* | |
190 | * r0 = irqflags | |
191 | * r1 = address of atomic data | |
192 | * | |
193 | * Clobbers: r2:0, p1:0 | |
194 | */ | |
195 | _start_lock_coherent: | |
196 | ||
197 | [--sp] = rets; | |
198 | [--sp] = ( r7:6 ); | |
199 | r7 = r0; | |
200 | p1 = r1; | |
201 | ||
202 | /* | |
203 | * Determine whether the atomic data was previously | |
204 | * owned by another CPU (=r6). | |
205 | */ | |
206 | GET_CPUID(p0, r2); | |
207 | r1 = 1; | |
208 | r1 <<= r2; | |
209 | r2 = ~r1; | |
210 | ||
211 | r1 = [p1]; | |
212 | r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */ | |
213 | r6 = r1 & r2; | |
214 | r1 = [p1]; | |
215 | r1 <<= 4; | |
216 | r1 >>= 4; | |
217 | [p1] = r1; | |
218 | ||
219 | /* | |
220 | * Release the core lock now, but keep IRQs disabled while we are | |
221 | * performing the remaining housekeeping chores for the current CPU. | |
222 | */ | |
223 | coreslot_loadaddr p0; | |
224 | r1 = 0; | |
225 | [p0] = r1; | |
226 | ||
227 | /* | |
228 | * If another CPU has owned the same atomic section before us, | |
229 | * then our D-cached copy of the shared data protected by the | |
230 | * current spin/write_lock may be obsolete. | |
231 | */ | |
232 | cc = r6 == 0; | |
233 | if cc jump .Lcache_synced | |
234 | ||
235 | /* | |
236 | * Invalidate the entire D-cache of the current core. | |
237 | */ | |
238 | sp += -12; | |
239 | call _resync_core_dcache | |
240 | sp += 12; | |
241 | ||
242 | .Lcache_synced: | |
243 | SSYNC(r2); | |
244 | sti r7; | |
245 | ( r7:6 ) = [sp++]; | |
246 | rets = [sp++]; | |
247 | rts | |
248 | ||
249 | /* | |
250 | * r0 = irqflags | |
251 | * r1 = address of atomic data | |
252 | * | |
253 | * Clobbers: r2:0, p1:0 | |
254 | */ | |
255 | _end_lock_coherent: | |
256 | ||
257 | p1 = r1; | |
258 | GET_CPUID(p0, r2); | |
259 | r2 += 28; | |
260 | r1 = 1; | |
261 | r1 <<= r2; | |
262 | r2 = [p1]; | |
263 | r2 = r1 | r2; | |
264 | [p1] = r2; | |
265 | r1 = p1; | |
266 | jump _put_core_lock; | |
267 | ||
268 | #endif /* __ARCH_SYNC_CORE_DCACHE */ | |
269 | ||
270 | /* | |
271 | * r0 = &spinlock->lock | |
272 | * | |
273 | * Clobbers: r3:0, p1:0 | |
274 | */ | |
275 | ENTRY(___raw_spin_is_locked_asm) | |
276 | p1 = r0; | |
277 | [--sp] = rets; | |
278 | call _get_core_lock; | |
279 | r3 = [p1]; | |
280 | cc = bittst( r3, 0 ); | |
281 | r3 = cc; | |
282 | r1 = p1; | |
283 | call _put_core_lock; | |
284 | rets = [sp++]; | |
285 | r0 = r3; | |
286 | rts; | |
287 | ENDPROC(___raw_spin_is_locked_asm) | |
288 | ||
289 | /* | |
290 | * r0 = &spinlock->lock | |
291 | * | |
292 | * Clobbers: r3:0, p1:0 | |
293 | */ | |
294 | ENTRY(___raw_spin_lock_asm) | |
295 | p1 = r0; | |
296 | [--sp] = rets; | |
297 | .Lretry_spinlock: | |
298 | call _get_core_lock; | |
299 | r1 = p1; | |
300 | r2 = [p1]; | |
301 | cc = bittst( r2, 0 ); | |
302 | if cc jump .Lbusy_spinlock | |
303 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
304 | r3 = p1; | |
305 | bitset ( r2, 0 ); /* Raise the lock bit. */ | |
306 | [p1] = r2; | |
307 | call _start_lock_coherent | |
308 | #else | |
309 | r2 = 1; | |
310 | [p1] = r2; | |
311 | call _put_core_lock; | |
312 | #endif | |
313 | rets = [sp++]; | |
314 | rts; | |
315 | ||
316 | .Lbusy_spinlock: | |
317 | /* We don't touch the atomic area if busy, so that flush | |
318 | will behave like nop in _put_core_lock. */ | |
319 | call _put_core_lock; | |
320 | SSYNC(r2); | |
321 | r0 = p1; | |
322 | jump .Lretry_spinlock | |
323 | ENDPROC(___raw_spin_lock_asm) | |
324 | ||
325 | /* | |
326 | * r0 = &spinlock->lock | |
327 | * | |
328 | * Clobbers: r3:0, p1:0 | |
329 | */ | |
330 | ENTRY(___raw_spin_trylock_asm) | |
331 | p1 = r0; | |
332 | [--sp] = rets; | |
333 | call _get_core_lock; | |
334 | r1 = p1; | |
335 | r3 = [p1]; | |
336 | cc = bittst( r3, 0 ); | |
337 | if cc jump .Lfailed_trylock | |
338 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
339 | bitset ( r3, 0 ); /* Raise the lock bit. */ | |
340 | [p1] = r3; | |
341 | call _start_lock_coherent | |
342 | #else | |
343 | r2 = 1; | |
344 | [p1] = r2; | |
345 | call _put_core_lock; | |
346 | #endif | |
347 | r0 = 1; | |
348 | rets = [sp++]; | |
349 | rts; | |
350 | .Lfailed_trylock: | |
351 | call _put_core_lock; | |
352 | r0 = 0; | |
353 | rets = [sp++]; | |
354 | rts; | |
355 | ENDPROC(___raw_spin_trylock_asm) | |
356 | ||
357 | /* | |
358 | * r0 = &spinlock->lock | |
359 | * | |
360 | * Clobbers: r2:0, p1:0 | |
361 | */ | |
362 | ENTRY(___raw_spin_unlock_asm) | |
363 | p1 = r0; | |
364 | [--sp] = rets; | |
365 | call _get_core_lock; | |
366 | r2 = [p1]; | |
367 | bitclr ( r2, 0 ); | |
368 | [p1] = r2; | |
369 | r1 = p1; | |
370 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
371 | call _end_lock_coherent | |
372 | #else | |
373 | call _put_core_lock; | |
374 | #endif | |
375 | rets = [sp++]; | |
376 | rts; | |
377 | ENDPROC(___raw_spin_unlock_asm) | |
378 | ||
379 | /* | |
380 | * r0 = &rwlock->lock | |
381 | * | |
382 | * Clobbers: r2:0, p1:0 | |
383 | */ | |
384 | ENTRY(___raw_read_lock_asm) | |
385 | p1 = r0; | |
386 | [--sp] = rets; | |
387 | call _get_core_lock; | |
388 | .Lrdlock_try: | |
389 | r1 = [p1]; | |
390 | r1 += -1; | |
391 | [p1] = r1; | |
392 | cc = r1 < 0; | |
393 | if cc jump .Lrdlock_failed | |
394 | r1 = p1; | |
395 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
396 | call _start_lock_coherent | |
397 | #else | |
398 | call _put_core_lock; | |
399 | #endif | |
400 | rets = [sp++]; | |
401 | rts; | |
402 | ||
403 | .Lrdlock_failed: | |
404 | r1 += 1; | |
405 | [p1] = r1; | |
406 | .Lrdlock_wait: | |
407 | r1 = p1; | |
408 | call _put_core_lock; | |
409 | SSYNC(r2); | |
410 | r0 = p1; | |
411 | call _get_core_lock; | |
412 | r1 = [p1]; | |
413 | cc = r1 < 2; | |
414 | if cc jump .Lrdlock_wait; | |
415 | jump .Lrdlock_try | |
416 | ENDPROC(___raw_read_lock_asm) | |
417 | ||
418 | /* | |
419 | * r0 = &rwlock->lock | |
420 | * | |
421 | * Clobbers: r3:0, p1:0 | |
422 | */ | |
423 | ENTRY(___raw_read_trylock_asm) | |
424 | p1 = r0; | |
425 | [--sp] = rets; | |
426 | call _get_core_lock; | |
427 | r1 = [p1]; | |
428 | cc = r1 <= 0; | |
429 | if cc jump .Lfailed_tryrdlock; | |
430 | r1 += -1; | |
431 | [p1] = r1; | |
432 | r1 = p1; | |
433 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
434 | call _start_lock_coherent | |
435 | #else | |
436 | call _put_core_lock; | |
437 | #endif | |
438 | rets = [sp++]; | |
439 | r0 = 1; | |
440 | rts; | |
441 | .Lfailed_tryrdlock: | |
442 | r1 = p1; | |
443 | call _put_core_lock; | |
444 | rets = [sp++]; | |
445 | r0 = 0; | |
446 | rts; | |
447 | ENDPROC(___raw_read_trylock_asm) | |
448 | ||
449 | /* | |
450 | * r0 = &rwlock->lock | |
451 | * | |
452 | * Note: Processing controlled by a reader lock should not have | |
453 | * any side-effect on cache issues with the other core, so we | |
454 | * just release the core lock and exit (no _end_lock_coherent). | |
455 | * | |
456 | * Clobbers: r3:0, p1:0 | |
457 | */ | |
458 | ENTRY(___raw_read_unlock_asm) | |
459 | p1 = r0; | |
460 | [--sp] = rets; | |
461 | call _get_core_lock; | |
462 | r1 = [p1]; | |
463 | r1 += 1; | |
464 | [p1] = r1; | |
465 | r1 = p1; | |
466 | call _put_core_lock; | |
467 | rets = [sp++]; | |
468 | rts; | |
469 | ENDPROC(___raw_read_unlock_asm) | |
470 | ||
471 | /* | |
472 | * r0 = &rwlock->lock | |
473 | * | |
474 | * Clobbers: r3:0, p1:0 | |
475 | */ | |
476 | ENTRY(___raw_write_lock_asm) | |
477 | p1 = r0; | |
478 | r3.l = lo(RW_LOCK_BIAS); | |
479 | r3.h = hi(RW_LOCK_BIAS); | |
480 | [--sp] = rets; | |
481 | call _get_core_lock; | |
482 | .Lwrlock_try: | |
483 | r1 = [p1]; | |
484 | r1 = r1 - r3; | |
485 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
486 | r2 = r1; | |
487 | r2 <<= 4; | |
488 | r2 >>= 4; | |
489 | cc = r2 == 0; | |
490 | #else | |
491 | cc = r1 == 0; | |
492 | #endif | |
493 | if !cc jump .Lwrlock_wait | |
494 | [p1] = r1; | |
495 | r1 = p1; | |
496 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
497 | call _start_lock_coherent | |
498 | #else | |
499 | call _put_core_lock; | |
500 | #endif | |
501 | rets = [sp++]; | |
502 | rts; | |
503 | ||
504 | .Lwrlock_wait: | |
505 | r1 = p1; | |
506 | call _put_core_lock; | |
507 | SSYNC(r2); | |
508 | r0 = p1; | |
509 | call _get_core_lock; | |
510 | r1 = [p1]; | |
511 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
512 | r1 <<= 4; | |
513 | r1 >>= 4; | |
514 | #endif | |
515 | cc = r1 == r3; | |
516 | if !cc jump .Lwrlock_wait; | |
517 | jump .Lwrlock_try | |
518 | ENDPROC(___raw_write_lock_asm) | |
519 | ||
520 | /* | |
521 | * r0 = &rwlock->lock | |
522 | * | |
523 | * Clobbers: r3:0, p1:0 | |
524 | */ | |
525 | ENTRY(___raw_write_trylock_asm) | |
526 | p1 = r0; | |
527 | [--sp] = rets; | |
528 | call _get_core_lock; | |
529 | r1 = [p1]; | |
530 | r2.l = lo(RW_LOCK_BIAS); | |
531 | r2.h = hi(RW_LOCK_BIAS); | |
532 | cc = r1 == r2; | |
533 | if !cc jump .Lfailed_trywrlock; | |
534 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
535 | r1 >>= 28; | |
536 | r1 <<= 28; | |
537 | #else | |
538 | r1 = 0; | |
539 | #endif | |
540 | [p1] = r1; | |
541 | r1 = p1; | |
542 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
543 | call _start_lock_coherent | |
544 | #else | |
545 | call _put_core_lock; | |
546 | #endif | |
547 | rets = [sp++]; | |
548 | r0 = 1; | |
549 | rts; | |
550 | ||
551 | .Lfailed_trywrlock: | |
552 | r1 = p1; | |
553 | call _put_core_lock; | |
554 | rets = [sp++]; | |
555 | r0 = 0; | |
556 | rts; | |
557 | ENDPROC(___raw_write_trylock_asm) | |
558 | ||
559 | /* | |
560 | * r0 = &rwlock->lock | |
561 | * | |
562 | * Clobbers: r3:0, p1:0 | |
563 | */ | |
564 | ENTRY(___raw_write_unlock_asm) | |
565 | p1 = r0; | |
566 | r3.l = lo(RW_LOCK_BIAS); | |
567 | r3.h = hi(RW_LOCK_BIAS); | |
568 | [--sp] = rets; | |
569 | call _get_core_lock; | |
570 | r1 = [p1]; | |
571 | r1 = r1 + r3; | |
572 | [p1] = r1; | |
573 | r1 = p1; | |
574 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
575 | call _end_lock_coherent | |
576 | #else | |
577 | call _put_core_lock; | |
578 | #endif | |
579 | rets = [sp++]; | |
580 | rts; | |
581 | ENDPROC(___raw_write_unlock_asm) | |
582 | ||
583 | /* | |
584 | * r0 = ptr | |
585 | * r1 = value | |
586 | * | |
587 | * Add a signed value to a 32bit word and return the new value atomically. | |
588 | * Clobbers: r3:0, p1:0 | |
589 | */ | |
590 | ENTRY(___raw_atomic_update_asm) | |
591 | p1 = r0; | |
592 | r3 = r1; | |
593 | [--sp] = rets; | |
594 | call _get_core_lock; | |
595 | r2 = [p1]; | |
596 | r3 = r3 + r2; | |
597 | [p1] = r3; | |
598 | r1 = p1; | |
599 | call _put_core_lock; | |
600 | r0 = r3; | |
601 | rets = [sp++]; | |
602 | rts; | |
603 | ENDPROC(___raw_atomic_update_asm) | |
604 | ||
605 | /* | |
606 | * r0 = ptr | |
607 | * r1 = mask | |
608 | * | |
609 | * Clear the mask bits from a 32bit word and return the old 32bit value | |
610 | * atomically. | |
611 | * Clobbers: r3:0, p1:0 | |
612 | */ | |
613 | ENTRY(___raw_atomic_clear_asm) | |
614 | p1 = r0; | |
615 | r3 = ~r1; | |
616 | [--sp] = rets; | |
617 | call _get_core_lock; | |
618 | r2 = [p1]; | |
619 | r3 = r2 & r3; | |
620 | [p1] = r3; | |
621 | r3 = r2; | |
622 | r1 = p1; | |
623 | call _put_core_lock; | |
624 | r0 = r3; | |
625 | rets = [sp++]; | |
626 | rts; | |
627 | ENDPROC(___raw_atomic_clear_asm) | |
628 | ||
629 | /* | |
630 | * r0 = ptr | |
631 | * r1 = mask | |
632 | * | |
633 | * Set the mask bits into a 32bit word and return the old 32bit value | |
634 | * atomically. | |
635 | * Clobbers: r3:0, p1:0 | |
636 | */ | |
637 | ENTRY(___raw_atomic_set_asm) | |
638 | p1 = r0; | |
639 | r3 = r1; | |
640 | [--sp] = rets; | |
641 | call _get_core_lock; | |
642 | r2 = [p1]; | |
643 | r3 = r2 | r3; | |
644 | [p1] = r3; | |
645 | r3 = r2; | |
646 | r1 = p1; | |
647 | call _put_core_lock; | |
648 | r0 = r3; | |
649 | rets = [sp++]; | |
650 | rts; | |
651 | ENDPROC(___raw_atomic_set_asm) | |
652 | ||
653 | /* | |
654 | * r0 = ptr | |
655 | * r1 = mask | |
656 | * | |
657 | * XOR the mask bits with a 32bit word and return the old 32bit value | |
658 | * atomically. | |
659 | * Clobbers: r3:0, p1:0 | |
660 | */ | |
661 | ENTRY(___raw_atomic_xor_asm) | |
662 | p1 = r0; | |
663 | r3 = r1; | |
664 | [--sp] = rets; | |
665 | call _get_core_lock; | |
666 | r2 = [p1]; | |
667 | r3 = r2 ^ r3; | |
668 | [p1] = r3; | |
669 | r3 = r2; | |
670 | r1 = p1; | |
671 | call _put_core_lock; | |
672 | r0 = r3; | |
673 | rets = [sp++]; | |
674 | rts; | |
675 | ENDPROC(___raw_atomic_xor_asm) | |
676 | ||
677 | /* | |
678 | * r0 = ptr | |
679 | * r1 = mask | |
680 | * | |
681 | * Perform a logical AND between the mask bits and a 32bit word, and | |
682 | * return the masked value. We need this on this architecture in | |
683 | * order to invalidate the local cache before testing. | |
684 | * | |
685 | * Clobbers: r3:0, p1:0 | |
686 | */ | |
687 | ENTRY(___raw_atomic_test_asm) | |
688 | p1 = r0; | |
689 | r3 = r1; | |
690 | r1 = -L1_CACHE_BYTES; | |
691 | r1 = r0 & r1; | |
692 | p0 = r1; | |
693 | flushinv[p0]; | |
694 | SSYNC(r2); | |
695 | r0 = [p1]; | |
696 | r0 = r0 & r3; | |
697 | rts; | |
698 | ENDPROC(___raw_atomic_test_asm) | |
699 | ||
700 | /* | |
701 | * r0 = ptr | |
702 | * r1 = value | |
703 | * | |
704 | * Swap *ptr with value and return the old 32bit value atomically. | |
705 | * Clobbers: r3:0, p1:0 | |
706 | */ | |
707 | #define __do_xchg(src, dst) \ | |
708 | p1 = r0; \ | |
709 | r3 = r1; \ | |
710 | [--sp] = rets; \ | |
711 | call _get_core_lock; \ | |
712 | r2 = src; \ | |
713 | dst = r3; \ | |
714 | r3 = r2; \ | |
715 | r1 = p1; \ | |
716 | call _put_core_lock; \ | |
717 | r0 = r3; \ | |
718 | rets = [sp++]; \ | |
719 | rts; | |
720 | ||
721 | ENTRY(___raw_xchg_1_asm) | |
722 | __do_xchg(b[p1] (z), b[p1]) | |
723 | ENDPROC(___raw_xchg_1_asm) | |
724 | ||
725 | ENTRY(___raw_xchg_2_asm) | |
726 | __do_xchg(w[p1] (z), w[p1]) | |
727 | ENDPROC(___raw_xchg_2_asm) | |
728 | ||
729 | ENTRY(___raw_xchg_4_asm) | |
730 | __do_xchg([p1], [p1]) | |
731 | ENDPROC(___raw_xchg_4_asm) | |
732 | ||
733 | /* | |
734 | * r0 = ptr | |
735 | * r1 = new | |
736 | * r2 = old | |
737 | * | |
738 | * Swap *ptr with new if *ptr == old and return the previous *ptr | |
739 | * value atomically. | |
740 | * | |
741 | * Clobbers: r3:0, p1:0 | |
742 | */ | |
743 | #define __do_cmpxchg(src, dst) \ | |
744 | [--sp] = rets; \ | |
745 | [--sp] = r4; \ | |
746 | p1 = r0; \ | |
747 | r3 = r1; \ | |
748 | r4 = r2; \ | |
749 | call _get_core_lock; \ | |
750 | r2 = src; \ | |
751 | cc = r2 == r4; \ | |
752 | if !cc jump 1f; \ | |
753 | dst = r3; \ | |
754 | 1: r3 = r2; \ | |
755 | r1 = p1; \ | |
756 | call _put_core_lock; \ | |
757 | r0 = r3; \ | |
758 | r4 = [sp++]; \ | |
759 | rets = [sp++]; \ | |
760 | rts; | |
761 | ||
762 | ENTRY(___raw_cmpxchg_1_asm) | |
763 | __do_cmpxchg(b[p1] (z), b[p1]) | |
764 | ENDPROC(___raw_cmpxchg_1_asm) | |
765 | ||
766 | ENTRY(___raw_cmpxchg_2_asm) | |
767 | __do_cmpxchg(w[p1] (z), w[p1]) | |
768 | ENDPROC(___raw_cmpxchg_2_asm) | |
769 | ||
770 | ENTRY(___raw_cmpxchg_4_asm) | |
771 | __do_cmpxchg([p1], [p1]) | |
772 | ENDPROC(___raw_cmpxchg_4_asm) | |
773 | ||
774 | /* | |
775 | * r0 = ptr | |
776 | * r1 = bitnr | |
777 | * | |
778 | * Set a bit in a 32bit word and return the old 32bit value atomically. | |
779 | * Clobbers: r3:0, p1:0 | |
780 | */ | |
781 | ENTRY(___raw_bit_set_asm) | |
782 | r2 = r1; | |
783 | r1 = 1; | |
784 | r1 <<= r2; | |
785 | jump ___raw_atomic_set_asm | |
786 | ENDPROC(___raw_bit_set_asm) | |
787 | ||
788 | /* | |
789 | * r0 = ptr | |
790 | * r1 = bitnr | |
791 | * | |
792 | * Clear a bit in a 32bit word and return the old 32bit value atomically. | |
793 | * Clobbers: r3:0, p1:0 | |
794 | */ | |
795 | ENTRY(___raw_bit_clear_asm) | |
796 | r2 = r1; | |
797 | r1 = 1; | |
798 | r1 <<= r2; | |
799 | jump ___raw_atomic_clear_asm | |
800 | ENDPROC(___raw_bit_clear_asm) | |
801 | ||
802 | /* | |
803 | * r0 = ptr | |
804 | * r1 = bitnr | |
805 | * | |
806 | * Toggle a bit in a 32bit word and return the old 32bit value atomically. | |
807 | * Clobbers: r3:0, p1:0 | |
808 | */ | |
809 | ENTRY(___raw_bit_toggle_asm) | |
810 | r2 = r1; | |
811 | r1 = 1; | |
812 | r1 <<= r2; | |
813 | jump ___raw_atomic_xor_asm | |
814 | ENDPROC(___raw_bit_toggle_asm) | |
815 | ||
816 | /* | |
817 | * r0 = ptr | |
818 | * r1 = bitnr | |
819 | * | |
820 | * Test-and-set a bit in a 32bit word and return the old bit value atomically. | |
821 | * Clobbers: r3:0, p1:0 | |
822 | */ | |
823 | ENTRY(___raw_bit_test_set_asm) | |
824 | [--sp] = rets; | |
825 | [--sp] = r1; | |
826 | call ___raw_bit_set_asm | |
827 | r1 = [sp++]; | |
828 | r2 = 1; | |
829 | r2 <<= r1; | |
830 | r0 = r0 & r2; | |
831 | cc = r0 == 0; | |
832 | if cc jump 1f | |
833 | r0 = 1; | |
834 | 1: | |
835 | rets = [sp++]; | |
836 | rts; | |
837 | ENDPROC(___raw_bit_test_set_asm) | |
838 | ||
839 | /* | |
840 | * r0 = ptr | |
841 | * r1 = bitnr | |
842 | * | |
843 | * Test-and-clear a bit in a 32bit word and return the old bit value atomically. | |
844 | * Clobbers: r3:0, p1:0 | |
845 | */ | |
846 | ENTRY(___raw_bit_test_clear_asm) | |
847 | [--sp] = rets; | |
848 | [--sp] = r1; | |
849 | call ___raw_bit_clear_asm | |
850 | r1 = [sp++]; | |
851 | r2 = 1; | |
852 | r2 <<= r1; | |
853 | r0 = r0 & r2; | |
854 | cc = r0 == 0; | |
855 | if cc jump 1f | |
856 | r0 = 1; | |
857 | 1: | |
858 | rets = [sp++]; | |
859 | rts; | |
860 | ENDPROC(___raw_bit_test_clear_asm) | |
861 | ||
862 | /* | |
863 | * r0 = ptr | |
864 | * r1 = bitnr | |
865 | * | |
866 | * Test-and-toggle a bit in a 32bit word, | |
867 | * and return the old bit value atomically. | |
868 | * Clobbers: r3:0, p1:0 | |
869 | */ | |
870 | ENTRY(___raw_bit_test_toggle_asm) | |
871 | [--sp] = rets; | |
872 | [--sp] = r1; | |
873 | call ___raw_bit_toggle_asm | |
874 | r1 = [sp++]; | |
875 | r2 = 1; | |
876 | r2 <<= r1; | |
877 | r0 = r0 & r2; | |
878 | cc = r0 == 0; | |
879 | if cc jump 1f | |
880 | r0 = 1; | |
881 | 1: | |
882 | rets = [sp++]; | |
883 | rts; | |
884 | ENDPROC(___raw_bit_test_toggle_asm) | |
885 | ||
886 | /* | |
887 | * r0 = ptr | |
888 | * r1 = bitnr | |
889 | * | |
890 | * Test a bit in a 32bit word and return its value. | |
891 | * We need this on this architecture in order to invalidate | |
892 | * the local cache before testing. | |
893 | * | |
894 | * Clobbers: r3:0, p1:0 | |
895 | */ | |
896 | ENTRY(___raw_bit_test_asm) | |
897 | r2 = r1; | |
898 | r1 = 1; | |
899 | r1 <<= r2; | |
900 | jump ___raw_atomic_test_asm | |
901 | ENDPROC(___raw_bit_test_asm) | |
902 | ||
903 | /* | |
904 | * r0 = ptr | |
905 | * | |
906 | * Fetch and return an uncached 32bit value. | |
907 | * | |
908 | * Clobbers: r2:0, p1:0 | |
909 | */ | |
910 | ENTRY(___raw_uncached_fetch_asm) | |
911 | p1 = r0; | |
912 | r1 = -L1_CACHE_BYTES; | |
913 | r1 = r0 & r1; | |
914 | p0 = r1; | |
915 | flushinv[p0]; | |
916 | SSYNC(r2); | |
917 | r0 = [p1]; | |
918 | rts; | |
919 | ENDPROC(___raw_uncached_fetch_asm) |