Commit | Line | Data |
---|---|---|
c51b4488 | 1 | /* |
96f1050d RG |
2 | * Copyright 2007-2008 Analog Devices Inc. |
3 | * Philippe Gerum <rpm@xenomai.org> | |
c51b4488 | 4 | * |
96f1050d | 5 | * Licensed under the GPL-2 or later. |
c51b4488 GY |
6 | */ |
7 | ||
8 | #include <linux/linkage.h> | |
9 | #include <asm/blackfin.h> | |
10 | #include <asm/cache.h> | |
11 | #include <asm/asm-offsets.h> | |
12 | #include <asm/rwlock.h> | |
13 | #include <asm/cplb.h> | |
14 | ||
15 | .text | |
16 | ||
17 | .macro coreslot_loadaddr reg:req | |
18 | \reg\().l = _corelock; | |
19 | \reg\().h = _corelock; | |
20 | .endm | |
21 | ||
f99e8c1d MF |
22 | .macro safe_testset addr:req, scratch:req |
23 | #if ANOMALY_05000477 | |
24 | cli \scratch; | |
25 | testset (\addr); | |
26 | sti \scratch; | |
27 | #else | |
28 | testset (\addr); | |
29 | #endif | |
30 | .endm | |
31 | ||
c51b4488 GY |
32 | /* |
33 | * r0 = address of atomic data to flush and invalidate (32bit). | |
34 | * | |
35 | * Clear interrupts and return the old mask. | |
36 | * We assume that no atomic data can span cachelines. | |
37 | * | |
38 | * Clobbers: r2:0, p0 | |
39 | */ | |
40 | ENTRY(_get_core_lock) | |
41 | r1 = -L1_CACHE_BYTES; | |
42 | r1 = r0 & r1; | |
43 | cli r0; | |
44 | coreslot_loadaddr p0; | |
45 | .Lretry_corelock: | |
f99e8c1d | 46 | safe_testset p0, r2; |
c51b4488 GY |
47 | if cc jump .Ldone_corelock; |
48 | SSYNC(r2); | |
49 | jump .Lretry_corelock | |
50 | .Ldone_corelock: | |
51 | p0 = r1; | |
064cc44e | 52 | /* flush core internal write buffer before invalidate dcache */ |
c51b4488 GY |
53 | CSYNC(r2); |
54 | flushinv[p0]; | |
55 | SSYNC(r2); | |
56 | rts; | |
57 | ENDPROC(_get_core_lock) | |
58 | ||
59 | /* | |
60 | * r0 = address of atomic data in uncacheable memory region (32bit). | |
61 | * | |
62 | * Clear interrupts and return the old mask. | |
63 | * | |
64 | * Clobbers: r0, p0 | |
65 | */ | |
66 | ENTRY(_get_core_lock_noflush) | |
67 | cli r0; | |
68 | coreslot_loadaddr p0; | |
69 | .Lretry_corelock_noflush: | |
f99e8c1d | 70 | safe_testset p0, r2; |
c51b4488 GY |
71 | if cc jump .Ldone_corelock_noflush; |
72 | SSYNC(r2); | |
73 | jump .Lretry_corelock_noflush | |
74 | .Ldone_corelock_noflush: | |
a5e0d865 BL |
75 | /* |
76 | * SMP kgdb runs into dead loop without NOP here, when one core | |
77 | * single steps over get_core_lock_noflush and the other executes | |
78 | * get_core_lock as a slave node. | |
79 | */ | |
80 | nop; | |
81 | CSYNC(r2); | |
c51b4488 GY |
82 | rts; |
83 | ENDPROC(_get_core_lock_noflush) | |
84 | ||
85 | /* | |
86 | * r0 = interrupt mask to restore. | |
87 | * r1 = address of atomic data to flush and invalidate (32bit). | |
88 | * | |
89 | * Interrupts are masked on entry (see _get_core_lock). | |
90 | * Clobbers: r2:0, p0 | |
91 | */ | |
92 | ENTRY(_put_core_lock) | |
93 | /* Write-through cache assumed, so no flush needed here. */ | |
94 | coreslot_loadaddr p0; | |
95 | r1 = 0; | |
96 | [p0] = r1; | |
97 | SSYNC(r2); | |
98 | sti r0; | |
99 | rts; | |
100 | ENDPROC(_put_core_lock) | |
101 | ||
102 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
103 | ||
104 | ENTRY(___raw_smp_mark_barrier_asm) | |
105 | [--sp] = rets; | |
106 | [--sp] = ( r7:5 ); | |
107 | [--sp] = r0; | |
108 | [--sp] = p1; | |
109 | [--sp] = p0; | |
110 | call _get_core_lock_noflush; | |
111 | ||
112 | /* | |
113 | * Calculate current core mask | |
114 | */ | |
115 | GET_CPUID(p1, r7); | |
116 | r6 = 1; | |
117 | r6 <<= r7; | |
118 | ||
119 | /* | |
120 | * Set bit of other cores in barrier mask. Don't change current core bit. | |
121 | */ | |
122 | p1.l = _barrier_mask; | |
123 | p1.h = _barrier_mask; | |
124 | r7 = [p1]; | |
125 | r5 = r7 & r6; | |
126 | r7 = ~r6; | |
127 | cc = r5 == 0; | |
128 | if cc jump 1f; | |
129 | r7 = r7 | r6; | |
130 | 1: | |
131 | [p1] = r7; | |
132 | SSYNC(r2); | |
133 | ||
134 | call _put_core_lock; | |
135 | p0 = [sp++]; | |
136 | p1 = [sp++]; | |
137 | r0 = [sp++]; | |
138 | ( r7:5 ) = [sp++]; | |
139 | rets = [sp++]; | |
140 | rts; | |
141 | ENDPROC(___raw_smp_mark_barrier_asm) | |
142 | ||
143 | ENTRY(___raw_smp_check_barrier_asm) | |
144 | [--sp] = rets; | |
145 | [--sp] = ( r7:5 ); | |
146 | [--sp] = r0; | |
147 | [--sp] = p1; | |
148 | [--sp] = p0; | |
149 | call _get_core_lock_noflush; | |
150 | ||
151 | /* | |
152 | * Calculate current core mask | |
153 | */ | |
154 | GET_CPUID(p1, r7); | |
155 | r6 = 1; | |
156 | r6 <<= r7; | |
157 | ||
158 | /* | |
159 | * Clear current core bit in barrier mask if it is set. | |
160 | */ | |
161 | p1.l = _barrier_mask; | |
162 | p1.h = _barrier_mask; | |
163 | r7 = [p1]; | |
164 | r5 = r7 & r6; | |
165 | cc = r5 == 0; | |
166 | if cc jump 1f; | |
167 | r6 = ~r6; | |
168 | r7 = r7 & r6; | |
169 | [p1] = r7; | |
170 | SSYNC(r2); | |
171 | ||
172 | call _put_core_lock; | |
173 | ||
174 | /* | |
175 | * Invalidate the entire D-cache of current core. | |
176 | */ | |
177 | sp += -12; | |
178 | call _resync_core_dcache | |
179 | sp += 12; | |
180 | jump 2f; | |
181 | 1: | |
182 | call _put_core_lock; | |
183 | 2: | |
184 | p0 = [sp++]; | |
185 | p1 = [sp++]; | |
186 | r0 = [sp++]; | |
187 | ( r7:5 ) = [sp++]; | |
188 | rets = [sp++]; | |
189 | rts; | |
190 | ENDPROC(___raw_smp_check_barrier_asm) | |
191 | ||
192 | /* | |
193 | * r0 = irqflags | |
194 | * r1 = address of atomic data | |
195 | * | |
196 | * Clobbers: r2:0, p1:0 | |
197 | */ | |
198 | _start_lock_coherent: | |
199 | ||
200 | [--sp] = rets; | |
201 | [--sp] = ( r7:6 ); | |
202 | r7 = r0; | |
203 | p1 = r1; | |
204 | ||
205 | /* | |
206 | * Determine whether the atomic data was previously | |
207 | * owned by another CPU (=r6). | |
208 | */ | |
209 | GET_CPUID(p0, r2); | |
210 | r1 = 1; | |
211 | r1 <<= r2; | |
212 | r2 = ~r1; | |
213 | ||
214 | r1 = [p1]; | |
215 | r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */ | |
216 | r6 = r1 & r2; | |
217 | r1 = [p1]; | |
218 | r1 <<= 4; | |
219 | r1 >>= 4; | |
220 | [p1] = r1; | |
221 | ||
222 | /* | |
223 | * Release the core lock now, but keep IRQs disabled while we are | |
224 | * performing the remaining housekeeping chores for the current CPU. | |
225 | */ | |
226 | coreslot_loadaddr p0; | |
227 | r1 = 0; | |
228 | [p0] = r1; | |
229 | ||
230 | /* | |
231 | * If another CPU has owned the same atomic section before us, | |
232 | * then our D-cached copy of the shared data protected by the | |
233 | * current spin/write_lock may be obsolete. | |
234 | */ | |
235 | cc = r6 == 0; | |
236 | if cc jump .Lcache_synced | |
237 | ||
238 | /* | |
239 | * Invalidate the entire D-cache of the current core. | |
240 | */ | |
241 | sp += -12; | |
242 | call _resync_core_dcache | |
243 | sp += 12; | |
244 | ||
245 | .Lcache_synced: | |
246 | SSYNC(r2); | |
247 | sti r7; | |
248 | ( r7:6 ) = [sp++]; | |
249 | rets = [sp++]; | |
250 | rts | |
251 | ||
252 | /* | |
253 | * r0 = irqflags | |
254 | * r1 = address of atomic data | |
255 | * | |
256 | * Clobbers: r2:0, p1:0 | |
257 | */ | |
258 | _end_lock_coherent: | |
259 | ||
260 | p1 = r1; | |
261 | GET_CPUID(p0, r2); | |
262 | r2 += 28; | |
263 | r1 = 1; | |
264 | r1 <<= r2; | |
265 | r2 = [p1]; | |
266 | r2 = r1 | r2; | |
267 | [p1] = r2; | |
268 | r1 = p1; | |
269 | jump _put_core_lock; | |
270 | ||
271 | #endif /* __ARCH_SYNC_CORE_DCACHE */ | |
272 | ||
273 | /* | |
274 | * r0 = &spinlock->lock | |
275 | * | |
276 | * Clobbers: r3:0, p1:0 | |
277 | */ | |
278 | ENTRY(___raw_spin_is_locked_asm) | |
279 | p1 = r0; | |
280 | [--sp] = rets; | |
281 | call _get_core_lock; | |
282 | r3 = [p1]; | |
283 | cc = bittst( r3, 0 ); | |
284 | r3 = cc; | |
285 | r1 = p1; | |
286 | call _put_core_lock; | |
287 | rets = [sp++]; | |
288 | r0 = r3; | |
289 | rts; | |
290 | ENDPROC(___raw_spin_is_locked_asm) | |
291 | ||
292 | /* | |
293 | * r0 = &spinlock->lock | |
294 | * | |
295 | * Clobbers: r3:0, p1:0 | |
296 | */ | |
297 | ENTRY(___raw_spin_lock_asm) | |
298 | p1 = r0; | |
299 | [--sp] = rets; | |
300 | .Lretry_spinlock: | |
301 | call _get_core_lock; | |
302 | r1 = p1; | |
303 | r2 = [p1]; | |
304 | cc = bittst( r2, 0 ); | |
305 | if cc jump .Lbusy_spinlock | |
306 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
307 | r3 = p1; | |
308 | bitset ( r2, 0 ); /* Raise the lock bit. */ | |
309 | [p1] = r2; | |
310 | call _start_lock_coherent | |
311 | #else | |
312 | r2 = 1; | |
313 | [p1] = r2; | |
314 | call _put_core_lock; | |
315 | #endif | |
316 | rets = [sp++]; | |
317 | rts; | |
318 | ||
319 | .Lbusy_spinlock: | |
320 | /* We don't touch the atomic area if busy, so that flush | |
321 | will behave like nop in _put_core_lock. */ | |
322 | call _put_core_lock; | |
323 | SSYNC(r2); | |
324 | r0 = p1; | |
325 | jump .Lretry_spinlock | |
326 | ENDPROC(___raw_spin_lock_asm) | |
327 | ||
328 | /* | |
329 | * r0 = &spinlock->lock | |
330 | * | |
331 | * Clobbers: r3:0, p1:0 | |
332 | */ | |
333 | ENTRY(___raw_spin_trylock_asm) | |
334 | p1 = r0; | |
335 | [--sp] = rets; | |
336 | call _get_core_lock; | |
337 | r1 = p1; | |
338 | r3 = [p1]; | |
339 | cc = bittst( r3, 0 ); | |
340 | if cc jump .Lfailed_trylock | |
341 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
342 | bitset ( r3, 0 ); /* Raise the lock bit. */ | |
343 | [p1] = r3; | |
344 | call _start_lock_coherent | |
345 | #else | |
346 | r2 = 1; | |
347 | [p1] = r2; | |
348 | call _put_core_lock; | |
349 | #endif | |
350 | r0 = 1; | |
351 | rets = [sp++]; | |
352 | rts; | |
353 | .Lfailed_trylock: | |
354 | call _put_core_lock; | |
355 | r0 = 0; | |
356 | rets = [sp++]; | |
357 | rts; | |
358 | ENDPROC(___raw_spin_trylock_asm) | |
359 | ||
360 | /* | |
361 | * r0 = &spinlock->lock | |
362 | * | |
363 | * Clobbers: r2:0, p1:0 | |
364 | */ | |
365 | ENTRY(___raw_spin_unlock_asm) | |
366 | p1 = r0; | |
367 | [--sp] = rets; | |
368 | call _get_core_lock; | |
369 | r2 = [p1]; | |
370 | bitclr ( r2, 0 ); | |
371 | [p1] = r2; | |
372 | r1 = p1; | |
373 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
374 | call _end_lock_coherent | |
375 | #else | |
376 | call _put_core_lock; | |
377 | #endif | |
378 | rets = [sp++]; | |
379 | rts; | |
380 | ENDPROC(___raw_spin_unlock_asm) | |
381 | ||
382 | /* | |
383 | * r0 = &rwlock->lock | |
384 | * | |
385 | * Clobbers: r2:0, p1:0 | |
386 | */ | |
387 | ENTRY(___raw_read_lock_asm) | |
388 | p1 = r0; | |
389 | [--sp] = rets; | |
390 | call _get_core_lock; | |
391 | .Lrdlock_try: | |
392 | r1 = [p1]; | |
393 | r1 += -1; | |
394 | [p1] = r1; | |
395 | cc = r1 < 0; | |
396 | if cc jump .Lrdlock_failed | |
397 | r1 = p1; | |
398 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
399 | call _start_lock_coherent | |
400 | #else | |
401 | call _put_core_lock; | |
402 | #endif | |
403 | rets = [sp++]; | |
404 | rts; | |
405 | ||
406 | .Lrdlock_failed: | |
407 | r1 += 1; | |
408 | [p1] = r1; | |
409 | .Lrdlock_wait: | |
410 | r1 = p1; | |
411 | call _put_core_lock; | |
412 | SSYNC(r2); | |
413 | r0 = p1; | |
414 | call _get_core_lock; | |
415 | r1 = [p1]; | |
416 | cc = r1 < 2; | |
417 | if cc jump .Lrdlock_wait; | |
418 | jump .Lrdlock_try | |
419 | ENDPROC(___raw_read_lock_asm) | |
420 | ||
421 | /* | |
422 | * r0 = &rwlock->lock | |
423 | * | |
424 | * Clobbers: r3:0, p1:0 | |
425 | */ | |
426 | ENTRY(___raw_read_trylock_asm) | |
427 | p1 = r0; | |
428 | [--sp] = rets; | |
429 | call _get_core_lock; | |
430 | r1 = [p1]; | |
431 | cc = r1 <= 0; | |
432 | if cc jump .Lfailed_tryrdlock; | |
433 | r1 += -1; | |
434 | [p1] = r1; | |
435 | r1 = p1; | |
436 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
437 | call _start_lock_coherent | |
438 | #else | |
439 | call _put_core_lock; | |
440 | #endif | |
441 | rets = [sp++]; | |
442 | r0 = 1; | |
443 | rts; | |
444 | .Lfailed_tryrdlock: | |
445 | r1 = p1; | |
446 | call _put_core_lock; | |
447 | rets = [sp++]; | |
448 | r0 = 0; | |
449 | rts; | |
450 | ENDPROC(___raw_read_trylock_asm) | |
451 | ||
452 | /* | |
453 | * r0 = &rwlock->lock | |
454 | * | |
455 | * Note: Processing controlled by a reader lock should not have | |
456 | * any side-effect on cache issues with the other core, so we | |
457 | * just release the core lock and exit (no _end_lock_coherent). | |
458 | * | |
459 | * Clobbers: r3:0, p1:0 | |
460 | */ | |
461 | ENTRY(___raw_read_unlock_asm) | |
462 | p1 = r0; | |
463 | [--sp] = rets; | |
464 | call _get_core_lock; | |
465 | r1 = [p1]; | |
466 | r1 += 1; | |
467 | [p1] = r1; | |
468 | r1 = p1; | |
469 | call _put_core_lock; | |
470 | rets = [sp++]; | |
471 | rts; | |
472 | ENDPROC(___raw_read_unlock_asm) | |
473 | ||
474 | /* | |
475 | * r0 = &rwlock->lock | |
476 | * | |
477 | * Clobbers: r3:0, p1:0 | |
478 | */ | |
479 | ENTRY(___raw_write_lock_asm) | |
480 | p1 = r0; | |
481 | r3.l = lo(RW_LOCK_BIAS); | |
482 | r3.h = hi(RW_LOCK_BIAS); | |
483 | [--sp] = rets; | |
484 | call _get_core_lock; | |
485 | .Lwrlock_try: | |
486 | r1 = [p1]; | |
487 | r1 = r1 - r3; | |
488 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
489 | r2 = r1; | |
490 | r2 <<= 4; | |
491 | r2 >>= 4; | |
492 | cc = r2 == 0; | |
493 | #else | |
494 | cc = r1 == 0; | |
495 | #endif | |
496 | if !cc jump .Lwrlock_wait | |
497 | [p1] = r1; | |
498 | r1 = p1; | |
499 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
500 | call _start_lock_coherent | |
501 | #else | |
502 | call _put_core_lock; | |
503 | #endif | |
504 | rets = [sp++]; | |
505 | rts; | |
506 | ||
507 | .Lwrlock_wait: | |
508 | r1 = p1; | |
509 | call _put_core_lock; | |
510 | SSYNC(r2); | |
511 | r0 = p1; | |
512 | call _get_core_lock; | |
513 | r1 = [p1]; | |
514 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
515 | r1 <<= 4; | |
516 | r1 >>= 4; | |
517 | #endif | |
518 | cc = r1 == r3; | |
519 | if !cc jump .Lwrlock_wait; | |
520 | jump .Lwrlock_try | |
521 | ENDPROC(___raw_write_lock_asm) | |
522 | ||
523 | /* | |
524 | * r0 = &rwlock->lock | |
525 | * | |
526 | * Clobbers: r3:0, p1:0 | |
527 | */ | |
528 | ENTRY(___raw_write_trylock_asm) | |
529 | p1 = r0; | |
530 | [--sp] = rets; | |
531 | call _get_core_lock; | |
532 | r1 = [p1]; | |
533 | r2.l = lo(RW_LOCK_BIAS); | |
534 | r2.h = hi(RW_LOCK_BIAS); | |
535 | cc = r1 == r2; | |
536 | if !cc jump .Lfailed_trywrlock; | |
537 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
538 | r1 >>= 28; | |
539 | r1 <<= 28; | |
540 | #else | |
541 | r1 = 0; | |
542 | #endif | |
543 | [p1] = r1; | |
544 | r1 = p1; | |
545 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
546 | call _start_lock_coherent | |
547 | #else | |
548 | call _put_core_lock; | |
549 | #endif | |
550 | rets = [sp++]; | |
551 | r0 = 1; | |
552 | rts; | |
553 | ||
554 | .Lfailed_trywrlock: | |
555 | r1 = p1; | |
556 | call _put_core_lock; | |
557 | rets = [sp++]; | |
558 | r0 = 0; | |
559 | rts; | |
560 | ENDPROC(___raw_write_trylock_asm) | |
561 | ||
562 | /* | |
563 | * r0 = &rwlock->lock | |
564 | * | |
565 | * Clobbers: r3:0, p1:0 | |
566 | */ | |
567 | ENTRY(___raw_write_unlock_asm) | |
568 | p1 = r0; | |
569 | r3.l = lo(RW_LOCK_BIAS); | |
570 | r3.h = hi(RW_LOCK_BIAS); | |
571 | [--sp] = rets; | |
572 | call _get_core_lock; | |
573 | r1 = [p1]; | |
574 | r1 = r1 + r3; | |
575 | [p1] = r1; | |
576 | r1 = p1; | |
577 | #ifdef __ARCH_SYNC_CORE_DCACHE | |
578 | call _end_lock_coherent | |
579 | #else | |
580 | call _put_core_lock; | |
581 | #endif | |
582 | rets = [sp++]; | |
583 | rts; | |
584 | ENDPROC(___raw_write_unlock_asm) | |
585 | ||
586 | /* | |
587 | * r0 = ptr | |
588 | * r1 = value | |
589 | * | |
590 | * Add a signed value to a 32bit word and return the new value atomically. | |
591 | * Clobbers: r3:0, p1:0 | |
592 | */ | |
593 | ENTRY(___raw_atomic_update_asm) | |
594 | p1 = r0; | |
595 | r3 = r1; | |
596 | [--sp] = rets; | |
597 | call _get_core_lock; | |
598 | r2 = [p1]; | |
599 | r3 = r3 + r2; | |
600 | [p1] = r3; | |
601 | r1 = p1; | |
602 | call _put_core_lock; | |
603 | r0 = r3; | |
604 | rets = [sp++]; | |
605 | rts; | |
606 | ENDPROC(___raw_atomic_update_asm) | |
607 | ||
608 | /* | |
609 | * r0 = ptr | |
610 | * r1 = mask | |
611 | * | |
612 | * Clear the mask bits from a 32bit word and return the old 32bit value | |
613 | * atomically. | |
614 | * Clobbers: r3:0, p1:0 | |
615 | */ | |
616 | ENTRY(___raw_atomic_clear_asm) | |
617 | p1 = r0; | |
618 | r3 = ~r1; | |
619 | [--sp] = rets; | |
620 | call _get_core_lock; | |
621 | r2 = [p1]; | |
622 | r3 = r2 & r3; | |
623 | [p1] = r3; | |
624 | r3 = r2; | |
625 | r1 = p1; | |
626 | call _put_core_lock; | |
627 | r0 = r3; | |
628 | rets = [sp++]; | |
629 | rts; | |
630 | ENDPROC(___raw_atomic_clear_asm) | |
631 | ||
632 | /* | |
633 | * r0 = ptr | |
634 | * r1 = mask | |
635 | * | |
636 | * Set the mask bits into a 32bit word and return the old 32bit value | |
637 | * atomically. | |
638 | * Clobbers: r3:0, p1:0 | |
639 | */ | |
640 | ENTRY(___raw_atomic_set_asm) | |
641 | p1 = r0; | |
642 | r3 = r1; | |
643 | [--sp] = rets; | |
644 | call _get_core_lock; | |
645 | r2 = [p1]; | |
646 | r3 = r2 | r3; | |
647 | [p1] = r3; | |
648 | r3 = r2; | |
649 | r1 = p1; | |
650 | call _put_core_lock; | |
651 | r0 = r3; | |
652 | rets = [sp++]; | |
653 | rts; | |
654 | ENDPROC(___raw_atomic_set_asm) | |
655 | ||
656 | /* | |
657 | * r0 = ptr | |
658 | * r1 = mask | |
659 | * | |
660 | * XOR the mask bits with a 32bit word and return the old 32bit value | |
661 | * atomically. | |
662 | * Clobbers: r3:0, p1:0 | |
663 | */ | |
664 | ENTRY(___raw_atomic_xor_asm) | |
665 | p1 = r0; | |
666 | r3 = r1; | |
667 | [--sp] = rets; | |
668 | call _get_core_lock; | |
669 | r2 = [p1]; | |
670 | r3 = r2 ^ r3; | |
671 | [p1] = r3; | |
672 | r3 = r2; | |
673 | r1 = p1; | |
674 | call _put_core_lock; | |
675 | r0 = r3; | |
676 | rets = [sp++]; | |
677 | rts; | |
678 | ENDPROC(___raw_atomic_xor_asm) | |
679 | ||
680 | /* | |
681 | * r0 = ptr | |
682 | * r1 = mask | |
683 | * | |
684 | * Perform a logical AND between the mask bits and a 32bit word, and | |
685 | * return the masked value. We need this on this architecture in | |
686 | * order to invalidate the local cache before testing. | |
687 | * | |
688 | * Clobbers: r3:0, p1:0 | |
689 | */ | |
690 | ENTRY(___raw_atomic_test_asm) | |
691 | p1 = r0; | |
692 | r3 = r1; | |
693 | r1 = -L1_CACHE_BYTES; | |
694 | r1 = r0 & r1; | |
695 | p0 = r1; | |
064cc44e SZ |
696 | /* flush core internal write buffer before invalidate dcache */ |
697 | CSYNC(r2); | |
c51b4488 GY |
698 | flushinv[p0]; |
699 | SSYNC(r2); | |
700 | r0 = [p1]; | |
701 | r0 = r0 & r3; | |
702 | rts; | |
703 | ENDPROC(___raw_atomic_test_asm) | |
704 | ||
705 | /* | |
706 | * r0 = ptr | |
707 | * r1 = value | |
708 | * | |
709 | * Swap *ptr with value and return the old 32bit value atomically. | |
710 | * Clobbers: r3:0, p1:0 | |
711 | */ | |
712 | #define __do_xchg(src, dst) \ | |
713 | p1 = r0; \ | |
714 | r3 = r1; \ | |
715 | [--sp] = rets; \ | |
716 | call _get_core_lock; \ | |
717 | r2 = src; \ | |
718 | dst = r3; \ | |
719 | r3 = r2; \ | |
720 | r1 = p1; \ | |
721 | call _put_core_lock; \ | |
722 | r0 = r3; \ | |
723 | rets = [sp++]; \ | |
724 | rts; | |
725 | ||
726 | ENTRY(___raw_xchg_1_asm) | |
727 | __do_xchg(b[p1] (z), b[p1]) | |
728 | ENDPROC(___raw_xchg_1_asm) | |
729 | ||
730 | ENTRY(___raw_xchg_2_asm) | |
731 | __do_xchg(w[p1] (z), w[p1]) | |
732 | ENDPROC(___raw_xchg_2_asm) | |
733 | ||
734 | ENTRY(___raw_xchg_4_asm) | |
735 | __do_xchg([p1], [p1]) | |
736 | ENDPROC(___raw_xchg_4_asm) | |
737 | ||
738 | /* | |
739 | * r0 = ptr | |
740 | * r1 = new | |
741 | * r2 = old | |
742 | * | |
743 | * Swap *ptr with new if *ptr == old and return the previous *ptr | |
744 | * value atomically. | |
745 | * | |
746 | * Clobbers: r3:0, p1:0 | |
747 | */ | |
748 | #define __do_cmpxchg(src, dst) \ | |
749 | [--sp] = rets; \ | |
750 | [--sp] = r4; \ | |
751 | p1 = r0; \ | |
752 | r3 = r1; \ | |
753 | r4 = r2; \ | |
754 | call _get_core_lock; \ | |
755 | r2 = src; \ | |
756 | cc = r2 == r4; \ | |
757 | if !cc jump 1f; \ | |
758 | dst = r3; \ | |
759 | 1: r3 = r2; \ | |
760 | r1 = p1; \ | |
761 | call _put_core_lock; \ | |
762 | r0 = r3; \ | |
763 | r4 = [sp++]; \ | |
764 | rets = [sp++]; \ | |
765 | rts; | |
766 | ||
767 | ENTRY(___raw_cmpxchg_1_asm) | |
768 | __do_cmpxchg(b[p1] (z), b[p1]) | |
769 | ENDPROC(___raw_cmpxchg_1_asm) | |
770 | ||
771 | ENTRY(___raw_cmpxchg_2_asm) | |
772 | __do_cmpxchg(w[p1] (z), w[p1]) | |
773 | ENDPROC(___raw_cmpxchg_2_asm) | |
774 | ||
775 | ENTRY(___raw_cmpxchg_4_asm) | |
776 | __do_cmpxchg([p1], [p1]) | |
777 | ENDPROC(___raw_cmpxchg_4_asm) | |
778 | ||
779 | /* | |
780 | * r0 = ptr | |
781 | * r1 = bitnr | |
782 | * | |
783 | * Set a bit in a 32bit word and return the old 32bit value atomically. | |
784 | * Clobbers: r3:0, p1:0 | |
785 | */ | |
786 | ENTRY(___raw_bit_set_asm) | |
787 | r2 = r1; | |
788 | r1 = 1; | |
789 | r1 <<= r2; | |
790 | jump ___raw_atomic_set_asm | |
791 | ENDPROC(___raw_bit_set_asm) | |
792 | ||
793 | /* | |
794 | * r0 = ptr | |
795 | * r1 = bitnr | |
796 | * | |
797 | * Clear a bit in a 32bit word and return the old 32bit value atomically. | |
798 | * Clobbers: r3:0, p1:0 | |
799 | */ | |
800 | ENTRY(___raw_bit_clear_asm) | |
801 | r2 = r1; | |
802 | r1 = 1; | |
803 | r1 <<= r2; | |
804 | jump ___raw_atomic_clear_asm | |
805 | ENDPROC(___raw_bit_clear_asm) | |
806 | ||
807 | /* | |
808 | * r0 = ptr | |
809 | * r1 = bitnr | |
810 | * | |
811 | * Toggle a bit in a 32bit word and return the old 32bit value atomically. | |
812 | * Clobbers: r3:0, p1:0 | |
813 | */ | |
814 | ENTRY(___raw_bit_toggle_asm) | |
815 | r2 = r1; | |
816 | r1 = 1; | |
817 | r1 <<= r2; | |
818 | jump ___raw_atomic_xor_asm | |
819 | ENDPROC(___raw_bit_toggle_asm) | |
820 | ||
821 | /* | |
822 | * r0 = ptr | |
823 | * r1 = bitnr | |
824 | * | |
825 | * Test-and-set a bit in a 32bit word and return the old bit value atomically. | |
826 | * Clobbers: r3:0, p1:0 | |
827 | */ | |
828 | ENTRY(___raw_bit_test_set_asm) | |
829 | [--sp] = rets; | |
830 | [--sp] = r1; | |
831 | call ___raw_bit_set_asm | |
832 | r1 = [sp++]; | |
833 | r2 = 1; | |
834 | r2 <<= r1; | |
835 | r0 = r0 & r2; | |
836 | cc = r0 == 0; | |
837 | if cc jump 1f | |
838 | r0 = 1; | |
839 | 1: | |
840 | rets = [sp++]; | |
841 | rts; | |
842 | ENDPROC(___raw_bit_test_set_asm) | |
843 | ||
844 | /* | |
845 | * r0 = ptr | |
846 | * r1 = bitnr | |
847 | * | |
848 | * Test-and-clear a bit in a 32bit word and return the old bit value atomically. | |
849 | * Clobbers: r3:0, p1:0 | |
850 | */ | |
851 | ENTRY(___raw_bit_test_clear_asm) | |
852 | [--sp] = rets; | |
853 | [--sp] = r1; | |
854 | call ___raw_bit_clear_asm | |
855 | r1 = [sp++]; | |
856 | r2 = 1; | |
857 | r2 <<= r1; | |
858 | r0 = r0 & r2; | |
859 | cc = r0 == 0; | |
860 | if cc jump 1f | |
861 | r0 = 1; | |
862 | 1: | |
863 | rets = [sp++]; | |
864 | rts; | |
865 | ENDPROC(___raw_bit_test_clear_asm) | |
866 | ||
867 | /* | |
868 | * r0 = ptr | |
869 | * r1 = bitnr | |
870 | * | |
871 | * Test-and-toggle a bit in a 32bit word, | |
872 | * and return the old bit value atomically. | |
873 | * Clobbers: r3:0, p1:0 | |
874 | */ | |
875 | ENTRY(___raw_bit_test_toggle_asm) | |
876 | [--sp] = rets; | |
877 | [--sp] = r1; | |
878 | call ___raw_bit_toggle_asm | |
879 | r1 = [sp++]; | |
880 | r2 = 1; | |
881 | r2 <<= r1; | |
882 | r0 = r0 & r2; | |
883 | cc = r0 == 0; | |
884 | if cc jump 1f | |
885 | r0 = 1; | |
886 | 1: | |
887 | rets = [sp++]; | |
888 | rts; | |
889 | ENDPROC(___raw_bit_test_toggle_asm) | |
890 | ||
891 | /* | |
892 | * r0 = ptr | |
893 | * r1 = bitnr | |
894 | * | |
895 | * Test a bit in a 32bit word and return its value. | |
896 | * We need this on this architecture in order to invalidate | |
897 | * the local cache before testing. | |
898 | * | |
899 | * Clobbers: r3:0, p1:0 | |
900 | */ | |
901 | ENTRY(___raw_bit_test_asm) | |
902 | r2 = r1; | |
903 | r1 = 1; | |
904 | r1 <<= r2; | |
905 | jump ___raw_atomic_test_asm | |
906 | ENDPROC(___raw_bit_test_asm) | |
907 | ||
908 | /* | |
909 | * r0 = ptr | |
910 | * | |
911 | * Fetch and return an uncached 32bit value. | |
912 | * | |
913 | * Clobbers: r2:0, p1:0 | |
914 | */ | |
915 | ENTRY(___raw_uncached_fetch_asm) | |
916 | p1 = r0; | |
917 | r1 = -L1_CACHE_BYTES; | |
918 | r1 = r0 & r1; | |
919 | p0 = r1; | |
064cc44e SZ |
920 | /* flush core internal write buffer before invalidate dcache */ |
921 | CSYNC(r2); | |
c51b4488 GY |
922 | flushinv[p0]; |
923 | SSYNC(r2); | |
924 | r0 = [p1]; | |
925 | rts; | |
926 | ENDPROC(___raw_uncached_fetch_asm) |