[PATCH] optimize hweight64 for x86_64
[deliverable/linux.git] / include / asm-x86_64 / bitops.h
1 #ifndef _X86_64_BITOPS_H
2 #define _X86_64_BITOPS_H
3
4 /*
5 * Copyright 1992, Linus Torvalds.
6 */
7
8 #include <asm/alternative.h>
9
10 #define ADDR (*(volatile long *) addr)
11
12 /**
13 * set_bit - Atomically set a bit in memory
14 * @nr: the bit to set
15 * @addr: the address to start counting from
16 *
17 * This function is atomic and may not be reordered. See __set_bit()
18 * if you do not require the atomic guarantees.
19 * Note that @nr may be almost arbitrarily large; this function is not
20 * restricted to acting on a single-word quantity.
21 */
22 static __inline__ void set_bit(int nr, volatile void * addr)
23 {
24 __asm__ __volatile__( LOCK_PREFIX
25 "btsl %1,%0"
26 :"+m" (ADDR)
27 :"dIr" (nr) : "memory");
28 }
29
30 /**
31 * __set_bit - Set a bit in memory
32 * @nr: the bit to set
33 * @addr: the address to start counting from
34 *
35 * Unlike set_bit(), this function is non-atomic and may be reordered.
36 * If it's called on the same region of memory simultaneously, the effect
37 * may be that only one operation succeeds.
38 */
39 static __inline__ void __set_bit(int nr, volatile void * addr)
40 {
41 __asm__ volatile(
42 "btsl %1,%0"
43 :"+m" (ADDR)
44 :"dIr" (nr) : "memory");
45 }
46
47 /**
48 * clear_bit - Clears a bit in memory
49 * @nr: Bit to clear
50 * @addr: Address to start counting from
51 *
52 * clear_bit() is atomic and may not be reordered. However, it does
53 * not contain a memory barrier, so if it is used for locking purposes,
54 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
55 * in order to ensure changes are visible on other processors.
56 */
57 static __inline__ void clear_bit(int nr, volatile void * addr)
58 {
59 __asm__ __volatile__( LOCK_PREFIX
60 "btrl %1,%0"
61 :"+m" (ADDR)
62 :"dIr" (nr));
63 }
64
65 static __inline__ void __clear_bit(int nr, volatile void * addr)
66 {
67 __asm__ __volatile__(
68 "btrl %1,%0"
69 :"+m" (ADDR)
70 :"dIr" (nr));
71 }
72
73 #define smp_mb__before_clear_bit() barrier()
74 #define smp_mb__after_clear_bit() barrier()
75
76 /**
77 * __change_bit - Toggle a bit in memory
78 * @nr: the bit to change
79 * @addr: the address to start counting from
80 *
81 * Unlike change_bit(), this function is non-atomic and may be reordered.
82 * If it's called on the same region of memory simultaneously, the effect
83 * may be that only one operation succeeds.
84 */
85 static __inline__ void __change_bit(int nr, volatile void * addr)
86 {
87 __asm__ __volatile__(
88 "btcl %1,%0"
89 :"+m" (ADDR)
90 :"dIr" (nr));
91 }
92
93 /**
94 * change_bit - Toggle a bit in memory
95 * @nr: Bit to change
96 * @addr: Address to start counting from
97 *
98 * change_bit() is atomic and may not be reordered.
99 * Note that @nr may be almost arbitrarily large; this function is not
100 * restricted to acting on a single-word quantity.
101 */
102 static __inline__ void change_bit(int nr, volatile void * addr)
103 {
104 __asm__ __volatile__( LOCK_PREFIX
105 "btcl %1,%0"
106 :"+m" (ADDR)
107 :"dIr" (nr));
108 }
109
110 /**
111 * test_and_set_bit - Set a bit and return its old value
112 * @nr: Bit to set
113 * @addr: Address to count from
114 *
115 * This operation is atomic and cannot be reordered.
116 * It also implies a memory barrier.
117 */
118 static __inline__ int test_and_set_bit(int nr, volatile void * addr)
119 {
120 int oldbit;
121
122 __asm__ __volatile__( LOCK_PREFIX
123 "btsl %2,%1\n\tsbbl %0,%0"
124 :"=r" (oldbit),"+m" (ADDR)
125 :"dIr" (nr) : "memory");
126 return oldbit;
127 }
128
129 /**
130 * __test_and_set_bit - Set a bit and return its old value
131 * @nr: Bit to set
132 * @addr: Address to count from
133 *
134 * This operation is non-atomic and can be reordered.
135 * If two examples of this operation race, one can appear to succeed
136 * but actually fail. You must protect multiple accesses with a lock.
137 */
138 static __inline__ int __test_and_set_bit(int nr, volatile void * addr)
139 {
140 int oldbit;
141
142 __asm__(
143 "btsl %2,%1\n\tsbbl %0,%0"
144 :"=r" (oldbit),"+m" (ADDR)
145 :"dIr" (nr));
146 return oldbit;
147 }
148
149 /**
150 * test_and_clear_bit - Clear a bit and return its old value
151 * @nr: Bit to clear
152 * @addr: Address to count from
153 *
154 * This operation is atomic and cannot be reordered.
155 * It also implies a memory barrier.
156 */
157 static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
158 {
159 int oldbit;
160
161 __asm__ __volatile__( LOCK_PREFIX
162 "btrl %2,%1\n\tsbbl %0,%0"
163 :"=r" (oldbit),"+m" (ADDR)
164 :"dIr" (nr) : "memory");
165 return oldbit;
166 }
167
168 /**
169 * __test_and_clear_bit - Clear a bit and return its old value
170 * @nr: Bit to clear
171 * @addr: Address to count from
172 *
173 * This operation is non-atomic and can be reordered.
174 * If two examples of this operation race, one can appear to succeed
175 * but actually fail. You must protect multiple accesses with a lock.
176 */
177 static __inline__ int __test_and_clear_bit(int nr, volatile void * addr)
178 {
179 int oldbit;
180
181 __asm__(
182 "btrl %2,%1\n\tsbbl %0,%0"
183 :"=r" (oldbit),"+m" (ADDR)
184 :"dIr" (nr));
185 return oldbit;
186 }
187
188 /* WARNING: non atomic and it can be reordered! */
189 static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
190 {
191 int oldbit;
192
193 __asm__ __volatile__(
194 "btcl %2,%1\n\tsbbl %0,%0"
195 :"=r" (oldbit),"+m" (ADDR)
196 :"dIr" (nr) : "memory");
197 return oldbit;
198 }
199
200 /**
201 * test_and_change_bit - Change a bit and return its old value
202 * @nr: Bit to change
203 * @addr: Address to count from
204 *
205 * This operation is atomic and cannot be reordered.
206 * It also implies a memory barrier.
207 */
208 static __inline__ int test_and_change_bit(int nr, volatile void * addr)
209 {
210 int oldbit;
211
212 __asm__ __volatile__( LOCK_PREFIX
213 "btcl %2,%1\n\tsbbl %0,%0"
214 :"=r" (oldbit),"+m" (ADDR)
215 :"dIr" (nr) : "memory");
216 return oldbit;
217 }
218
219 #if 0 /* Fool kernel-doc since it doesn't do macros yet */
220 /**
221 * test_bit - Determine whether a bit is set
222 * @nr: bit number to test
223 * @addr: Address to start counting from
224 */
225 static int test_bit(int nr, const volatile void * addr);
226 #endif
227
228 static __inline__ int constant_test_bit(int nr, const volatile void * addr)
229 {
230 return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
231 }
232
233 static __inline__ int variable_test_bit(int nr, volatile const void * addr)
234 {
235 int oldbit;
236
237 __asm__ __volatile__(
238 "btl %2,%1\n\tsbbl %0,%0"
239 :"=r" (oldbit)
240 :"m" (ADDR),"dIr" (nr));
241 return oldbit;
242 }
243
244 #define test_bit(nr,addr) \
245 (__builtin_constant_p(nr) ? \
246 constant_test_bit((nr),(addr)) : \
247 variable_test_bit((nr),(addr)))
248
249 #undef ADDR
250
251 extern long find_first_zero_bit(const unsigned long * addr, unsigned long size);
252 extern long find_next_zero_bit (const unsigned long * addr, long size, long offset);
253 extern long find_first_bit(const unsigned long * addr, unsigned long size);
254 extern long find_next_bit(const unsigned long * addr, long size, long offset);
255
256 /* return index of first bet set in val or max when no bit is set */
257 static inline unsigned long __scanbit(unsigned long val, unsigned long max)
258 {
259 asm("bsfq %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max));
260 return val;
261 }
262
263 #define find_first_bit(addr,size) \
264 ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
265 (__scanbit(*(unsigned long *)addr,(size))) : \
266 find_first_bit(addr,size)))
267
268 #define find_next_bit(addr,size,off) \
269 ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
270 ((off) + (__scanbit((*(unsigned long *)addr) >> (off),(size)-(off)))) : \
271 find_next_bit(addr,size,off)))
272
273 #define find_first_zero_bit(addr,size) \
274 ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
275 (__scanbit(~*(unsigned long *)addr,(size))) : \
276 find_first_zero_bit(addr,size)))
277
278 #define find_next_zero_bit(addr,size,off) \
279 ((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
280 ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \
281 find_next_zero_bit(addr,size,off)))
282
283 /*
284 * Find string of zero bits in a bitmap. -1 when not found.
285 */
286 extern unsigned long
287 find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len);
288
289 static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
290 int len)
291 {
292 unsigned long end = i + len;
293 while (i < end) {
294 __set_bit(i, bitmap);
295 i++;
296 }
297 }
298
299 static inline void __clear_bit_string(unsigned long *bitmap, unsigned long i,
300 int len)
301 {
302 unsigned long end = i + len;
303 while (i < end) {
304 __clear_bit(i, bitmap);
305 i++;
306 }
307 }
308
309 /**
310 * ffz - find first zero in word.
311 * @word: The word to search
312 *
313 * Undefined if no zero exists, so code should check against ~0UL first.
314 */
315 static __inline__ unsigned long ffz(unsigned long word)
316 {
317 __asm__("bsfq %1,%0"
318 :"=r" (word)
319 :"r" (~word));
320 return word;
321 }
322
323 /**
324 * __ffs - find first bit in word.
325 * @word: The word to search
326 *
327 * Undefined if no bit exists, so code should check against 0 first.
328 */
329 static __inline__ unsigned long __ffs(unsigned long word)
330 {
331 __asm__("bsfq %1,%0"
332 :"=r" (word)
333 :"rm" (word));
334 return word;
335 }
336
337 /*
338 * __fls: find last bit set.
339 * @word: The word to search
340 *
341 * Undefined if no zero exists, so code should check against ~0UL first.
342 */
343 static __inline__ unsigned long __fls(unsigned long word)
344 {
345 __asm__("bsrq %1,%0"
346 :"=r" (word)
347 :"rm" (word));
348 return word;
349 }
350
351 #ifdef __KERNEL__
352
353 #include <asm-generic/bitops/sched.h>
354
355 /**
356 * ffs - find first bit set
357 * @x: the word to search
358 *
359 * This is defined the same way as
360 * the libc and compiler builtin ffs routines, therefore
361 * differs in spirit from the above ffz (man ffs).
362 */
363 static __inline__ int ffs(int x)
364 {
365 int r;
366
367 __asm__("bsfl %1,%0\n\t"
368 "cmovzl %2,%0"
369 : "=r" (r) : "rm" (x), "r" (-1));
370 return r+1;
371 }
372
373 /**
374 * fls64 - find last bit set in 64 bit word
375 * @x: the word to search
376 *
377 * This is defined the same way as fls.
378 */
379 static __inline__ int fls64(__u64 x)
380 {
381 if (x == 0)
382 return 0;
383 return __fls(x) + 1;
384 }
385
386 /**
387 * fls - find last bit set
388 * @x: the word to search
389 *
390 * This is defined the same way as ffs.
391 */
392 static __inline__ int fls(int x)
393 {
394 int r;
395
396 __asm__("bsrl %1,%0\n\t"
397 "cmovzl %2,%0"
398 : "=&r" (r) : "rm" (x), "rm" (-1));
399 return r+1;
400 }
401
402 #define ARCH_HAS_FAST_MULTIPLIER 1
403
404 #include <asm-generic/bitops/hweight.h>
405
406 #endif /* __KERNEL__ */
407
408 #ifdef __KERNEL__
409
410 #include <asm-generic/bitops/ext2-non-atomic.h>
411
412 #define ext2_set_bit_atomic(lock,nr,addr) \
413 test_and_set_bit((nr),(unsigned long*)addr)
414 #define ext2_clear_bit_atomic(lock,nr,addr) \
415 test_and_clear_bit((nr),(unsigned long*)addr)
416
417 #include <asm-generic/bitops/minix.h>
418
419 #endif /* __KERNEL__ */
420
421 #endif /* _X86_64_BITOPS_H */
This page took 0.039694 seconds and 6 git commands to generate.