xref: /openbmc/linux/arch/x86/include/asm/bitops.h (revision ce932d0c5589e9766e089c22c66890dfc48fbd94)
1 #ifndef _ASM_X86_BITOPS_H
2 #define _ASM_X86_BITOPS_H
3 
4 /*
5  * Copyright 1992, Linus Torvalds.
6  *
7  * Note: inlines with more than a single statement should be marked
8  * __always_inline to avoid problems with older gcc's inlining heuristics.
9  */
10 
11 #ifndef _LINUX_BITOPS_H
12 #error only <linux/bitops.h> can be included directly
13 #endif
14 
15 #include <linux/compiler.h>
16 #include <asm/alternative.h>
17 
18 /*
19  * These have to be done with inline assembly: that way the bit-setting
20  * is guaranteed to be atomic. All bit operations return 0 if the bit
21  * was cleared before the operation and != 0 if it was not.
22  *
23  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
24  */
25 
26 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
27 /* Technically wrong, but this avoids compilation errors on some gcc
28    versions. */
29 #define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
30 #else
31 #define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
32 #endif
33 
34 #define ADDR				BITOP_ADDR(addr)
35 
36 /*
37  * We do the locked ops that don't return the old value as
38  * a mask operation on a byte.
39  */
40 #define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
41 #define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
42 #define CONST_MASK(nr)			(1 << ((nr) & 7))
43 
44 /**
45  * set_bit - Atomically set a bit in memory
46  * @nr: the bit to set
47  * @addr: the address to start counting from
48  *
49  * This function is atomic and may not be reordered.  See __set_bit()
50  * if you do not require the atomic guarantees.
51  *
52  * Note: there are no guarantees that this function will not be reordered
53  * on non x86 architectures, so if you are writing portable code,
54  * make sure not to rely on its reordering guarantees.
55  *
56  * Note that @nr may be almost arbitrarily large; this function is not
57  * restricted to acting on a single-word quantity.
58  */
59 static __always_inline void
60 set_bit(unsigned int nr, volatile unsigned long *addr)
61 {
62 	if (IS_IMMEDIATE(nr)) {
63 		asm volatile(LOCK_PREFIX "orb %1,%0"
64 			: CONST_MASK_ADDR(nr, addr)
65 			: "iq" ((u8)CONST_MASK(nr))
66 			: "memory");
67 	} else {
68 		asm volatile(LOCK_PREFIX "bts %1,%0"
69 			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
70 	}
71 }
72 
73 /**
74  * __set_bit - Set a bit in memory
75  * @nr: the bit to set
76  * @addr: the address to start counting from
77  *
78  * Unlike set_bit(), this function is non-atomic and may be reordered.
79  * If it's called on the same region of memory simultaneously, the effect
80  * may be that only one operation succeeds.
81  */
82 static inline void __set_bit(int nr, volatile unsigned long *addr)
83 {
84 	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
85 }
86 
87 /**
88  * clear_bit - Clears a bit in memory
89  * @nr: Bit to clear
90  * @addr: Address to start counting from
91  *
92  * clear_bit() is atomic and may not be reordered.  However, it does
93  * not contain a memory barrier, so if it is used for locking purposes,
94  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
95  * in order to ensure changes are visible on other processors.
96  */
97 static __always_inline void
98 clear_bit(int nr, volatile unsigned long *addr)
99 {
100 	if (IS_IMMEDIATE(nr)) {
101 		asm volatile(LOCK_PREFIX "andb %1,%0"
102 			: CONST_MASK_ADDR(nr, addr)
103 			: "iq" ((u8)~CONST_MASK(nr)));
104 	} else {
105 		asm volatile(LOCK_PREFIX "btr %1,%0"
106 			: BITOP_ADDR(addr)
107 			: "Ir" (nr));
108 	}
109 }
110 
111 /*
112  * clear_bit_unlock - Clears a bit in memory
113  * @nr: Bit to clear
114  * @addr: Address to start counting from
115  *
116  * clear_bit() is atomic and implies release semantics before the memory
117  * operation. It can be used for an unlock.
118  */
119 static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
120 {
121 	barrier();
122 	clear_bit(nr, addr);
123 }
124 
125 static inline void __clear_bit(int nr, volatile unsigned long *addr)
126 {
127 	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
128 }
129 
130 /*
131  * __clear_bit_unlock - Clears a bit in memory
132  * @nr: Bit to clear
133  * @addr: Address to start counting from
134  *
135  * __clear_bit() is non-atomic and implies release semantics before the memory
136  * operation. It can be used for an unlock if no other CPUs can concurrently
137  * modify other bits in the word.
138  *
139  * No memory barrier is required here, because x86 cannot reorder stores past
140  * older loads. Same principle as spin_unlock.
141  */
142 static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
143 {
144 	barrier();
145 	__clear_bit(nr, addr);
146 }
147 
148 #define smp_mb__before_clear_bit()	barrier()
149 #define smp_mb__after_clear_bit()	barrier()
150 
151 /**
152  * __change_bit - Toggle a bit in memory
153  * @nr: the bit to change
154  * @addr: the address to start counting from
155  *
156  * Unlike change_bit(), this function is non-atomic and may be reordered.
157  * If it's called on the same region of memory simultaneously, the effect
158  * may be that only one operation succeeds.
159  */
160 static inline void __change_bit(int nr, volatile unsigned long *addr)
161 {
162 	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
163 }
164 
165 /**
166  * change_bit - Toggle a bit in memory
167  * @nr: Bit to change
168  * @addr: Address to start counting from
169  *
170  * change_bit() is atomic and may not be reordered.
171  * Note that @nr may be almost arbitrarily large; this function is not
172  * restricted to acting on a single-word quantity.
173  */
174 static inline void change_bit(int nr, volatile unsigned long *addr)
175 {
176 	if (IS_IMMEDIATE(nr)) {
177 		asm volatile(LOCK_PREFIX "xorb %1,%0"
178 			: CONST_MASK_ADDR(nr, addr)
179 			: "iq" ((u8)CONST_MASK(nr)));
180 	} else {
181 		asm volatile(LOCK_PREFIX "btc %1,%0"
182 			: BITOP_ADDR(addr)
183 			: "Ir" (nr));
184 	}
185 }
186 
187 /**
188  * test_and_set_bit - Set a bit and return its old value
189  * @nr: Bit to set
190  * @addr: Address to count from
191  *
192  * This operation is atomic and cannot be reordered.
193  * It also implies a memory barrier.
194  */
195 static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
196 {
197 	int oldbit;
198 
199 	asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
200 		     "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
201 
202 	return oldbit;
203 }
204 
205 /**
206  * test_and_set_bit_lock - Set a bit and return its old value for lock
207  * @nr: Bit to set
208  * @addr: Address to count from
209  *
210  * This is the same as test_and_set_bit on x86.
211  */
212 static __always_inline int
213 test_and_set_bit_lock(int nr, volatile unsigned long *addr)
214 {
215 	return test_and_set_bit(nr, addr);
216 }
217 
218 /**
219  * __test_and_set_bit - Set a bit and return its old value
220  * @nr: Bit to set
221  * @addr: Address to count from
222  *
223  * This operation is non-atomic and can be reordered.
224  * If two examples of this operation race, one can appear to succeed
225  * but actually fail.  You must protect multiple accesses with a lock.
226  */
227 static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
228 {
229 	int oldbit;
230 
231 	asm("bts %2,%1\n\t"
232 	    "sbb %0,%0"
233 	    : "=r" (oldbit), ADDR
234 	    : "Ir" (nr));
235 	return oldbit;
236 }
237 
238 /**
239  * test_and_clear_bit - Clear a bit and return its old value
240  * @nr: Bit to clear
241  * @addr: Address to count from
242  *
243  * This operation is atomic and cannot be reordered.
244  * It also implies a memory barrier.
245  */
246 static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
247 {
248 	int oldbit;
249 
250 	asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
251 		     "sbb %0,%0"
252 		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
253 
254 	return oldbit;
255 }
256 
257 /**
258  * __test_and_clear_bit - Clear a bit and return its old value
259  * @nr: Bit to clear
260  * @addr: Address to count from
261  *
262  * This operation is non-atomic and can be reordered.
263  * If two examples of this operation race, one can appear to succeed
264  * but actually fail.  You must protect multiple accesses with a lock.
265  */
266 static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
267 {
268 	int oldbit;
269 
270 	asm volatile("btr %2,%1\n\t"
271 		     "sbb %0,%0"
272 		     : "=r" (oldbit), ADDR
273 		     : "Ir" (nr));
274 	return oldbit;
275 }
276 
277 /* WARNING: non atomic and it can be reordered! */
278 static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
279 {
280 	int oldbit;
281 
282 	asm volatile("btc %2,%1\n\t"
283 		     "sbb %0,%0"
284 		     : "=r" (oldbit), ADDR
285 		     : "Ir" (nr) : "memory");
286 
287 	return oldbit;
288 }
289 
290 /**
291  * test_and_change_bit - Change a bit and return its old value
292  * @nr: Bit to change
293  * @addr: Address to count from
294  *
295  * This operation is atomic and cannot be reordered.
296  * It also implies a memory barrier.
297  */
298 static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
299 {
300 	int oldbit;
301 
302 	asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
303 		     "sbb %0,%0"
304 		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
305 
306 	return oldbit;
307 }
308 
309 static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
310 {
311 	return ((1UL << (nr % BITS_PER_LONG)) &
312 		(addr[nr / BITS_PER_LONG])) != 0;
313 }
314 
315 static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
316 {
317 	int oldbit;
318 
319 	asm volatile("bt %2,%1\n\t"
320 		     "sbb %0,%0"
321 		     : "=r" (oldbit)
322 		     : "m" (*(unsigned long *)addr), "Ir" (nr));
323 
324 	return oldbit;
325 }
326 
327 #if 0 /* Fool kernel-doc since it doesn't do macros yet */
328 /**
329  * test_bit - Determine whether a bit is set
330  * @nr: bit number to test
331  * @addr: Address to start counting from
332  */
333 static int test_bit(int nr, const volatile unsigned long *addr);
334 #endif
335 
336 #define test_bit(nr, addr)			\
337 	(__builtin_constant_p((nr))		\
338 	 ? constant_test_bit((nr), (addr))	\
339 	 : variable_test_bit((nr), (addr)))
340 
341 /**
342  * __ffs - find first set bit in word
343  * @word: The word to search
344  *
345  * Undefined if no bit exists, so code should check against 0 first.
346  */
347 static inline unsigned long __ffs(unsigned long word)
348 {
349 	asm("bsf %1,%0"
350 		: "=r" (word)
351 		: "rm" (word));
352 	return word;
353 }
354 
355 /**
356  * ffz - find first zero bit in word
357  * @word: The word to search
358  *
359  * Undefined if no zero exists, so code should check against ~0UL first.
360  */
361 static inline unsigned long ffz(unsigned long word)
362 {
363 	asm("bsf %1,%0"
364 		: "=r" (word)
365 		: "r" (~word));
366 	return word;
367 }
368 
369 /*
370  * __fls: find last set bit in word
371  * @word: The word to search
372  *
373  * Undefined if no set bit exists, so code should check against 0 first.
374  */
375 static inline unsigned long __fls(unsigned long word)
376 {
377 	asm("bsr %1,%0"
378 	    : "=r" (word)
379 	    : "rm" (word));
380 	return word;
381 }
382 
383 #undef ADDR
384 
385 #ifdef __KERNEL__
386 /**
387  * ffs - find first set bit in word
388  * @x: the word to search
389  *
390  * This is defined the same way as the libc and compiler builtin ffs
391  * routines, therefore differs in spirit from the other bitops.
392  *
393  * ffs(value) returns 0 if value is 0 or the position of the first
394  * set bit if value is nonzero. The first (least significant) bit
395  * is at position 1.
396  */
397 static inline int ffs(int x)
398 {
399 	int r;
400 
401 #ifdef CONFIG_X86_64
402 	/*
403 	 * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
404 	 * dest reg is undefined if x==0, but their CPU architect says its
405 	 * value is written to set it to the same as before, except that the
406 	 * top 32 bits will be cleared.
407 	 *
408 	 * We cannot do this on 32 bits because at the very least some
409 	 * 486 CPUs did not behave this way.
410 	 */
411 	long tmp = -1;
412 	asm("bsfl %1,%0"
413 	    : "=r" (r)
414 	    : "rm" (x), "0" (tmp));
415 #elif defined(CONFIG_X86_CMOV)
416 	asm("bsfl %1,%0\n\t"
417 	    "cmovzl %2,%0"
418 	    : "=&r" (r) : "rm" (x), "r" (-1));
419 #else
420 	asm("bsfl %1,%0\n\t"
421 	    "jnz 1f\n\t"
422 	    "movl $-1,%0\n"
423 	    "1:" : "=r" (r) : "rm" (x));
424 #endif
425 	return r + 1;
426 }
427 
428 /**
429  * fls - find last set bit in word
430  * @x: the word to search
431  *
432  * This is defined in a similar way as the libc and compiler builtin
433  * ffs, but returns the position of the most significant set bit.
434  *
435  * fls(value) returns 0 if value is 0 or the position of the last
436  * set bit if value is nonzero. The last (most significant) bit is
437  * at position 32.
438  */
439 static inline int fls(int x)
440 {
441 	int r;
442 
443 #ifdef CONFIG_X86_64
444 	/*
445 	 * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
446 	 * dest reg is undefined if x==0, but their CPU architect says its
447 	 * value is written to set it to the same as before, except that the
448 	 * top 32 bits will be cleared.
449 	 *
450 	 * We cannot do this on 32 bits because at the very least some
451 	 * 486 CPUs did not behave this way.
452 	 */
453 	long tmp = -1;
454 	asm("bsrl %1,%0"
455 	    : "=r" (r)
456 	    : "rm" (x), "0" (tmp));
457 #elif defined(CONFIG_X86_CMOV)
458 	asm("bsrl %1,%0\n\t"
459 	    "cmovzl %2,%0"
460 	    : "=&r" (r) : "rm" (x), "rm" (-1));
461 #else
462 	asm("bsrl %1,%0\n\t"
463 	    "jnz 1f\n\t"
464 	    "movl $-1,%0\n"
465 	    "1:" : "=r" (r) : "rm" (x));
466 #endif
467 	return r + 1;
468 }
469 
470 /**
471  * fls64 - find last set bit in a 64-bit word
472  * @x: the word to search
473  *
474  * This is defined in a similar way as the libc and compiler builtin
475  * ffsll, but returns the position of the most significant set bit.
476  *
477  * fls64(value) returns 0 if value is 0 or the position of the last
478  * set bit if value is nonzero. The last (most significant) bit is
479  * at position 64.
480  */
481 #ifdef CONFIG_X86_64
482 static __always_inline int fls64(__u64 x)
483 {
484 	long bitpos = -1;
485 	/*
486 	 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
487 	 * dest reg is undefined if x==0, but their CPU architect says its
488 	 * value is written to set it to the same as before.
489 	 */
490 	asm("bsrq %1,%0"
491 	    : "+r" (bitpos)
492 	    : "rm" (x));
493 	return bitpos + 1;
494 }
495 #else
496 #include <asm-generic/bitops/fls64.h>
497 #endif
498 
499 #include <asm-generic/bitops/find.h>
500 
501 #include <asm-generic/bitops/sched.h>
502 
503 #define ARCH_HAS_FAST_MULTIPLIER 1
504 
505 #include <asm/arch_hweight.h>
506 
507 #include <asm-generic/bitops/const_hweight.h>
508 
509 #include <asm-generic/bitops/le.h>
510 
511 #include <asm-generic/bitops/ext2-atomic-setbit.h>
512 
513 #endif /* __KERNEL__ */
514 #endif /* _ASM_X86_BITOPS_H */
515