xref: /openbmc/linux/arch/x86/include/asm/bitops.h (revision cd5d5810)
1 #ifndef _ASM_X86_BITOPS_H
2 #define _ASM_X86_BITOPS_H
3 
4 /*
5  * Copyright 1992, Linus Torvalds.
6  *
7  * Note: inlines with more than a single statement should be marked
8  * __always_inline to avoid problems with older gcc's inlining heuristics.
9  */
10 
11 #ifndef _LINUX_BITOPS_H
12 #error only <linux/bitops.h> can be included directly
13 #endif
14 
15 #include <linux/compiler.h>
16 #include <asm/alternative.h>
17 
18 #if BITS_PER_LONG == 32
19 # define _BITOPS_LONG_SHIFT 5
20 #elif BITS_PER_LONG == 64
21 # define _BITOPS_LONG_SHIFT 6
22 #else
23 # error "Unexpected BITS_PER_LONG"
24 #endif
25 
26 #define BIT_64(n)			(U64_C(1) << (n))
27 
28 /*
29  * These have to be done with inline assembly: that way the bit-setting
30  * is guaranteed to be atomic. All bit operations return 0 if the bit
31  * was cleared before the operation and != 0 if it was not.
32  *
33  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
34  */
35 
36 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
37 /* Technically wrong, but this avoids compilation errors on some gcc
38    versions. */
39 #define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
40 #else
41 #define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
42 #endif
43 
44 #define ADDR				BITOP_ADDR(addr)
45 
46 /*
47  * We do the locked ops that don't return the old value as
48  * a mask operation on a byte.
49  */
50 #define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
51 #define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
52 #define CONST_MASK(nr)			(1 << ((nr) & 7))
53 
54 /**
55  * set_bit - Atomically set a bit in memory
56  * @nr: the bit to set
57  * @addr: the address to start counting from
58  *
59  * This function is atomic and may not be reordered.  See __set_bit()
60  * if you do not require the atomic guarantees.
61  *
62  * Note: there are no guarantees that this function will not be reordered
63  * on non x86 architectures, so if you are writing portable code,
64  * make sure not to rely on its reordering guarantees.
65  *
66  * Note that @nr may be almost arbitrarily large; this function is not
67  * restricted to acting on a single-word quantity.
68  */
69 static __always_inline void
70 set_bit(long nr, volatile unsigned long *addr)
71 {
72 	if (IS_IMMEDIATE(nr)) {
73 		asm volatile(LOCK_PREFIX "orb %1,%0"
74 			: CONST_MASK_ADDR(nr, addr)
75 			: "iq" ((u8)CONST_MASK(nr))
76 			: "memory");
77 	} else {
78 		asm volatile(LOCK_PREFIX "bts %1,%0"
79 			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
80 	}
81 }
82 
83 /**
84  * __set_bit - Set a bit in memory
85  * @nr: the bit to set
86  * @addr: the address to start counting from
87  *
88  * Unlike set_bit(), this function is non-atomic and may be reordered.
89  * If it's called on the same region of memory simultaneously, the effect
90  * may be that only one operation succeeds.
91  */
92 static inline void __set_bit(long nr, volatile unsigned long *addr)
93 {
94 	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
95 }
96 
97 /**
98  * clear_bit - Clears a bit in memory
99  * @nr: Bit to clear
100  * @addr: Address to start counting from
101  *
102  * clear_bit() is atomic and may not be reordered.  However, it does
103  * not contain a memory barrier, so if it is used for locking purposes,
104  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
105  * in order to ensure changes are visible on other processors.
106  */
107 static __always_inline void
108 clear_bit(long nr, volatile unsigned long *addr)
109 {
110 	if (IS_IMMEDIATE(nr)) {
111 		asm volatile(LOCK_PREFIX "andb %1,%0"
112 			: CONST_MASK_ADDR(nr, addr)
113 			: "iq" ((u8)~CONST_MASK(nr)));
114 	} else {
115 		asm volatile(LOCK_PREFIX "btr %1,%0"
116 			: BITOP_ADDR(addr)
117 			: "Ir" (nr));
118 	}
119 }
120 
121 /*
122  * clear_bit_unlock - Clears a bit in memory
123  * @nr: Bit to clear
124  * @addr: Address to start counting from
125  *
126  * clear_bit() is atomic and implies release semantics before the memory
127  * operation. It can be used for an unlock.
128  */
129 static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
130 {
131 	barrier();
132 	clear_bit(nr, addr);
133 }
134 
135 static inline void __clear_bit(long nr, volatile unsigned long *addr)
136 {
137 	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
138 }
139 
140 /*
141  * __clear_bit_unlock - Clears a bit in memory
142  * @nr: Bit to clear
143  * @addr: Address to start counting from
144  *
145  * __clear_bit() is non-atomic and implies release semantics before the memory
146  * operation. It can be used for an unlock if no other CPUs can concurrently
147  * modify other bits in the word.
148  *
149  * No memory barrier is required here, because x86 cannot reorder stores past
150  * older loads. Same principle as spin_unlock.
151  */
152 static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
153 {
154 	barrier();
155 	__clear_bit(nr, addr);
156 }
157 
158 #define smp_mb__before_clear_bit()	barrier()
159 #define smp_mb__after_clear_bit()	barrier()
160 
161 /**
162  * __change_bit - Toggle a bit in memory
163  * @nr: the bit to change
164  * @addr: the address to start counting from
165  *
166  * Unlike change_bit(), this function is non-atomic and may be reordered.
167  * If it's called on the same region of memory simultaneously, the effect
168  * may be that only one operation succeeds.
169  */
170 static inline void __change_bit(long nr, volatile unsigned long *addr)
171 {
172 	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
173 }
174 
175 /**
176  * change_bit - Toggle a bit in memory
177  * @nr: Bit to change
178  * @addr: Address to start counting from
179  *
180  * change_bit() is atomic and may not be reordered.
181  * Note that @nr may be almost arbitrarily large; this function is not
182  * restricted to acting on a single-word quantity.
183  */
184 static inline void change_bit(long nr, volatile unsigned long *addr)
185 {
186 	if (IS_IMMEDIATE(nr)) {
187 		asm volatile(LOCK_PREFIX "xorb %1,%0"
188 			: CONST_MASK_ADDR(nr, addr)
189 			: "iq" ((u8)CONST_MASK(nr)));
190 	} else {
191 		asm volatile(LOCK_PREFIX "btc %1,%0"
192 			: BITOP_ADDR(addr)
193 			: "Ir" (nr));
194 	}
195 }
196 
197 /**
198  * test_and_set_bit - Set a bit and return its old value
199  * @nr: Bit to set
200  * @addr: Address to count from
201  *
202  * This operation is atomic and cannot be reordered.
203  * It also implies a memory barrier.
204  */
205 static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
206 {
207 	int oldbit;
208 
209 	asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
210 		     "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
211 
212 	return oldbit;
213 }
214 
215 /**
216  * test_and_set_bit_lock - Set a bit and return its old value for lock
217  * @nr: Bit to set
218  * @addr: Address to count from
219  *
220  * This is the same as test_and_set_bit on x86.
221  */
222 static __always_inline int
223 test_and_set_bit_lock(long nr, volatile unsigned long *addr)
224 {
225 	return test_and_set_bit(nr, addr);
226 }
227 
228 /**
229  * __test_and_set_bit - Set a bit and return its old value
230  * @nr: Bit to set
231  * @addr: Address to count from
232  *
233  * This operation is non-atomic and can be reordered.
234  * If two examples of this operation race, one can appear to succeed
235  * but actually fail.  You must protect multiple accesses with a lock.
236  */
237 static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
238 {
239 	int oldbit;
240 
241 	asm("bts %2,%1\n\t"
242 	    "sbb %0,%0"
243 	    : "=r" (oldbit), ADDR
244 	    : "Ir" (nr));
245 	return oldbit;
246 }
247 
248 /**
249  * test_and_clear_bit - Clear a bit and return its old value
250  * @nr: Bit to clear
251  * @addr: Address to count from
252  *
253  * This operation is atomic and cannot be reordered.
254  * It also implies a memory barrier.
255  */
256 static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
257 {
258 	int oldbit;
259 
260 	asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
261 		     "sbb %0,%0"
262 		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
263 
264 	return oldbit;
265 }
266 
267 /**
268  * __test_and_clear_bit - Clear a bit and return its old value
269  * @nr: Bit to clear
270  * @addr: Address to count from
271  *
272  * This operation is non-atomic and can be reordered.
273  * If two examples of this operation race, one can appear to succeed
274  * but actually fail.  You must protect multiple accesses with a lock.
275  *
276  * Note: the operation is performed atomically with respect to
277  * the local CPU, but not other CPUs. Portable code should not
278  * rely on this behaviour.
279  * KVM relies on this behaviour on x86 for modifying memory that is also
280  * accessed from a hypervisor on the same CPU if running in a VM: don't change
281  * this without also updating arch/x86/kernel/kvm.c
282  */
283 static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
284 {
285 	int oldbit;
286 
287 	asm volatile("btr %2,%1\n\t"
288 		     "sbb %0,%0"
289 		     : "=r" (oldbit), ADDR
290 		     : "Ir" (nr));
291 	return oldbit;
292 }
293 
294 /* WARNING: non atomic and it can be reordered! */
295 static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
296 {
297 	int oldbit;
298 
299 	asm volatile("btc %2,%1\n\t"
300 		     "sbb %0,%0"
301 		     : "=r" (oldbit), ADDR
302 		     : "Ir" (nr) : "memory");
303 
304 	return oldbit;
305 }
306 
307 /**
308  * test_and_change_bit - Change a bit and return its old value
309  * @nr: Bit to change
310  * @addr: Address to count from
311  *
312  * This operation is atomic and cannot be reordered.
313  * It also implies a memory barrier.
314  */
315 static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
316 {
317 	int oldbit;
318 
319 	asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
320 		     "sbb %0,%0"
321 		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
322 
323 	return oldbit;
324 }
325 
326 static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
327 {
328 	return ((1UL << (nr & (BITS_PER_LONG-1))) &
329 		(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
330 }
331 
332 static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
333 {
334 	int oldbit;
335 
336 	asm volatile("bt %2,%1\n\t"
337 		     "sbb %0,%0"
338 		     : "=r" (oldbit)
339 		     : "m" (*(unsigned long *)addr), "Ir" (nr));
340 
341 	return oldbit;
342 }
343 
344 #if 0 /* Fool kernel-doc since it doesn't do macros yet */
345 /**
346  * test_bit - Determine whether a bit is set
347  * @nr: bit number to test
348  * @addr: Address to start counting from
349  */
350 static int test_bit(int nr, const volatile unsigned long *addr);
351 #endif
352 
353 #define test_bit(nr, addr)			\
354 	(__builtin_constant_p((nr))		\
355 	 ? constant_test_bit((nr), (addr))	\
356 	 : variable_test_bit((nr), (addr)))
357 
358 /**
359  * __ffs - find first set bit in word
360  * @word: The word to search
361  *
362  * Undefined if no bit exists, so code should check against 0 first.
363  */
364 static inline unsigned long __ffs(unsigned long word)
365 {
366 	asm("rep; bsf %1,%0"
367 		: "=r" (word)
368 		: "rm" (word));
369 	return word;
370 }
371 
372 /**
373  * ffz - find first zero bit in word
374  * @word: The word to search
375  *
376  * Undefined if no zero exists, so code should check against ~0UL first.
377  */
378 static inline unsigned long ffz(unsigned long word)
379 {
380 	asm("rep; bsf %1,%0"
381 		: "=r" (word)
382 		: "r" (~word));
383 	return word;
384 }
385 
386 /*
387  * __fls: find last set bit in word
388  * @word: The word to search
389  *
390  * Undefined if no set bit exists, so code should check against 0 first.
391  */
392 static inline unsigned long __fls(unsigned long word)
393 {
394 	asm("bsr %1,%0"
395 	    : "=r" (word)
396 	    : "rm" (word));
397 	return word;
398 }
399 
400 #undef ADDR
401 
402 #ifdef __KERNEL__
403 /**
404  * ffs - find first set bit in word
405  * @x: the word to search
406  *
407  * This is defined the same way as the libc and compiler builtin ffs
408  * routines, therefore differs in spirit from the other bitops.
409  *
410  * ffs(value) returns 0 if value is 0 or the position of the first
411  * set bit if value is nonzero. The first (least significant) bit
412  * is at position 1.
413  */
414 static inline int ffs(int x)
415 {
416 	int r;
417 
418 #ifdef CONFIG_X86_64
419 	/*
420 	 * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
421 	 * dest reg is undefined if x==0, but their CPU architect says its
422 	 * value is written to set it to the same as before, except that the
423 	 * top 32 bits will be cleared.
424 	 *
425 	 * We cannot do this on 32 bits because at the very least some
426 	 * 486 CPUs did not behave this way.
427 	 */
428 	asm("bsfl %1,%0"
429 	    : "=r" (r)
430 	    : "rm" (x), "0" (-1));
431 #elif defined(CONFIG_X86_CMOV)
432 	asm("bsfl %1,%0\n\t"
433 	    "cmovzl %2,%0"
434 	    : "=&r" (r) : "rm" (x), "r" (-1));
435 #else
436 	asm("bsfl %1,%0\n\t"
437 	    "jnz 1f\n\t"
438 	    "movl $-1,%0\n"
439 	    "1:" : "=r" (r) : "rm" (x));
440 #endif
441 	return r + 1;
442 }
443 
444 /**
445  * fls - find last set bit in word
446  * @x: the word to search
447  *
448  * This is defined in a similar way as the libc and compiler builtin
449  * ffs, but returns the position of the most significant set bit.
450  *
451  * fls(value) returns 0 if value is 0 or the position of the last
452  * set bit if value is nonzero. The last (most significant) bit is
453  * at position 32.
454  */
455 static inline int fls(int x)
456 {
457 	int r;
458 
459 #ifdef CONFIG_X86_64
460 	/*
461 	 * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
462 	 * dest reg is undefined if x==0, but their CPU architect says its
463 	 * value is written to set it to the same as before, except that the
464 	 * top 32 bits will be cleared.
465 	 *
466 	 * We cannot do this on 32 bits because at the very least some
467 	 * 486 CPUs did not behave this way.
468 	 */
469 	asm("bsrl %1,%0"
470 	    : "=r" (r)
471 	    : "rm" (x), "0" (-1));
472 #elif defined(CONFIG_X86_CMOV)
473 	asm("bsrl %1,%0\n\t"
474 	    "cmovzl %2,%0"
475 	    : "=&r" (r) : "rm" (x), "rm" (-1));
476 #else
477 	asm("bsrl %1,%0\n\t"
478 	    "jnz 1f\n\t"
479 	    "movl $-1,%0\n"
480 	    "1:" : "=r" (r) : "rm" (x));
481 #endif
482 	return r + 1;
483 }
484 
485 /**
486  * fls64 - find last set bit in a 64-bit word
487  * @x: the word to search
488  *
489  * This is defined in a similar way as the libc and compiler builtin
490  * ffsll, but returns the position of the most significant set bit.
491  *
492  * fls64(value) returns 0 if value is 0 or the position of the last
493  * set bit if value is nonzero. The last (most significant) bit is
494  * at position 64.
495  */
496 #ifdef CONFIG_X86_64
497 static __always_inline int fls64(__u64 x)
498 {
499 	int bitpos = -1;
500 	/*
501 	 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
502 	 * dest reg is undefined if x==0, but their CPU architect says its
503 	 * value is written to set it to the same as before.
504 	 */
505 	asm("bsrq %1,%q0"
506 	    : "+r" (bitpos)
507 	    : "rm" (x));
508 	return bitpos + 1;
509 }
510 #else
511 #include <asm-generic/bitops/fls64.h>
512 #endif
513 
514 #include <asm-generic/bitops/find.h>
515 
516 #include <asm-generic/bitops/sched.h>
517 
518 #define ARCH_HAS_FAST_MULTIPLIER 1
519 
520 #include <asm/arch_hweight.h>
521 
522 #include <asm-generic/bitops/const_hweight.h>
523 
524 #include <asm-generic/bitops/le.h>
525 
526 #include <asm-generic/bitops/ext2-atomic-setbit.h>
527 
528 #endif /* __KERNEL__ */
529 #endif /* _ASM_X86_BITOPS_H */
530