1 #ifndef _ASM_X86_BITOPS_H 2 #define _ASM_X86_BITOPS_H 3 4 /* 5 * Copyright 1992, Linus Torvalds. 6 * 7 * Note: inlines with more than a single statement should be marked 8 * __always_inline to avoid problems with older gcc's inlining heuristics. 9 */ 10 11 #ifndef _LINUX_BITOPS_H 12 #error only <linux/bitops.h> can be included directly 13 #endif 14 15 #include <linux/compiler.h> 16 #include <asm/alternative.h> 17 18 #define BIT_64(n) (U64_C(1) << (n)) 19 20 /* 21 * These have to be done with inline assembly: that way the bit-setting 22 * is guaranteed to be atomic. All bit operations return 0 if the bit 23 * was cleared before the operation and != 0 if it was not. 24 * 25 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). 26 */ 27 28 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) 29 /* Technically wrong, but this avoids compilation errors on some gcc 30 versions. */ 31 #define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) 32 #else 33 #define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) 34 #endif 35 36 #define ADDR BITOP_ADDR(addr) 37 38 /* 39 * We do the locked ops that don't return the old value as 40 * a mask operation on a byte. 41 */ 42 #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) 43 #define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) 44 #define CONST_MASK(nr) (1 << ((nr) & 7)) 45 46 /** 47 * set_bit - Atomically set a bit in memory 48 * @nr: the bit to set 49 * @addr: the address to start counting from 50 * 51 * This function is atomic and may not be reordered. See __set_bit() 52 * if you do not require the atomic guarantees. 53 * 54 * Note: there are no guarantees that this function will not be reordered 55 * on non x86 architectures, so if you are writing portable code, 56 * make sure not to rely on its reordering guarantees. 57 * 58 * Note that @nr may be almost arbitrarily large; this function is not 59 * restricted to acting on a single-word quantity. 60 */ 61 static __always_inline void 62 set_bit(unsigned int nr, volatile unsigned long *addr) 63 { 64 if (IS_IMMEDIATE(nr)) { 65 asm volatile(LOCK_PREFIX "orb %1,%0" 66 : CONST_MASK_ADDR(nr, addr) 67 : "iq" ((u8)CONST_MASK(nr)) 68 : "memory"); 69 } else { 70 asm volatile(LOCK_PREFIX "bts %1,%0" 71 : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); 72 } 73 } 74 75 /** 76 * __set_bit - Set a bit in memory 77 * @nr: the bit to set 78 * @addr: the address to start counting from 79 * 80 * Unlike set_bit(), this function is non-atomic and may be reordered. 81 * If it's called on the same region of memory simultaneously, the effect 82 * may be that only one operation succeeds. 83 */ 84 static inline void __set_bit(int nr, volatile unsigned long *addr) 85 { 86 asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); 87 } 88 89 /** 90 * clear_bit - Clears a bit in memory 91 * @nr: Bit to clear 92 * @addr: Address to start counting from 93 * 94 * clear_bit() is atomic and may not be reordered. However, it does 95 * not contain a memory barrier, so if it is used for locking purposes, 96 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() 97 * in order to ensure changes are visible on other processors. 98 */ 99 static __always_inline void 100 clear_bit(int nr, volatile unsigned long *addr) 101 { 102 if (IS_IMMEDIATE(nr)) { 103 asm volatile(LOCK_PREFIX "andb %1,%0" 104 : CONST_MASK_ADDR(nr, addr) 105 : "iq" ((u8)~CONST_MASK(nr))); 106 } else { 107 asm volatile(LOCK_PREFIX "btr %1,%0" 108 : BITOP_ADDR(addr) 109 : "Ir" (nr)); 110 } 111 } 112 113 /* 114 * clear_bit_unlock - Clears a bit in memory 115 * @nr: Bit to clear 116 * @addr: Address to start counting from 117 * 118 * clear_bit() is atomic and implies release semantics before the memory 119 * operation. It can be used for an unlock. 120 */ 121 static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) 122 { 123 barrier(); 124 clear_bit(nr, addr); 125 } 126 127 static inline void __clear_bit(int nr, volatile unsigned long *addr) 128 { 129 asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); 130 } 131 132 /* 133 * __clear_bit_unlock - Clears a bit in memory 134 * @nr: Bit to clear 135 * @addr: Address to start counting from 136 * 137 * __clear_bit() is non-atomic and implies release semantics before the memory 138 * operation. It can be used for an unlock if no other CPUs can concurrently 139 * modify other bits in the word. 140 * 141 * No memory barrier is required here, because x86 cannot reorder stores past 142 * older loads. Same principle as spin_unlock. 143 */ 144 static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) 145 { 146 barrier(); 147 __clear_bit(nr, addr); 148 } 149 150 #define smp_mb__before_clear_bit() barrier() 151 #define smp_mb__after_clear_bit() barrier() 152 153 /** 154 * __change_bit - Toggle a bit in memory 155 * @nr: the bit to change 156 * @addr: the address to start counting from 157 * 158 * Unlike change_bit(), this function is non-atomic and may be reordered. 159 * If it's called on the same region of memory simultaneously, the effect 160 * may be that only one operation succeeds. 161 */ 162 static inline void __change_bit(int nr, volatile unsigned long *addr) 163 { 164 asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); 165 } 166 167 /** 168 * change_bit - Toggle a bit in memory 169 * @nr: Bit to change 170 * @addr: Address to start counting from 171 * 172 * change_bit() is atomic and may not be reordered. 173 * Note that @nr may be almost arbitrarily large; this function is not 174 * restricted to acting on a single-word quantity. 175 */ 176 static inline void change_bit(int nr, volatile unsigned long *addr) 177 { 178 if (IS_IMMEDIATE(nr)) { 179 asm volatile(LOCK_PREFIX "xorb %1,%0" 180 : CONST_MASK_ADDR(nr, addr) 181 : "iq" ((u8)CONST_MASK(nr))); 182 } else { 183 asm volatile(LOCK_PREFIX "btc %1,%0" 184 : BITOP_ADDR(addr) 185 : "Ir" (nr)); 186 } 187 } 188 189 /** 190 * test_and_set_bit - Set a bit and return its old value 191 * @nr: Bit to set 192 * @addr: Address to count from 193 * 194 * This operation is atomic and cannot be reordered. 195 * It also implies a memory barrier. 196 */ 197 static inline int test_and_set_bit(int nr, volatile unsigned long *addr) 198 { 199 int oldbit; 200 201 asm volatile(LOCK_PREFIX "bts %2,%1\n\t" 202 "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); 203 204 return oldbit; 205 } 206 207 /** 208 * test_and_set_bit_lock - Set a bit and return its old value for lock 209 * @nr: Bit to set 210 * @addr: Address to count from 211 * 212 * This is the same as test_and_set_bit on x86. 213 */ 214 static __always_inline int 215 test_and_set_bit_lock(int nr, volatile unsigned long *addr) 216 { 217 return test_and_set_bit(nr, addr); 218 } 219 220 /** 221 * __test_and_set_bit - Set a bit and return its old value 222 * @nr: Bit to set 223 * @addr: Address to count from 224 * 225 * This operation is non-atomic and can be reordered. 226 * If two examples of this operation race, one can appear to succeed 227 * but actually fail. You must protect multiple accesses with a lock. 228 */ 229 static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) 230 { 231 int oldbit; 232 233 asm("bts %2,%1\n\t" 234 "sbb %0,%0" 235 : "=r" (oldbit), ADDR 236 : "Ir" (nr)); 237 return oldbit; 238 } 239 240 /** 241 * test_and_clear_bit - Clear a bit and return its old value 242 * @nr: Bit to clear 243 * @addr: Address to count from 244 * 245 * This operation is atomic and cannot be reordered. 246 * It also implies a memory barrier. 247 */ 248 static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) 249 { 250 int oldbit; 251 252 asm volatile(LOCK_PREFIX "btr %2,%1\n\t" 253 "sbb %0,%0" 254 : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); 255 256 return oldbit; 257 } 258 259 /** 260 * __test_and_clear_bit - Clear a bit and return its old value 261 * @nr: Bit to clear 262 * @addr: Address to count from 263 * 264 * This operation is non-atomic and can be reordered. 265 * If two examples of this operation race, one can appear to succeed 266 * but actually fail. You must protect multiple accesses with a lock. 267 * 268 * Note: the operation is performed atomically with respect to 269 * the local CPU, but not other CPUs. Portable code should not 270 * rely on this behaviour. 271 * KVM relies on this behaviour on x86 for modifying memory that is also 272 * accessed from a hypervisor on the same CPU if running in a VM: don't change 273 * this without also updating arch/x86/kernel/kvm.c 274 */ 275 static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) 276 { 277 int oldbit; 278 279 asm volatile("btr %2,%1\n\t" 280 "sbb %0,%0" 281 : "=r" (oldbit), ADDR 282 : "Ir" (nr)); 283 return oldbit; 284 } 285 286 /* WARNING: non atomic and it can be reordered! */ 287 static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) 288 { 289 int oldbit; 290 291 asm volatile("btc %2,%1\n\t" 292 "sbb %0,%0" 293 : "=r" (oldbit), ADDR 294 : "Ir" (nr) : "memory"); 295 296 return oldbit; 297 } 298 299 /** 300 * test_and_change_bit - Change a bit and return its old value 301 * @nr: Bit to change 302 * @addr: Address to count from 303 * 304 * This operation is atomic and cannot be reordered. 305 * It also implies a memory barrier. 306 */ 307 static inline int test_and_change_bit(int nr, volatile unsigned long *addr) 308 { 309 int oldbit; 310 311 asm volatile(LOCK_PREFIX "btc %2,%1\n\t" 312 "sbb %0,%0" 313 : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); 314 315 return oldbit; 316 } 317 318 static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) 319 { 320 return ((1UL << (nr % BITS_PER_LONG)) & 321 (addr[nr / BITS_PER_LONG])) != 0; 322 } 323 324 static inline int variable_test_bit(int nr, volatile const unsigned long *addr) 325 { 326 int oldbit; 327 328 asm volatile("bt %2,%1\n\t" 329 "sbb %0,%0" 330 : "=r" (oldbit) 331 : "m" (*(unsigned long *)addr), "Ir" (nr)); 332 333 return oldbit; 334 } 335 336 #if 0 /* Fool kernel-doc since it doesn't do macros yet */ 337 /** 338 * test_bit - Determine whether a bit is set 339 * @nr: bit number to test 340 * @addr: Address to start counting from 341 */ 342 static int test_bit(int nr, const volatile unsigned long *addr); 343 #endif 344 345 #define test_bit(nr, addr) \ 346 (__builtin_constant_p((nr)) \ 347 ? constant_test_bit((nr), (addr)) \ 348 : variable_test_bit((nr), (addr))) 349 350 /** 351 * __ffs - find first set bit in word 352 * @word: The word to search 353 * 354 * Undefined if no bit exists, so code should check against 0 first. 355 */ 356 static inline unsigned long __ffs(unsigned long word) 357 { 358 asm("rep; bsf %1,%0" 359 : "=r" (word) 360 : "rm" (word)); 361 return word; 362 } 363 364 /** 365 * ffz - find first zero bit in word 366 * @word: The word to search 367 * 368 * Undefined if no zero exists, so code should check against ~0UL first. 369 */ 370 static inline unsigned long ffz(unsigned long word) 371 { 372 asm("rep; bsf %1,%0" 373 : "=r" (word) 374 : "r" (~word)); 375 return word; 376 } 377 378 /* 379 * __fls: find last set bit in word 380 * @word: The word to search 381 * 382 * Undefined if no set bit exists, so code should check against 0 first. 383 */ 384 static inline unsigned long __fls(unsigned long word) 385 { 386 asm("bsr %1,%0" 387 : "=r" (word) 388 : "rm" (word)); 389 return word; 390 } 391 392 #undef ADDR 393 394 #ifdef __KERNEL__ 395 /** 396 * ffs - find first set bit in word 397 * @x: the word to search 398 * 399 * This is defined the same way as the libc and compiler builtin ffs 400 * routines, therefore differs in spirit from the other bitops. 401 * 402 * ffs(value) returns 0 if value is 0 or the position of the first 403 * set bit if value is nonzero. The first (least significant) bit 404 * is at position 1. 405 */ 406 static inline int ffs(int x) 407 { 408 int r; 409 410 #ifdef CONFIG_X86_64 411 /* 412 * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the 413 * dest reg is undefined if x==0, but their CPU architect says its 414 * value is written to set it to the same as before, except that the 415 * top 32 bits will be cleared. 416 * 417 * We cannot do this on 32 bits because at the very least some 418 * 486 CPUs did not behave this way. 419 */ 420 asm("bsfl %1,%0" 421 : "=r" (r) 422 : "rm" (x), "0" (-1)); 423 #elif defined(CONFIG_X86_CMOV) 424 asm("bsfl %1,%0\n\t" 425 "cmovzl %2,%0" 426 : "=&r" (r) : "rm" (x), "r" (-1)); 427 #else 428 asm("bsfl %1,%0\n\t" 429 "jnz 1f\n\t" 430 "movl $-1,%0\n" 431 "1:" : "=r" (r) : "rm" (x)); 432 #endif 433 return r + 1; 434 } 435 436 /** 437 * fls - find last set bit in word 438 * @x: the word to search 439 * 440 * This is defined in a similar way as the libc and compiler builtin 441 * ffs, but returns the position of the most significant set bit. 442 * 443 * fls(value) returns 0 if value is 0 or the position of the last 444 * set bit if value is nonzero. The last (most significant) bit is 445 * at position 32. 446 */ 447 static inline int fls(int x) 448 { 449 int r; 450 451 #ifdef CONFIG_X86_64 452 /* 453 * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the 454 * dest reg is undefined if x==0, but their CPU architect says its 455 * value is written to set it to the same as before, except that the 456 * top 32 bits will be cleared. 457 * 458 * We cannot do this on 32 bits because at the very least some 459 * 486 CPUs did not behave this way. 460 */ 461 asm("bsrl %1,%0" 462 : "=r" (r) 463 : "rm" (x), "0" (-1)); 464 #elif defined(CONFIG_X86_CMOV) 465 asm("bsrl %1,%0\n\t" 466 "cmovzl %2,%0" 467 : "=&r" (r) : "rm" (x), "rm" (-1)); 468 #else 469 asm("bsrl %1,%0\n\t" 470 "jnz 1f\n\t" 471 "movl $-1,%0\n" 472 "1:" : "=r" (r) : "rm" (x)); 473 #endif 474 return r + 1; 475 } 476 477 /** 478 * fls64 - find last set bit in a 64-bit word 479 * @x: the word to search 480 * 481 * This is defined in a similar way as the libc and compiler builtin 482 * ffsll, but returns the position of the most significant set bit. 483 * 484 * fls64(value) returns 0 if value is 0 or the position of the last 485 * set bit if value is nonzero. The last (most significant) bit is 486 * at position 64. 487 */ 488 #ifdef CONFIG_X86_64 489 static __always_inline int fls64(__u64 x) 490 { 491 int bitpos = -1; 492 /* 493 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the 494 * dest reg is undefined if x==0, but their CPU architect says its 495 * value is written to set it to the same as before. 496 */ 497 asm("bsrq %1,%q0" 498 : "+r" (bitpos) 499 : "rm" (x)); 500 return bitpos + 1; 501 } 502 #else 503 #include <asm-generic/bitops/fls64.h> 504 #endif 505 506 #include <asm-generic/bitops/find.h> 507 508 #include <asm-generic/bitops/sched.h> 509 510 #define ARCH_HAS_FAST_MULTIPLIER 1 511 512 #include <asm/arch_hweight.h> 513 514 #include <asm-generic/bitops/const_hweight.h> 515 516 #include <asm-generic/bitops/le.h> 517 518 #include <asm-generic/bitops/ext2-atomic-setbit.h> 519 520 #endif /* __KERNEL__ */ 521 #endif /* _ASM_X86_BITOPS_H */ 522