1 #ifndef _ASM_X86_SPINLOCK_H 2 #define _ASM_X86_SPINLOCK_H 3 4 #include <linux/atomic.h> 5 #include <asm/page.h> 6 #include <asm/processor.h> 7 #include <linux/compiler.h> 8 #include <asm/paravirt.h> 9 /* 10 * Your basic SMP spinlocks, allowing only a single CPU anywhere 11 * 12 * Simple spin lock operations. There are two variants, one clears IRQ's 13 * on the local processor, one does not. 14 * 15 * These are fair FIFO ticket locks, which are currently limited to 256 16 * CPUs. 17 * 18 * (the type definitions are in asm/spinlock_types.h) 19 */ 20 21 #ifdef CONFIG_X86_32 22 # define LOCK_PTR_REG "a" 23 # define REG_PTR_MODE "k" 24 #else 25 # define LOCK_PTR_REG "D" 26 # define REG_PTR_MODE "q" 27 #endif 28 29 #if defined(CONFIG_X86_32) && \ 30 (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) 31 /* 32 * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock 33 * (PPro errata 66, 92) 34 */ 35 # define UNLOCK_LOCK_PREFIX LOCK_PREFIX 36 #else 37 # define UNLOCK_LOCK_PREFIX 38 #endif 39 40 /* 41 * Ticket locks are conceptually two parts, one indicating the current head of 42 * the queue, and the other indicating the current tail. The lock is acquired 43 * by atomically noting the tail and incrementing it by one (thus adding 44 * ourself to the queue and noting our position), then waiting until the head 45 * becomes equal to the the initial value of the tail. 46 * 47 * We use an xadd covering *both* parts of the lock, to increment the tail and 48 * also load the position of the head, which takes care of memory ordering 49 * issues and should be optimal for the uncontended case. Note the tail must be 50 * in the high part, because a wide xadd increment of the low part would carry 51 * up and contaminate the high part. 52 * 53 * With fewer than 2^8 possible CPUs, we can use x86's partial registers to 54 * save some instructions and make the code more elegant. There really isn't 55 * much between them in performance though, especially as locks are out of line. 56 */ 57 #if (NR_CPUS < 256) 58 static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) 59 { 60 unsigned short inc = 1 << TICKET_SHIFT; 61 62 asm volatile ( 63 LOCK_PREFIX "xaddw %w0, %1\n" 64 "1:\t" 65 "cmpb %h0, %b0\n\t" 66 "je 2f\n\t" 67 "rep ; nop\n\t" 68 "movb %1, %b0\n\t" 69 /* don't need lfence here, because loads are in-order */ 70 "jmp 1b\n" 71 "2:" 72 : "+Q" (inc), "+m" (lock->slock) 73 : 74 : "memory", "cc"); 75 } 76 77 static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) 78 { 79 unsigned int tmp, new; 80 81 asm volatile("movzwl %2, %0\n\t" 82 "cmpb %h0,%b0\n\t" 83 "leal 0x100(%" REG_PTR_MODE "0), %1\n\t" 84 "jne 1f\n\t" 85 LOCK_PREFIX "cmpxchgw %w1,%2\n\t" 86 "1:" 87 "sete %b1\n\t" 88 "movzbl %b1,%0\n\t" 89 : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) 90 : 91 : "memory", "cc"); 92 93 return tmp; 94 } 95 96 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 97 { 98 asm volatile(UNLOCK_LOCK_PREFIX "incb %0" 99 : "+m" (lock->slock) 100 : 101 : "memory", "cc"); 102 } 103 #else 104 static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) 105 { 106 unsigned inc = 1 << TICKET_SHIFT; 107 unsigned tmp; 108 109 asm volatile(LOCK_PREFIX "xaddl %0, %1\n" 110 "movzwl %w0, %2\n\t" 111 "shrl $16, %0\n\t" 112 "1:\t" 113 "cmpl %0, %2\n\t" 114 "je 2f\n\t" 115 "rep ; nop\n\t" 116 "movzwl %1, %2\n\t" 117 /* don't need lfence here, because loads are in-order */ 118 "jmp 1b\n" 119 "2:" 120 : "+r" (inc), "+m" (lock->slock), "=&r" (tmp) 121 : 122 : "memory", "cc"); 123 } 124 125 static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) 126 { 127 unsigned tmp; 128 unsigned new; 129 130 asm volatile("movl %2,%0\n\t" 131 "movl %0,%1\n\t" 132 "roll $16, %0\n\t" 133 "cmpl %0,%1\n\t" 134 "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t" 135 "jne 1f\n\t" 136 LOCK_PREFIX "cmpxchgl %1,%2\n\t" 137 "1:" 138 "sete %b1\n\t" 139 "movzbl %b1,%0\n\t" 140 : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) 141 : 142 : "memory", "cc"); 143 144 return tmp; 145 } 146 147 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 148 { 149 asm volatile(UNLOCK_LOCK_PREFIX "incw %0" 150 : "+m" (lock->slock) 151 : 152 : "memory", "cc"); 153 } 154 #endif 155 156 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) 157 { 158 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 159 160 return !!(tmp.tail ^ tmp.head); 161 } 162 163 static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) 164 { 165 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 166 167 return ((tmp.tail - tmp.head) & TICKET_MASK) > 1; 168 } 169 170 #ifndef CONFIG_PARAVIRT_SPINLOCKS 171 172 static inline int arch_spin_is_locked(arch_spinlock_t *lock) 173 { 174 return __ticket_spin_is_locked(lock); 175 } 176 177 static inline int arch_spin_is_contended(arch_spinlock_t *lock) 178 { 179 return __ticket_spin_is_contended(lock); 180 } 181 #define arch_spin_is_contended arch_spin_is_contended 182 183 static __always_inline void arch_spin_lock(arch_spinlock_t *lock) 184 { 185 __ticket_spin_lock(lock); 186 } 187 188 static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) 189 { 190 return __ticket_spin_trylock(lock); 191 } 192 193 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) 194 { 195 __ticket_spin_unlock(lock); 196 } 197 198 static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, 199 unsigned long flags) 200 { 201 arch_spin_lock(lock); 202 } 203 204 #endif /* CONFIG_PARAVIRT_SPINLOCKS */ 205 206 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 207 { 208 while (arch_spin_is_locked(lock)) 209 cpu_relax(); 210 } 211 212 /* 213 * Read-write spinlocks, allowing multiple readers 214 * but only one writer. 215 * 216 * NOTE! it is quite common to have readers in interrupts 217 * but no interrupt writers. For those circumstances we 218 * can "mix" irq-safe locks - any writer needs to get a 219 * irq-safe write-lock, but readers can get non-irqsafe 220 * read-locks. 221 * 222 * On x86, we implement read-write locks as a 32-bit counter 223 * with the high bit (sign) being the "contended" bit. 224 */ 225 226 /** 227 * read_can_lock - would read_trylock() succeed? 228 * @lock: the rwlock in question. 229 */ 230 static inline int arch_read_can_lock(arch_rwlock_t *lock) 231 { 232 return lock->lock > 0; 233 } 234 235 /** 236 * write_can_lock - would write_trylock() succeed? 237 * @lock: the rwlock in question. 238 */ 239 static inline int arch_write_can_lock(arch_rwlock_t *lock) 240 { 241 return lock->write == WRITE_LOCK_CMP; 242 } 243 244 static inline void arch_read_lock(arch_rwlock_t *rw) 245 { 246 asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t" 247 "jns 1f\n" 248 "call __read_lock_failed\n\t" 249 "1:\n" 250 ::LOCK_PTR_REG (rw) : "memory"); 251 } 252 253 static inline void arch_write_lock(arch_rwlock_t *rw) 254 { 255 asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t" 256 "jz 1f\n" 257 "call __write_lock_failed\n\t" 258 "1:\n" 259 ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS) 260 : "memory"); 261 } 262 263 static inline int arch_read_trylock(arch_rwlock_t *lock) 264 { 265 READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock; 266 267 if (READ_LOCK_ATOMIC(dec_return)(count) >= 0) 268 return 1; 269 READ_LOCK_ATOMIC(inc)(count); 270 return 0; 271 } 272 273 static inline int arch_write_trylock(arch_rwlock_t *lock) 274 { 275 atomic_t *count = (atomic_t *)&lock->write; 276 277 if (atomic_sub_and_test(WRITE_LOCK_CMP, count)) 278 return 1; 279 atomic_add(WRITE_LOCK_CMP, count); 280 return 0; 281 } 282 283 static inline void arch_read_unlock(arch_rwlock_t *rw) 284 { 285 asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0" 286 :"+m" (rw->lock) : : "memory"); 287 } 288 289 static inline void arch_write_unlock(arch_rwlock_t *rw) 290 { 291 asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" 292 : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); 293 } 294 295 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 296 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) 297 298 #undef READ_LOCK_SIZE 299 #undef READ_LOCK_ATOMIC 300 #undef WRITE_LOCK_ADD 301 #undef WRITE_LOCK_SUB 302 #undef WRITE_LOCK_CMP 303 304 #define arch_spin_relax(lock) cpu_relax() 305 #define arch_read_relax(lock) cpu_relax() 306 #define arch_write_relax(lock) cpu_relax() 307 308 /* The {read|write|spin}_lock() on x86 are full memory barriers. */ 309 static inline void smp_mb__after_lock(void) { } 310 #define ARCH_HAS_SMP_MB_AFTER_LOCK 311 312 #endif /* _ASM_X86_SPINLOCK_H */ 313