1 /* 2 * Split spinlock implementation out into its own file, so it can be 3 * compiled in a FTRACE-compatible way. 4 */ 5 #include <linux/kernel_stat.h> 6 #include <linux/spinlock.h> 7 #include <linux/debugfs.h> 8 #include <linux/log2.h> 9 10 #include <asm/paravirt.h> 11 12 #include <xen/interface/xen.h> 13 #include <xen/events.h> 14 15 #include "xen-ops.h" 16 #include "debugfs.h" 17 18 #ifdef CONFIG_XEN_DEBUG_FS 19 static struct xen_spinlock_stats 20 { 21 u64 taken; 22 u32 taken_slow; 23 u32 taken_slow_nested; 24 u32 taken_slow_pickup; 25 u32 taken_slow_spurious; 26 u32 taken_slow_irqenable; 27 28 u64 released; 29 u32 released_slow; 30 u32 released_slow_kicked; 31 32 #define HISTO_BUCKETS 30 33 u32 histo_spin_total[HISTO_BUCKETS+1]; 34 u32 histo_spin_spinning[HISTO_BUCKETS+1]; 35 u32 histo_spin_blocked[HISTO_BUCKETS+1]; 36 37 u64 time_total; 38 u64 time_spinning; 39 u64 time_blocked; 40 } spinlock_stats; 41 42 static u8 zero_stats; 43 44 static unsigned lock_timeout = 1 << 10; 45 #define TIMEOUT lock_timeout 46 47 static inline void check_zero(void) 48 { 49 if (unlikely(zero_stats)) { 50 memset(&spinlock_stats, 0, sizeof(spinlock_stats)); 51 zero_stats = 0; 52 } 53 } 54 55 #define ADD_STATS(elem, val) \ 56 do { check_zero(); spinlock_stats.elem += (val); } while(0) 57 58 static inline u64 spin_time_start(void) 59 { 60 return xen_clocksource_read(); 61 } 62 63 static void __spin_time_accum(u64 delta, u32 *array) 64 { 65 unsigned index = ilog2(delta); 66 67 check_zero(); 68 69 if (index < HISTO_BUCKETS) 70 array[index]++; 71 else 72 array[HISTO_BUCKETS]++; 73 } 74 75 static inline void spin_time_accum_spinning(u64 start) 76 { 77 u32 delta = xen_clocksource_read() - start; 78 79 __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); 80 spinlock_stats.time_spinning += delta; 81 } 82 83 static inline void spin_time_accum_total(u64 start) 84 { 85 u32 delta = xen_clocksource_read() - start; 86 87 __spin_time_accum(delta, spinlock_stats.histo_spin_total); 88 spinlock_stats.time_total += delta; 89 } 90 91 static inline void spin_time_accum_blocked(u64 start) 92 { 93 u32 delta = xen_clocksource_read() - start; 94 95 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); 96 spinlock_stats.time_blocked += delta; 97 } 98 #else /* !CONFIG_XEN_DEBUG_FS */ 99 #define TIMEOUT (1 << 10) 100 #define ADD_STATS(elem, val) do { (void)(val); } while(0) 101 102 static inline u64 spin_time_start(void) 103 { 104 return 0; 105 } 106 107 static inline void spin_time_accum_total(u64 start) 108 { 109 } 110 static inline void spin_time_accum_spinning(u64 start) 111 { 112 } 113 static inline void spin_time_accum_blocked(u64 start) 114 { 115 } 116 #endif /* CONFIG_XEN_DEBUG_FS */ 117 118 struct xen_spinlock { 119 unsigned char lock; /* 0 -> free; 1 -> locked */ 120 unsigned short spinners; /* count of waiting cpus */ 121 }; 122 123 static int xen_spin_is_locked(struct arch_spinlock *lock) 124 { 125 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 126 127 return xl->lock != 0; 128 } 129 130 static int xen_spin_is_contended(struct arch_spinlock *lock) 131 { 132 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 133 134 /* Not strictly true; this is only the count of contended 135 lock-takers entering the slow path. */ 136 return xl->spinners != 0; 137 } 138 139 static int xen_spin_trylock(struct arch_spinlock *lock) 140 { 141 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 142 u8 old = 1; 143 144 asm("xchgb %b0,%1" 145 : "+q" (old), "+m" (xl->lock) : : "memory"); 146 147 return old == 0; 148 } 149 150 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 151 static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); 152 153 /* 154 * Mark a cpu as interested in a lock. Returns the CPU's previous 155 * lock of interest, in case we got preempted by an interrupt. 156 */ 157 static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) 158 { 159 struct xen_spinlock *prev; 160 161 prev = __get_cpu_var(lock_spinners); 162 __get_cpu_var(lock_spinners) = xl; 163 164 wmb(); /* set lock of interest before count */ 165 166 asm(LOCK_PREFIX " incw %0" 167 : "+m" (xl->spinners) : : "memory"); 168 169 return prev; 170 } 171 172 /* 173 * Mark a cpu as no longer interested in a lock. Restores previous 174 * lock of interest (NULL for none). 175 */ 176 static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) 177 { 178 asm(LOCK_PREFIX " decw %0" 179 : "+m" (xl->spinners) : : "memory"); 180 wmb(); /* decrement count before restoring lock */ 181 __get_cpu_var(lock_spinners) = prev; 182 } 183 184 static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) 185 { 186 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 187 struct xen_spinlock *prev; 188 int irq = __get_cpu_var(lock_kicker_irq); 189 int ret; 190 u64 start; 191 192 /* If kicker interrupts not initialized yet, just spin */ 193 if (irq == -1) 194 return 0; 195 196 start = spin_time_start(); 197 198 /* announce we're spinning */ 199 prev = spinning_lock(xl); 200 201 ADD_STATS(taken_slow, 1); 202 ADD_STATS(taken_slow_nested, prev != NULL); 203 204 do { 205 unsigned long flags; 206 207 /* clear pending */ 208 xen_clear_irq_pending(irq); 209 210 /* check again make sure it didn't become free while 211 we weren't looking */ 212 ret = xen_spin_trylock(lock); 213 if (ret) { 214 ADD_STATS(taken_slow_pickup, 1); 215 216 /* 217 * If we interrupted another spinlock while it 218 * was blocking, make sure it doesn't block 219 * without rechecking the lock. 220 */ 221 if (prev != NULL) 222 xen_set_irq_pending(irq); 223 goto out; 224 } 225 226 flags = __raw_local_save_flags(); 227 if (irq_enable) { 228 ADD_STATS(taken_slow_irqenable, 1); 229 raw_local_irq_enable(); 230 } 231 232 /* 233 * Block until irq becomes pending. If we're 234 * interrupted at this point (after the trylock but 235 * before entering the block), then the nested lock 236 * handler guarantees that the irq will be left 237 * pending if there's any chance the lock became free; 238 * xen_poll_irq() returns immediately if the irq is 239 * pending. 240 */ 241 xen_poll_irq(irq); 242 243 raw_local_irq_restore(flags); 244 245 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 246 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ 247 248 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 249 250 out: 251 unspinning_lock(xl, prev); 252 spin_time_accum_blocked(start); 253 254 return ret; 255 } 256 257 static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) 258 { 259 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 260 unsigned timeout; 261 u8 oldval; 262 u64 start_spin; 263 264 ADD_STATS(taken, 1); 265 266 start_spin = spin_time_start(); 267 268 do { 269 u64 start_spin_fast = spin_time_start(); 270 271 timeout = TIMEOUT; 272 273 asm("1: xchgb %1,%0\n" 274 " testb %1,%1\n" 275 " jz 3f\n" 276 "2: rep;nop\n" 277 " cmpb $0,%0\n" 278 " je 1b\n" 279 " dec %2\n" 280 " jnz 2b\n" 281 "3:\n" 282 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) 283 : "1" (1) 284 : "memory"); 285 286 spin_time_accum_spinning(start_spin_fast); 287 288 } while (unlikely(oldval != 0 && 289 (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); 290 291 spin_time_accum_total(start_spin); 292 } 293 294 static void xen_spin_lock(struct arch_spinlock *lock) 295 { 296 __xen_spin_lock(lock, false); 297 } 298 299 static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) 300 { 301 __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); 302 } 303 304 static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) 305 { 306 int cpu; 307 308 ADD_STATS(released_slow, 1); 309 310 for_each_online_cpu(cpu) { 311 /* XXX should mix up next cpu selection */ 312 if (per_cpu(lock_spinners, cpu) == xl) { 313 ADD_STATS(released_slow_kicked, 1); 314 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 315 break; 316 } 317 } 318 } 319 320 static void xen_spin_unlock(struct arch_spinlock *lock) 321 { 322 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 323 324 ADD_STATS(released, 1); 325 326 smp_wmb(); /* make sure no writes get moved after unlock */ 327 xl->lock = 0; /* release lock */ 328 329 /* 330 * Make sure unlock happens before checking for waiting 331 * spinners. We need a strong barrier to enforce the 332 * write-read ordering to different memory locations, as the 333 * CPU makes no implied guarantees about their ordering. 334 */ 335 mb(); 336 337 if (unlikely(xl->spinners)) 338 xen_spin_unlock_slow(xl); 339 } 340 341 static irqreturn_t dummy_handler(int irq, void *dev_id) 342 { 343 BUG(); 344 return IRQ_HANDLED; 345 } 346 347 void __cpuinit xen_init_lock_cpu(int cpu) 348 { 349 int irq; 350 const char *name; 351 352 name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); 353 irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, 354 cpu, 355 dummy_handler, 356 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 357 name, 358 NULL); 359 360 if (irq >= 0) { 361 disable_irq(irq); /* make sure it's never delivered */ 362 per_cpu(lock_kicker_irq, cpu) = irq; 363 } 364 365 printk("cpu %d spinlock event irq %d\n", cpu, irq); 366 } 367 368 void xen_uninit_lock_cpu(int cpu) 369 { 370 unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); 371 } 372 373 void __init xen_init_spinlocks(void) 374 { 375 pv_lock_ops.spin_is_locked = xen_spin_is_locked; 376 pv_lock_ops.spin_is_contended = xen_spin_is_contended; 377 pv_lock_ops.spin_lock = xen_spin_lock; 378 pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; 379 pv_lock_ops.spin_trylock = xen_spin_trylock; 380 pv_lock_ops.spin_unlock = xen_spin_unlock; 381 } 382 383 #ifdef CONFIG_XEN_DEBUG_FS 384 385 static struct dentry *d_spin_debug; 386 387 static int __init xen_spinlock_debugfs(void) 388 { 389 struct dentry *d_xen = xen_init_debugfs(); 390 391 if (d_xen == NULL) 392 return -ENOMEM; 393 394 d_spin_debug = debugfs_create_dir("spinlocks", d_xen); 395 396 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); 397 398 debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); 399 400 debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); 401 debugfs_create_u32("taken_slow", 0444, d_spin_debug, 402 &spinlock_stats.taken_slow); 403 debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, 404 &spinlock_stats.taken_slow_nested); 405 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, 406 &spinlock_stats.taken_slow_pickup); 407 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, 408 &spinlock_stats.taken_slow_spurious); 409 debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, 410 &spinlock_stats.taken_slow_irqenable); 411 412 debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); 413 debugfs_create_u32("released_slow", 0444, d_spin_debug, 414 &spinlock_stats.released_slow); 415 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, 416 &spinlock_stats.released_slow_kicked); 417 418 debugfs_create_u64("time_spinning", 0444, d_spin_debug, 419 &spinlock_stats.time_spinning); 420 debugfs_create_u64("time_blocked", 0444, d_spin_debug, 421 &spinlock_stats.time_blocked); 422 debugfs_create_u64("time_total", 0444, d_spin_debug, 423 &spinlock_stats.time_total); 424 425 xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug, 426 spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); 427 xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, 428 spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); 429 xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, 430 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); 431 432 return 0; 433 } 434 fs_initcall(xen_spinlock_debugfs); 435 436 #endif /* CONFIG_XEN_DEBUG_FS */ 437