1 /* 2 * Split spinlock implementation out into its own file, so it can be 3 * compiled in a FTRACE-compatible way. 4 */ 5 #include <linux/kernel_stat.h> 6 #include <linux/spinlock.h> 7 #include <linux/debugfs.h> 8 #include <linux/log2.h> 9 #include <linux/gfp.h> 10 #include <linux/slab.h> 11 12 #include <asm/paravirt.h> 13 14 #include <xen/interface/xen.h> 15 #include <xen/events.h> 16 17 #include "xen-ops.h" 18 #include "debugfs.h" 19 20 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 21 static DEFINE_PER_CPU(char *, irq_name); 22 static bool xen_pvspin = true; 23 24 #ifdef CONFIG_QUEUED_SPINLOCKS 25 26 #include <asm/qspinlock.h> 27 28 static void xen_qlock_kick(int cpu) 29 { 30 int irq = per_cpu(lock_kicker_irq, cpu); 31 32 /* Don't kick if the target's kicker interrupt is not initialized. */ 33 if (irq == -1) 34 return; 35 36 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 37 } 38 39 /* 40 * Halt the current CPU & release it back to the host 41 */ 42 static void xen_qlock_wait(u8 *byte, u8 val) 43 { 44 int irq = __this_cpu_read(lock_kicker_irq); 45 46 /* If kicker interrupts not initialized yet, just spin */ 47 if (irq == -1) 48 return; 49 50 /* clear pending */ 51 xen_clear_irq_pending(irq); 52 barrier(); 53 54 /* 55 * We check the byte value after clearing pending IRQ to make sure 56 * that we won't miss a wakeup event because of the clearing. 57 * 58 * The sync_clear_bit() call in xen_clear_irq_pending() is atomic. 59 * So it is effectively a memory barrier for x86. 60 */ 61 if (READ_ONCE(*byte) != val) 62 return; 63 64 /* 65 * If an interrupt happens here, it will leave the wakeup irq 66 * pending, which will cause xen_poll_irq() to return 67 * immediately. 68 */ 69 70 /* Block until irq becomes pending (or perhaps a spurious wakeup) */ 71 xen_poll_irq(irq); 72 } 73 74 #else /* CONFIG_QUEUED_SPINLOCKS */ 75 76 enum xen_contention_stat { 77 TAKEN_SLOW, 78 TAKEN_SLOW_PICKUP, 79 TAKEN_SLOW_SPURIOUS, 80 RELEASED_SLOW, 81 RELEASED_SLOW_KICKED, 82 NR_CONTENTION_STATS 83 }; 84 85 86 #ifdef CONFIG_XEN_DEBUG_FS 87 #define HISTO_BUCKETS 30 88 static struct xen_spinlock_stats 89 { 90 u32 contention_stats[NR_CONTENTION_STATS]; 91 u32 histo_spin_blocked[HISTO_BUCKETS+1]; 92 u64 time_blocked; 93 } spinlock_stats; 94 95 static u8 zero_stats; 96 97 static inline void check_zero(void) 98 { 99 u8 ret; 100 u8 old = READ_ONCE(zero_stats); 101 if (unlikely(old)) { 102 ret = cmpxchg(&zero_stats, old, 0); 103 /* This ensures only one fellow resets the stat */ 104 if (ret == old) 105 memset(&spinlock_stats, 0, sizeof(spinlock_stats)); 106 } 107 } 108 109 static inline void add_stats(enum xen_contention_stat var, u32 val) 110 { 111 check_zero(); 112 spinlock_stats.contention_stats[var] += val; 113 } 114 115 static inline u64 spin_time_start(void) 116 { 117 return xen_clocksource_read(); 118 } 119 120 static void __spin_time_accum(u64 delta, u32 *array) 121 { 122 unsigned index = ilog2(delta); 123 124 check_zero(); 125 126 if (index < HISTO_BUCKETS) 127 array[index]++; 128 else 129 array[HISTO_BUCKETS]++; 130 } 131 132 static inline void spin_time_accum_blocked(u64 start) 133 { 134 u32 delta = xen_clocksource_read() - start; 135 136 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); 137 spinlock_stats.time_blocked += delta; 138 } 139 #else /* !CONFIG_XEN_DEBUG_FS */ 140 static inline void add_stats(enum xen_contention_stat var, u32 val) 141 { 142 } 143 144 static inline u64 spin_time_start(void) 145 { 146 return 0; 147 } 148 149 static inline void spin_time_accum_blocked(u64 start) 150 { 151 } 152 #endif /* CONFIG_XEN_DEBUG_FS */ 153 154 struct xen_lock_waiting { 155 struct arch_spinlock *lock; 156 __ticket_t want; 157 }; 158 159 static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); 160 static cpumask_t waiting_cpus; 161 162 __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) 163 { 164 int irq = __this_cpu_read(lock_kicker_irq); 165 struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting); 166 int cpu = smp_processor_id(); 167 u64 start; 168 __ticket_t head; 169 unsigned long flags; 170 171 /* If kicker interrupts not initialized yet, just spin */ 172 if (irq == -1) 173 return; 174 175 start = spin_time_start(); 176 177 /* 178 * Make sure an interrupt handler can't upset things in a 179 * partially setup state. 180 */ 181 local_irq_save(flags); 182 /* 183 * We don't really care if we're overwriting some other 184 * (lock,want) pair, as that would mean that we're currently 185 * in an interrupt context, and the outer context had 186 * interrupts enabled. That has already kicked the VCPU out 187 * of xen_poll_irq(), so it will just return spuriously and 188 * retry with newly setup (lock,want). 189 * 190 * The ordering protocol on this is that the "lock" pointer 191 * may only be set non-NULL if the "want" ticket is correct. 192 * If we're updating "want", we must first clear "lock". 193 */ 194 w->lock = NULL; 195 smp_wmb(); 196 w->want = want; 197 smp_wmb(); 198 w->lock = lock; 199 200 /* This uses set_bit, which atomic and therefore a barrier */ 201 cpumask_set_cpu(cpu, &waiting_cpus); 202 add_stats(TAKEN_SLOW, 1); 203 204 /* clear pending */ 205 xen_clear_irq_pending(irq); 206 207 /* Only check lock once pending cleared */ 208 barrier(); 209 210 /* 211 * Mark entry to slowpath before doing the pickup test to make 212 * sure we don't deadlock with an unlocker. 213 */ 214 __ticket_enter_slowpath(lock); 215 216 /* make sure enter_slowpath, which is atomic does not cross the read */ 217 smp_mb__after_atomic(); 218 219 /* 220 * check again make sure it didn't become free while 221 * we weren't looking 222 */ 223 head = READ_ONCE(lock->tickets.head); 224 if (__tickets_equal(head, want)) { 225 add_stats(TAKEN_SLOW_PICKUP, 1); 226 goto out; 227 } 228 229 /* Allow interrupts while blocked */ 230 local_irq_restore(flags); 231 232 /* 233 * If an interrupt happens here, it will leave the wakeup irq 234 * pending, which will cause xen_poll_irq() to return 235 * immediately. 236 */ 237 238 /* Block until irq becomes pending (or perhaps a spurious wakeup) */ 239 xen_poll_irq(irq); 240 add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); 241 242 local_irq_save(flags); 243 244 kstat_incr_irq_this_cpu(irq); 245 out: 246 cpumask_clear_cpu(cpu, &waiting_cpus); 247 w->lock = NULL; 248 249 local_irq_restore(flags); 250 251 spin_time_accum_blocked(start); 252 } 253 PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); 254 255 static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) 256 { 257 int cpu; 258 259 add_stats(RELEASED_SLOW, 1); 260 261 for_each_cpu(cpu, &waiting_cpus) { 262 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); 263 264 /* Make sure we read lock before want */ 265 if (READ_ONCE(w->lock) == lock && 266 READ_ONCE(w->want) == next) { 267 add_stats(RELEASED_SLOW_KICKED, 1); 268 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 269 break; 270 } 271 } 272 } 273 #endif /* CONFIG_QUEUED_SPINLOCKS */ 274 275 static irqreturn_t dummy_handler(int irq, void *dev_id) 276 { 277 BUG(); 278 return IRQ_HANDLED; 279 } 280 281 void xen_init_lock_cpu(int cpu) 282 { 283 int irq; 284 char *name; 285 286 if (!xen_pvspin) 287 return; 288 289 WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", 290 cpu, per_cpu(lock_kicker_irq, cpu)); 291 292 name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); 293 irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, 294 cpu, 295 dummy_handler, 296 IRQF_PERCPU|IRQF_NOBALANCING, 297 name, 298 NULL); 299 300 if (irq >= 0) { 301 disable_irq(irq); /* make sure it's never delivered */ 302 per_cpu(lock_kicker_irq, cpu) = irq; 303 per_cpu(irq_name, cpu) = name; 304 } 305 306 printk("cpu %d spinlock event irq %d\n", cpu, irq); 307 } 308 309 void xen_uninit_lock_cpu(int cpu) 310 { 311 if (!xen_pvspin) 312 return; 313 314 unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); 315 per_cpu(lock_kicker_irq, cpu) = -1; 316 kfree(per_cpu(irq_name, cpu)); 317 per_cpu(irq_name, cpu) = NULL; 318 } 319 320 321 /* 322 * Our init of PV spinlocks is split in two init functions due to us 323 * using paravirt patching and jump labels patching and having to do 324 * all of this before SMP code is invoked. 325 * 326 * The paravirt patching needs to be done _before_ the alternative asm code 327 * is started, otherwise we would not patch the core kernel code. 328 */ 329 void __init xen_init_spinlocks(void) 330 { 331 332 if (!xen_pvspin) { 333 printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); 334 return; 335 } 336 printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); 337 #ifdef CONFIG_QUEUED_SPINLOCKS 338 __pv_init_lock_hash(); 339 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; 340 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); 341 pv_lock_ops.wait = xen_qlock_wait; 342 pv_lock_ops.kick = xen_qlock_kick; 343 #else 344 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); 345 pv_lock_ops.unlock_kick = xen_unlock_kick; 346 #endif 347 } 348 349 /* 350 * While the jump_label init code needs to happend _after_ the jump labels are 351 * enabled and before SMP is started. Hence we use pre-SMP initcall level 352 * init. We cannot do it in xen_init_spinlocks as that is done before 353 * jump labels are activated. 354 */ 355 static __init int xen_init_spinlocks_jump(void) 356 { 357 if (!xen_pvspin) 358 return 0; 359 360 if (!xen_domain()) 361 return 0; 362 363 static_key_slow_inc(¶virt_ticketlocks_enabled); 364 return 0; 365 } 366 early_initcall(xen_init_spinlocks_jump); 367 368 static __init int xen_parse_nopvspin(char *arg) 369 { 370 xen_pvspin = false; 371 return 0; 372 } 373 early_param("xen_nopvspin", xen_parse_nopvspin); 374 375 #if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS) 376 377 static struct dentry *d_spin_debug; 378 379 static int __init xen_spinlock_debugfs(void) 380 { 381 struct dentry *d_xen = xen_init_debugfs(); 382 383 if (d_xen == NULL) 384 return -ENOMEM; 385 386 if (!xen_pvspin) 387 return 0; 388 389 d_spin_debug = debugfs_create_dir("spinlocks", d_xen); 390 391 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); 392 393 debugfs_create_u32("taken_slow", 0444, d_spin_debug, 394 &spinlock_stats.contention_stats[TAKEN_SLOW]); 395 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, 396 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); 397 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, 398 &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); 399 400 debugfs_create_u32("released_slow", 0444, d_spin_debug, 401 &spinlock_stats.contention_stats[RELEASED_SLOW]); 402 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, 403 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); 404 405 debugfs_create_u64("time_blocked", 0444, d_spin_debug, 406 &spinlock_stats.time_blocked); 407 408 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, 409 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); 410 411 return 0; 412 } 413 fs_initcall(xen_spinlock_debugfs); 414 415 #endif /* CONFIG_XEN_DEBUG_FS */ 416