1 /* 2 * Split spinlock implementation out into its own file, so it can be 3 * compiled in a FTRACE-compatible way. 4 */ 5 #include <linux/kernel_stat.h> 6 #include <linux/spinlock.h> 7 #include <linux/debugfs.h> 8 #include <linux/log2.h> 9 #include <linux/gfp.h> 10 #include <linux/slab.h> 11 12 #include <asm/paravirt.h> 13 14 #include <xen/interface/xen.h> 15 #include <xen/events.h> 16 17 #include "xen-ops.h" 18 #include "debugfs.h" 19 20 enum xen_contention_stat { 21 TAKEN_SLOW, 22 TAKEN_SLOW_PICKUP, 23 TAKEN_SLOW_SPURIOUS, 24 RELEASED_SLOW, 25 RELEASED_SLOW_KICKED, 26 NR_CONTENTION_STATS 27 }; 28 29 30 #ifdef CONFIG_XEN_DEBUG_FS 31 #define HISTO_BUCKETS 30 32 static struct xen_spinlock_stats 33 { 34 u32 contention_stats[NR_CONTENTION_STATS]; 35 u32 histo_spin_blocked[HISTO_BUCKETS+1]; 36 u64 time_blocked; 37 } spinlock_stats; 38 39 static u8 zero_stats; 40 41 static inline void check_zero(void) 42 { 43 u8 ret; 44 u8 old = READ_ONCE(zero_stats); 45 if (unlikely(old)) { 46 ret = cmpxchg(&zero_stats, old, 0); 47 /* This ensures only one fellow resets the stat */ 48 if (ret == old) 49 memset(&spinlock_stats, 0, sizeof(spinlock_stats)); 50 } 51 } 52 53 static inline void add_stats(enum xen_contention_stat var, u32 val) 54 { 55 check_zero(); 56 spinlock_stats.contention_stats[var] += val; 57 } 58 59 static inline u64 spin_time_start(void) 60 { 61 return xen_clocksource_read(); 62 } 63 64 static void __spin_time_accum(u64 delta, u32 *array) 65 { 66 unsigned index = ilog2(delta); 67 68 check_zero(); 69 70 if (index < HISTO_BUCKETS) 71 array[index]++; 72 else 73 array[HISTO_BUCKETS]++; 74 } 75 76 static inline void spin_time_accum_blocked(u64 start) 77 { 78 u32 delta = xen_clocksource_read() - start; 79 80 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); 81 spinlock_stats.time_blocked += delta; 82 } 83 #else /* !CONFIG_XEN_DEBUG_FS */ 84 static inline void add_stats(enum xen_contention_stat var, u32 val) 85 { 86 } 87 88 static inline u64 spin_time_start(void) 89 { 90 return 0; 91 } 92 93 static inline void spin_time_accum_blocked(u64 start) 94 { 95 } 96 #endif /* CONFIG_XEN_DEBUG_FS */ 97 98 struct xen_lock_waiting { 99 struct arch_spinlock *lock; 100 __ticket_t want; 101 }; 102 103 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 104 static DEFINE_PER_CPU(char *, irq_name); 105 static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); 106 static cpumask_t waiting_cpus; 107 108 static bool xen_pvspin = true; 109 __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) 110 { 111 int irq = __this_cpu_read(lock_kicker_irq); 112 struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting); 113 int cpu = smp_processor_id(); 114 u64 start; 115 __ticket_t head; 116 unsigned long flags; 117 118 /* If kicker interrupts not initialized yet, just spin */ 119 if (irq == -1) 120 return; 121 122 start = spin_time_start(); 123 124 /* 125 * Make sure an interrupt handler can't upset things in a 126 * partially setup state. 127 */ 128 local_irq_save(flags); 129 /* 130 * We don't really care if we're overwriting some other 131 * (lock,want) pair, as that would mean that we're currently 132 * in an interrupt context, and the outer context had 133 * interrupts enabled. That has already kicked the VCPU out 134 * of xen_poll_irq(), so it will just return spuriously and 135 * retry with newly setup (lock,want). 136 * 137 * The ordering protocol on this is that the "lock" pointer 138 * may only be set non-NULL if the "want" ticket is correct. 139 * If we're updating "want", we must first clear "lock". 140 */ 141 w->lock = NULL; 142 smp_wmb(); 143 w->want = want; 144 smp_wmb(); 145 w->lock = lock; 146 147 /* This uses set_bit, which atomic and therefore a barrier */ 148 cpumask_set_cpu(cpu, &waiting_cpus); 149 add_stats(TAKEN_SLOW, 1); 150 151 /* clear pending */ 152 xen_clear_irq_pending(irq); 153 154 /* Only check lock once pending cleared */ 155 barrier(); 156 157 /* 158 * Mark entry to slowpath before doing the pickup test to make 159 * sure we don't deadlock with an unlocker. 160 */ 161 __ticket_enter_slowpath(lock); 162 163 /* make sure enter_slowpath, which is atomic does not cross the read */ 164 smp_mb__after_atomic(); 165 166 /* 167 * check again make sure it didn't become free while 168 * we weren't looking 169 */ 170 head = READ_ONCE(lock->tickets.head); 171 if (__tickets_equal(head, want)) { 172 add_stats(TAKEN_SLOW_PICKUP, 1); 173 goto out; 174 } 175 176 /* Allow interrupts while blocked */ 177 local_irq_restore(flags); 178 179 /* 180 * If an interrupt happens here, it will leave the wakeup irq 181 * pending, which will cause xen_poll_irq() to return 182 * immediately. 183 */ 184 185 /* Block until irq becomes pending (or perhaps a spurious wakeup) */ 186 xen_poll_irq(irq); 187 add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); 188 189 local_irq_save(flags); 190 191 kstat_incr_irq_this_cpu(irq); 192 out: 193 cpumask_clear_cpu(cpu, &waiting_cpus); 194 w->lock = NULL; 195 196 local_irq_restore(flags); 197 198 spin_time_accum_blocked(start); 199 } 200 PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); 201 202 static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) 203 { 204 int cpu; 205 206 add_stats(RELEASED_SLOW, 1); 207 208 for_each_cpu(cpu, &waiting_cpus) { 209 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); 210 211 /* Make sure we read lock before want */ 212 if (READ_ONCE(w->lock) == lock && 213 READ_ONCE(w->want) == next) { 214 add_stats(RELEASED_SLOW_KICKED, 1); 215 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 216 break; 217 } 218 } 219 } 220 221 static irqreturn_t dummy_handler(int irq, void *dev_id) 222 { 223 BUG(); 224 return IRQ_HANDLED; 225 } 226 227 void xen_init_lock_cpu(int cpu) 228 { 229 int irq; 230 char *name; 231 232 if (!xen_pvspin) 233 return; 234 235 WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", 236 cpu, per_cpu(lock_kicker_irq, cpu)); 237 238 name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); 239 irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, 240 cpu, 241 dummy_handler, 242 IRQF_PERCPU|IRQF_NOBALANCING, 243 name, 244 NULL); 245 246 if (irq >= 0) { 247 disable_irq(irq); /* make sure it's never delivered */ 248 per_cpu(lock_kicker_irq, cpu) = irq; 249 per_cpu(irq_name, cpu) = name; 250 } 251 252 printk("cpu %d spinlock event irq %d\n", cpu, irq); 253 } 254 255 void xen_uninit_lock_cpu(int cpu) 256 { 257 if (!xen_pvspin) 258 return; 259 260 unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); 261 per_cpu(lock_kicker_irq, cpu) = -1; 262 kfree(per_cpu(irq_name, cpu)); 263 per_cpu(irq_name, cpu) = NULL; 264 } 265 266 267 /* 268 * Our init of PV spinlocks is split in two init functions due to us 269 * using paravirt patching and jump labels patching and having to do 270 * all of this before SMP code is invoked. 271 * 272 * The paravirt patching needs to be done _before_ the alternative asm code 273 * is started, otherwise we would not patch the core kernel code. 274 */ 275 void __init xen_init_spinlocks(void) 276 { 277 278 if (!xen_pvspin) { 279 printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); 280 return; 281 } 282 printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); 283 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); 284 pv_lock_ops.unlock_kick = xen_unlock_kick; 285 } 286 287 /* 288 * While the jump_label init code needs to happend _after_ the jump labels are 289 * enabled and before SMP is started. Hence we use pre-SMP initcall level 290 * init. We cannot do it in xen_init_spinlocks as that is done before 291 * jump labels are activated. 292 */ 293 static __init int xen_init_spinlocks_jump(void) 294 { 295 if (!xen_pvspin) 296 return 0; 297 298 if (!xen_domain()) 299 return 0; 300 301 static_key_slow_inc(¶virt_ticketlocks_enabled); 302 return 0; 303 } 304 early_initcall(xen_init_spinlocks_jump); 305 306 static __init int xen_parse_nopvspin(char *arg) 307 { 308 xen_pvspin = false; 309 return 0; 310 } 311 early_param("xen_nopvspin", xen_parse_nopvspin); 312 313 #ifdef CONFIG_XEN_DEBUG_FS 314 315 static struct dentry *d_spin_debug; 316 317 static int __init xen_spinlock_debugfs(void) 318 { 319 struct dentry *d_xen = xen_init_debugfs(); 320 321 if (d_xen == NULL) 322 return -ENOMEM; 323 324 if (!xen_pvspin) 325 return 0; 326 327 d_spin_debug = debugfs_create_dir("spinlocks", d_xen); 328 329 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); 330 331 debugfs_create_u32("taken_slow", 0444, d_spin_debug, 332 &spinlock_stats.contention_stats[TAKEN_SLOW]); 333 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, 334 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); 335 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, 336 &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); 337 338 debugfs_create_u32("released_slow", 0444, d_spin_debug, 339 &spinlock_stats.contention_stats[RELEASED_SLOW]); 340 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, 341 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); 342 343 debugfs_create_u64("time_blocked", 0444, d_spin_debug, 344 &spinlock_stats.time_blocked); 345 346 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, 347 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); 348 349 return 0; 350 } 351 fs_initcall(xen_spinlock_debugfs); 352 353 #endif /* CONFIG_XEN_DEBUG_FS */ 354