1 /* 2 * Split spinlock implementation out into its own file, so it can be 3 * compiled in a FTRACE-compatible way. 4 */ 5 #include <linux/kernel_stat.h> 6 #include <linux/spinlock.h> 7 #include <linux/debugfs.h> 8 #include <linux/log2.h> 9 #include <linux/gfp.h> 10 #include <linux/slab.h> 11 12 #include <asm/paravirt.h> 13 14 #include <xen/interface/xen.h> 15 #include <xen/events.h> 16 17 #include "xen-ops.h" 18 #include "debugfs.h" 19 20 enum xen_contention_stat { 21 TAKEN_SLOW, 22 TAKEN_SLOW_PICKUP, 23 TAKEN_SLOW_SPURIOUS, 24 RELEASED_SLOW, 25 RELEASED_SLOW_KICKED, 26 NR_CONTENTION_STATS 27 }; 28 29 30 #ifdef CONFIG_XEN_DEBUG_FS 31 #define HISTO_BUCKETS 30 32 static struct xen_spinlock_stats 33 { 34 u32 contention_stats[NR_CONTENTION_STATS]; 35 u32 histo_spin_blocked[HISTO_BUCKETS+1]; 36 u64 time_blocked; 37 } spinlock_stats; 38 39 static u8 zero_stats; 40 41 static inline void check_zero(void) 42 { 43 u8 ret; 44 u8 old = ACCESS_ONCE(zero_stats); 45 if (unlikely(old)) { 46 ret = cmpxchg(&zero_stats, old, 0); 47 /* This ensures only one fellow resets the stat */ 48 if (ret == old) 49 memset(&spinlock_stats, 0, sizeof(spinlock_stats)); 50 } 51 } 52 53 static inline void add_stats(enum xen_contention_stat var, u32 val) 54 { 55 check_zero(); 56 spinlock_stats.contention_stats[var] += val; 57 } 58 59 static inline u64 spin_time_start(void) 60 { 61 return xen_clocksource_read(); 62 } 63 64 static void __spin_time_accum(u64 delta, u32 *array) 65 { 66 unsigned index = ilog2(delta); 67 68 check_zero(); 69 70 if (index < HISTO_BUCKETS) 71 array[index]++; 72 else 73 array[HISTO_BUCKETS]++; 74 } 75 76 static inline void spin_time_accum_blocked(u64 start) 77 { 78 u32 delta = xen_clocksource_read() - start; 79 80 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); 81 spinlock_stats.time_blocked += delta; 82 } 83 #else /* !CONFIG_XEN_DEBUG_FS */ 84 static inline void add_stats(enum xen_contention_stat var, u32 val) 85 { 86 } 87 88 static inline u64 spin_time_start(void) 89 { 90 return 0; 91 } 92 93 static inline void spin_time_accum_blocked(u64 start) 94 { 95 } 96 #endif /* CONFIG_XEN_DEBUG_FS */ 97 98 struct xen_lock_waiting { 99 struct arch_spinlock *lock; 100 __ticket_t want; 101 }; 102 103 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 104 static DEFINE_PER_CPU(char *, irq_name); 105 static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); 106 static cpumask_t waiting_cpus; 107 108 static bool xen_pvspin = true; 109 static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) 110 { 111 int irq = __this_cpu_read(lock_kicker_irq); 112 struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); 113 int cpu = smp_processor_id(); 114 u64 start; 115 unsigned long flags; 116 117 /* If kicker interrupts not initialized yet, just spin */ 118 if (irq == -1) 119 return; 120 121 start = spin_time_start(); 122 123 /* 124 * Make sure an interrupt handler can't upset things in a 125 * partially setup state. 126 */ 127 local_irq_save(flags); 128 /* 129 * We don't really care if we're overwriting some other 130 * (lock,want) pair, as that would mean that we're currently 131 * in an interrupt context, and the outer context had 132 * interrupts enabled. That has already kicked the VCPU out 133 * of xen_poll_irq(), so it will just return spuriously and 134 * retry with newly setup (lock,want). 135 * 136 * The ordering protocol on this is that the "lock" pointer 137 * may only be set non-NULL if the "want" ticket is correct. 138 * If we're updating "want", we must first clear "lock". 139 */ 140 w->lock = NULL; 141 smp_wmb(); 142 w->want = want; 143 smp_wmb(); 144 w->lock = lock; 145 146 /* This uses set_bit, which atomic and therefore a barrier */ 147 cpumask_set_cpu(cpu, &waiting_cpus); 148 add_stats(TAKEN_SLOW, 1); 149 150 /* clear pending */ 151 xen_clear_irq_pending(irq); 152 153 /* Only check lock once pending cleared */ 154 barrier(); 155 156 /* 157 * Mark entry to slowpath before doing the pickup test to make 158 * sure we don't deadlock with an unlocker. 159 */ 160 __ticket_enter_slowpath(lock); 161 162 /* 163 * check again make sure it didn't become free while 164 * we weren't looking 165 */ 166 if (ACCESS_ONCE(lock->tickets.head) == want) { 167 add_stats(TAKEN_SLOW_PICKUP, 1); 168 goto out; 169 } 170 171 /* Allow interrupts while blocked */ 172 local_irq_restore(flags); 173 174 /* 175 * If an interrupt happens here, it will leave the wakeup irq 176 * pending, which will cause xen_poll_irq() to return 177 * immediately. 178 */ 179 180 /* Block until irq becomes pending (or perhaps a spurious wakeup) */ 181 xen_poll_irq(irq); 182 add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); 183 184 local_irq_save(flags); 185 186 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 187 out: 188 cpumask_clear_cpu(cpu, &waiting_cpus); 189 w->lock = NULL; 190 191 local_irq_restore(flags); 192 193 spin_time_accum_blocked(start); 194 } 195 PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); 196 197 static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) 198 { 199 int cpu; 200 201 add_stats(RELEASED_SLOW, 1); 202 203 for_each_cpu(cpu, &waiting_cpus) { 204 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); 205 206 /* Make sure we read lock before want */ 207 if (ACCESS_ONCE(w->lock) == lock && 208 ACCESS_ONCE(w->want) == next) { 209 add_stats(RELEASED_SLOW_KICKED, 1); 210 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 211 break; 212 } 213 } 214 } 215 216 static irqreturn_t dummy_handler(int irq, void *dev_id) 217 { 218 BUG(); 219 return IRQ_HANDLED; 220 } 221 222 void xen_init_lock_cpu(int cpu) 223 { 224 int irq; 225 char *name; 226 227 if (!xen_pvspin) 228 return; 229 230 WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", 231 cpu, per_cpu(lock_kicker_irq, cpu)); 232 233 name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); 234 irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, 235 cpu, 236 dummy_handler, 237 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, 238 name, 239 NULL); 240 241 if (irq >= 0) { 242 disable_irq(irq); /* make sure it's never delivered */ 243 per_cpu(lock_kicker_irq, cpu) = irq; 244 per_cpu(irq_name, cpu) = name; 245 } 246 247 printk("cpu %d spinlock event irq %d\n", cpu, irq); 248 } 249 250 void xen_uninit_lock_cpu(int cpu) 251 { 252 if (!xen_pvspin) 253 return; 254 255 unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); 256 per_cpu(lock_kicker_irq, cpu) = -1; 257 kfree(per_cpu(irq_name, cpu)); 258 per_cpu(irq_name, cpu) = NULL; 259 } 260 261 262 /* 263 * Our init of PV spinlocks is split in two init functions due to us 264 * using paravirt patching and jump labels patching and having to do 265 * all of this before SMP code is invoked. 266 * 267 * The paravirt patching needs to be done _before_ the alternative asm code 268 * is started, otherwise we would not patch the core kernel code. 269 */ 270 void __init xen_init_spinlocks(void) 271 { 272 273 if (!xen_pvspin) { 274 printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); 275 return; 276 } 277 278 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); 279 pv_lock_ops.unlock_kick = xen_unlock_kick; 280 } 281 282 /* 283 * While the jump_label init code needs to happend _after_ the jump labels are 284 * enabled and before SMP is started. Hence we use pre-SMP initcall level 285 * init. We cannot do it in xen_init_spinlocks as that is done before 286 * jump labels are activated. 287 */ 288 static __init int xen_init_spinlocks_jump(void) 289 { 290 if (!xen_pvspin) 291 return 0; 292 293 static_key_slow_inc(¶virt_ticketlocks_enabled); 294 return 0; 295 } 296 early_initcall(xen_init_spinlocks_jump); 297 298 static __init int xen_parse_nopvspin(char *arg) 299 { 300 xen_pvspin = false; 301 return 0; 302 } 303 early_param("xen_nopvspin", xen_parse_nopvspin); 304 305 #ifdef CONFIG_XEN_DEBUG_FS 306 307 static struct dentry *d_spin_debug; 308 309 static int __init xen_spinlock_debugfs(void) 310 { 311 struct dentry *d_xen = xen_init_debugfs(); 312 313 if (d_xen == NULL) 314 return -ENOMEM; 315 316 if (!xen_pvspin) 317 return 0; 318 319 d_spin_debug = debugfs_create_dir("spinlocks", d_xen); 320 321 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); 322 323 debugfs_create_u32("taken_slow", 0444, d_spin_debug, 324 &spinlock_stats.contention_stats[TAKEN_SLOW]); 325 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, 326 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); 327 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, 328 &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); 329 330 debugfs_create_u32("released_slow", 0444, d_spin_debug, 331 &spinlock_stats.contention_stats[RELEASED_SLOW]); 332 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, 333 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); 334 335 debugfs_create_u64("time_blocked", 0444, d_spin_debug, 336 &spinlock_stats.time_blocked); 337 338 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, 339 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); 340 341 return 0; 342 } 343 fs_initcall(xen_spinlock_debugfs); 344 345 #endif /* CONFIG_XEN_DEBUG_FS */ 346