xref: /openbmc/linux/arch/powerpc/lib/qspinlock.c (revision 8a649e33f48e08be20c51541d9184645892ec370)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 #include <linux/bug.h>
3 #include <linux/compiler.h>
4 #include <linux/export.h>
5 #include <linux/percpu.h>
6 #include <linux/processor.h>
7 #include <linux/smp.h>
8 #include <linux/topology.h>
9 #include <linux/sched/clock.h>
10 #include <asm/qspinlock.h>
11 #include <asm/paravirt.h>
12 
13 #define MAX_NODES	4
14 
15 struct qnode {
16 	struct qnode	*next;
17 	struct qspinlock *lock;
18 	int		cpu;
19 	int		yield_cpu;
20 	u8		locked; /* 1 if lock acquired */
21 };
22 
23 struct qnodes {
24 	int		count;
25 	struct qnode nodes[MAX_NODES];
26 };
27 
28 /* Tuning parameters */
29 static int steal_spins __read_mostly = (1 << 5);
30 static int remote_steal_spins __read_mostly = (1 << 2);
31 #if _Q_SPIN_TRY_LOCK_STEAL == 1
32 static const bool maybe_stealers = true;
33 #else
34 static bool maybe_stealers __read_mostly = true;
35 #endif
36 static int head_spins __read_mostly = (1 << 8);
37 
38 static bool pv_yield_owner __read_mostly = true;
39 static bool pv_yield_allow_steal __read_mostly = false;
40 static bool pv_spin_on_preempted_owner __read_mostly = false;
41 static bool pv_sleepy_lock __read_mostly = true;
42 static bool pv_sleepy_lock_sticky __read_mostly = false;
43 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
44 static int pv_sleepy_lock_factor __read_mostly = 256;
45 static bool pv_yield_prev __read_mostly = true;
46 static bool pv_yield_propagate_owner __read_mostly = true;
47 static bool pv_prod_head __read_mostly = false;
48 
49 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
50 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
51 
52 #if _Q_SPIN_SPEC_BARRIER == 1
53 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
54 #else
55 #define spec_barrier() do { } while (0)
56 #endif
57 
58 static __always_inline bool recently_sleepy(void)
59 {
60 	/* pv_sleepy_lock is true when this is called */
61 	if (pv_sleepy_lock_interval_ns) {
62 		u64 seen = this_cpu_read(sleepy_lock_seen_clock);
63 
64 		if (seen) {
65 			u64 delta = sched_clock() - seen;
66 			if (delta < pv_sleepy_lock_interval_ns)
67 				return true;
68 			this_cpu_write(sleepy_lock_seen_clock, 0);
69 		}
70 	}
71 
72 	return false;
73 }
74 
75 static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
76 {
77 	if (paravirt && sleepy)
78 		return steal_spins * pv_sleepy_lock_factor;
79 	else
80 		return steal_spins;
81 }
82 
83 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
84 {
85 	if (paravirt && sleepy)
86 		return remote_steal_spins * pv_sleepy_lock_factor;
87 	else
88 		return remote_steal_spins;
89 }
90 
91 static __always_inline int get_head_spins(bool paravirt, bool sleepy)
92 {
93 	if (paravirt && sleepy)
94 		return head_spins * pv_sleepy_lock_factor;
95 	else
96 		return head_spins;
97 }
98 
99 static inline u32 encode_tail_cpu(int cpu)
100 {
101 	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
102 }
103 
104 static inline int decode_tail_cpu(u32 val)
105 {
106 	return (val >> _Q_TAIL_CPU_OFFSET) - 1;
107 }
108 
109 static inline int get_owner_cpu(u32 val)
110 {
111 	return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
112 }
113 
114 /*
115  * Try to acquire the lock if it was not already locked. If the tail matches
116  * mytail then clear it, otherwise leave it unchnaged. Return previous value.
117  *
118  * This is used by the head of the queue to acquire the lock and clean up
119  * its tail if it was the last one queued.
120  */
121 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
122 {
123 	u32 newval = queued_spin_encode_locked_val();
124 	u32 prev, tmp;
125 
126 	asm volatile(
127 "1:	lwarx	%0,0,%2,%7	# trylock_clean_tail			\n"
128 	/* This test is necessary if there could be stealers */
129 "	andi.	%1,%0,%5						\n"
130 "	bne	3f							\n"
131 	/* Test whether the lock tail == mytail */
132 "	and	%1,%0,%6						\n"
133 "	cmpw	0,%1,%3							\n"
134 	/* Merge the new locked value */
135 "	or	%1,%1,%4						\n"
136 "	bne	2f							\n"
137 	/* If the lock tail matched, then clear it, otherwise leave it. */
138 "	andc	%1,%1,%6						\n"
139 "2:	stwcx.	%1,0,%2							\n"
140 "	bne-	1b							\n"
141 "\t"	PPC_ACQUIRE_BARRIER "						\n"
142 "3:									\n"
143 	: "=&r" (prev), "=&r" (tmp)
144 	: "r" (&lock->val), "r"(tail), "r" (newval),
145 	  "i" (_Q_LOCKED_VAL),
146 	  "r" (_Q_TAIL_CPU_MASK),
147 	  "i" (_Q_SPIN_EH_HINT)
148 	: "cr0", "memory");
149 
150 	return prev;
151 }
152 
153 /*
154  * Publish our tail, replacing previous tail. Return previous value.
155  *
156  * This provides a release barrier for publishing node, this pairs with the
157  * acquire barrier in get_tail_qnode() when the next CPU finds this tail
158  * value.
159  */
160 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
161 {
162 	u32 prev, tmp;
163 
164 	asm volatile(
165 "\t"	PPC_RELEASE_BARRIER "						\n"
166 "1:	lwarx	%0,0,%2		# publish_tail_cpu			\n"
167 "	andc	%1,%0,%4						\n"
168 "	or	%1,%1,%3						\n"
169 "	stwcx.	%1,0,%2							\n"
170 "	bne-	1b							\n"
171 	: "=&r" (prev), "=&r"(tmp)
172 	: "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
173 	: "cr0", "memory");
174 
175 	return prev;
176 }
177 
178 static __always_inline u32 set_mustq(struct qspinlock *lock)
179 {
180 	u32 prev;
181 
182 	asm volatile(
183 "1:	lwarx	%0,0,%1		# set_mustq				\n"
184 "	or	%0,%0,%2						\n"
185 "	stwcx.	%0,0,%1							\n"
186 "	bne-	1b							\n"
187 	: "=&r" (prev)
188 	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
189 	: "cr0", "memory");
190 
191 	return prev;
192 }
193 
194 static __always_inline u32 clear_mustq(struct qspinlock *lock)
195 {
196 	u32 prev;
197 
198 	asm volatile(
199 "1:	lwarx	%0,0,%1		# clear_mustq				\n"
200 "	andc	%0,%0,%2						\n"
201 "	stwcx.	%0,0,%1							\n"
202 "	bne-	1b							\n"
203 	: "=&r" (prev)
204 	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
205 	: "cr0", "memory");
206 
207 	return prev;
208 }
209 
210 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
211 {
212 	u32 prev;
213 	u32 new = old | _Q_SLEEPY_VAL;
214 
215 	BUG_ON(!(old & _Q_LOCKED_VAL));
216 	BUG_ON(old & _Q_SLEEPY_VAL);
217 
218 	asm volatile(
219 "1:	lwarx	%0,0,%1		# try_set_sleepy			\n"
220 "	cmpw	0,%0,%2							\n"
221 "	bne-	2f							\n"
222 "	stwcx.	%3,0,%1							\n"
223 "	bne-	1b							\n"
224 "2:									\n"
225 	: "=&r" (prev)
226 	: "r" (&lock->val), "r"(old), "r" (new)
227 	: "cr0", "memory");
228 
229 	return likely(prev == old);
230 }
231 
232 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
233 {
234 	if (pv_sleepy_lock) {
235 		if (pv_sleepy_lock_interval_ns)
236 			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
237 		if (!(val & _Q_SLEEPY_VAL))
238 			try_set_sleepy(lock, val);
239 	}
240 }
241 
242 static __always_inline void seen_sleepy_lock(void)
243 {
244 	if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
245 		this_cpu_write(sleepy_lock_seen_clock, sched_clock());
246 }
247 
248 static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
249 {
250 	if (pv_sleepy_lock) {
251 		if (pv_sleepy_lock_interval_ns)
252 			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
253 		if (val & _Q_LOCKED_VAL) {
254 			if (!(val & _Q_SLEEPY_VAL))
255 				try_set_sleepy(lock, val);
256 		}
257 	}
258 }
259 
260 static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
261 {
262 	int cpu = decode_tail_cpu(val);
263 	struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
264 	int idx;
265 
266 	/*
267 	 * After publishing the new tail and finding a previous tail in the
268 	 * previous val (which is the control dependency), this barrier
269 	 * orders the release barrier in publish_tail_cpu performed by the
270 	 * last CPU, with subsequently looking at its qnode structures
271 	 * after the barrier.
272 	 */
273 	smp_acquire__after_ctrl_dep();
274 
275 	for (idx = 0; idx < MAX_NODES; idx++) {
276 		struct qnode *qnode = &qnodesp->nodes[idx];
277 		if (qnode->lock == lock)
278 			return qnode;
279 	}
280 
281 	BUG();
282 }
283 
284 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
285 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
286 {
287 	int owner;
288 	u32 yield_count;
289 	bool preempted = false;
290 
291 	BUG_ON(!(val & _Q_LOCKED_VAL));
292 
293 	if (!paravirt)
294 		goto relax;
295 
296 	if (!pv_yield_owner)
297 		goto relax;
298 
299 	owner = get_owner_cpu(val);
300 	yield_count = yield_count_of(owner);
301 
302 	if ((yield_count & 1) == 0)
303 		goto relax; /* owner vcpu is running */
304 
305 	spin_end();
306 
307 	seen_sleepy_owner(lock, val);
308 	preempted = true;
309 
310 	/*
311 	 * Read the lock word after sampling the yield count. On the other side
312 	 * there may a wmb because the yield count update is done by the
313 	 * hypervisor preemption and the value update by the OS, however this
314 	 * ordering might reduce the chance of out of order accesses and
315 	 * improve the heuristic.
316 	 */
317 	smp_rmb();
318 
319 	if (READ_ONCE(lock->val) == val) {
320 		if (mustq)
321 			clear_mustq(lock);
322 		yield_to_preempted(owner, yield_count);
323 		if (mustq)
324 			set_mustq(lock);
325 		spin_begin();
326 
327 		/* Don't relax if we yielded. Maybe we should? */
328 		return preempted;
329 	}
330 	spin_begin();
331 relax:
332 	spin_cpu_relax();
333 
334 	return preempted;
335 }
336 
337 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
338 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
339 {
340 	return __yield_to_locked_owner(lock, val, paravirt, false);
341 }
342 
343 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
344 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
345 {
346 	bool mustq = false;
347 
348 	if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
349 		mustq = true;
350 
351 	return __yield_to_locked_owner(lock, val, paravirt, mustq);
352 }
353 
354 static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
355 {
356 	struct qnode *next;
357 	int owner;
358 
359 	if (!paravirt)
360 		return;
361 	if (!pv_yield_propagate_owner)
362 		return;
363 
364 	owner = get_owner_cpu(val);
365 	if (*set_yield_cpu == owner)
366 		return;
367 
368 	next = READ_ONCE(node->next);
369 	if (!next)
370 		return;
371 
372 	if (vcpu_is_preempted(owner)) {
373 		next->yield_cpu = owner;
374 		*set_yield_cpu = owner;
375 	} else if (*set_yield_cpu != -1) {
376 		next->yield_cpu = owner;
377 		*set_yield_cpu = owner;
378 	}
379 }
380 
381 /* Called inside spin_begin() */
382 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
383 {
384 	int prev_cpu = decode_tail_cpu(val);
385 	u32 yield_count;
386 	int yield_cpu;
387 	bool preempted = false;
388 
389 	if (!paravirt)
390 		goto relax;
391 
392 	if (!pv_yield_propagate_owner)
393 		goto yield_prev;
394 
395 	yield_cpu = READ_ONCE(node->yield_cpu);
396 	if (yield_cpu == -1) {
397 		/* Propagate back the -1 CPU */
398 		if (node->next && node->next->yield_cpu != -1)
399 			node->next->yield_cpu = yield_cpu;
400 		goto yield_prev;
401 	}
402 
403 	yield_count = yield_count_of(yield_cpu);
404 	if ((yield_count & 1) == 0)
405 		goto yield_prev; /* owner vcpu is running */
406 
407 	spin_end();
408 
409 	preempted = true;
410 	seen_sleepy_node(lock, val);
411 
412 	smp_rmb();
413 
414 	if (yield_cpu == node->yield_cpu) {
415 		if (node->next && node->next->yield_cpu != yield_cpu)
416 			node->next->yield_cpu = yield_cpu;
417 		yield_to_preempted(yield_cpu, yield_count);
418 		spin_begin();
419 		return preempted;
420 	}
421 	spin_begin();
422 
423 yield_prev:
424 	if (!pv_yield_prev)
425 		goto relax;
426 
427 	yield_count = yield_count_of(prev_cpu);
428 	if ((yield_count & 1) == 0)
429 		goto relax; /* owner vcpu is running */
430 
431 	spin_end();
432 
433 	preempted = true;
434 	seen_sleepy_node(lock, val);
435 
436 	smp_rmb(); /* See __yield_to_locked_owner comment */
437 
438 	if (!node->locked) {
439 		yield_to_preempted(prev_cpu, yield_count);
440 		spin_begin();
441 		return preempted;
442 	}
443 	spin_begin();
444 
445 relax:
446 	spin_cpu_relax();
447 
448 	return preempted;
449 }
450 
451 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
452 {
453 	if (iters >= get_steal_spins(paravirt, sleepy))
454 		return true;
455 
456 	if (IS_ENABLED(CONFIG_NUMA) &&
457 	    (iters >= get_remote_steal_spins(paravirt, sleepy))) {
458 		int cpu = get_owner_cpu(val);
459 		if (numa_node_id() != cpu_to_node(cpu))
460 			return true;
461 	}
462 	return false;
463 }
464 
465 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
466 {
467 	bool seen_preempted = false;
468 	bool sleepy = false;
469 	int iters = 0;
470 	u32 val;
471 
472 	if (!steal_spins) {
473 		/* XXX: should spin_on_preempted_owner do anything here? */
474 		return false;
475 	}
476 
477 	/* Attempt to steal the lock */
478 	spin_begin();
479 	do {
480 		bool preempted = false;
481 
482 		val = READ_ONCE(lock->val);
483 		if (val & _Q_MUST_Q_VAL)
484 			break;
485 		spec_barrier();
486 
487 		if (unlikely(!(val & _Q_LOCKED_VAL))) {
488 			spin_end();
489 			if (__queued_spin_trylock_steal(lock))
490 				return true;
491 			spin_begin();
492 		} else {
493 			preempted = yield_to_locked_owner(lock, val, paravirt);
494 		}
495 
496 		if (paravirt && pv_sleepy_lock) {
497 			if (!sleepy) {
498 				if (val & _Q_SLEEPY_VAL) {
499 					seen_sleepy_lock();
500 					sleepy = true;
501 				} else if (recently_sleepy()) {
502 					sleepy = true;
503 				}
504 			}
505 			if (pv_sleepy_lock_sticky && seen_preempted &&
506 			    !(val & _Q_SLEEPY_VAL)) {
507 				if (try_set_sleepy(lock, val))
508 					val |= _Q_SLEEPY_VAL;
509 			}
510 		}
511 
512 		if (preempted) {
513 			seen_preempted = true;
514 			sleepy = true;
515 			if (!pv_spin_on_preempted_owner)
516 				iters++;
517 			/*
518 			 * pv_spin_on_preempted_owner don't increase iters
519 			 * while the owner is preempted -- we won't interfere
520 			 * with it by definition. This could introduce some
521 			 * latency issue if we continually observe preempted
522 			 * owners, but hopefully that's a rare corner case of
523 			 * a badly oversubscribed system.
524 			 */
525 		} else {
526 			iters++;
527 		}
528 	} while (!steal_break(val, iters, paravirt, sleepy));
529 
530 	spin_end();
531 
532 	return false;
533 }
534 
535 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
536 {
537 	struct qnodes *qnodesp;
538 	struct qnode *next, *node;
539 	u32 val, old, tail;
540 	bool seen_preempted = false;
541 	bool sleepy = false;
542 	bool mustq = false;
543 	int idx;
544 	int set_yield_cpu = -1;
545 	int iters = 0;
546 
547 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
548 
549 	qnodesp = this_cpu_ptr(&qnodes);
550 	if (unlikely(qnodesp->count >= MAX_NODES)) {
551 		spec_barrier();
552 		while (!queued_spin_trylock(lock))
553 			cpu_relax();
554 		return;
555 	}
556 
557 	idx = qnodesp->count++;
558 	/*
559 	 * Ensure that we increment the head node->count before initialising
560 	 * the actual node. If the compiler is kind enough to reorder these
561 	 * stores, then an IRQ could overwrite our assignments.
562 	 */
563 	barrier();
564 	node = &qnodesp->nodes[idx];
565 	node->next = NULL;
566 	node->lock = lock;
567 	node->cpu = smp_processor_id();
568 	node->yield_cpu = -1;
569 	node->locked = 0;
570 
571 	tail = encode_tail_cpu(node->cpu);
572 
573 	old = publish_tail_cpu(lock, tail);
574 
575 	/*
576 	 * If there was a previous node; link it and wait until reaching the
577 	 * head of the waitqueue.
578 	 */
579 	if (old & _Q_TAIL_CPU_MASK) {
580 		struct qnode *prev = get_tail_qnode(lock, old);
581 
582 		/* Link @node into the waitqueue. */
583 		WRITE_ONCE(prev->next, node);
584 
585 		/* Wait for mcs node lock to be released */
586 		spin_begin();
587 		while (!node->locked) {
588 			spec_barrier();
589 
590 			if (yield_to_prev(lock, node, old, paravirt))
591 				seen_preempted = true;
592 		}
593 		spec_barrier();
594 		spin_end();
595 
596 		/* Clear out stale propagated yield_cpu */
597 		if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
598 			node->yield_cpu = -1;
599 
600 		smp_rmb(); /* acquire barrier for the mcs lock */
601 
602 		/*
603 		 * Generic qspinlocks have this prefetch here, but it seems
604 		 * like it could cause additional line transitions because
605 		 * the waiter will keep loading from it.
606 		 */
607 		if (_Q_SPIN_PREFETCH_NEXT) {
608 			next = READ_ONCE(node->next);
609 			if (next)
610 				prefetchw(next);
611 		}
612 	}
613 
614 	/* We're at the head of the waitqueue, wait for the lock. */
615 again:
616 	spin_begin();
617 	for (;;) {
618 		bool preempted;
619 
620 		val = READ_ONCE(lock->val);
621 		if (!(val & _Q_LOCKED_VAL))
622 			break;
623 		spec_barrier();
624 
625 		if (paravirt && pv_sleepy_lock && maybe_stealers) {
626 			if (!sleepy) {
627 				if (val & _Q_SLEEPY_VAL) {
628 					seen_sleepy_lock();
629 					sleepy = true;
630 				} else if (recently_sleepy()) {
631 					sleepy = true;
632 				}
633 			}
634 			if (pv_sleepy_lock_sticky && seen_preempted &&
635 			    !(val & _Q_SLEEPY_VAL)) {
636 				if (try_set_sleepy(lock, val))
637 					val |= _Q_SLEEPY_VAL;
638 			}
639 		}
640 
641 		propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
642 		preempted = yield_head_to_locked_owner(lock, val, paravirt);
643 		if (!maybe_stealers)
644 			continue;
645 
646 		if (preempted)
647 			seen_preempted = true;
648 
649 		if (paravirt && preempted) {
650 			sleepy = true;
651 
652 			if (!pv_spin_on_preempted_owner)
653 				iters++;
654 		} else {
655 			iters++;
656 		}
657 
658 		if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
659 			mustq = true;
660 			set_mustq(lock);
661 			val |= _Q_MUST_Q_VAL;
662 		}
663 	}
664 	spec_barrier();
665 	spin_end();
666 
667 	/* If we're the last queued, must clean up the tail. */
668 	old = trylock_clean_tail(lock, tail);
669 	if (unlikely(old & _Q_LOCKED_VAL)) {
670 		BUG_ON(!maybe_stealers);
671 		goto again; /* Can only be true if maybe_stealers. */
672 	}
673 
674 	if ((old & _Q_TAIL_CPU_MASK) == tail)
675 		goto release; /* We were the tail, no next. */
676 
677 	/* There is a next, must wait for node->next != NULL (MCS protocol) */
678 	next = READ_ONCE(node->next);
679 	if (!next) {
680 		spin_begin();
681 		while (!(next = READ_ONCE(node->next)))
682 			cpu_relax();
683 		spin_end();
684 	}
685 	spec_barrier();
686 
687 	/*
688 	 * Unlock the next mcs waiter node. Release barrier is not required
689 	 * here because the acquirer is only accessing the lock word, and
690 	 * the acquire barrier we took the lock with orders that update vs
691 	 * this store to locked. The corresponding barrier is the smp_rmb()
692 	 * acquire barrier for mcs lock, above.
693 	 */
694 	if (paravirt && pv_prod_head) {
695 		int next_cpu = next->cpu;
696 		WRITE_ONCE(next->locked, 1);
697 		if (_Q_SPIN_MISO)
698 			asm volatile("miso" ::: "memory");
699 		if (vcpu_is_preempted(next_cpu))
700 			prod_cpu(next_cpu);
701 	} else {
702 		WRITE_ONCE(next->locked, 1);
703 		if (_Q_SPIN_MISO)
704 			asm volatile("miso" ::: "memory");
705 	}
706 
707 release:
708 	qnodesp->count--; /* release the node */
709 }
710 
711 void queued_spin_lock_slowpath(struct qspinlock *lock)
712 {
713 	/*
714 	 * This looks funny, but it induces the compiler to inline both
715 	 * sides of the branch rather than share code as when the condition
716 	 * is passed as the paravirt argument to the functions.
717 	 */
718 	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
719 		if (try_to_steal_lock(lock, true)) {
720 			spec_barrier();
721 			return;
722 		}
723 		queued_spin_lock_mcs_queue(lock, true);
724 	} else {
725 		if (try_to_steal_lock(lock, false)) {
726 			spec_barrier();
727 			return;
728 		}
729 		queued_spin_lock_mcs_queue(lock, false);
730 	}
731 }
732 EXPORT_SYMBOL(queued_spin_lock_slowpath);
733 
734 #ifdef CONFIG_PARAVIRT_SPINLOCKS
735 void pv_spinlocks_init(void)
736 {
737 }
738 #endif
739 
740 #include <linux/debugfs.h>
741 static int steal_spins_set(void *data, u64 val)
742 {
743 #if _Q_SPIN_TRY_LOCK_STEAL == 1
744 	/* MAYBE_STEAL remains true */
745 	steal_spins = val;
746 #else
747 	static DEFINE_MUTEX(lock);
748 
749 	/*
750 	 * The lock slow path has a !maybe_stealers case that can assume
751 	 * the head of queue will not see concurrent waiters. That waiter
752 	 * is unsafe in the presence of stealers, so must keep them away
753 	 * from one another.
754 	 */
755 
756 	mutex_lock(&lock);
757 	if (val && !steal_spins) {
758 		maybe_stealers = true;
759 		/* wait for queue head waiter to go away */
760 		synchronize_rcu();
761 		steal_spins = val;
762 	} else if (!val && steal_spins) {
763 		steal_spins = val;
764 		/* wait for all possible stealers to go away */
765 		synchronize_rcu();
766 		maybe_stealers = false;
767 	} else {
768 		steal_spins = val;
769 	}
770 	mutex_unlock(&lock);
771 #endif
772 
773 	return 0;
774 }
775 
776 static int steal_spins_get(void *data, u64 *val)
777 {
778 	*val = steal_spins;
779 
780 	return 0;
781 }
782 
783 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
784 
785 static int remote_steal_spins_set(void *data, u64 val)
786 {
787 	remote_steal_spins = val;
788 
789 	return 0;
790 }
791 
792 static int remote_steal_spins_get(void *data, u64 *val)
793 {
794 	*val = remote_steal_spins;
795 
796 	return 0;
797 }
798 
799 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
800 
801 static int head_spins_set(void *data, u64 val)
802 {
803 	head_spins = val;
804 
805 	return 0;
806 }
807 
808 static int head_spins_get(void *data, u64 *val)
809 {
810 	*val = head_spins;
811 
812 	return 0;
813 }
814 
815 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
816 
817 static int pv_yield_owner_set(void *data, u64 val)
818 {
819 	pv_yield_owner = !!val;
820 
821 	return 0;
822 }
823 
824 static int pv_yield_owner_get(void *data, u64 *val)
825 {
826 	*val = pv_yield_owner;
827 
828 	return 0;
829 }
830 
831 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
832 
833 static int pv_yield_allow_steal_set(void *data, u64 val)
834 {
835 	pv_yield_allow_steal = !!val;
836 
837 	return 0;
838 }
839 
840 static int pv_yield_allow_steal_get(void *data, u64 *val)
841 {
842 	*val = pv_yield_allow_steal;
843 
844 	return 0;
845 }
846 
847 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
848 
849 static int pv_spin_on_preempted_owner_set(void *data, u64 val)
850 {
851 	pv_spin_on_preempted_owner = !!val;
852 
853 	return 0;
854 }
855 
856 static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
857 {
858 	*val = pv_spin_on_preempted_owner;
859 
860 	return 0;
861 }
862 
863 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
864 
865 static int pv_sleepy_lock_set(void *data, u64 val)
866 {
867 	pv_sleepy_lock = !!val;
868 
869 	return 0;
870 }
871 
872 static int pv_sleepy_lock_get(void *data, u64 *val)
873 {
874 	*val = pv_sleepy_lock;
875 
876 	return 0;
877 }
878 
879 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
880 
881 static int pv_sleepy_lock_sticky_set(void *data, u64 val)
882 {
883 	pv_sleepy_lock_sticky = !!val;
884 
885 	return 0;
886 }
887 
888 static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
889 {
890 	*val = pv_sleepy_lock_sticky;
891 
892 	return 0;
893 }
894 
895 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
896 
897 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
898 {
899 	pv_sleepy_lock_interval_ns = val;
900 
901 	return 0;
902 }
903 
904 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
905 {
906 	*val = pv_sleepy_lock_interval_ns;
907 
908 	return 0;
909 }
910 
911 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
912 
913 static int pv_sleepy_lock_factor_set(void *data, u64 val)
914 {
915 	pv_sleepy_lock_factor = val;
916 
917 	return 0;
918 }
919 
920 static int pv_sleepy_lock_factor_get(void *data, u64 *val)
921 {
922 	*val = pv_sleepy_lock_factor;
923 
924 	return 0;
925 }
926 
927 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
928 
929 static int pv_yield_prev_set(void *data, u64 val)
930 {
931 	pv_yield_prev = !!val;
932 
933 	return 0;
934 }
935 
936 static int pv_yield_prev_get(void *data, u64 *val)
937 {
938 	*val = pv_yield_prev;
939 
940 	return 0;
941 }
942 
943 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
944 
945 static int pv_yield_propagate_owner_set(void *data, u64 val)
946 {
947 	pv_yield_propagate_owner = !!val;
948 
949 	return 0;
950 }
951 
952 static int pv_yield_propagate_owner_get(void *data, u64 *val)
953 {
954 	*val = pv_yield_propagate_owner;
955 
956 	return 0;
957 }
958 
959 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
960 
961 static int pv_prod_head_set(void *data, u64 val)
962 {
963 	pv_prod_head = !!val;
964 
965 	return 0;
966 }
967 
968 static int pv_prod_head_get(void *data, u64 *val)
969 {
970 	*val = pv_prod_head;
971 
972 	return 0;
973 }
974 
975 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
976 
977 static __init int spinlock_debugfs_init(void)
978 {
979 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
980 	debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
981 	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
982 	if (is_shared_processor()) {
983 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
984 		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
985 		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
986 		debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
987 		debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
988 		debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
989 		debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
990 		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
991 		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
992 		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
993 	}
994 
995 	return 0;
996 }
997 device_initcall(spinlock_debugfs_init);
998