1 // SPDX-License-Identifier: GPL-2.0-or-later
2 #include <linux/bug.h>
3 #include <linux/compiler.h>
4 #include <linux/export.h>
5 #include <linux/percpu.h>
6 #include <linux/processor.h>
7 #include <linux/smp.h>
8 #include <linux/topology.h>
9 #include <linux/sched/clock.h>
10 #include <asm/qspinlock.h>
11 #include <asm/paravirt.h>
12
13 #define MAX_NODES 4
14
15 struct qnode {
16 struct qnode *next;
17 struct qspinlock *lock;
18 int cpu;
19 int yield_cpu;
20 u8 locked; /* 1 if lock acquired */
21 };
22
23 struct qnodes {
24 int count;
25 struct qnode nodes[MAX_NODES];
26 };
27
28 /* Tuning parameters */
29 static int steal_spins __read_mostly = (1 << 5);
30 static int remote_steal_spins __read_mostly = (1 << 2);
31 #if _Q_SPIN_TRY_LOCK_STEAL == 1
32 static const bool maybe_stealers = true;
33 #else
34 static bool maybe_stealers __read_mostly = true;
35 #endif
36 static int head_spins __read_mostly = (1 << 8);
37
38 static bool pv_yield_owner __read_mostly = true;
39 static bool pv_yield_allow_steal __read_mostly = false;
40 static bool pv_spin_on_preempted_owner __read_mostly = false;
41 static bool pv_sleepy_lock __read_mostly = true;
42 static bool pv_sleepy_lock_sticky __read_mostly = false;
43 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
44 static int pv_sleepy_lock_factor __read_mostly = 256;
45 static bool pv_yield_prev __read_mostly = true;
46 static bool pv_yield_propagate_owner __read_mostly = true;
47 static bool pv_prod_head __read_mostly = false;
48
49 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
50 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
51
52 #if _Q_SPIN_SPEC_BARRIER == 1
53 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
54 #else
55 #define spec_barrier() do { } while (0)
56 #endif
57
recently_sleepy(void)58 static __always_inline bool recently_sleepy(void)
59 {
60 /* pv_sleepy_lock is true when this is called */
61 if (pv_sleepy_lock_interval_ns) {
62 u64 seen = this_cpu_read(sleepy_lock_seen_clock);
63
64 if (seen) {
65 u64 delta = sched_clock() - seen;
66 if (delta < pv_sleepy_lock_interval_ns)
67 return true;
68 this_cpu_write(sleepy_lock_seen_clock, 0);
69 }
70 }
71
72 return false;
73 }
74
get_steal_spins(bool paravirt,bool sleepy)75 static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
76 {
77 if (paravirt && sleepy)
78 return steal_spins * pv_sleepy_lock_factor;
79 else
80 return steal_spins;
81 }
82
get_remote_steal_spins(bool paravirt,bool sleepy)83 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
84 {
85 if (paravirt && sleepy)
86 return remote_steal_spins * pv_sleepy_lock_factor;
87 else
88 return remote_steal_spins;
89 }
90
get_head_spins(bool paravirt,bool sleepy)91 static __always_inline int get_head_spins(bool paravirt, bool sleepy)
92 {
93 if (paravirt && sleepy)
94 return head_spins * pv_sleepy_lock_factor;
95 else
96 return head_spins;
97 }
98
encode_tail_cpu(int cpu)99 static inline u32 encode_tail_cpu(int cpu)
100 {
101 return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
102 }
103
decode_tail_cpu(u32 val)104 static inline int decode_tail_cpu(u32 val)
105 {
106 return (val >> _Q_TAIL_CPU_OFFSET) - 1;
107 }
108
get_owner_cpu(u32 val)109 static inline int get_owner_cpu(u32 val)
110 {
111 return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
112 }
113
114 /*
115 * Try to acquire the lock if it was not already locked. If the tail matches
116 * mytail then clear it, otherwise leave it unchnaged. Return previous value.
117 *
118 * This is used by the head of the queue to acquire the lock and clean up
119 * its tail if it was the last one queued.
120 */
trylock_clean_tail(struct qspinlock * lock,u32 tail)121 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
122 {
123 u32 newval = queued_spin_encode_locked_val();
124 u32 prev, tmp;
125
126 asm volatile(
127 "1: lwarx %0,0,%2,%7 # trylock_clean_tail \n"
128 /* This test is necessary if there could be stealers */
129 " andi. %1,%0,%5 \n"
130 " bne 3f \n"
131 /* Test whether the lock tail == mytail */
132 " and %1,%0,%6 \n"
133 " cmpw 0,%1,%3 \n"
134 /* Merge the new locked value */
135 " or %1,%1,%4 \n"
136 " bne 2f \n"
137 /* If the lock tail matched, then clear it, otherwise leave it. */
138 " andc %1,%1,%6 \n"
139 "2: stwcx. %1,0,%2 \n"
140 " bne- 1b \n"
141 "\t" PPC_ACQUIRE_BARRIER " \n"
142 "3: \n"
143 : "=&r" (prev), "=&r" (tmp)
144 : "r" (&lock->val), "r"(tail), "r" (newval),
145 "i" (_Q_LOCKED_VAL),
146 "r" (_Q_TAIL_CPU_MASK),
147 "i" (_Q_SPIN_EH_HINT)
148 : "cr0", "memory");
149
150 return prev;
151 }
152
153 /*
154 * Publish our tail, replacing previous tail. Return previous value.
155 *
156 * This provides a release barrier for publishing node, this pairs with the
157 * acquire barrier in get_tail_qnode() when the next CPU finds this tail
158 * value.
159 */
publish_tail_cpu(struct qspinlock * lock,u32 tail)160 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
161 {
162 u32 prev, tmp;
163
164 kcsan_release();
165
166 asm volatile(
167 "\t" PPC_RELEASE_BARRIER " \n"
168 "1: lwarx %0,0,%2 # publish_tail_cpu \n"
169 " andc %1,%0,%4 \n"
170 " or %1,%1,%3 \n"
171 " stwcx. %1,0,%2 \n"
172 " bne- 1b \n"
173 : "=&r" (prev), "=&r"(tmp)
174 : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
175 : "cr0", "memory");
176
177 return prev;
178 }
179
set_mustq(struct qspinlock * lock)180 static __always_inline u32 set_mustq(struct qspinlock *lock)
181 {
182 u32 prev;
183
184 asm volatile(
185 "1: lwarx %0,0,%1 # set_mustq \n"
186 " or %0,%0,%2 \n"
187 " stwcx. %0,0,%1 \n"
188 " bne- 1b \n"
189 : "=&r" (prev)
190 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
191 : "cr0", "memory");
192
193 return prev;
194 }
195
clear_mustq(struct qspinlock * lock)196 static __always_inline u32 clear_mustq(struct qspinlock *lock)
197 {
198 u32 prev;
199
200 asm volatile(
201 "1: lwarx %0,0,%1 # clear_mustq \n"
202 " andc %0,%0,%2 \n"
203 " stwcx. %0,0,%1 \n"
204 " bne- 1b \n"
205 : "=&r" (prev)
206 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
207 : "cr0", "memory");
208
209 return prev;
210 }
211
try_set_sleepy(struct qspinlock * lock,u32 old)212 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
213 {
214 u32 prev;
215 u32 new = old | _Q_SLEEPY_VAL;
216
217 BUG_ON(!(old & _Q_LOCKED_VAL));
218 BUG_ON(old & _Q_SLEEPY_VAL);
219
220 asm volatile(
221 "1: lwarx %0,0,%1 # try_set_sleepy \n"
222 " cmpw 0,%0,%2 \n"
223 " bne- 2f \n"
224 " stwcx. %3,0,%1 \n"
225 " bne- 1b \n"
226 "2: \n"
227 : "=&r" (prev)
228 : "r" (&lock->val), "r"(old), "r" (new)
229 : "cr0", "memory");
230
231 return likely(prev == old);
232 }
233
seen_sleepy_owner(struct qspinlock * lock,u32 val)234 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
235 {
236 if (pv_sleepy_lock) {
237 if (pv_sleepy_lock_interval_ns)
238 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
239 if (!(val & _Q_SLEEPY_VAL))
240 try_set_sleepy(lock, val);
241 }
242 }
243
seen_sleepy_lock(void)244 static __always_inline void seen_sleepy_lock(void)
245 {
246 if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
247 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
248 }
249
seen_sleepy_node(struct qspinlock * lock,u32 val)250 static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
251 {
252 if (pv_sleepy_lock) {
253 if (pv_sleepy_lock_interval_ns)
254 this_cpu_write(sleepy_lock_seen_clock, sched_clock());
255 if (val & _Q_LOCKED_VAL) {
256 if (!(val & _Q_SLEEPY_VAL))
257 try_set_sleepy(lock, val);
258 }
259 }
260 }
261
get_tail_qnode(struct qspinlock * lock,u32 val)262 static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
263 {
264 int cpu = decode_tail_cpu(val);
265 struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
266 int idx;
267
268 /*
269 * After publishing the new tail and finding a previous tail in the
270 * previous val (which is the control dependency), this barrier
271 * orders the release barrier in publish_tail_cpu performed by the
272 * last CPU, with subsequently looking at its qnode structures
273 * after the barrier.
274 */
275 smp_acquire__after_ctrl_dep();
276
277 for (idx = 0; idx < MAX_NODES; idx++) {
278 struct qnode *qnode = &qnodesp->nodes[idx];
279 if (qnode->lock == lock)
280 return qnode;
281 }
282
283 BUG();
284 }
285
286 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
__yield_to_locked_owner(struct qspinlock * lock,u32 val,bool paravirt,bool mustq)287 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
288 {
289 int owner;
290 u32 yield_count;
291 bool preempted = false;
292
293 BUG_ON(!(val & _Q_LOCKED_VAL));
294
295 if (!paravirt)
296 goto relax;
297
298 if (!pv_yield_owner)
299 goto relax;
300
301 owner = get_owner_cpu(val);
302 yield_count = yield_count_of(owner);
303
304 if ((yield_count & 1) == 0)
305 goto relax; /* owner vcpu is running */
306
307 spin_end();
308
309 seen_sleepy_owner(lock, val);
310 preempted = true;
311
312 /*
313 * Read the lock word after sampling the yield count. On the other side
314 * there may a wmb because the yield count update is done by the
315 * hypervisor preemption and the value update by the OS, however this
316 * ordering might reduce the chance of out of order accesses and
317 * improve the heuristic.
318 */
319 smp_rmb();
320
321 if (READ_ONCE(lock->val) == val) {
322 if (mustq)
323 clear_mustq(lock);
324 yield_to_preempted(owner, yield_count);
325 if (mustq)
326 set_mustq(lock);
327 spin_begin();
328
329 /* Don't relax if we yielded. Maybe we should? */
330 return preempted;
331 }
332 spin_begin();
333 relax:
334 spin_cpu_relax();
335
336 return preempted;
337 }
338
339 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
yield_to_locked_owner(struct qspinlock * lock,u32 val,bool paravirt)340 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
341 {
342 return __yield_to_locked_owner(lock, val, paravirt, false);
343 }
344
345 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
yield_head_to_locked_owner(struct qspinlock * lock,u32 val,bool paravirt)346 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
347 {
348 bool mustq = false;
349
350 if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
351 mustq = true;
352
353 return __yield_to_locked_owner(lock, val, paravirt, mustq);
354 }
355
propagate_yield_cpu(struct qnode * node,u32 val,int * set_yield_cpu,bool paravirt)356 static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
357 {
358 struct qnode *next;
359 int owner;
360
361 if (!paravirt)
362 return;
363 if (!pv_yield_propagate_owner)
364 return;
365
366 owner = get_owner_cpu(val);
367 if (*set_yield_cpu == owner)
368 return;
369
370 next = READ_ONCE(node->next);
371 if (!next)
372 return;
373
374 if (vcpu_is_preempted(owner)) {
375 next->yield_cpu = owner;
376 *set_yield_cpu = owner;
377 } else if (*set_yield_cpu != -1) {
378 next->yield_cpu = owner;
379 *set_yield_cpu = owner;
380 }
381 }
382
383 /* Called inside spin_begin() */
yield_to_prev(struct qspinlock * lock,struct qnode * node,u32 val,bool paravirt)384 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
385 {
386 int prev_cpu = decode_tail_cpu(val);
387 u32 yield_count;
388 int yield_cpu;
389 bool preempted = false;
390
391 if (!paravirt)
392 goto relax;
393
394 if (!pv_yield_propagate_owner)
395 goto yield_prev;
396
397 yield_cpu = READ_ONCE(node->yield_cpu);
398 if (yield_cpu == -1) {
399 /* Propagate back the -1 CPU */
400 if (node->next && node->next->yield_cpu != -1)
401 node->next->yield_cpu = yield_cpu;
402 goto yield_prev;
403 }
404
405 yield_count = yield_count_of(yield_cpu);
406 if ((yield_count & 1) == 0)
407 goto yield_prev; /* owner vcpu is running */
408
409 if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu)
410 goto yield_prev; /* re-sample lock owner */
411
412 spin_end();
413
414 preempted = true;
415 seen_sleepy_node(lock, val);
416
417 smp_rmb();
418
419 if (yield_cpu == node->yield_cpu) {
420 if (node->next && node->next->yield_cpu != yield_cpu)
421 node->next->yield_cpu = yield_cpu;
422 yield_to_preempted(yield_cpu, yield_count);
423 spin_begin();
424 return preempted;
425 }
426 spin_begin();
427
428 yield_prev:
429 if (!pv_yield_prev)
430 goto relax;
431
432 yield_count = yield_count_of(prev_cpu);
433 if ((yield_count & 1) == 0)
434 goto relax; /* owner vcpu is running */
435
436 spin_end();
437
438 preempted = true;
439 seen_sleepy_node(lock, val);
440
441 smp_rmb(); /* See __yield_to_locked_owner comment */
442
443 if (!READ_ONCE(node->locked)) {
444 yield_to_preempted(prev_cpu, yield_count);
445 spin_begin();
446 return preempted;
447 }
448 spin_begin();
449
450 relax:
451 spin_cpu_relax();
452
453 return preempted;
454 }
455
steal_break(u32 val,int iters,bool paravirt,bool sleepy)456 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
457 {
458 if (iters >= get_steal_spins(paravirt, sleepy))
459 return true;
460
461 if (IS_ENABLED(CONFIG_NUMA) &&
462 (iters >= get_remote_steal_spins(paravirt, sleepy))) {
463 int cpu = get_owner_cpu(val);
464 if (numa_node_id() != cpu_to_node(cpu))
465 return true;
466 }
467 return false;
468 }
469
try_to_steal_lock(struct qspinlock * lock,bool paravirt)470 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
471 {
472 bool seen_preempted = false;
473 bool sleepy = false;
474 int iters = 0;
475 u32 val;
476
477 if (!steal_spins) {
478 /* XXX: should spin_on_preempted_owner do anything here? */
479 return false;
480 }
481
482 /* Attempt to steal the lock */
483 spin_begin();
484 do {
485 bool preempted = false;
486
487 val = READ_ONCE(lock->val);
488 if (val & _Q_MUST_Q_VAL)
489 break;
490 spec_barrier();
491
492 if (unlikely(!(val & _Q_LOCKED_VAL))) {
493 spin_end();
494 if (__queued_spin_trylock_steal(lock))
495 return true;
496 spin_begin();
497 } else {
498 preempted = yield_to_locked_owner(lock, val, paravirt);
499 }
500
501 if (paravirt && pv_sleepy_lock) {
502 if (!sleepy) {
503 if (val & _Q_SLEEPY_VAL) {
504 seen_sleepy_lock();
505 sleepy = true;
506 } else if (recently_sleepy()) {
507 sleepy = true;
508 }
509 }
510 if (pv_sleepy_lock_sticky && seen_preempted &&
511 !(val & _Q_SLEEPY_VAL)) {
512 if (try_set_sleepy(lock, val))
513 val |= _Q_SLEEPY_VAL;
514 }
515 }
516
517 if (preempted) {
518 seen_preempted = true;
519 sleepy = true;
520 if (!pv_spin_on_preempted_owner)
521 iters++;
522 /*
523 * pv_spin_on_preempted_owner don't increase iters
524 * while the owner is preempted -- we won't interfere
525 * with it by definition. This could introduce some
526 * latency issue if we continually observe preempted
527 * owners, but hopefully that's a rare corner case of
528 * a badly oversubscribed system.
529 */
530 } else {
531 iters++;
532 }
533 } while (!steal_break(val, iters, paravirt, sleepy));
534
535 spin_end();
536
537 return false;
538 }
539
queued_spin_lock_mcs_queue(struct qspinlock * lock,bool paravirt)540 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
541 {
542 struct qnodes *qnodesp;
543 struct qnode *next, *node;
544 u32 val, old, tail;
545 bool seen_preempted = false;
546 bool sleepy = false;
547 bool mustq = false;
548 int idx;
549 int set_yield_cpu = -1;
550 int iters = 0;
551
552 BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
553
554 qnodesp = this_cpu_ptr(&qnodes);
555 if (unlikely(qnodesp->count >= MAX_NODES)) {
556 spec_barrier();
557 while (!queued_spin_trylock(lock))
558 cpu_relax();
559 return;
560 }
561
562 idx = qnodesp->count++;
563 /*
564 * Ensure that we increment the head node->count before initialising
565 * the actual node. If the compiler is kind enough to reorder these
566 * stores, then an IRQ could overwrite our assignments.
567 */
568 barrier();
569 node = &qnodesp->nodes[idx];
570 node->next = NULL;
571 node->lock = lock;
572 node->cpu = smp_processor_id();
573 node->yield_cpu = -1;
574 node->locked = 0;
575
576 tail = encode_tail_cpu(node->cpu);
577
578 /*
579 * Assign all attributes of a node before it can be published.
580 * Issues an lwsync, serving as a release barrier, as well as a
581 * compiler barrier.
582 */
583 old = publish_tail_cpu(lock, tail);
584
585 /*
586 * If there was a previous node; link it and wait until reaching the
587 * head of the waitqueue.
588 */
589 if (old & _Q_TAIL_CPU_MASK) {
590 struct qnode *prev = get_tail_qnode(lock, old);
591
592 /* Link @node into the waitqueue. */
593 WRITE_ONCE(prev->next, node);
594
595 /* Wait for mcs node lock to be released */
596 spin_begin();
597 while (!READ_ONCE(node->locked)) {
598 spec_barrier();
599
600 if (yield_to_prev(lock, node, old, paravirt))
601 seen_preempted = true;
602 }
603 spec_barrier();
604 spin_end();
605
606 /* Clear out stale propagated yield_cpu */
607 if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
608 node->yield_cpu = -1;
609
610 smp_rmb(); /* acquire barrier for the mcs lock */
611
612 /*
613 * Generic qspinlocks have this prefetch here, but it seems
614 * like it could cause additional line transitions because
615 * the waiter will keep loading from it.
616 */
617 if (_Q_SPIN_PREFETCH_NEXT) {
618 next = READ_ONCE(node->next);
619 if (next)
620 prefetchw(next);
621 }
622 }
623
624 /* We're at the head of the waitqueue, wait for the lock. */
625 again:
626 spin_begin();
627 for (;;) {
628 bool preempted;
629
630 val = READ_ONCE(lock->val);
631 if (!(val & _Q_LOCKED_VAL))
632 break;
633 spec_barrier();
634
635 if (paravirt && pv_sleepy_lock && maybe_stealers) {
636 if (!sleepy) {
637 if (val & _Q_SLEEPY_VAL) {
638 seen_sleepy_lock();
639 sleepy = true;
640 } else if (recently_sleepy()) {
641 sleepy = true;
642 }
643 }
644 if (pv_sleepy_lock_sticky && seen_preempted &&
645 !(val & _Q_SLEEPY_VAL)) {
646 if (try_set_sleepy(lock, val))
647 val |= _Q_SLEEPY_VAL;
648 }
649 }
650
651 propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
652 preempted = yield_head_to_locked_owner(lock, val, paravirt);
653 if (!maybe_stealers)
654 continue;
655
656 if (preempted)
657 seen_preempted = true;
658
659 if (paravirt && preempted) {
660 sleepy = true;
661
662 if (!pv_spin_on_preempted_owner)
663 iters++;
664 } else {
665 iters++;
666 }
667
668 if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
669 mustq = true;
670 set_mustq(lock);
671 val |= _Q_MUST_Q_VAL;
672 }
673 }
674 spec_barrier();
675 spin_end();
676
677 /* If we're the last queued, must clean up the tail. */
678 old = trylock_clean_tail(lock, tail);
679 if (unlikely(old & _Q_LOCKED_VAL)) {
680 BUG_ON(!maybe_stealers);
681 goto again; /* Can only be true if maybe_stealers. */
682 }
683
684 if ((old & _Q_TAIL_CPU_MASK) == tail)
685 goto release; /* We were the tail, no next. */
686
687 /* There is a next, must wait for node->next != NULL (MCS protocol) */
688 next = READ_ONCE(node->next);
689 if (!next) {
690 spin_begin();
691 while (!(next = READ_ONCE(node->next)))
692 cpu_relax();
693 spin_end();
694 }
695 spec_barrier();
696
697 /*
698 * Unlock the next mcs waiter node. Release barrier is not required
699 * here because the acquirer is only accessing the lock word, and
700 * the acquire barrier we took the lock with orders that update vs
701 * this store to locked. The corresponding barrier is the smp_rmb()
702 * acquire barrier for mcs lock, above.
703 */
704 if (paravirt && pv_prod_head) {
705 int next_cpu = next->cpu;
706 WRITE_ONCE(next->locked, 1);
707 if (_Q_SPIN_MISO)
708 asm volatile("miso" ::: "memory");
709 if (vcpu_is_preempted(next_cpu))
710 prod_cpu(next_cpu);
711 } else {
712 WRITE_ONCE(next->locked, 1);
713 if (_Q_SPIN_MISO)
714 asm volatile("miso" ::: "memory");
715 }
716
717 release:
718 /*
719 * Clear the lock before releasing the node, as another CPU might see stale
720 * values if an interrupt occurs after we increment qnodesp->count
721 * but before node->lock is initialized. The barrier ensures that
722 * there are no further stores to the node after it has been released.
723 */
724 node->lock = NULL;
725 barrier();
726 qnodesp->count--;
727 }
728
queued_spin_lock_slowpath(struct qspinlock * lock)729 void queued_spin_lock_slowpath(struct qspinlock *lock)
730 {
731 /*
732 * This looks funny, but it induces the compiler to inline both
733 * sides of the branch rather than share code as when the condition
734 * is passed as the paravirt argument to the functions.
735 */
736 if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
737 if (try_to_steal_lock(lock, true)) {
738 spec_barrier();
739 return;
740 }
741 queued_spin_lock_mcs_queue(lock, true);
742 } else {
743 if (try_to_steal_lock(lock, false)) {
744 spec_barrier();
745 return;
746 }
747 queued_spin_lock_mcs_queue(lock, false);
748 }
749 }
750 EXPORT_SYMBOL(queued_spin_lock_slowpath);
751
752 #ifdef CONFIG_PARAVIRT_SPINLOCKS
pv_spinlocks_init(void)753 void pv_spinlocks_init(void)
754 {
755 }
756 #endif
757
758 #include <linux/debugfs.h>
steal_spins_set(void * data,u64 val)759 static int steal_spins_set(void *data, u64 val)
760 {
761 #if _Q_SPIN_TRY_LOCK_STEAL == 1
762 /* MAYBE_STEAL remains true */
763 steal_spins = val;
764 #else
765 static DEFINE_MUTEX(lock);
766
767 /*
768 * The lock slow path has a !maybe_stealers case that can assume
769 * the head of queue will not see concurrent waiters. That waiter
770 * is unsafe in the presence of stealers, so must keep them away
771 * from one another.
772 */
773
774 mutex_lock(&lock);
775 if (val && !steal_spins) {
776 maybe_stealers = true;
777 /* wait for queue head waiter to go away */
778 synchronize_rcu();
779 steal_spins = val;
780 } else if (!val && steal_spins) {
781 steal_spins = val;
782 /* wait for all possible stealers to go away */
783 synchronize_rcu();
784 maybe_stealers = false;
785 } else {
786 steal_spins = val;
787 }
788 mutex_unlock(&lock);
789 #endif
790
791 return 0;
792 }
793
steal_spins_get(void * data,u64 * val)794 static int steal_spins_get(void *data, u64 *val)
795 {
796 *val = steal_spins;
797
798 return 0;
799 }
800
801 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
802
remote_steal_spins_set(void * data,u64 val)803 static int remote_steal_spins_set(void *data, u64 val)
804 {
805 remote_steal_spins = val;
806
807 return 0;
808 }
809
remote_steal_spins_get(void * data,u64 * val)810 static int remote_steal_spins_get(void *data, u64 *val)
811 {
812 *val = remote_steal_spins;
813
814 return 0;
815 }
816
817 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
818
head_spins_set(void * data,u64 val)819 static int head_spins_set(void *data, u64 val)
820 {
821 head_spins = val;
822
823 return 0;
824 }
825
head_spins_get(void * data,u64 * val)826 static int head_spins_get(void *data, u64 *val)
827 {
828 *val = head_spins;
829
830 return 0;
831 }
832
833 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
834
pv_yield_owner_set(void * data,u64 val)835 static int pv_yield_owner_set(void *data, u64 val)
836 {
837 pv_yield_owner = !!val;
838
839 return 0;
840 }
841
pv_yield_owner_get(void * data,u64 * val)842 static int pv_yield_owner_get(void *data, u64 *val)
843 {
844 *val = pv_yield_owner;
845
846 return 0;
847 }
848
849 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
850
pv_yield_allow_steal_set(void * data,u64 val)851 static int pv_yield_allow_steal_set(void *data, u64 val)
852 {
853 pv_yield_allow_steal = !!val;
854
855 return 0;
856 }
857
pv_yield_allow_steal_get(void * data,u64 * val)858 static int pv_yield_allow_steal_get(void *data, u64 *val)
859 {
860 *val = pv_yield_allow_steal;
861
862 return 0;
863 }
864
865 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
866
pv_spin_on_preempted_owner_set(void * data,u64 val)867 static int pv_spin_on_preempted_owner_set(void *data, u64 val)
868 {
869 pv_spin_on_preempted_owner = !!val;
870
871 return 0;
872 }
873
pv_spin_on_preempted_owner_get(void * data,u64 * val)874 static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
875 {
876 *val = pv_spin_on_preempted_owner;
877
878 return 0;
879 }
880
881 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
882
pv_sleepy_lock_set(void * data,u64 val)883 static int pv_sleepy_lock_set(void *data, u64 val)
884 {
885 pv_sleepy_lock = !!val;
886
887 return 0;
888 }
889
pv_sleepy_lock_get(void * data,u64 * val)890 static int pv_sleepy_lock_get(void *data, u64 *val)
891 {
892 *val = pv_sleepy_lock;
893
894 return 0;
895 }
896
897 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
898
pv_sleepy_lock_sticky_set(void * data,u64 val)899 static int pv_sleepy_lock_sticky_set(void *data, u64 val)
900 {
901 pv_sleepy_lock_sticky = !!val;
902
903 return 0;
904 }
905
pv_sleepy_lock_sticky_get(void * data,u64 * val)906 static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
907 {
908 *val = pv_sleepy_lock_sticky;
909
910 return 0;
911 }
912
913 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
914
pv_sleepy_lock_interval_ns_set(void * data,u64 val)915 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
916 {
917 pv_sleepy_lock_interval_ns = val;
918
919 return 0;
920 }
921
pv_sleepy_lock_interval_ns_get(void * data,u64 * val)922 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
923 {
924 *val = pv_sleepy_lock_interval_ns;
925
926 return 0;
927 }
928
929 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
930
pv_sleepy_lock_factor_set(void * data,u64 val)931 static int pv_sleepy_lock_factor_set(void *data, u64 val)
932 {
933 pv_sleepy_lock_factor = val;
934
935 return 0;
936 }
937
pv_sleepy_lock_factor_get(void * data,u64 * val)938 static int pv_sleepy_lock_factor_get(void *data, u64 *val)
939 {
940 *val = pv_sleepy_lock_factor;
941
942 return 0;
943 }
944
945 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
946
pv_yield_prev_set(void * data,u64 val)947 static int pv_yield_prev_set(void *data, u64 val)
948 {
949 pv_yield_prev = !!val;
950
951 return 0;
952 }
953
pv_yield_prev_get(void * data,u64 * val)954 static int pv_yield_prev_get(void *data, u64 *val)
955 {
956 *val = pv_yield_prev;
957
958 return 0;
959 }
960
961 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
962
pv_yield_propagate_owner_set(void * data,u64 val)963 static int pv_yield_propagate_owner_set(void *data, u64 val)
964 {
965 pv_yield_propagate_owner = !!val;
966
967 return 0;
968 }
969
pv_yield_propagate_owner_get(void * data,u64 * val)970 static int pv_yield_propagate_owner_get(void *data, u64 *val)
971 {
972 *val = pv_yield_propagate_owner;
973
974 return 0;
975 }
976
977 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
978
pv_prod_head_set(void * data,u64 val)979 static int pv_prod_head_set(void *data, u64 val)
980 {
981 pv_prod_head = !!val;
982
983 return 0;
984 }
985
pv_prod_head_get(void * data,u64 * val)986 static int pv_prod_head_get(void *data, u64 *val)
987 {
988 *val = pv_prod_head;
989
990 return 0;
991 }
992
993 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
994
spinlock_debugfs_init(void)995 static __init int spinlock_debugfs_init(void)
996 {
997 debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
998 debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
999 debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
1000 if (is_shared_processor()) {
1001 debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
1002 debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
1003 debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
1004 debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
1005 debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
1006 debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
1007 debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
1008 debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
1009 debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
1010 debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
1011 }
1012
1013 return 0;
1014 }
1015 device_initcall(spinlock_debugfs_init);
1016