1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_POWERPC_QSPINLOCK_H
3 #define _ASM_POWERPC_QSPINLOCK_H
4 
5 #include <linux/compiler.h>
6 #include <asm/qspinlock_types.h>
7 #include <asm/paravirt.h>
8 
9 #ifdef CONFIG_PPC64
10 /*
11  * Use the EH=1 hint for accesses that result in the lock being acquired.
12  * The hardware is supposed to optimise this pattern by holding the lock
13  * cacheline longer, and releasing when a store to the same memory (the
14  * unlock) is performed.
15  */
16 #define _Q_SPIN_EH_HINT 1
17 #else
18 #define _Q_SPIN_EH_HINT 0
19 #endif
20 
21 /*
22  * The trylock itself may steal. This makes trylocks slightly stronger, and
23  * makes locks slightly more efficient when stealing.
24  *
25  * This is compile-time, so if true then there may always be stealers, so the
26  * nosteal paths become unused.
27  */
28 #define _Q_SPIN_TRY_LOCK_STEAL 1
29 
30 /*
31  * Put a speculation barrier after testing the lock/node and finding it
32  * busy. Try to prevent pointless speculation in slow paths.
33  *
34  * Slows down the lockstorm microbenchmark with no stealing, where locking
35  * is purely FIFO through the queue. May have more benefit in real workload
36  * where speculating into the wrong place could have a greater cost.
37  */
38 #define _Q_SPIN_SPEC_BARRIER 0
39 
40 #ifdef CONFIG_PPC64
41 /*
42  * Execute a miso instruction after passing the MCS lock ownership to the
43  * queue head. Miso is intended to make stores visible to other CPUs sooner.
44  *
45  * This seems to make the lockstorm microbenchmark nospin test go slightly
46  * faster on POWER10, but disable for now.
47  */
48 #define _Q_SPIN_MISO 0
49 #else
50 #define _Q_SPIN_MISO 0
51 #endif
52 
53 #ifdef CONFIG_PPC64
54 /*
55  * This executes miso after an unlock of the lock word, having ownership
56  * pass to the next CPU sooner. This will slow the uncontended path to some
57  * degree. Not evidence it helps yet.
58  */
59 #define _Q_SPIN_MISO_UNLOCK 0
60 #else
61 #define _Q_SPIN_MISO_UNLOCK 0
62 #endif
63 
64 /*
65  * Seems to slow down lockstorm microbenchmark, suspect queue node just
66  * has to become shared again right afterwards when its waiter spins on
67  * the lock field.
68  */
69 #define _Q_SPIN_PREFETCH_NEXT 0
70 
queued_spin_is_locked(struct qspinlock * lock)71 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
72 {
73 	return READ_ONCE(lock->val);
74 }
75 
queued_spin_value_unlocked(struct qspinlock lock)76 static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
77 {
78 	return !lock.val;
79 }
80 
queued_spin_is_contended(struct qspinlock * lock)81 static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
82 {
83 	return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
84 }
85 
queued_spin_encode_locked_val(void)86 static __always_inline u32 queued_spin_encode_locked_val(void)
87 {
88 	/* XXX: make this use lock value in paca like simple spinlocks? */
89 	return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
90 }
91 
__queued_spin_trylock_nosteal(struct qspinlock * lock)92 static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
93 {
94 	u32 new = queued_spin_encode_locked_val();
95 	u32 prev;
96 
97 	/* Trylock succeeds only when unlocked and no queued nodes */
98 	asm volatile(
99 "1:	lwarx	%0,0,%1,%3	# __queued_spin_trylock_nosteal		\n"
100 "	cmpwi	0,%0,0							\n"
101 "	bne-	2f							\n"
102 "	stwcx.	%2,0,%1							\n"
103 "	bne-	1b							\n"
104 "\t"	PPC_ACQUIRE_BARRIER "						\n"
105 "2:									\n"
106 	: "=&r" (prev)
107 	: "r" (&lock->val), "r" (new),
108 	  "i" (_Q_SPIN_EH_HINT)
109 	: "cr0", "memory");
110 
111 	return likely(prev == 0);
112 }
113 
__queued_spin_trylock_steal(struct qspinlock * lock)114 static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
115 {
116 	u32 new = queued_spin_encode_locked_val();
117 	u32 prev, tmp;
118 
119 	/* Trylock may get ahead of queued nodes if it finds unlocked */
120 	asm volatile(
121 "1:	lwarx	%0,0,%2,%5	# __queued_spin_trylock_steal		\n"
122 "	andc.	%1,%0,%4						\n"
123 "	bne-	2f							\n"
124 "	and	%1,%0,%4						\n"
125 "	or	%1,%1,%3						\n"
126 "	stwcx.	%1,0,%2							\n"
127 "	bne-	1b							\n"
128 "\t"	PPC_ACQUIRE_BARRIER "						\n"
129 "2:									\n"
130 	: "=&r" (prev), "=&r" (tmp)
131 	: "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
132 	  "i" (_Q_SPIN_EH_HINT)
133 	: "cr0", "memory");
134 
135 	return likely(!(prev & ~_Q_TAIL_CPU_MASK));
136 }
137 
queued_spin_trylock(struct qspinlock * lock)138 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
139 {
140 	if (!_Q_SPIN_TRY_LOCK_STEAL)
141 		return __queued_spin_trylock_nosteal(lock);
142 	else
143 		return __queued_spin_trylock_steal(lock);
144 }
145 
146 void queued_spin_lock_slowpath(struct qspinlock *lock);
147 
queued_spin_lock(struct qspinlock * lock)148 static __always_inline void queued_spin_lock(struct qspinlock *lock)
149 {
150 	if (!queued_spin_trylock(lock))
151 		queued_spin_lock_slowpath(lock);
152 }
153 
queued_spin_unlock(struct qspinlock * lock)154 static inline void queued_spin_unlock(struct qspinlock *lock)
155 {
156 	smp_store_release(&lock->locked, 0);
157 	if (_Q_SPIN_MISO_UNLOCK)
158 		asm volatile("miso" ::: "memory");
159 }
160 
161 #define arch_spin_is_locked(l)		queued_spin_is_locked(l)
162 #define arch_spin_is_contended(l)	queued_spin_is_contended(l)
163 #define arch_spin_value_unlocked(l)	queued_spin_value_unlocked(l)
164 #define arch_spin_lock(l)		queued_spin_lock(l)
165 #define arch_spin_trylock(l)		queued_spin_trylock(l)
166 #define arch_spin_unlock(l)		queued_spin_unlock(l)
167 
168 #ifdef CONFIG_PARAVIRT_SPINLOCKS
169 void pv_spinlocks_init(void);
170 #else
pv_spinlocks_init(void)171 static inline void pv_spinlocks_init(void) { }
172 #endif
173 
174 #endif /* _ASM_POWERPC_QSPINLOCK_H */
175