1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 #ifndef __MIPS_ASM_SYNC_H__ 3 #define __MIPS_ASM_SYNC_H__ 4 5 /* 6 * sync types are defined by the MIPS64 Instruction Set documentation in Volume 7 * II-A of the MIPS Architecture Reference Manual, which can be found here: 8 * 9 * https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06 10 * 11 * Two types of barrier are provided: 12 * 13 * 1) Completion barriers, which ensure that a memory operation has actually 14 * completed & often involve stalling the CPU pipeline to do so. 15 * 16 * 2) Ordering barriers, which only ensure that affected memory operations 17 * won't be reordered in the CPU pipeline in a manner that violates the 18 * restrictions imposed by the barrier. 19 * 20 * Ordering barriers can be more efficient than completion barriers, since: 21 * 22 * a) Ordering barriers only require memory access instructions which preceed 23 * them in program order (older instructions) to reach a point in the 24 * load/store datapath beyond which reordering is not possible before 25 * allowing memory access instructions which follow them (younger 26 * instructions) to be performed. That is, older instructions don't 27 * actually need to complete - they just need to get far enough that all 28 * other coherent CPUs will observe their completion before they observe 29 * the effects of younger instructions. 30 * 31 * b) Multiple variants of ordering barrier are provided which allow the 32 * effects to be restricted to different combinations of older or younger 33 * loads or stores. By way of example, if we only care that stores older 34 * than a barrier are observed prior to stores that are younger than a 35 * barrier & don't care about the ordering of loads then the 'wmb' 36 * ordering barrier can be used. Limiting the barrier's effects to stores 37 * allows loads to continue unaffected & potentially allows the CPU to 38 * make progress faster than if younger loads had to wait for older stores 39 * to complete. 40 */ 41 42 /* 43 * No sync instruction at all; used to allow code to nullify the effect of the 44 * __SYNC() macro without needing lots of #ifdefery. 45 */ 46 #define __SYNC_none -1 47 48 /* 49 * A full completion barrier; all memory accesses appearing prior to this sync 50 * instruction in program order must complete before any memory accesses 51 * appearing after this sync instruction in program order. 52 */ 53 #define __SYNC_full 0x00 54 55 /* 56 * For now we use a full completion barrier to implement all sync types, until 57 * we're satisfied that lightweight ordering barriers defined by MIPSr6 are 58 * sufficient to uphold our desired memory model. 59 */ 60 #define __SYNC_aq __SYNC_full 61 #define __SYNC_rl __SYNC_full 62 #define __SYNC_mb __SYNC_full 63 64 /* 65 * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering 66 * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform 67 * speculative reads. 68 */ 69 #ifdef CONFIG_CPU_CAVIUM_OCTEON 70 # define __SYNC_rmb __SYNC_none 71 # define __SYNC_wmb 0x04 72 #else 73 # define __SYNC_rmb __SYNC_full 74 # define __SYNC_wmb __SYNC_full 75 #endif 76 77 /* 78 * A GINV sync is a little different; it doesn't relate directly to loads or 79 * stores, but instead causes synchronization of an icache or TLB global 80 * invalidation operation triggered by the ginvi or ginvt instructions 81 * respectively. In cases where we need to know that a ginvi or ginvt operation 82 * has been performed by all coherent CPUs, we must issue a sync instruction of 83 * this type. Once this instruction graduates all coherent CPUs will have 84 * observed the invalidation. 85 */ 86 #define __SYNC_ginv 0x14 87 88 /* Trivial; indicate that we always need this sync instruction. */ 89 #define __SYNC_always (1 << 0) 90 91 /* 92 * Indicate that we need this sync instruction only on systems with weakly 93 * ordered memory access. In general this is most MIPS systems, but there are 94 * exceptions which provide strongly ordered memory. 95 */ 96 #ifdef CONFIG_WEAK_ORDERING 97 # define __SYNC_weak_ordering (1 << 1) 98 #else 99 # define __SYNC_weak_ordering 0 100 #endif 101 102 /* 103 * Indicate that we need this sync instruction only on systems where LL/SC 104 * don't implicitly provide a memory barrier. In general this is most MIPS 105 * systems. 106 */ 107 #ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC 108 # define __SYNC_weak_llsc (1 << 2) 109 #else 110 # define __SYNC_weak_llsc 0 111 #endif 112 113 /* 114 * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, 115 * store or prefetch) in between an LL & SC can cause the SC instruction to 116 * erroneously succeed, breaking atomicity. Whilst it's unusual to write code 117 * containing such sequences, this bug bites harder than we might otherwise 118 * expect due to reordering & speculation: 119 * 120 * 1) A memory access appearing prior to the LL in program order may actually 121 * be executed after the LL - this is the reordering case. 122 * 123 * In order to avoid this we need to place a memory barrier (ie. a SYNC 124 * instruction) prior to every LL instruction, in between it and any earlier 125 * memory access instructions. 126 * 127 * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. 128 * 129 * 2) If a conditional branch exists between an LL & SC with a target outside 130 * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg() 131 * or similar, then misprediction of the branch may allow speculative 132 * execution of memory accesses from outside of the LL-SC loop. 133 * 134 * In order to avoid this we need a memory barrier (ie. a SYNC instruction) 135 * at each affected branch target. 136 * 137 * This case affects all current Loongson 3 CPUs. 138 * 139 * The above described cases cause an error in the cache coherence protocol; 140 * such that the Invalidate of a competing LL-SC goes 'missing' and SC 141 * erroneously observes its core still has Exclusive state and lets the SC 142 * proceed. 143 * 144 * Therefore the error only occurs on SMP systems. 145 */ 146 #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS 147 # define __SYNC_loongson3_war (1 << 31) 148 #else 149 # define __SYNC_loongson3_war 0 150 #endif 151 152 /* 153 * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering 154 * barrier to be ineffective, requiring the use of 2 in sequence to provide an 155 * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use 156 * optimized memory barrier primitives."). Here we specify that the affected 157 * sync instructions should be emitted twice. 158 */ 159 #ifdef CONFIG_CPU_CAVIUM_OCTEON 160 # define __SYNC_rpt(type) (1 + (type == __SYNC_wmb)) 161 #else 162 # define __SYNC_rpt(type) 1 163 #endif 164 165 /* 166 * The main event. Here we actually emit a sync instruction of a given type, if 167 * reason is non-zero. 168 * 169 * In future we have the option of emitting entries in a fixups-style table 170 * here that would allow us to opportunistically remove some sync instructions 171 * when we detect at runtime that we're running on a CPU that doesn't need 172 * them. 173 */ 174 #ifdef CONFIG_CPU_HAS_SYNC 175 # define ____SYNC(_type, _reason, _else) \ 176 .if (( _type ) != -1) && ( _reason ); \ 177 .set push; \ 178 .set MIPS_ISA_LEVEL_RAW; \ 179 .rept __SYNC_rpt(_type); \ 180 sync _type; \ 181 .endr; \ 182 .set pop; \ 183 .else; \ 184 _else; \ 185 .endif 186 #else 187 # define ____SYNC(_type, _reason, _else) 188 #endif 189 190 /* 191 * Preprocessor magic to expand macros used as arguments before we insert them 192 * into assembly code. 193 */ 194 #ifdef __ASSEMBLY__ 195 # define ___SYNC(type, reason, else) \ 196 ____SYNC(type, reason, else) 197 #else 198 # define ___SYNC(type, reason, else) \ 199 __stringify(____SYNC(type, reason, else)) 200 #endif 201 202 #define __SYNC(type, reason) \ 203 ___SYNC(__SYNC_##type, __SYNC_##reason, ) 204 #define __SYNC_ELSE(type, reason, else) \ 205 ___SYNC(__SYNC_##type, __SYNC_##reason, else) 206 207 #endif /* __MIPS_ASM_SYNC_H__ */ 208