1 #ifndef _ASM_X86_BARRIER_H 2 #define _ASM_X86_BARRIER_H 3 4 #include <asm/alternative.h> 5 #include <asm/nops.h> 6 7 /* 8 * Force strict CPU ordering. 9 * And yes, this is required on UP too when we're talking 10 * to devices. 11 */ 12 13 #ifdef CONFIG_X86_32 14 /* 15 * Some non-Intel clones support out of order store. wmb() ceases to be a 16 * nop for these. 17 */ 18 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) 19 #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) 20 #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) 21 #else 22 #define mb() asm volatile("mfence":::"memory") 23 #define rmb() asm volatile("lfence":::"memory") 24 #define wmb() asm volatile("sfence" ::: "memory") 25 #endif 26 27 /** 28 * read_barrier_depends - Flush all pending reads that subsequents reads 29 * depend on. 30 * 31 * No data-dependent reads from memory-like regions are ever reordered 32 * over this barrier. All reads preceding this primitive are guaranteed 33 * to access memory (but not necessarily other CPUs' caches) before any 34 * reads following this primitive that depend on the data return by 35 * any of the preceding reads. This primitive is much lighter weight than 36 * rmb() on most CPUs, and is never heavier weight than is 37 * rmb(). 38 * 39 * These ordering constraints are respected by both the local CPU 40 * and the compiler. 41 * 42 * Ordering is not guaranteed by anything other than these primitives, 43 * not even by data dependencies. See the documentation for 44 * memory_barrier() for examples and URLs to more information. 45 * 46 * For example, the following code would force ordering (the initial 47 * value of "a" is zero, "b" is one, and "p" is "&a"): 48 * 49 * <programlisting> 50 * CPU 0 CPU 1 51 * 52 * b = 2; 53 * memory_barrier(); 54 * p = &b; q = p; 55 * read_barrier_depends(); 56 * d = *q; 57 * </programlisting> 58 * 59 * because the read of "*q" depends on the read of "p" and these 60 * two reads are separated by a read_barrier_depends(). However, 61 * the following code, with the same initial values for "a" and "b": 62 * 63 * <programlisting> 64 * CPU 0 CPU 1 65 * 66 * a = 2; 67 * memory_barrier(); 68 * b = 3; y = b; 69 * read_barrier_depends(); 70 * x = a; 71 * </programlisting> 72 * 73 * does not enforce ordering, since there is no data dependency between 74 * the read of "a" and the read of "b". Therefore, on some CPUs, such 75 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() 76 * in cases like this where there are no data dependencies. 77 **/ 78 79 #define read_barrier_depends() do { } while (0) 80 81 #ifdef CONFIG_SMP 82 #define smp_mb() mb() 83 #ifdef CONFIG_X86_PPRO_FENCE 84 # define smp_rmb() rmb() 85 #else 86 # define smp_rmb() barrier() 87 #endif 88 #define smp_wmb() barrier() 89 #define smp_read_barrier_depends() read_barrier_depends() 90 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 91 #else /* !SMP */ 92 #define smp_mb() barrier() 93 #define smp_rmb() barrier() 94 #define smp_wmb() barrier() 95 #define smp_read_barrier_depends() do { } while (0) 96 #define set_mb(var, value) do { var = value; barrier(); } while (0) 97 #endif /* SMP */ 98 99 #if defined(CONFIG_X86_PPRO_FENCE) 100 101 /* 102 * For either of these options x86 doesn't have a strong TSO memory 103 * model and we should fall back to full barriers. 104 */ 105 106 #define smp_store_release(p, v) \ 107 do { \ 108 compiletime_assert_atomic_type(*p); \ 109 smp_mb(); \ 110 ACCESS_ONCE(*p) = (v); \ 111 } while (0) 112 113 #define smp_load_acquire(p) \ 114 ({ \ 115 typeof(*p) ___p1 = ACCESS_ONCE(*p); \ 116 compiletime_assert_atomic_type(*p); \ 117 smp_mb(); \ 118 ___p1; \ 119 }) 120 121 #else /* regular x86 TSO memory ordering */ 122 123 #define smp_store_release(p, v) \ 124 do { \ 125 compiletime_assert_atomic_type(*p); \ 126 barrier(); \ 127 ACCESS_ONCE(*p) = (v); \ 128 } while (0) 129 130 #define smp_load_acquire(p) \ 131 ({ \ 132 typeof(*p) ___p1 = ACCESS_ONCE(*p); \ 133 compiletime_assert_atomic_type(*p); \ 134 barrier(); \ 135 ___p1; \ 136 }) 137 138 #endif 139 140 /* Atomic operations are already serializing on x86 */ 141 #define smp_mb__before_atomic() barrier() 142 #define smp_mb__after_atomic() barrier() 143 144 /* 145 * Stop RDTSC speculation. This is needed when you need to use RDTSC 146 * (or get_cycles or vread that possibly accesses the TSC) in a defined 147 * code region. 148 * 149 * (Could use an alternative three way for this if there was one.) 150 */ 151 static __always_inline void rdtsc_barrier(void) 152 { 153 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); 154 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 155 } 156 157 #endif /* _ASM_X86_BARRIER_H */ 158