1 #include <linux/init.h> 2 3 #include <linux/mm.h> 4 #include <linux/spinlock.h> 5 #include <linux/smp.h> 6 #include <linux/interrupt.h> 7 #include <linux/module.h> 8 9 #include <asm/tlbflush.h> 10 #include <asm/mmu_context.h> 11 #include <asm/cache.h> 12 #include <asm/apic.h> 13 #include <asm/uv/uv.h> 14 15 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) 16 = { &init_mm, 0, }; 17 18 /* 19 * Smarter SMP flushing macros. 20 * c/o Linus Torvalds. 21 * 22 * These mean you can really definitely utterly forget about 23 * writing to user space from interrupts. (Its not allowed anyway). 24 * 25 * Optimizations Manfred Spraul <manfred@colorfullife.com> 26 * 27 * More scalable flush, from Andi Kleen 28 * 29 * To avoid global state use 8 different call vectors. 30 * Each CPU uses a specific vector to trigger flushes on other 31 * CPUs. Depending on the received vector the target CPUs look into 32 * the right array slot for the flush data. 33 * 34 * With more than 8 CPUs they are hashed to the 8 available 35 * vectors. The limited global vector space forces us to this right now. 36 * In future when interrupts are split into per CPU domains this could be 37 * fixed, at the cost of triggering multiple IPIs in some cases. 38 */ 39 40 union smp_flush_state { 41 struct { 42 struct mm_struct *flush_mm; 43 unsigned long flush_va; 44 spinlock_t tlbstate_lock; 45 DECLARE_BITMAP(flush_cpumask, NR_CPUS); 46 }; 47 char pad[INTERNODE_CACHE_BYTES]; 48 } ____cacheline_internodealigned_in_smp; 49 50 /* State is put into the per CPU data section, but padded 51 to a full cache line because other CPUs can access it and we don't 52 want false sharing in the per cpu data segment. */ 53 static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; 54 55 /* 56 * We cannot call mmdrop() because we are in interrupt context, 57 * instead update mm->cpu_vm_mask. 58 */ 59 void leave_mm(int cpu) 60 { 61 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 62 BUG(); 63 cpumask_clear_cpu(cpu, 64 mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); 65 load_cr3(swapper_pg_dir); 66 } 67 EXPORT_SYMBOL_GPL(leave_mm); 68 69 /* 70 * 71 * The flush IPI assumes that a thread switch happens in this order: 72 * [cpu0: the cpu that switches] 73 * 1) switch_mm() either 1a) or 1b) 74 * 1a) thread switch to a different mm 75 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); 76 * Stop ipi delivery for the old mm. This is not synchronized with 77 * the other cpus, but smp_invalidate_interrupt ignore flush ipis 78 * for the wrong mm, and in the worst case we perform a superfluous 79 * tlb flush. 80 * 1a2) set cpu mmu_state to TLBSTATE_OK 81 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 82 * was in lazy tlb mode. 83 * 1a3) update cpu active_mm 84 * Now cpu0 accepts tlb flushes for the new mm. 85 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); 86 * Now the other cpus will send tlb flush ipis. 87 * 1a4) change cr3. 88 * 1b) thread switch without mm change 89 * cpu active_mm is correct, cpu0 already handles 90 * flush ipis. 91 * 1b1) set cpu mmu_state to TLBSTATE_OK 92 * 1b2) test_and_set the cpu bit in cpu_vm_mask. 93 * Atomically set the bit [other cpus will start sending flush ipis], 94 * and test the bit. 95 * 1b3) if the bit was 0: leave_mm was called, flush the tlb. 96 * 2) switch %%esp, ie current 97 * 98 * The interrupt must handle 2 special cases: 99 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. 100 * - the cpu performs speculative tlb reads, i.e. even if the cpu only 101 * runs in kernel space, the cpu could load tlb entries for user space 102 * pages. 103 * 104 * The good news is that cpu mmu_state is local to each cpu, no 105 * write/read ordering problems. 106 */ 107 108 /* 109 * TLB flush IPI: 110 * 111 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. 112 * 2) Leave the mm if we are in the lazy tlb mode. 113 * 114 * Interrupts are disabled. 115 */ 116 117 /* 118 * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop 119 * but still used for documentation purpose but the usage is slightly 120 * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt 121 * entry calls in with the first parameter in %eax. Maybe define 122 * intrlinkage? 123 */ 124 #ifdef CONFIG_X86_64 125 asmlinkage 126 #endif 127 void smp_invalidate_interrupt(struct pt_regs *regs) 128 { 129 unsigned int cpu; 130 unsigned int sender; 131 union smp_flush_state *f; 132 133 cpu = smp_processor_id(); 134 /* 135 * orig_rax contains the negated interrupt vector. 136 * Use that to determine where the sender put the data. 137 */ 138 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; 139 f = &flush_state[sender]; 140 141 if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) 142 goto out; 143 /* 144 * This was a BUG() but until someone can quote me the 145 * line from the intel manual that guarantees an IPI to 146 * multiple CPUs is retried _only_ on the erroring CPUs 147 * its staying as a return 148 * 149 * BUG(); 150 */ 151 152 if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { 153 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 154 if (f->flush_va == TLB_FLUSH_ALL) 155 local_flush_tlb(); 156 else 157 __flush_tlb_one(f->flush_va); 158 } else 159 leave_mm(cpu); 160 } 161 out: 162 ack_APIC_irq(); 163 smp_mb__before_clear_bit(); 164 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); 165 smp_mb__after_clear_bit(); 166 inc_irq_stat(irq_tlb_count); 167 } 168 169 static void flush_tlb_others_ipi(const struct cpumask *cpumask, 170 struct mm_struct *mm, unsigned long va) 171 { 172 unsigned int sender; 173 union smp_flush_state *f; 174 175 /* Caller has disabled preemption */ 176 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 177 f = &flush_state[sender]; 178 179 /* 180 * Could avoid this lock when 181 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is 182 * probably not worth checking this for a cache-hot lock. 183 */ 184 spin_lock(&f->tlbstate_lock); 185 186 f->flush_mm = mm; 187 f->flush_va = va; 188 if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) { 189 /* 190 * We have to send the IPI only to 191 * CPUs affected. 192 */ 193 apic->send_IPI_mask(to_cpumask(f->flush_cpumask), 194 INVALIDATE_TLB_VECTOR_START + sender); 195 196 while (!cpumask_empty(to_cpumask(f->flush_cpumask))) 197 cpu_relax(); 198 } 199 200 f->flush_mm = NULL; 201 f->flush_va = 0; 202 spin_unlock(&f->tlbstate_lock); 203 } 204 205 void native_flush_tlb_others(const struct cpumask *cpumask, 206 struct mm_struct *mm, unsigned long va) 207 { 208 if (is_uv_system()) { 209 unsigned int cpu; 210 211 cpu = get_cpu(); 212 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); 213 if (cpumask) 214 flush_tlb_others_ipi(cpumask, mm, va); 215 put_cpu(); 216 return; 217 } 218 flush_tlb_others_ipi(cpumask, mm, va); 219 } 220 221 static int __cpuinit init_smp_flush(void) 222 { 223 int i; 224 225 for (i = 0; i < ARRAY_SIZE(flush_state); i++) 226 spin_lock_init(&flush_state[i].tlbstate_lock); 227 228 return 0; 229 } 230 core_initcall(init_smp_flush); 231 232 void flush_tlb_current_task(void) 233 { 234 struct mm_struct *mm = current->mm; 235 236 preempt_disable(); 237 238 local_flush_tlb(); 239 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 240 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); 241 preempt_enable(); 242 } 243 244 void flush_tlb_mm(struct mm_struct *mm) 245 { 246 preempt_disable(); 247 248 if (current->active_mm == mm) { 249 if (current->mm) 250 local_flush_tlb(); 251 else 252 leave_mm(smp_processor_id()); 253 } 254 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 255 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); 256 257 preempt_enable(); 258 } 259 260 void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) 261 { 262 struct mm_struct *mm = vma->vm_mm; 263 264 preempt_disable(); 265 266 if (current->active_mm == mm) { 267 if (current->mm) 268 __flush_tlb_one(va); 269 else 270 leave_mm(smp_processor_id()); 271 } 272 273 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 274 flush_tlb_others(mm_cpumask(mm), mm, va); 275 276 preempt_enable(); 277 } 278 279 static void do_flush_tlb_all(void *info) 280 { 281 unsigned long cpu = smp_processor_id(); 282 283 __flush_tlb_all(); 284 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) 285 leave_mm(cpu); 286 } 287 288 void flush_tlb_all(void) 289 { 290 on_each_cpu(do_flush_tlb_all, NULL, 1); 291 } 292