1 #include <linux/init.h> 2 3 #include <linux/mm.h> 4 #include <linux/spinlock.h> 5 #include <linux/smp.h> 6 #include <linux/interrupt.h> 7 #include <linux/module.h> 8 #include <linux/cpu.h> 9 10 #include <asm/tlbflush.h> 11 #include <asm/mmu_context.h> 12 #include <asm/cache.h> 13 #include <asm/apic.h> 14 #include <asm/uv/uv.h> 15 #include <linux/debugfs.h> 16 17 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) 18 = { &init_mm, 0, }; 19 20 /* 21 * Smarter SMP flushing macros. 22 * c/o Linus Torvalds. 23 * 24 * These mean you can really definitely utterly forget about 25 * writing to user space from interrupts. (Its not allowed anyway). 26 * 27 * Optimizations Manfred Spraul <manfred@colorfullife.com> 28 * 29 * More scalable flush, from Andi Kleen 30 * 31 * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi 32 */ 33 34 struct flush_tlb_info { 35 struct mm_struct *flush_mm; 36 unsigned long flush_start; 37 unsigned long flush_end; 38 }; 39 40 /* 41 * We cannot call mmdrop() because we are in interrupt context, 42 * instead update mm->cpu_vm_mask. 43 */ 44 void leave_mm(int cpu) 45 { 46 struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm); 47 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 48 BUG(); 49 if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { 50 cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); 51 load_cr3(swapper_pg_dir); 52 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 53 } 54 } 55 EXPORT_SYMBOL_GPL(leave_mm); 56 57 /* 58 * The flush IPI assumes that a thread switch happens in this order: 59 * [cpu0: the cpu that switches] 60 * 1) switch_mm() either 1a) or 1b) 61 * 1a) thread switch to a different mm 62 * 1a1) set cpu_tlbstate to TLBSTATE_OK 63 * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm 64 * if cpu0 was in lazy tlb mode. 65 * 1a2) update cpu active_mm 66 * Now cpu0 accepts tlb flushes for the new mm. 67 * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask); 68 * Now the other cpus will send tlb flush ipis. 69 * 1a4) change cr3. 70 * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask); 71 * Stop ipi delivery for the old mm. This is not synchronized with 72 * the other cpus, but flush_tlb_func ignore flush ipis for the wrong 73 * mm, and in the worst case we perform a superfluous tlb flush. 74 * 1b) thread switch without mm change 75 * cpu active_mm is correct, cpu0 already handles flush ipis. 76 * 1b1) set cpu_tlbstate to TLBSTATE_OK 77 * 1b2) test_and_set the cpu bit in cpu_vm_mask. 78 * Atomically set the bit [other cpus will start sending flush ipis], 79 * and test the bit. 80 * 1b3) if the bit was 0: leave_mm was called, flush the tlb. 81 * 2) switch %%esp, ie current 82 * 83 * The interrupt must handle 2 special cases: 84 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. 85 * - the cpu performs speculative tlb reads, i.e. even if the cpu only 86 * runs in kernel space, the cpu could load tlb entries for user space 87 * pages. 88 * 89 * The good news is that cpu_tlbstate is local to each cpu, no 90 * write/read ordering problems. 91 */ 92 93 /* 94 * TLB flush funcation: 95 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. 96 * 2) Leave the mm if we are in the lazy tlb mode. 97 */ 98 static void flush_tlb_func(void *info) 99 { 100 struct flush_tlb_info *f = info; 101 102 inc_irq_stat(irq_tlb_count); 103 104 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) 105 return; 106 if (!f->flush_end) 107 f->flush_end = f->flush_start + PAGE_SIZE; 108 109 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 110 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 111 if (f->flush_end == TLB_FLUSH_ALL) { 112 local_flush_tlb(); 113 trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL); 114 } else { 115 unsigned long addr; 116 unsigned long nr_pages = 117 f->flush_end - f->flush_start / PAGE_SIZE; 118 addr = f->flush_start; 119 while (addr < f->flush_end) { 120 __flush_tlb_single(addr); 121 addr += PAGE_SIZE; 122 } 123 trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages); 124 } 125 } else 126 leave_mm(smp_processor_id()); 127 128 } 129 130 void native_flush_tlb_others(const struct cpumask *cpumask, 131 struct mm_struct *mm, unsigned long start, 132 unsigned long end) 133 { 134 struct flush_tlb_info info; 135 info.flush_mm = mm; 136 info.flush_start = start; 137 info.flush_end = end; 138 139 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); 140 if (is_uv_system()) { 141 unsigned int cpu; 142 143 cpu = smp_processor_id(); 144 cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu); 145 if (cpumask) 146 smp_call_function_many(cpumask, flush_tlb_func, 147 &info, 1); 148 return; 149 } 150 smp_call_function_many(cpumask, flush_tlb_func, &info, 1); 151 } 152 153 void flush_tlb_current_task(void) 154 { 155 struct mm_struct *mm = current->mm; 156 157 preempt_disable(); 158 159 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 160 local_flush_tlb(); 161 trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); 162 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 163 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); 164 preempt_enable(); 165 } 166 167 /* 168 * See Documentation/x86/tlb.txt for details. We choose 33 169 * because it is large enough to cover the vast majority (at 170 * least 95%) of allocations, and is small enough that we are 171 * confident it will not cause too much overhead. Each single 172 * flush is about 100 ns, so this caps the maximum overhead at 173 * _about_ 3,000 ns. 174 * 175 * This is in units of pages. 176 */ 177 unsigned long tlb_single_page_flush_ceiling = 33; 178 179 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 180 unsigned long end, unsigned long vmflag) 181 { 182 unsigned long addr; 183 /* do a global flush by default */ 184 unsigned long base_pages_to_flush = TLB_FLUSH_ALL; 185 186 preempt_disable(); 187 if (current->active_mm != mm) 188 goto out; 189 190 if (!current->mm) { 191 leave_mm(smp_processor_id()); 192 goto out; 193 } 194 195 if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) 196 base_pages_to_flush = (end - start) >> PAGE_SHIFT; 197 198 if (base_pages_to_flush > tlb_single_page_flush_ceiling) { 199 base_pages_to_flush = TLB_FLUSH_ALL; 200 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 201 local_flush_tlb(); 202 } else { 203 /* flush range by one by one 'invlpg' */ 204 for (addr = start; addr < end; addr += PAGE_SIZE) { 205 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 206 __flush_tlb_single(addr); 207 } 208 } 209 trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush); 210 out: 211 if (base_pages_to_flush == TLB_FLUSH_ALL) { 212 start = 0UL; 213 end = TLB_FLUSH_ALL; 214 } 215 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 216 flush_tlb_others(mm_cpumask(mm), mm, start, end); 217 preempt_enable(); 218 } 219 220 void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) 221 { 222 struct mm_struct *mm = vma->vm_mm; 223 224 preempt_disable(); 225 226 if (current->active_mm == mm) { 227 if (current->mm) 228 __flush_tlb_one(start); 229 else 230 leave_mm(smp_processor_id()); 231 } 232 233 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 234 flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); 235 236 preempt_enable(); 237 } 238 239 static void do_flush_tlb_all(void *info) 240 { 241 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 242 __flush_tlb_all(); 243 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) 244 leave_mm(smp_processor_id()); 245 } 246 247 void flush_tlb_all(void) 248 { 249 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); 250 on_each_cpu(do_flush_tlb_all, NULL, 1); 251 } 252 253 static void do_kernel_range_flush(void *info) 254 { 255 struct flush_tlb_info *f = info; 256 unsigned long addr; 257 258 /* flush range by one by one 'invlpg' */ 259 for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) 260 __flush_tlb_single(addr); 261 } 262 263 void flush_tlb_kernel_range(unsigned long start, unsigned long end) 264 { 265 266 /* Balance as user space task's flush, a bit conservative */ 267 if (end == TLB_FLUSH_ALL || 268 (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) { 269 on_each_cpu(do_flush_tlb_all, NULL, 1); 270 } else { 271 struct flush_tlb_info info; 272 info.flush_start = start; 273 info.flush_end = end; 274 on_each_cpu(do_kernel_range_flush, &info, 1); 275 } 276 } 277 278 static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, 279 size_t count, loff_t *ppos) 280 { 281 char buf[32]; 282 unsigned int len; 283 284 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling); 285 return simple_read_from_buffer(user_buf, count, ppos, buf, len); 286 } 287 288 static ssize_t tlbflush_write_file(struct file *file, 289 const char __user *user_buf, size_t count, loff_t *ppos) 290 { 291 char buf[32]; 292 ssize_t len; 293 int ceiling; 294 295 len = min(count, sizeof(buf) - 1); 296 if (copy_from_user(buf, user_buf, len)) 297 return -EFAULT; 298 299 buf[len] = '\0'; 300 if (kstrtoint(buf, 0, &ceiling)) 301 return -EINVAL; 302 303 if (ceiling < 0) 304 return -EINVAL; 305 306 tlb_single_page_flush_ceiling = ceiling; 307 return count; 308 } 309 310 static const struct file_operations fops_tlbflush = { 311 .read = tlbflush_read_file, 312 .write = tlbflush_write_file, 313 .llseek = default_llseek, 314 }; 315 316 static int __init create_tlb_single_page_flush_ceiling(void) 317 { 318 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR, 319 arch_debugfs_dir, NULL, &fops_tlbflush); 320 return 0; 321 } 322 late_initcall(create_tlb_single_page_flush_ceiling); 323