1 /* 2 * This file contains the routines for handling the MMU on those 3 * PowerPC implementations where the MMU is not using the hash 4 * table, such as 8xx, 4xx, BookE's etc... 5 * 6 * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org> 7 * IBM Corp. 8 * 9 * Derived from previous arch/powerpc/mm/mmu_context.c 10 * and arch/powerpc/include/asm/mmu_context.h 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 * TODO: 18 * 19 * - The global context lock will not scale very well 20 * - The maps should be dynamically allocated to allow for processors 21 * that support more PID bits at runtime 22 * - Implement flush_tlb_mm() by making the context stale and picking 23 * a new one 24 * - More aggressively clear stale map bits and maybe find some way to 25 * also clear mm->cpu_vm_mask bits when processes are migrated 26 */ 27 28 //#define DEBUG_MAP_CONSISTENCY 29 //#define DEBUG_CLAMP_LAST_CONTEXT 31 30 //#define DEBUG_HARDER 31 32 /* We don't use DEBUG because it tends to be compiled in always nowadays 33 * and this would generate way too much output 34 */ 35 #ifdef DEBUG_HARDER 36 #define pr_hard(args...) printk(KERN_DEBUG args) 37 #define pr_hardcont(args...) printk(KERN_CONT args) 38 #else 39 #define pr_hard(args...) do { } while(0) 40 #define pr_hardcont(args...) do { } while(0) 41 #endif 42 43 #include <linux/kernel.h> 44 #include <linux/mm.h> 45 #include <linux/init.h> 46 #include <linux/spinlock.h> 47 #include <linux/memblock.h> 48 #include <linux/notifier.h> 49 #include <linux/cpu.h> 50 #include <linux/slab.h> 51 52 #include <asm/mmu_context.h> 53 #include <asm/tlbflush.h> 54 55 #include <mm/mmu_decl.h> 56 57 /* 58 * The MPC8xx has only 16 contexts. We rotate through them on each task switch. 59 * A better way would be to keep track of tasks that own contexts, and implement 60 * an LRU usage. That way very active tasks don't always have to pay the TLB 61 * reload overhead. The kernel pages are mapped shared, so the kernel can run on 62 * behalf of any task that makes a kernel entry. Shared does not mean they are 63 * not protected, just that the ASID comparison is not performed. -- Dan 64 * 65 * The IBM4xx has 256 contexts, so we can just rotate through these as a way of 66 * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison 67 * is disabled, so we can use a TID of zero to represent all kernel pages as 68 * shared among all contexts. -- Dan 69 * 70 * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should 71 * normally never have to steal though the facility is present if needed. 72 * -- BenH 73 */ 74 #define FIRST_CONTEXT 1 75 #ifdef DEBUG_CLAMP_LAST_CONTEXT 76 #define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT 77 #elif defined(CONFIG_PPC_8xx) 78 #define LAST_CONTEXT 16 79 #elif defined(CONFIG_PPC_47x) 80 #define LAST_CONTEXT 65535 81 #else 82 #define LAST_CONTEXT 255 83 #endif 84 85 static unsigned int next_context, nr_free_contexts; 86 static unsigned long *context_map; 87 #ifdef CONFIG_SMP 88 static unsigned long *stale_map[NR_CPUS]; 89 #endif 90 static struct mm_struct **context_mm; 91 static DEFINE_RAW_SPINLOCK(context_lock); 92 93 #define CTX_MAP_SIZE \ 94 (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1)) 95 96 97 /* Steal a context from a task that has one at the moment. 98 * 99 * This is used when we are running out of available PID numbers 100 * on the processors. 101 * 102 * This isn't an LRU system, it just frees up each context in 103 * turn (sort-of pseudo-random replacement :). This would be the 104 * place to implement an LRU scheme if anyone was motivated to do it. 105 * -- paulus 106 * 107 * For context stealing, we use a slightly different approach for 108 * SMP and UP. Basically, the UP one is simpler and doesn't use 109 * the stale map as we can just flush the local CPU 110 * -- benh 111 */ 112 #ifdef CONFIG_SMP 113 static unsigned int steal_context_smp(unsigned int id) 114 { 115 struct mm_struct *mm; 116 unsigned int cpu, max, i; 117 118 max = LAST_CONTEXT - FIRST_CONTEXT; 119 120 /* Attempt to free next_context first and then loop until we manage */ 121 while (max--) { 122 /* Pick up the victim mm */ 123 mm = context_mm[id]; 124 125 /* We have a candidate victim, check if it's active, on SMP 126 * we cannot steal active contexts 127 */ 128 if (mm->context.active) { 129 id++; 130 if (id > LAST_CONTEXT) 131 id = FIRST_CONTEXT; 132 continue; 133 } 134 pr_hardcont(" | steal %d from 0x%p", id, mm); 135 136 /* Mark this mm has having no context anymore */ 137 mm->context.id = MMU_NO_CONTEXT; 138 139 /* Mark it stale on all CPUs that used this mm. For threaded 140 * implementations, we set it on all threads on each core 141 * represented in the mask. A future implementation will use 142 * a core map instead but this will do for now. 143 */ 144 for_each_cpu(cpu, mm_cpumask(mm)) { 145 for (i = cpu_first_thread_sibling(cpu); 146 i <= cpu_last_thread_sibling(cpu); i++) { 147 if (stale_map[i]) 148 __set_bit(id, stale_map[i]); 149 } 150 cpu = i - 1; 151 } 152 return id; 153 } 154 155 /* This will happen if you have more CPUs than available contexts, 156 * all we can do here is wait a bit and try again 157 */ 158 raw_spin_unlock(&context_lock); 159 cpu_relax(); 160 raw_spin_lock(&context_lock); 161 162 /* This will cause the caller to try again */ 163 return MMU_NO_CONTEXT; 164 } 165 #endif /* CONFIG_SMP */ 166 167 static unsigned int steal_all_contexts(void) 168 { 169 struct mm_struct *mm; 170 #ifdef CONFIG_SMP 171 int cpu = smp_processor_id(); 172 #endif 173 unsigned int id; 174 175 for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { 176 /* Pick up the victim mm */ 177 mm = context_mm[id]; 178 179 pr_hardcont(" | steal %d from 0x%p", id, mm); 180 181 /* Mark this mm as having no context anymore */ 182 mm->context.id = MMU_NO_CONTEXT; 183 if (id != FIRST_CONTEXT) { 184 context_mm[id] = NULL; 185 __clear_bit(id, context_map); 186 #ifdef DEBUG_MAP_CONSISTENCY 187 mm->context.active = 0; 188 #endif 189 } 190 #ifdef CONFIG_SMP 191 __clear_bit(id, stale_map[cpu]); 192 #endif 193 } 194 195 /* Flush the TLB for all contexts (not to be used on SMP) */ 196 _tlbil_all(); 197 198 nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT; 199 200 return FIRST_CONTEXT; 201 } 202 203 /* Note that this will also be called on SMP if all other CPUs are 204 * offlined, which means that it may be called for cpu != 0. For 205 * this to work, we somewhat assume that CPUs that are onlined 206 * come up with a fully clean TLB (or are cleaned when offlined) 207 */ 208 static unsigned int steal_context_up(unsigned int id) 209 { 210 struct mm_struct *mm; 211 #ifdef CONFIG_SMP 212 int cpu = smp_processor_id(); 213 #endif 214 215 /* Pick up the victim mm */ 216 mm = context_mm[id]; 217 218 pr_hardcont(" | steal %d from 0x%p", id, mm); 219 220 /* Flush the TLB for that context */ 221 local_flush_tlb_mm(mm); 222 223 /* Mark this mm has having no context anymore */ 224 mm->context.id = MMU_NO_CONTEXT; 225 226 /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 227 #ifdef CONFIG_SMP 228 __clear_bit(id, stale_map[cpu]); 229 #endif 230 231 return id; 232 } 233 234 #ifdef DEBUG_MAP_CONSISTENCY 235 static void context_check_map(void) 236 { 237 unsigned int id, nrf, nact; 238 239 nrf = nact = 0; 240 for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { 241 int used = test_bit(id, context_map); 242 if (!used) 243 nrf++; 244 if (used != (context_mm[id] != NULL)) 245 pr_err("MMU: Context %d is %s and MM is %p !\n", 246 id, used ? "used" : "free", context_mm[id]); 247 if (context_mm[id] != NULL) 248 nact += context_mm[id]->context.active; 249 } 250 if (nrf != nr_free_contexts) { 251 pr_err("MMU: Free context count out of sync ! (%d vs %d)\n", 252 nr_free_contexts, nrf); 253 nr_free_contexts = nrf; 254 } 255 if (nact > num_online_cpus()) 256 pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", 257 nact, num_online_cpus()); 258 if (FIRST_CONTEXT > 0 && !test_bit(0, context_map)) 259 pr_err("MMU: Context 0 has been freed !!!\n"); 260 } 261 #else 262 static void context_check_map(void) { } 263 #endif 264 265 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, 266 struct task_struct *tsk) 267 { 268 unsigned int id; 269 #ifdef CONFIG_SMP 270 unsigned int i, cpu = smp_processor_id(); 271 #endif 272 unsigned long *map; 273 274 /* No lockless fast path .. yet */ 275 raw_spin_lock(&context_lock); 276 277 pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", 278 cpu, next, next->context.active, next->context.id); 279 280 #ifdef CONFIG_SMP 281 /* Mark us active and the previous one not anymore */ 282 next->context.active++; 283 if (prev) { 284 pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); 285 WARN_ON(prev->context.active < 1); 286 prev->context.active--; 287 } 288 289 again: 290 #endif /* CONFIG_SMP */ 291 292 /* If we already have a valid assigned context, skip all that */ 293 id = next->context.id; 294 if (likely(id != MMU_NO_CONTEXT)) { 295 #ifdef DEBUG_MAP_CONSISTENCY 296 if (context_mm[id] != next) 297 pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n", 298 next, id, id, context_mm[id]); 299 #endif 300 goto ctxt_ok; 301 } 302 303 /* We really don't have a context, let's try to acquire one */ 304 id = next_context; 305 if (id > LAST_CONTEXT) 306 id = FIRST_CONTEXT; 307 map = context_map; 308 309 /* No more free contexts, let's try to steal one */ 310 if (nr_free_contexts == 0) { 311 #ifdef CONFIG_SMP 312 if (num_online_cpus() > 1) { 313 id = steal_context_smp(id); 314 if (id == MMU_NO_CONTEXT) 315 goto again; 316 goto stolen; 317 } 318 #endif /* CONFIG_SMP */ 319 if (IS_ENABLED(CONFIG_PPC_8xx)) 320 id = steal_all_contexts(); 321 else 322 id = steal_context_up(id); 323 goto stolen; 324 } 325 nr_free_contexts--; 326 327 /* We know there's at least one free context, try to find it */ 328 while (__test_and_set_bit(id, map)) { 329 id = find_next_zero_bit(map, LAST_CONTEXT+1, id); 330 if (id > LAST_CONTEXT) 331 id = FIRST_CONTEXT; 332 } 333 stolen: 334 next_context = id + 1; 335 context_mm[id] = next; 336 next->context.id = id; 337 pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts); 338 339 context_check_map(); 340 ctxt_ok: 341 342 /* If that context got marked stale on this CPU, then flush the 343 * local TLB for it and unmark it before we use it 344 */ 345 #ifdef CONFIG_SMP 346 if (test_bit(id, stale_map[cpu])) { 347 pr_hardcont(" | stale flush %d [%d..%d]", 348 id, cpu_first_thread_sibling(cpu), 349 cpu_last_thread_sibling(cpu)); 350 351 local_flush_tlb_mm(next); 352 353 /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 354 for (i = cpu_first_thread_sibling(cpu); 355 i <= cpu_last_thread_sibling(cpu); i++) { 356 if (stale_map[i]) 357 __clear_bit(id, stale_map[i]); 358 } 359 } 360 #endif 361 362 /* Flick the MMU and release lock */ 363 pr_hardcont(" -> %d\n", id); 364 set_context(id, next->pgd); 365 raw_spin_unlock(&context_lock); 366 } 367 368 /* 369 * Set up the context for a new address space. 370 */ 371 int init_new_context(struct task_struct *t, struct mm_struct *mm) 372 { 373 pr_hard("initing context for mm @%p\n", mm); 374 375 /* 376 * We have MMU_NO_CONTEXT set to be ~0. Hence check 377 * explicitly against context.id == 0. This ensures that we properly 378 * initialize context slice details for newly allocated mm's (which will 379 * have id == 0) and don't alter context slice inherited via fork (which 380 * will have id != 0). 381 */ 382 if (mm->context.id == 0) 383 slice_init_new_context_exec(mm); 384 mm->context.id = MMU_NO_CONTEXT; 385 mm->context.active = 0; 386 pte_frag_set(&mm->context, NULL); 387 return 0; 388 } 389 390 /* 391 * We're finished using the context for an address space. 392 */ 393 void destroy_context(struct mm_struct *mm) 394 { 395 unsigned long flags; 396 unsigned int id; 397 398 if (mm->context.id == MMU_NO_CONTEXT) 399 return; 400 401 WARN_ON(mm->context.active != 0); 402 403 raw_spin_lock_irqsave(&context_lock, flags); 404 id = mm->context.id; 405 if (id != MMU_NO_CONTEXT) { 406 __clear_bit(id, context_map); 407 mm->context.id = MMU_NO_CONTEXT; 408 #ifdef DEBUG_MAP_CONSISTENCY 409 mm->context.active = 0; 410 #endif 411 context_mm[id] = NULL; 412 nr_free_contexts++; 413 } 414 raw_spin_unlock_irqrestore(&context_lock, flags); 415 } 416 417 #ifdef CONFIG_SMP 418 static int mmu_ctx_cpu_prepare(unsigned int cpu) 419 { 420 /* We don't touch CPU 0 map, it's allocated at aboot and kept 421 * around forever 422 */ 423 if (cpu == boot_cpuid) 424 return 0; 425 426 pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); 427 stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); 428 return 0; 429 } 430 431 static int mmu_ctx_cpu_dead(unsigned int cpu) 432 { 433 #ifdef CONFIG_HOTPLUG_CPU 434 if (cpu == boot_cpuid) 435 return 0; 436 437 pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); 438 kfree(stale_map[cpu]); 439 stale_map[cpu] = NULL; 440 441 /* We also clear the cpu_vm_mask bits of CPUs going away */ 442 clear_tasks_mm_cpumask(cpu); 443 #endif 444 return 0; 445 } 446 447 #endif /* CONFIG_SMP */ 448 449 /* 450 * Initialize the context management stuff. 451 */ 452 void __init mmu_context_init(void) 453 { 454 /* Mark init_mm as being active on all possible CPUs since 455 * we'll get called with prev == init_mm the first time 456 * we schedule on a given CPU 457 */ 458 init_mm.context.active = NR_CPUS; 459 460 /* 461 * Allocate the maps used by context management 462 */ 463 context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); 464 if (!context_map) 465 panic("%s: Failed to allocate %zu bytes\n", __func__, 466 CTX_MAP_SIZE); 467 context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 468 SMP_CACHE_BYTES); 469 if (!context_mm) 470 panic("%s: Failed to allocate %zu bytes\n", __func__, 471 sizeof(void *) * (LAST_CONTEXT + 1)); 472 #ifdef CONFIG_SMP 473 stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); 474 if (!stale_map[boot_cpuid]) 475 panic("%s: Failed to allocate %zu bytes\n", __func__, 476 CTX_MAP_SIZE); 477 478 cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, 479 "powerpc/mmu/ctx:prepare", 480 mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); 481 #endif 482 483 printk(KERN_INFO 484 "MMU: Allocated %zu bytes of context maps for %d contexts\n", 485 2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)), 486 LAST_CONTEXT - FIRST_CONTEXT + 1); 487 488 /* 489 * Some processors have too few contexts to reserve one for 490 * init_mm, and require using context 0 for a normal task. 491 * Other processors reserve the use of context zero for the kernel. 492 * This code assumes FIRST_CONTEXT < 32. 493 */ 494 context_map[0] = (1 << FIRST_CONTEXT) - 1; 495 next_context = FIRST_CONTEXT; 496 nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; 497 } 498