1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * This file contains the routines for handling the MMU on those 4 * PowerPC implementations where the MMU is not using the hash 5 * table, such as 8xx, 4xx, BookE's etc... 6 * 7 * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org> 8 * IBM Corp. 9 * 10 * Derived from previous arch/powerpc/mm/mmu_context.c 11 * and arch/powerpc/include/asm/mmu_context.h 12 * 13 * TODO: 14 * 15 * - The global context lock will not scale very well 16 * - The maps should be dynamically allocated to allow for processors 17 * that support more PID bits at runtime 18 * - Implement flush_tlb_mm() by making the context stale and picking 19 * a new one 20 * - More aggressively clear stale map bits and maybe find some way to 21 * also clear mm->cpu_vm_mask bits when processes are migrated 22 */ 23 24 #include <linux/kernel.h> 25 #include <linux/mm.h> 26 #include <linux/init.h> 27 #include <linux/spinlock.h> 28 #include <linux/memblock.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/slab.h> 32 33 #include <asm/mmu_context.h> 34 #include <asm/tlbflush.h> 35 #include <asm/smp.h> 36 #include <asm/kup.h> 37 38 #include <mm/mmu_decl.h> 39 40 /* 41 * Room for two PTE table pointers, usually the kernel and current user 42 * pointer to their respective root page table (pgdir). 43 */ 44 void *abatron_pteptrs[2]; 45 46 /* 47 * The MPC8xx has only 16 contexts. We rotate through them on each task switch. 48 * A better way would be to keep track of tasks that own contexts, and implement 49 * an LRU usage. That way very active tasks don't always have to pay the TLB 50 * reload overhead. The kernel pages are mapped shared, so the kernel can run on 51 * behalf of any task that makes a kernel entry. Shared does not mean they are 52 * not protected, just that the ASID comparison is not performed. -- Dan 53 * 54 * The IBM4xx has 256 contexts, so we can just rotate through these as a way of 55 * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison 56 * is disabled, so we can use a TID of zero to represent all kernel pages as 57 * shared among all contexts. -- Dan 58 * 59 * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should 60 * normally never have to steal though the facility is present if needed. 61 * -- BenH 62 */ 63 #define FIRST_CONTEXT 1 64 #if defined(CONFIG_PPC_8xx) 65 #define LAST_CONTEXT 16 66 #elif defined(CONFIG_PPC_47x) 67 #define LAST_CONTEXT 65535 68 #else 69 #define LAST_CONTEXT 255 70 #endif 71 72 static unsigned int next_context, nr_free_contexts; 73 static unsigned long *context_map; 74 static unsigned long *stale_map[NR_CPUS]; 75 static struct mm_struct **context_mm; 76 static DEFINE_RAW_SPINLOCK(context_lock); 77 78 #define CTX_MAP_SIZE \ 79 (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1)) 80 81 82 /* Steal a context from a task that has one at the moment. 83 * 84 * This is used when we are running out of available PID numbers 85 * on the processors. 86 * 87 * This isn't an LRU system, it just frees up each context in 88 * turn (sort-of pseudo-random replacement :). This would be the 89 * place to implement an LRU scheme if anyone was motivated to do it. 90 * -- paulus 91 * 92 * For context stealing, we use a slightly different approach for 93 * SMP and UP. Basically, the UP one is simpler and doesn't use 94 * the stale map as we can just flush the local CPU 95 * -- benh 96 */ 97 static unsigned int steal_context_smp(unsigned int id) 98 { 99 struct mm_struct *mm; 100 unsigned int cpu, max, i; 101 102 max = LAST_CONTEXT - FIRST_CONTEXT; 103 104 /* Attempt to free next_context first and then loop until we manage */ 105 while (max--) { 106 /* Pick up the victim mm */ 107 mm = context_mm[id]; 108 109 /* We have a candidate victim, check if it's active, on SMP 110 * we cannot steal active contexts 111 */ 112 if (mm->context.active) { 113 id++; 114 if (id > LAST_CONTEXT) 115 id = FIRST_CONTEXT; 116 continue; 117 } 118 119 /* Mark this mm has having no context anymore */ 120 mm->context.id = MMU_NO_CONTEXT; 121 122 /* Mark it stale on all CPUs that used this mm. For threaded 123 * implementations, we set it on all threads on each core 124 * represented in the mask. A future implementation will use 125 * a core map instead but this will do for now. 126 */ 127 for_each_cpu(cpu, mm_cpumask(mm)) { 128 for (i = cpu_first_thread_sibling(cpu); 129 i <= cpu_last_thread_sibling(cpu); i++) { 130 if (stale_map[i]) 131 __set_bit(id, stale_map[i]); 132 } 133 cpu = i - 1; 134 } 135 return id; 136 } 137 138 /* This will happen if you have more CPUs than available contexts, 139 * all we can do here is wait a bit and try again 140 */ 141 raw_spin_unlock(&context_lock); 142 cpu_relax(); 143 raw_spin_lock(&context_lock); 144 145 /* This will cause the caller to try again */ 146 return MMU_NO_CONTEXT; 147 } 148 149 static unsigned int steal_all_contexts(void) 150 { 151 struct mm_struct *mm; 152 int cpu = smp_processor_id(); 153 unsigned int id; 154 155 for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { 156 /* Pick up the victim mm */ 157 mm = context_mm[id]; 158 159 /* Mark this mm as having no context anymore */ 160 mm->context.id = MMU_NO_CONTEXT; 161 if (id != FIRST_CONTEXT) { 162 context_mm[id] = NULL; 163 __clear_bit(id, context_map); 164 } 165 if (IS_ENABLED(CONFIG_SMP)) 166 __clear_bit(id, stale_map[cpu]); 167 } 168 169 /* Flush the TLB for all contexts (not to be used on SMP) */ 170 _tlbil_all(); 171 172 nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT; 173 174 return FIRST_CONTEXT; 175 } 176 177 /* Note that this will also be called on SMP if all other CPUs are 178 * offlined, which means that it may be called for cpu != 0. For 179 * this to work, we somewhat assume that CPUs that are onlined 180 * come up with a fully clean TLB (or are cleaned when offlined) 181 */ 182 static unsigned int steal_context_up(unsigned int id) 183 { 184 struct mm_struct *mm; 185 int cpu = smp_processor_id(); 186 187 /* Pick up the victim mm */ 188 mm = context_mm[id]; 189 190 /* Flush the TLB for that context */ 191 local_flush_tlb_mm(mm); 192 193 /* Mark this mm has having no context anymore */ 194 mm->context.id = MMU_NO_CONTEXT; 195 196 /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 197 if (IS_ENABLED(CONFIG_SMP)) 198 __clear_bit(id, stale_map[cpu]); 199 200 return id; 201 } 202 203 static void set_context(unsigned long id, pgd_t *pgd) 204 { 205 if (IS_ENABLED(CONFIG_PPC_8xx)) { 206 s16 offset = (s16)(__pa(swapper_pg_dir)); 207 208 /* 209 * Register M_TWB will contain base address of level 1 table minus the 210 * lower part of the kernel PGDIR base address, so that all accesses to 211 * level 1 table are done relative to lower part of kernel PGDIR base 212 * address. 213 */ 214 mtspr(SPRN_M_TWB, __pa(pgd) - offset); 215 216 /* Update context */ 217 mtspr(SPRN_M_CASID, id - 1); 218 219 /* sync */ 220 mb(); 221 } else if (kuap_is_disabled()) { 222 if (IS_ENABLED(CONFIG_40x)) 223 mb(); /* sync */ 224 225 mtspr(SPRN_PID, id); 226 isync(); 227 } 228 } 229 230 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, 231 struct task_struct *tsk) 232 { 233 unsigned int id; 234 unsigned int i, cpu = smp_processor_id(); 235 unsigned long *map; 236 237 /* No lockless fast path .. yet */ 238 raw_spin_lock(&context_lock); 239 240 if (IS_ENABLED(CONFIG_SMP)) { 241 /* Mark us active and the previous one not anymore */ 242 next->context.active++; 243 if (prev) { 244 WARN_ON(prev->context.active < 1); 245 prev->context.active--; 246 } 247 } 248 249 again: 250 251 /* If we already have a valid assigned context, skip all that */ 252 id = next->context.id; 253 if (likely(id != MMU_NO_CONTEXT)) 254 goto ctxt_ok; 255 256 /* We really don't have a context, let's try to acquire one */ 257 id = next_context; 258 if (id > LAST_CONTEXT) 259 id = FIRST_CONTEXT; 260 map = context_map; 261 262 /* No more free contexts, let's try to steal one */ 263 if (nr_free_contexts == 0) { 264 if (num_online_cpus() > 1) { 265 id = steal_context_smp(id); 266 if (id == MMU_NO_CONTEXT) 267 goto again; 268 goto stolen; 269 } 270 if (IS_ENABLED(CONFIG_PPC_8xx)) 271 id = steal_all_contexts(); 272 else 273 id = steal_context_up(id); 274 goto stolen; 275 } 276 nr_free_contexts--; 277 278 /* We know there's at least one free context, try to find it */ 279 while (__test_and_set_bit(id, map)) { 280 id = find_next_zero_bit(map, LAST_CONTEXT+1, id); 281 if (id > LAST_CONTEXT) 282 id = FIRST_CONTEXT; 283 } 284 stolen: 285 next_context = id + 1; 286 context_mm[id] = next; 287 next->context.id = id; 288 289 ctxt_ok: 290 291 /* If that context got marked stale on this CPU, then flush the 292 * local TLB for it and unmark it before we use it 293 */ 294 if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) { 295 local_flush_tlb_mm(next); 296 297 /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 298 for (i = cpu_first_thread_sibling(cpu); 299 i <= cpu_last_thread_sibling(cpu); i++) { 300 if (stale_map[i]) 301 __clear_bit(id, stale_map[i]); 302 } 303 } 304 305 /* Flick the MMU and release lock */ 306 if (IS_ENABLED(CONFIG_BDI_SWITCH)) 307 abatron_pteptrs[1] = next->pgd; 308 set_context(id, next->pgd); 309 #if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) 310 tsk->thread.pid = id; 311 #endif 312 raw_spin_unlock(&context_lock); 313 } 314 315 /* 316 * Set up the context for a new address space. 317 */ 318 int init_new_context(struct task_struct *t, struct mm_struct *mm) 319 { 320 /* 321 * We have MMU_NO_CONTEXT set to be ~0. Hence check 322 * explicitly against context.id == 0. This ensures that we properly 323 * initialize context slice details for newly allocated mm's (which will 324 * have id == 0) and don't alter context slice inherited via fork (which 325 * will have id != 0). 326 */ 327 if (mm->context.id == 0) 328 slice_init_new_context_exec(mm); 329 mm->context.id = MMU_NO_CONTEXT; 330 mm->context.active = 0; 331 pte_frag_set(&mm->context, NULL); 332 return 0; 333 } 334 335 /* 336 * We're finished using the context for an address space. 337 */ 338 void destroy_context(struct mm_struct *mm) 339 { 340 unsigned long flags; 341 unsigned int id; 342 343 if (mm->context.id == MMU_NO_CONTEXT) 344 return; 345 346 WARN_ON(mm->context.active != 0); 347 348 raw_spin_lock_irqsave(&context_lock, flags); 349 id = mm->context.id; 350 if (id != MMU_NO_CONTEXT) { 351 __clear_bit(id, context_map); 352 mm->context.id = MMU_NO_CONTEXT; 353 context_mm[id] = NULL; 354 nr_free_contexts++; 355 } 356 raw_spin_unlock_irqrestore(&context_lock, flags); 357 } 358 359 static int mmu_ctx_cpu_prepare(unsigned int cpu) 360 { 361 /* We don't touch CPU 0 map, it's allocated at aboot and kept 362 * around forever 363 */ 364 if (cpu == boot_cpuid) 365 return 0; 366 367 stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); 368 return 0; 369 } 370 371 static int mmu_ctx_cpu_dead(unsigned int cpu) 372 { 373 #ifdef CONFIG_HOTPLUG_CPU 374 if (cpu == boot_cpuid) 375 return 0; 376 377 kfree(stale_map[cpu]); 378 stale_map[cpu] = NULL; 379 380 /* We also clear the cpu_vm_mask bits of CPUs going away */ 381 clear_tasks_mm_cpumask(cpu); 382 #endif 383 return 0; 384 } 385 386 /* 387 * Initialize the context management stuff. 388 */ 389 void __init mmu_context_init(void) 390 { 391 /* Mark init_mm as being active on all possible CPUs since 392 * we'll get called with prev == init_mm the first time 393 * we schedule on a given CPU 394 */ 395 init_mm.context.active = NR_CPUS; 396 397 /* 398 * Allocate the maps used by context management 399 */ 400 context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); 401 if (!context_map) 402 panic("%s: Failed to allocate %zu bytes\n", __func__, 403 CTX_MAP_SIZE); 404 context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 405 SMP_CACHE_BYTES); 406 if (!context_mm) 407 panic("%s: Failed to allocate %zu bytes\n", __func__, 408 sizeof(void *) * (LAST_CONTEXT + 1)); 409 if (IS_ENABLED(CONFIG_SMP)) { 410 stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); 411 if (!stale_map[boot_cpuid]) 412 panic("%s: Failed to allocate %zu bytes\n", __func__, 413 CTX_MAP_SIZE); 414 415 cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, 416 "powerpc/mmu/ctx:prepare", 417 mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); 418 } 419 420 printk(KERN_INFO 421 "MMU: Allocated %zu bytes of context maps for %d contexts\n", 422 2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)), 423 LAST_CONTEXT - FIRST_CONTEXT + 1); 424 425 /* 426 * Some processors have too few contexts to reserve one for 427 * init_mm, and require using context 0 for a normal task. 428 * Other processors reserve the use of context zero for the kernel. 429 * This code assumes FIRST_CONTEXT < 32. 430 */ 431 context_map[0] = (1 << FIRST_CONTEXT) - 1; 432 next_context = FIRST_CONTEXT; 433 nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; 434 } 435