12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 227e23b5fSChristophe Leroy /* 327e23b5fSChristophe Leroy * This file contains the routines for handling the MMU on those 427e23b5fSChristophe Leroy * PowerPC implementations where the MMU is not using the hash 527e23b5fSChristophe Leroy * table, such as 8xx, 4xx, BookE's etc... 627e23b5fSChristophe Leroy * 727e23b5fSChristophe Leroy * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org> 827e23b5fSChristophe Leroy * IBM Corp. 927e23b5fSChristophe Leroy * 1027e23b5fSChristophe Leroy * Derived from previous arch/powerpc/mm/mmu_context.c 1127e23b5fSChristophe Leroy * and arch/powerpc/include/asm/mmu_context.h 1227e23b5fSChristophe Leroy * 1327e23b5fSChristophe Leroy * TODO: 1427e23b5fSChristophe Leroy * 1527e23b5fSChristophe Leroy * - The global context lock will not scale very well 1627e23b5fSChristophe Leroy * - The maps should be dynamically allocated to allow for processors 1727e23b5fSChristophe Leroy * that support more PID bits at runtime 1827e23b5fSChristophe Leroy * - Implement flush_tlb_mm() by making the context stale and picking 1927e23b5fSChristophe Leroy * a new one 2027e23b5fSChristophe Leroy * - More aggressively clear stale map bits and maybe find some way to 2127e23b5fSChristophe Leroy * also clear mm->cpu_vm_mask bits when processes are migrated 2227e23b5fSChristophe Leroy */ 2327e23b5fSChristophe Leroy 2427e23b5fSChristophe Leroy //#define DEBUG_MAP_CONSISTENCY 2527e23b5fSChristophe Leroy //#define DEBUG_CLAMP_LAST_CONTEXT 31 2627e23b5fSChristophe Leroy //#define DEBUG_HARDER 2727e23b5fSChristophe Leroy 2827e23b5fSChristophe Leroy /* We don't use DEBUG because it tends to be compiled in always nowadays 2927e23b5fSChristophe Leroy * and this would generate way too much output 3027e23b5fSChristophe Leroy */ 3127e23b5fSChristophe Leroy #ifdef DEBUG_HARDER 3227e23b5fSChristophe Leroy #define pr_hard(args...) printk(KERN_DEBUG args) 3327e23b5fSChristophe Leroy #define pr_hardcont(args...) printk(KERN_CONT args) 3427e23b5fSChristophe Leroy #else 3527e23b5fSChristophe Leroy #define pr_hard(args...) do { } while(0) 3627e23b5fSChristophe Leroy #define pr_hardcont(args...) do { } while(0) 3727e23b5fSChristophe Leroy #endif 3827e23b5fSChristophe Leroy 3927e23b5fSChristophe Leroy #include <linux/kernel.h> 4027e23b5fSChristophe Leroy #include <linux/mm.h> 4127e23b5fSChristophe Leroy #include <linux/init.h> 4227e23b5fSChristophe Leroy #include <linux/spinlock.h> 4327e23b5fSChristophe Leroy #include <linux/memblock.h> 4427e23b5fSChristophe Leroy #include <linux/notifier.h> 4527e23b5fSChristophe Leroy #include <linux/cpu.h> 4627e23b5fSChristophe Leroy #include <linux/slab.h> 4727e23b5fSChristophe Leroy 4827e23b5fSChristophe Leroy #include <asm/mmu_context.h> 4927e23b5fSChristophe Leroy #include <asm/tlbflush.h> 5027e23b5fSChristophe Leroy 5127e23b5fSChristophe Leroy #include <mm/mmu_decl.h> 5227e23b5fSChristophe Leroy 5327e23b5fSChristophe Leroy /* 5425910260SChristophe Leroy * Room for two PTE table pointers, usually the kernel and current user 5525910260SChristophe Leroy * pointer to their respective root page table (pgdir). 5625910260SChristophe Leroy */ 5725910260SChristophe Leroy void *abatron_pteptrs[2]; 5825910260SChristophe Leroy 5925910260SChristophe Leroy /* 6027e23b5fSChristophe Leroy * The MPC8xx has only 16 contexts. We rotate through them on each task switch. 6127e23b5fSChristophe Leroy * A better way would be to keep track of tasks that own contexts, and implement 6227e23b5fSChristophe Leroy * an LRU usage. That way very active tasks don't always have to pay the TLB 6327e23b5fSChristophe Leroy * reload overhead. The kernel pages are mapped shared, so the kernel can run on 6427e23b5fSChristophe Leroy * behalf of any task that makes a kernel entry. Shared does not mean they are 6527e23b5fSChristophe Leroy * not protected, just that the ASID comparison is not performed. -- Dan 6627e23b5fSChristophe Leroy * 6727e23b5fSChristophe Leroy * The IBM4xx has 256 contexts, so we can just rotate through these as a way of 6827e23b5fSChristophe Leroy * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison 6927e23b5fSChristophe Leroy * is disabled, so we can use a TID of zero to represent all kernel pages as 7027e23b5fSChristophe Leroy * shared among all contexts. -- Dan 7127e23b5fSChristophe Leroy * 7227e23b5fSChristophe Leroy * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should 7327e23b5fSChristophe Leroy * normally never have to steal though the facility is present if needed. 7427e23b5fSChristophe Leroy * -- BenH 7527e23b5fSChristophe Leroy */ 7627e23b5fSChristophe Leroy #define FIRST_CONTEXT 1 7727e23b5fSChristophe Leroy #ifdef DEBUG_CLAMP_LAST_CONTEXT 7827e23b5fSChristophe Leroy #define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT 7927e23b5fSChristophe Leroy #elif defined(CONFIG_PPC_8xx) 8027e23b5fSChristophe Leroy #define LAST_CONTEXT 16 8127e23b5fSChristophe Leroy #elif defined(CONFIG_PPC_47x) 8227e23b5fSChristophe Leroy #define LAST_CONTEXT 65535 8327e23b5fSChristophe Leroy #else 8427e23b5fSChristophe Leroy #define LAST_CONTEXT 255 8527e23b5fSChristophe Leroy #endif 8627e23b5fSChristophe Leroy 8727e23b5fSChristophe Leroy static unsigned int next_context, nr_free_contexts; 8827e23b5fSChristophe Leroy static unsigned long *context_map; 8927e23b5fSChristophe Leroy #ifdef CONFIG_SMP 9027e23b5fSChristophe Leroy static unsigned long *stale_map[NR_CPUS]; 9127e23b5fSChristophe Leroy #endif 9227e23b5fSChristophe Leroy static struct mm_struct **context_mm; 9327e23b5fSChristophe Leroy static DEFINE_RAW_SPINLOCK(context_lock); 9427e23b5fSChristophe Leroy 9527e23b5fSChristophe Leroy #define CTX_MAP_SIZE \ 9627e23b5fSChristophe Leroy (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1)) 9727e23b5fSChristophe Leroy 9827e23b5fSChristophe Leroy 9927e23b5fSChristophe Leroy /* Steal a context from a task that has one at the moment. 10027e23b5fSChristophe Leroy * 10127e23b5fSChristophe Leroy * This is used when we are running out of available PID numbers 10227e23b5fSChristophe Leroy * on the processors. 10327e23b5fSChristophe Leroy * 10427e23b5fSChristophe Leroy * This isn't an LRU system, it just frees up each context in 10527e23b5fSChristophe Leroy * turn (sort-of pseudo-random replacement :). This would be the 10627e23b5fSChristophe Leroy * place to implement an LRU scheme if anyone was motivated to do it. 10727e23b5fSChristophe Leroy * -- paulus 10827e23b5fSChristophe Leroy * 10927e23b5fSChristophe Leroy * For context stealing, we use a slightly different approach for 11027e23b5fSChristophe Leroy * SMP and UP. Basically, the UP one is simpler and doesn't use 11127e23b5fSChristophe Leroy * the stale map as we can just flush the local CPU 11227e23b5fSChristophe Leroy * -- benh 11327e23b5fSChristophe Leroy */ 11427e23b5fSChristophe Leroy #ifdef CONFIG_SMP 11527e23b5fSChristophe Leroy static unsigned int steal_context_smp(unsigned int id) 11627e23b5fSChristophe Leroy { 11727e23b5fSChristophe Leroy struct mm_struct *mm; 11827e23b5fSChristophe Leroy unsigned int cpu, max, i; 11927e23b5fSChristophe Leroy 12027e23b5fSChristophe Leroy max = LAST_CONTEXT - FIRST_CONTEXT; 12127e23b5fSChristophe Leroy 12227e23b5fSChristophe Leroy /* Attempt to free next_context first and then loop until we manage */ 12327e23b5fSChristophe Leroy while (max--) { 12427e23b5fSChristophe Leroy /* Pick up the victim mm */ 12527e23b5fSChristophe Leroy mm = context_mm[id]; 12627e23b5fSChristophe Leroy 12727e23b5fSChristophe Leroy /* We have a candidate victim, check if it's active, on SMP 12827e23b5fSChristophe Leroy * we cannot steal active contexts 12927e23b5fSChristophe Leroy */ 13027e23b5fSChristophe Leroy if (mm->context.active) { 13127e23b5fSChristophe Leroy id++; 13227e23b5fSChristophe Leroy if (id > LAST_CONTEXT) 13327e23b5fSChristophe Leroy id = FIRST_CONTEXT; 13427e23b5fSChristophe Leroy continue; 13527e23b5fSChristophe Leroy } 13627e23b5fSChristophe Leroy pr_hardcont(" | steal %d from 0x%p", id, mm); 13727e23b5fSChristophe Leroy 13827e23b5fSChristophe Leroy /* Mark this mm has having no context anymore */ 13927e23b5fSChristophe Leroy mm->context.id = MMU_NO_CONTEXT; 14027e23b5fSChristophe Leroy 14127e23b5fSChristophe Leroy /* Mark it stale on all CPUs that used this mm. For threaded 14227e23b5fSChristophe Leroy * implementations, we set it on all threads on each core 14327e23b5fSChristophe Leroy * represented in the mask. A future implementation will use 14427e23b5fSChristophe Leroy * a core map instead but this will do for now. 14527e23b5fSChristophe Leroy */ 14627e23b5fSChristophe Leroy for_each_cpu(cpu, mm_cpumask(mm)) { 14727e23b5fSChristophe Leroy for (i = cpu_first_thread_sibling(cpu); 14827e23b5fSChristophe Leroy i <= cpu_last_thread_sibling(cpu); i++) { 14927e23b5fSChristophe Leroy if (stale_map[i]) 15027e23b5fSChristophe Leroy __set_bit(id, stale_map[i]); 15127e23b5fSChristophe Leroy } 15227e23b5fSChristophe Leroy cpu = i - 1; 15327e23b5fSChristophe Leroy } 15427e23b5fSChristophe Leroy return id; 15527e23b5fSChristophe Leroy } 15627e23b5fSChristophe Leroy 15727e23b5fSChristophe Leroy /* This will happen if you have more CPUs than available contexts, 15827e23b5fSChristophe Leroy * all we can do here is wait a bit and try again 15927e23b5fSChristophe Leroy */ 16027e23b5fSChristophe Leroy raw_spin_unlock(&context_lock); 16127e23b5fSChristophe Leroy cpu_relax(); 16227e23b5fSChristophe Leroy raw_spin_lock(&context_lock); 16327e23b5fSChristophe Leroy 16427e23b5fSChristophe Leroy /* This will cause the caller to try again */ 16527e23b5fSChristophe Leroy return MMU_NO_CONTEXT; 16627e23b5fSChristophe Leroy } 16727e23b5fSChristophe Leroy #endif /* CONFIG_SMP */ 16827e23b5fSChristophe Leroy 16927e23b5fSChristophe Leroy static unsigned int steal_all_contexts(void) 17027e23b5fSChristophe Leroy { 17127e23b5fSChristophe Leroy struct mm_struct *mm; 17227e23b5fSChristophe Leroy #ifdef CONFIG_SMP 17327e23b5fSChristophe Leroy int cpu = smp_processor_id(); 17427e23b5fSChristophe Leroy #endif 17527e23b5fSChristophe Leroy unsigned int id; 17627e23b5fSChristophe Leroy 17727e23b5fSChristophe Leroy for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { 17827e23b5fSChristophe Leroy /* Pick up the victim mm */ 17927e23b5fSChristophe Leroy mm = context_mm[id]; 18027e23b5fSChristophe Leroy 18127e23b5fSChristophe Leroy pr_hardcont(" | steal %d from 0x%p", id, mm); 18227e23b5fSChristophe Leroy 18327e23b5fSChristophe Leroy /* Mark this mm as having no context anymore */ 18427e23b5fSChristophe Leroy mm->context.id = MMU_NO_CONTEXT; 18527e23b5fSChristophe Leroy if (id != FIRST_CONTEXT) { 18627e23b5fSChristophe Leroy context_mm[id] = NULL; 18727e23b5fSChristophe Leroy __clear_bit(id, context_map); 18827e23b5fSChristophe Leroy #ifdef DEBUG_MAP_CONSISTENCY 18927e23b5fSChristophe Leroy mm->context.active = 0; 19027e23b5fSChristophe Leroy #endif 19127e23b5fSChristophe Leroy } 19227e23b5fSChristophe Leroy #ifdef CONFIG_SMP 19327e23b5fSChristophe Leroy __clear_bit(id, stale_map[cpu]); 19427e23b5fSChristophe Leroy #endif 19527e23b5fSChristophe Leroy } 19627e23b5fSChristophe Leroy 19727e23b5fSChristophe Leroy /* Flush the TLB for all contexts (not to be used on SMP) */ 19827e23b5fSChristophe Leroy _tlbil_all(); 19927e23b5fSChristophe Leroy 20027e23b5fSChristophe Leroy nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT; 20127e23b5fSChristophe Leroy 20227e23b5fSChristophe Leroy return FIRST_CONTEXT; 20327e23b5fSChristophe Leroy } 20427e23b5fSChristophe Leroy 20527e23b5fSChristophe Leroy /* Note that this will also be called on SMP if all other CPUs are 20627e23b5fSChristophe Leroy * offlined, which means that it may be called for cpu != 0. For 20727e23b5fSChristophe Leroy * this to work, we somewhat assume that CPUs that are onlined 20827e23b5fSChristophe Leroy * come up with a fully clean TLB (or are cleaned when offlined) 20927e23b5fSChristophe Leroy */ 21027e23b5fSChristophe Leroy static unsigned int steal_context_up(unsigned int id) 21127e23b5fSChristophe Leroy { 21227e23b5fSChristophe Leroy struct mm_struct *mm; 21327e23b5fSChristophe Leroy #ifdef CONFIG_SMP 21427e23b5fSChristophe Leroy int cpu = smp_processor_id(); 21527e23b5fSChristophe Leroy #endif 21627e23b5fSChristophe Leroy 21727e23b5fSChristophe Leroy /* Pick up the victim mm */ 21827e23b5fSChristophe Leroy mm = context_mm[id]; 21927e23b5fSChristophe Leroy 22027e23b5fSChristophe Leroy pr_hardcont(" | steal %d from 0x%p", id, mm); 22127e23b5fSChristophe Leroy 22227e23b5fSChristophe Leroy /* Flush the TLB for that context */ 22327e23b5fSChristophe Leroy local_flush_tlb_mm(mm); 22427e23b5fSChristophe Leroy 22527e23b5fSChristophe Leroy /* Mark this mm has having no context anymore */ 22627e23b5fSChristophe Leroy mm->context.id = MMU_NO_CONTEXT; 22727e23b5fSChristophe Leroy 22827e23b5fSChristophe Leroy /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 22927e23b5fSChristophe Leroy #ifdef CONFIG_SMP 23027e23b5fSChristophe Leroy __clear_bit(id, stale_map[cpu]); 23127e23b5fSChristophe Leroy #endif 23227e23b5fSChristophe Leroy 23327e23b5fSChristophe Leroy return id; 23427e23b5fSChristophe Leroy } 23527e23b5fSChristophe Leroy 23627e23b5fSChristophe Leroy #ifdef DEBUG_MAP_CONSISTENCY 23727e23b5fSChristophe Leroy static void context_check_map(void) 23827e23b5fSChristophe Leroy { 23927e23b5fSChristophe Leroy unsigned int id, nrf, nact; 24027e23b5fSChristophe Leroy 24127e23b5fSChristophe Leroy nrf = nact = 0; 24227e23b5fSChristophe Leroy for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { 24327e23b5fSChristophe Leroy int used = test_bit(id, context_map); 24427e23b5fSChristophe Leroy if (!used) 24527e23b5fSChristophe Leroy nrf++; 24627e23b5fSChristophe Leroy if (used != (context_mm[id] != NULL)) 24727e23b5fSChristophe Leroy pr_err("MMU: Context %d is %s and MM is %p !\n", 24827e23b5fSChristophe Leroy id, used ? "used" : "free", context_mm[id]); 24927e23b5fSChristophe Leroy if (context_mm[id] != NULL) 25027e23b5fSChristophe Leroy nact += context_mm[id]->context.active; 25127e23b5fSChristophe Leroy } 25227e23b5fSChristophe Leroy if (nrf != nr_free_contexts) { 25327e23b5fSChristophe Leroy pr_err("MMU: Free context count out of sync ! (%d vs %d)\n", 25427e23b5fSChristophe Leroy nr_free_contexts, nrf); 25527e23b5fSChristophe Leroy nr_free_contexts = nrf; 25627e23b5fSChristophe Leroy } 25727e23b5fSChristophe Leroy if (nact > num_online_cpus()) 25827e23b5fSChristophe Leroy pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", 25927e23b5fSChristophe Leroy nact, num_online_cpus()); 26027e23b5fSChristophe Leroy if (FIRST_CONTEXT > 0 && !test_bit(0, context_map)) 26127e23b5fSChristophe Leroy pr_err("MMU: Context 0 has been freed !!!\n"); 26227e23b5fSChristophe Leroy } 26327e23b5fSChristophe Leroy #else 26427e23b5fSChristophe Leroy static void context_check_map(void) { } 26527e23b5fSChristophe Leroy #endif 26627e23b5fSChristophe Leroy 267*a56ab7c7SChristophe Leroy static void set_context(unsigned long id, pgd_t *pgd) 268*a56ab7c7SChristophe Leroy { 269*a56ab7c7SChristophe Leroy if (IS_ENABLED(CONFIG_PPC_8xx)) { 270*a56ab7c7SChristophe Leroy s16 offset = (s16)(__pa(swapper_pg_dir)); 271*a56ab7c7SChristophe Leroy 272*a56ab7c7SChristophe Leroy /* 273*a56ab7c7SChristophe Leroy * Register M_TWB will contain base address of level 1 table minus the 274*a56ab7c7SChristophe Leroy * lower part of the kernel PGDIR base address, so that all accesses to 275*a56ab7c7SChristophe Leroy * level 1 table are done relative to lower part of kernel PGDIR base 276*a56ab7c7SChristophe Leroy * address. 277*a56ab7c7SChristophe Leroy */ 278*a56ab7c7SChristophe Leroy mtspr(SPRN_M_TWB, __pa(pgd) - offset); 279*a56ab7c7SChristophe Leroy 280*a56ab7c7SChristophe Leroy /* Update context */ 281*a56ab7c7SChristophe Leroy mtspr(SPRN_M_CASID, id - 1); 282*a56ab7c7SChristophe Leroy 283*a56ab7c7SChristophe Leroy /* sync */ 284*a56ab7c7SChristophe Leroy mb(); 285*a56ab7c7SChristophe Leroy } else { 286*a56ab7c7SChristophe Leroy if (IS_ENABLED(CONFIG_40x)) 287*a56ab7c7SChristophe Leroy mb(); /* sync */ 288*a56ab7c7SChristophe Leroy 289*a56ab7c7SChristophe Leroy mtspr(SPRN_PID, id); 290*a56ab7c7SChristophe Leroy isync(); 291*a56ab7c7SChristophe Leroy } 292*a56ab7c7SChristophe Leroy } 293*a56ab7c7SChristophe Leroy 29427e23b5fSChristophe Leroy void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, 29527e23b5fSChristophe Leroy struct task_struct *tsk) 29627e23b5fSChristophe Leroy { 29727e23b5fSChristophe Leroy unsigned int id; 29827e23b5fSChristophe Leroy #ifdef CONFIG_SMP 29927e23b5fSChristophe Leroy unsigned int i, cpu = smp_processor_id(); 30027e23b5fSChristophe Leroy #endif 30127e23b5fSChristophe Leroy unsigned long *map; 30227e23b5fSChristophe Leroy 30327e23b5fSChristophe Leroy /* No lockless fast path .. yet */ 30427e23b5fSChristophe Leroy raw_spin_lock(&context_lock); 30527e23b5fSChristophe Leroy 30627e23b5fSChristophe Leroy pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", 30727e23b5fSChristophe Leroy cpu, next, next->context.active, next->context.id); 30827e23b5fSChristophe Leroy 30927e23b5fSChristophe Leroy #ifdef CONFIG_SMP 31027e23b5fSChristophe Leroy /* Mark us active and the previous one not anymore */ 31127e23b5fSChristophe Leroy next->context.active++; 31227e23b5fSChristophe Leroy if (prev) { 31327e23b5fSChristophe Leroy pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); 31427e23b5fSChristophe Leroy WARN_ON(prev->context.active < 1); 31527e23b5fSChristophe Leroy prev->context.active--; 31627e23b5fSChristophe Leroy } 31727e23b5fSChristophe Leroy 31827e23b5fSChristophe Leroy again: 31927e23b5fSChristophe Leroy #endif /* CONFIG_SMP */ 32027e23b5fSChristophe Leroy 32127e23b5fSChristophe Leroy /* If we already have a valid assigned context, skip all that */ 32227e23b5fSChristophe Leroy id = next->context.id; 32327e23b5fSChristophe Leroy if (likely(id != MMU_NO_CONTEXT)) { 32427e23b5fSChristophe Leroy #ifdef DEBUG_MAP_CONSISTENCY 32527e23b5fSChristophe Leroy if (context_mm[id] != next) 32627e23b5fSChristophe Leroy pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n", 32727e23b5fSChristophe Leroy next, id, id, context_mm[id]); 32827e23b5fSChristophe Leroy #endif 32927e23b5fSChristophe Leroy goto ctxt_ok; 33027e23b5fSChristophe Leroy } 33127e23b5fSChristophe Leroy 33227e23b5fSChristophe Leroy /* We really don't have a context, let's try to acquire one */ 33327e23b5fSChristophe Leroy id = next_context; 33427e23b5fSChristophe Leroy if (id > LAST_CONTEXT) 33527e23b5fSChristophe Leroy id = FIRST_CONTEXT; 33627e23b5fSChristophe Leroy map = context_map; 33727e23b5fSChristophe Leroy 33827e23b5fSChristophe Leroy /* No more free contexts, let's try to steal one */ 33927e23b5fSChristophe Leroy if (nr_free_contexts == 0) { 34027e23b5fSChristophe Leroy #ifdef CONFIG_SMP 34127e23b5fSChristophe Leroy if (num_online_cpus() > 1) { 34227e23b5fSChristophe Leroy id = steal_context_smp(id); 34327e23b5fSChristophe Leroy if (id == MMU_NO_CONTEXT) 34427e23b5fSChristophe Leroy goto again; 34527e23b5fSChristophe Leroy goto stolen; 34627e23b5fSChristophe Leroy } 34727e23b5fSChristophe Leroy #endif /* CONFIG_SMP */ 34827e23b5fSChristophe Leroy if (IS_ENABLED(CONFIG_PPC_8xx)) 34927e23b5fSChristophe Leroy id = steal_all_contexts(); 35027e23b5fSChristophe Leroy else 35127e23b5fSChristophe Leroy id = steal_context_up(id); 35227e23b5fSChristophe Leroy goto stolen; 35327e23b5fSChristophe Leroy } 35427e23b5fSChristophe Leroy nr_free_contexts--; 35527e23b5fSChristophe Leroy 35627e23b5fSChristophe Leroy /* We know there's at least one free context, try to find it */ 35727e23b5fSChristophe Leroy while (__test_and_set_bit(id, map)) { 35827e23b5fSChristophe Leroy id = find_next_zero_bit(map, LAST_CONTEXT+1, id); 35927e23b5fSChristophe Leroy if (id > LAST_CONTEXT) 36027e23b5fSChristophe Leroy id = FIRST_CONTEXT; 36127e23b5fSChristophe Leroy } 36227e23b5fSChristophe Leroy stolen: 36327e23b5fSChristophe Leroy next_context = id + 1; 36427e23b5fSChristophe Leroy context_mm[id] = next; 36527e23b5fSChristophe Leroy next->context.id = id; 36627e23b5fSChristophe Leroy pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts); 36727e23b5fSChristophe Leroy 36827e23b5fSChristophe Leroy context_check_map(); 36927e23b5fSChristophe Leroy ctxt_ok: 37027e23b5fSChristophe Leroy 37127e23b5fSChristophe Leroy /* If that context got marked stale on this CPU, then flush the 37227e23b5fSChristophe Leroy * local TLB for it and unmark it before we use it 37327e23b5fSChristophe Leroy */ 37427e23b5fSChristophe Leroy #ifdef CONFIG_SMP 37527e23b5fSChristophe Leroy if (test_bit(id, stale_map[cpu])) { 37627e23b5fSChristophe Leroy pr_hardcont(" | stale flush %d [%d..%d]", 37727e23b5fSChristophe Leroy id, cpu_first_thread_sibling(cpu), 37827e23b5fSChristophe Leroy cpu_last_thread_sibling(cpu)); 37927e23b5fSChristophe Leroy 38027e23b5fSChristophe Leroy local_flush_tlb_mm(next); 38127e23b5fSChristophe Leroy 38227e23b5fSChristophe Leroy /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 38327e23b5fSChristophe Leroy for (i = cpu_first_thread_sibling(cpu); 38427e23b5fSChristophe Leroy i <= cpu_last_thread_sibling(cpu); i++) { 38527e23b5fSChristophe Leroy if (stale_map[i]) 38627e23b5fSChristophe Leroy __clear_bit(id, stale_map[i]); 38727e23b5fSChristophe Leroy } 38827e23b5fSChristophe Leroy } 38927e23b5fSChristophe Leroy #endif 39027e23b5fSChristophe Leroy 39127e23b5fSChristophe Leroy /* Flick the MMU and release lock */ 39227e23b5fSChristophe Leroy pr_hardcont(" -> %d\n", id); 39325910260SChristophe Leroy if (IS_ENABLED(CONFIG_BDI_SWITCH)) 39425910260SChristophe Leroy abatron_pteptrs[1] = next->pgd; 39527e23b5fSChristophe Leroy set_context(id, next->pgd); 39627e23b5fSChristophe Leroy raw_spin_unlock(&context_lock); 39727e23b5fSChristophe Leroy } 39827e23b5fSChristophe Leroy 39927e23b5fSChristophe Leroy /* 40027e23b5fSChristophe Leroy * Set up the context for a new address space. 40127e23b5fSChristophe Leroy */ 40227e23b5fSChristophe Leroy int init_new_context(struct task_struct *t, struct mm_struct *mm) 40327e23b5fSChristophe Leroy { 40427e23b5fSChristophe Leroy pr_hard("initing context for mm @%p\n", mm); 40527e23b5fSChristophe Leroy 40627e23b5fSChristophe Leroy /* 40727e23b5fSChristophe Leroy * We have MMU_NO_CONTEXT set to be ~0. Hence check 40827e23b5fSChristophe Leroy * explicitly against context.id == 0. This ensures that we properly 40927e23b5fSChristophe Leroy * initialize context slice details for newly allocated mm's (which will 41027e23b5fSChristophe Leroy * have id == 0) and don't alter context slice inherited via fork (which 41127e23b5fSChristophe Leroy * will have id != 0). 41227e23b5fSChristophe Leroy */ 41327e23b5fSChristophe Leroy if (mm->context.id == 0) 41427e23b5fSChristophe Leroy slice_init_new_context_exec(mm); 41527e23b5fSChristophe Leroy mm->context.id = MMU_NO_CONTEXT; 41627e23b5fSChristophe Leroy mm->context.active = 0; 41727e23b5fSChristophe Leroy pte_frag_set(&mm->context, NULL); 41827e23b5fSChristophe Leroy return 0; 41927e23b5fSChristophe Leroy } 42027e23b5fSChristophe Leroy 42127e23b5fSChristophe Leroy /* 42227e23b5fSChristophe Leroy * We're finished using the context for an address space. 42327e23b5fSChristophe Leroy */ 42427e23b5fSChristophe Leroy void destroy_context(struct mm_struct *mm) 42527e23b5fSChristophe Leroy { 42627e23b5fSChristophe Leroy unsigned long flags; 42727e23b5fSChristophe Leroy unsigned int id; 42827e23b5fSChristophe Leroy 42927e23b5fSChristophe Leroy if (mm->context.id == MMU_NO_CONTEXT) 43027e23b5fSChristophe Leroy return; 43127e23b5fSChristophe Leroy 43227e23b5fSChristophe Leroy WARN_ON(mm->context.active != 0); 43327e23b5fSChristophe Leroy 43427e23b5fSChristophe Leroy raw_spin_lock_irqsave(&context_lock, flags); 43527e23b5fSChristophe Leroy id = mm->context.id; 43627e23b5fSChristophe Leroy if (id != MMU_NO_CONTEXT) { 43727e23b5fSChristophe Leroy __clear_bit(id, context_map); 43827e23b5fSChristophe Leroy mm->context.id = MMU_NO_CONTEXT; 43927e23b5fSChristophe Leroy #ifdef DEBUG_MAP_CONSISTENCY 44027e23b5fSChristophe Leroy mm->context.active = 0; 44127e23b5fSChristophe Leroy #endif 44227e23b5fSChristophe Leroy context_mm[id] = NULL; 44327e23b5fSChristophe Leroy nr_free_contexts++; 44427e23b5fSChristophe Leroy } 44527e23b5fSChristophe Leroy raw_spin_unlock_irqrestore(&context_lock, flags); 44627e23b5fSChristophe Leroy } 44727e23b5fSChristophe Leroy 44827e23b5fSChristophe Leroy #ifdef CONFIG_SMP 44927e23b5fSChristophe Leroy static int mmu_ctx_cpu_prepare(unsigned int cpu) 45027e23b5fSChristophe Leroy { 45127e23b5fSChristophe Leroy /* We don't touch CPU 0 map, it's allocated at aboot and kept 45227e23b5fSChristophe Leroy * around forever 45327e23b5fSChristophe Leroy */ 45427e23b5fSChristophe Leroy if (cpu == boot_cpuid) 45527e23b5fSChristophe Leroy return 0; 45627e23b5fSChristophe Leroy 45727e23b5fSChristophe Leroy pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); 45827e23b5fSChristophe Leroy stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); 45927e23b5fSChristophe Leroy return 0; 46027e23b5fSChristophe Leroy } 46127e23b5fSChristophe Leroy 46227e23b5fSChristophe Leroy static int mmu_ctx_cpu_dead(unsigned int cpu) 46327e23b5fSChristophe Leroy { 46427e23b5fSChristophe Leroy #ifdef CONFIG_HOTPLUG_CPU 46527e23b5fSChristophe Leroy if (cpu == boot_cpuid) 46627e23b5fSChristophe Leroy return 0; 46727e23b5fSChristophe Leroy 46827e23b5fSChristophe Leroy pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); 46927e23b5fSChristophe Leroy kfree(stale_map[cpu]); 47027e23b5fSChristophe Leroy stale_map[cpu] = NULL; 47127e23b5fSChristophe Leroy 47227e23b5fSChristophe Leroy /* We also clear the cpu_vm_mask bits of CPUs going away */ 47327e23b5fSChristophe Leroy clear_tasks_mm_cpumask(cpu); 47427e23b5fSChristophe Leroy #endif 47527e23b5fSChristophe Leroy return 0; 47627e23b5fSChristophe Leroy } 47727e23b5fSChristophe Leroy 47827e23b5fSChristophe Leroy #endif /* CONFIG_SMP */ 47927e23b5fSChristophe Leroy 48027e23b5fSChristophe Leroy /* 48127e23b5fSChristophe Leroy * Initialize the context management stuff. 48227e23b5fSChristophe Leroy */ 48327e23b5fSChristophe Leroy void __init mmu_context_init(void) 48427e23b5fSChristophe Leroy { 48527e23b5fSChristophe Leroy /* Mark init_mm as being active on all possible CPUs since 48627e23b5fSChristophe Leroy * we'll get called with prev == init_mm the first time 48727e23b5fSChristophe Leroy * we schedule on a given CPU 48827e23b5fSChristophe Leroy */ 48927e23b5fSChristophe Leroy init_mm.context.active = NR_CPUS; 49027e23b5fSChristophe Leroy 49127e23b5fSChristophe Leroy /* 49227e23b5fSChristophe Leroy * Allocate the maps used by context management 49327e23b5fSChristophe Leroy */ 49427e23b5fSChristophe Leroy context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); 49527e23b5fSChristophe Leroy if (!context_map) 49627e23b5fSChristophe Leroy panic("%s: Failed to allocate %zu bytes\n", __func__, 49727e23b5fSChristophe Leroy CTX_MAP_SIZE); 49827e23b5fSChristophe Leroy context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 49927e23b5fSChristophe Leroy SMP_CACHE_BYTES); 50027e23b5fSChristophe Leroy if (!context_mm) 50127e23b5fSChristophe Leroy panic("%s: Failed to allocate %zu bytes\n", __func__, 50227e23b5fSChristophe Leroy sizeof(void *) * (LAST_CONTEXT + 1)); 50327e23b5fSChristophe Leroy #ifdef CONFIG_SMP 50427e23b5fSChristophe Leroy stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); 50527e23b5fSChristophe Leroy if (!stale_map[boot_cpuid]) 50627e23b5fSChristophe Leroy panic("%s: Failed to allocate %zu bytes\n", __func__, 50727e23b5fSChristophe Leroy CTX_MAP_SIZE); 50827e23b5fSChristophe Leroy 50927e23b5fSChristophe Leroy cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, 51027e23b5fSChristophe Leroy "powerpc/mmu/ctx:prepare", 51127e23b5fSChristophe Leroy mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); 51227e23b5fSChristophe Leroy #endif 51327e23b5fSChristophe Leroy 51427e23b5fSChristophe Leroy printk(KERN_INFO 51527e23b5fSChristophe Leroy "MMU: Allocated %zu bytes of context maps for %d contexts\n", 51627e23b5fSChristophe Leroy 2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)), 51727e23b5fSChristophe Leroy LAST_CONTEXT - FIRST_CONTEXT + 1); 51827e23b5fSChristophe Leroy 51927e23b5fSChristophe Leroy /* 52027e23b5fSChristophe Leroy * Some processors have too few contexts to reserve one for 52127e23b5fSChristophe Leroy * init_mm, and require using context 0 for a normal task. 52227e23b5fSChristophe Leroy * Other processors reserve the use of context zero for the kernel. 52327e23b5fSChristophe Leroy * This code assumes FIRST_CONTEXT < 32. 52427e23b5fSChristophe Leroy */ 52527e23b5fSChristophe Leroy context_map[0] = (1 << FIRST_CONTEXT) - 1; 52627e23b5fSChristophe Leroy next_context = FIRST_CONTEXT; 52727e23b5fSChristophe Leroy nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1; 52827e23b5fSChristophe Leroy } 529