1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * MMU context allocation for 64-bit kernels. 4 * 5 * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org> 6 */ 7 8 #include <linux/sched.h> 9 #include <linux/kernel.h> 10 #include <linux/errno.h> 11 #include <linux/string.h> 12 #include <linux/types.h> 13 #include <linux/mm.h> 14 #include <linux/pkeys.h> 15 #include <linux/spinlock.h> 16 #include <linux/idr.h> 17 #include <linux/export.h> 18 #include <linux/gfp.h> 19 #include <linux/slab.h> 20 #include <linux/cpu.h> 21 22 #include <asm/mmu_context.h> 23 #include <asm/pgalloc.h> 24 25 #include "internal.h" 26 27 static DEFINE_IDA(mmu_context_ida); 28 29 static int alloc_context_id(int min_id, int max_id) 30 { 31 return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL); 32 } 33 34 void hash__reserve_context_id(int id) 35 { 36 int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL); 37 38 WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result); 39 } 40 41 int hash__alloc_context_id(void) 42 { 43 unsigned long max; 44 45 if (mmu_has_feature(MMU_FTR_68_BIT_VA)) 46 max = MAX_USER_CONTEXT; 47 else 48 max = MAX_USER_CONTEXT_65BIT_VA; 49 50 return alloc_context_id(MIN_USER_CONTEXT, max); 51 } 52 EXPORT_SYMBOL_GPL(hash__alloc_context_id); 53 54 static int realloc_context_ids(mm_context_t *ctx) 55 { 56 int i, id; 57 58 /* 59 * id 0 (aka. ctx->id) is special, we always allocate a new one, even if 60 * there wasn't one allocated previously (which happens in the exec 61 * case where ctx is newly allocated). 62 * 63 * We have to be a bit careful here. We must keep the existing ids in 64 * the array, so that we can test if they're non-zero to decide if we 65 * need to allocate a new one. However in case of error we must free the 66 * ids we've allocated but *not* any of the existing ones (or risk a 67 * UAF). That's why we decrement i at the start of the error handling 68 * loop, to skip the id that we just tested but couldn't reallocate. 69 */ 70 for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { 71 if (i == 0 || ctx->extended_id[i]) { 72 id = hash__alloc_context_id(); 73 if (id < 0) 74 goto error; 75 76 ctx->extended_id[i] = id; 77 } 78 } 79 80 /* The caller expects us to return id */ 81 return ctx->id; 82 83 error: 84 for (i--; i >= 0; i--) { 85 if (ctx->extended_id[i]) 86 ida_free(&mmu_context_ida, ctx->extended_id[i]); 87 } 88 89 return id; 90 } 91 92 static int hash__init_new_context(struct mm_struct *mm) 93 { 94 int index; 95 96 mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), 97 GFP_KERNEL); 98 if (!mm->context.hash_context) 99 return -ENOMEM; 100 101 /* 102 * The old code would re-promote on fork, we don't do that when using 103 * slices as it could cause problem promoting slices that have been 104 * forced down to 4K. 105 * 106 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check 107 * explicitly against context.id == 0. This ensures that we properly 108 * initialize context slice details for newly allocated mm's (which will 109 * have id == 0) and don't alter context slice inherited via fork (which 110 * will have id != 0). 111 * 112 * We should not be calling init_new_context() on init_mm. Hence a 113 * check against 0 is OK. 114 */ 115 if (mm->context.id == 0) { 116 memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context)); 117 slice_init_new_context_exec(mm); 118 } else { 119 /* This is fork. Copy hash_context details from current->mm */ 120 memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); 121 #ifdef CONFIG_PPC_SUBPAGE_PROT 122 /* inherit subpage prot details if we have one. */ 123 if (current->mm->context.hash_context->spt) { 124 mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), 125 GFP_KERNEL); 126 if (!mm->context.hash_context->spt) { 127 kfree(mm->context.hash_context); 128 return -ENOMEM; 129 } 130 } 131 #endif 132 } 133 134 index = realloc_context_ids(&mm->context); 135 if (index < 0) { 136 #ifdef CONFIG_PPC_SUBPAGE_PROT 137 kfree(mm->context.hash_context->spt); 138 #endif 139 kfree(mm->context.hash_context); 140 return index; 141 } 142 143 pkey_mm_init(mm); 144 return index; 145 } 146 147 void hash__setup_new_exec(void) 148 { 149 slice_setup_new_exec(); 150 151 slb_setup_new_exec(); 152 } 153 154 static int radix__init_new_context(struct mm_struct *mm) 155 { 156 unsigned long rts_field; 157 int index, max_id; 158 159 max_id = (1 << mmu_pid_bits) - 1; 160 index = alloc_context_id(mmu_base_pid, max_id); 161 if (index < 0) 162 return index; 163 164 /* 165 * set the process table entry, 166 */ 167 rts_field = radix__get_tree_size(); 168 process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); 169 170 /* 171 * Order the above store with subsequent update of the PID 172 * register (at which point HW can start loading/caching 173 * the entry) and the corresponding load by the MMU from 174 * the L2 cache. 175 */ 176 asm volatile("ptesync;isync" : : : "memory"); 177 178 mm->context.hash_context = NULL; 179 180 return index; 181 } 182 183 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 184 { 185 int index; 186 187 if (radix_enabled()) 188 index = radix__init_new_context(mm); 189 else 190 index = hash__init_new_context(mm); 191 192 if (index < 0) 193 return index; 194 195 mm->context.id = index; 196 197 mm->context.pte_frag = NULL; 198 mm->context.pmd_frag = NULL; 199 #ifdef CONFIG_SPAPR_TCE_IOMMU 200 mm_iommu_init(mm); 201 #endif 202 atomic_set(&mm->context.active_cpus, 0); 203 atomic_set(&mm->context.copros, 0); 204 205 return 0; 206 } 207 208 void __destroy_context(int context_id) 209 { 210 ida_free(&mmu_context_ida, context_id); 211 } 212 EXPORT_SYMBOL_GPL(__destroy_context); 213 214 static void destroy_contexts(mm_context_t *ctx) 215 { 216 int index, context_id; 217 218 for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { 219 context_id = ctx->extended_id[index]; 220 if (context_id) 221 ida_free(&mmu_context_ida, context_id); 222 } 223 kfree(ctx->hash_context); 224 } 225 226 static void pmd_frag_destroy(void *pmd_frag) 227 { 228 int count; 229 struct page *page; 230 231 page = virt_to_page(pmd_frag); 232 /* drop all the pending references */ 233 count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT; 234 /* We allow PTE_FRAG_NR fragments from a PTE page */ 235 if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) { 236 pgtable_pmd_page_dtor(page); 237 __free_page(page); 238 } 239 } 240 241 static void destroy_pagetable_cache(struct mm_struct *mm) 242 { 243 void *frag; 244 245 frag = mm->context.pte_frag; 246 if (frag) 247 pte_frag_destroy(frag); 248 249 frag = mm->context.pmd_frag; 250 if (frag) 251 pmd_frag_destroy(frag); 252 return; 253 } 254 255 void destroy_context(struct mm_struct *mm) 256 { 257 #ifdef CONFIG_SPAPR_TCE_IOMMU 258 WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); 259 #endif 260 /* 261 * For tasks which were successfully initialized we end up calling 262 * arch_exit_mmap() which clears the process table entry. And 263 * arch_exit_mmap() is called before the required fullmm TLB flush 264 * which does a RIC=2 flush. Hence for an initialized task, we do clear 265 * any cached process table entries. 266 * 267 * The condition below handles the error case during task init. We have 268 * set the process table entry early and if we fail a task 269 * initialization, we need to ensure the process table entry is zeroed. 270 * We need not worry about process table entry caches because the task 271 * never ran with the PID value. 272 */ 273 if (radix_enabled()) 274 process_tb[mm->context.id].prtb0 = 0; 275 else 276 subpage_prot_free(mm); 277 destroy_contexts(&mm->context); 278 mm->context.id = MMU_NO_CONTEXT; 279 } 280 281 void arch_exit_mmap(struct mm_struct *mm) 282 { 283 destroy_pagetable_cache(mm); 284 285 if (radix_enabled()) { 286 /* 287 * Radix doesn't have a valid bit in the process table 288 * entries. However we know that at least P9 implementation 289 * will avoid caching an entry with an invalid RTS field, 290 * and 0 is invalid. So this will do. 291 * 292 * This runs before the "fullmm" tlb flush in exit_mmap, 293 * which does a RIC=2 tlbie to clear the process table 294 * entry. See the "fullmm" comments in tlb-radix.c. 295 * 296 * No barrier required here after the store because 297 * this process will do the invalidate, which starts with 298 * ptesync. 299 */ 300 process_tb[mm->context.id].prtb0 = 0; 301 } 302 } 303 304 #ifdef CONFIG_PPC_RADIX_MMU 305 void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) 306 { 307 mtspr(SPRN_PID, next->context.id); 308 isync(); 309 } 310 #endif 311 312 /** 313 * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined) 314 * 315 * This clears the CPU from mm_cpumask for all processes, and then flushes the 316 * local TLB to ensure TLB coherency in case the CPU is onlined again. 317 * 318 * KVM guest translations are not necessarily flushed here. If KVM started 319 * using mm_cpumask or the Linux APIs which do, this would have to be resolved. 320 */ 321 #ifdef CONFIG_HOTPLUG_CPU 322 void cleanup_cpu_mmu_context(void) 323 { 324 int cpu = smp_processor_id(); 325 326 clear_tasks_mm_cpumask(cpu); 327 tlbiel_all(); 328 } 329 #endif 330