1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * MMU context allocation for 64-bit kernels. 4 * 5 * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org> 6 */ 7 8 #include <linux/sched.h> 9 #include <linux/kernel.h> 10 #include <linux/errno.h> 11 #include <linux/string.h> 12 #include <linux/types.h> 13 #include <linux/mm.h> 14 #include <linux/pkeys.h> 15 #include <linux/spinlock.h> 16 #include <linux/idr.h> 17 #include <linux/export.h> 18 #include <linux/gfp.h> 19 #include <linux/slab.h> 20 21 #include <asm/mmu_context.h> 22 #include <asm/pgalloc.h> 23 24 static DEFINE_IDA(mmu_context_ida); 25 26 static int alloc_context_id(int min_id, int max_id) 27 { 28 return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL); 29 } 30 31 void hash__reserve_context_id(int id) 32 { 33 int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL); 34 35 WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result); 36 } 37 38 int hash__alloc_context_id(void) 39 { 40 unsigned long max; 41 42 if (mmu_has_feature(MMU_FTR_68_BIT_VA)) 43 max = MAX_USER_CONTEXT; 44 else 45 max = MAX_USER_CONTEXT_65BIT_VA; 46 47 return alloc_context_id(MIN_USER_CONTEXT, max); 48 } 49 EXPORT_SYMBOL_GPL(hash__alloc_context_id); 50 51 void slb_setup_new_exec(void); 52 53 static int realloc_context_ids(mm_context_t *ctx) 54 { 55 int i, id; 56 57 /* 58 * id 0 (aka. ctx->id) is special, we always allocate a new one, even if 59 * there wasn't one allocated previously (which happens in the exec 60 * case where ctx is newly allocated). 61 * 62 * We have to be a bit careful here. We must keep the existing ids in 63 * the array, so that we can test if they're non-zero to decide if we 64 * need to allocate a new one. However in case of error we must free the 65 * ids we've allocated but *not* any of the existing ones (or risk a 66 * UAF). That's why we decrement i at the start of the error handling 67 * loop, to skip the id that we just tested but couldn't reallocate. 68 */ 69 for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { 70 if (i == 0 || ctx->extended_id[i]) { 71 id = hash__alloc_context_id(); 72 if (id < 0) 73 goto error; 74 75 ctx->extended_id[i] = id; 76 } 77 } 78 79 /* The caller expects us to return id */ 80 return ctx->id; 81 82 error: 83 for (i--; i >= 0; i--) { 84 if (ctx->extended_id[i]) 85 ida_free(&mmu_context_ida, ctx->extended_id[i]); 86 } 87 88 return id; 89 } 90 91 static int hash__init_new_context(struct mm_struct *mm) 92 { 93 int index; 94 95 mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), 96 GFP_KERNEL); 97 if (!mm->context.hash_context) 98 return -ENOMEM; 99 100 /* 101 * The old code would re-promote on fork, we don't do that when using 102 * slices as it could cause problem promoting slices that have been 103 * forced down to 4K. 104 * 105 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check 106 * explicitly against context.id == 0. This ensures that we properly 107 * initialize context slice details for newly allocated mm's (which will 108 * have id == 0) and don't alter context slice inherited via fork (which 109 * will have id != 0). 110 * 111 * We should not be calling init_new_context() on init_mm. Hence a 112 * check against 0 is OK. 113 */ 114 if (mm->context.id == 0) { 115 memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context)); 116 slice_init_new_context_exec(mm); 117 } else { 118 /* This is fork. Copy hash_context details from current->mm */ 119 memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); 120 #ifdef CONFIG_PPC_SUBPAGE_PROT 121 /* inherit subpage prot detalis if we have one. */ 122 if (current->mm->context.hash_context->spt) { 123 mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), 124 GFP_KERNEL); 125 if (!mm->context.hash_context->spt) { 126 kfree(mm->context.hash_context); 127 return -ENOMEM; 128 } 129 } 130 #endif 131 } 132 133 index = realloc_context_ids(&mm->context); 134 if (index < 0) { 135 #ifdef CONFIG_PPC_SUBPAGE_PROT 136 kfree(mm->context.hash_context->spt); 137 #endif 138 kfree(mm->context.hash_context); 139 return index; 140 } 141 142 pkey_mm_init(mm); 143 return index; 144 } 145 146 void hash__setup_new_exec(void) 147 { 148 slice_setup_new_exec(); 149 150 slb_setup_new_exec(); 151 } 152 153 static int radix__init_new_context(struct mm_struct *mm) 154 { 155 unsigned long rts_field; 156 int index, max_id; 157 158 max_id = (1 << mmu_pid_bits) - 1; 159 index = alloc_context_id(mmu_base_pid, max_id); 160 if (index < 0) 161 return index; 162 163 /* 164 * set the process table entry, 165 */ 166 rts_field = radix__get_tree_size(); 167 process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); 168 169 /* 170 * Order the above store with subsequent update of the PID 171 * register (at which point HW can start loading/caching 172 * the entry) and the corresponding load by the MMU from 173 * the L2 cache. 174 */ 175 asm volatile("ptesync;isync" : : : "memory"); 176 177 mm->context.hash_context = NULL; 178 179 return index; 180 } 181 182 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 183 { 184 int index; 185 186 if (radix_enabled()) 187 index = radix__init_new_context(mm); 188 else 189 index = hash__init_new_context(mm); 190 191 if (index < 0) 192 return index; 193 194 mm->context.id = index; 195 196 mm->context.pte_frag = NULL; 197 mm->context.pmd_frag = NULL; 198 #ifdef CONFIG_SPAPR_TCE_IOMMU 199 mm_iommu_init(mm); 200 #endif 201 atomic_set(&mm->context.active_cpus, 0); 202 atomic_set(&mm->context.copros, 0); 203 204 return 0; 205 } 206 207 void __destroy_context(int context_id) 208 { 209 ida_free(&mmu_context_ida, context_id); 210 } 211 EXPORT_SYMBOL_GPL(__destroy_context); 212 213 static void destroy_contexts(mm_context_t *ctx) 214 { 215 int index, context_id; 216 217 for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { 218 context_id = ctx->extended_id[index]; 219 if (context_id) 220 ida_free(&mmu_context_ida, context_id); 221 } 222 kfree(ctx->hash_context); 223 } 224 225 static void pmd_frag_destroy(void *pmd_frag) 226 { 227 int count; 228 struct page *page; 229 230 page = virt_to_page(pmd_frag); 231 /* drop all the pending references */ 232 count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT; 233 /* We allow PTE_FRAG_NR fragments from a PTE page */ 234 if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) { 235 pgtable_pmd_page_dtor(page); 236 __free_page(page); 237 } 238 } 239 240 static void destroy_pagetable_cache(struct mm_struct *mm) 241 { 242 void *frag; 243 244 frag = mm->context.pte_frag; 245 if (frag) 246 pte_frag_destroy(frag); 247 248 frag = mm->context.pmd_frag; 249 if (frag) 250 pmd_frag_destroy(frag); 251 return; 252 } 253 254 void destroy_context(struct mm_struct *mm) 255 { 256 #ifdef CONFIG_SPAPR_TCE_IOMMU 257 WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); 258 #endif 259 /* 260 * For tasks which were successfully initialized we end up calling 261 * arch_exit_mmap() which clears the process table entry. And 262 * arch_exit_mmap() is called before the required fullmm TLB flush 263 * which does a RIC=2 flush. Hence for an initialized task, we do clear 264 * any cached process table entries. 265 * 266 * The condition below handles the error case during task init. We have 267 * set the process table entry early and if we fail a task 268 * initialization, we need to ensure the process table entry is zeroed. 269 * We need not worry about process table entry caches because the task 270 * never ran with the PID value. 271 */ 272 if (radix_enabled()) 273 process_tb[mm->context.id].prtb0 = 0; 274 else 275 subpage_prot_free(mm); 276 destroy_contexts(&mm->context); 277 mm->context.id = MMU_NO_CONTEXT; 278 } 279 280 void arch_exit_mmap(struct mm_struct *mm) 281 { 282 destroy_pagetable_cache(mm); 283 284 if (radix_enabled()) { 285 /* 286 * Radix doesn't have a valid bit in the process table 287 * entries. However we know that at least P9 implementation 288 * will avoid caching an entry with an invalid RTS field, 289 * and 0 is invalid. So this will do. 290 * 291 * This runs before the "fullmm" tlb flush in exit_mmap, 292 * which does a RIC=2 tlbie to clear the process table 293 * entry. See the "fullmm" comments in tlb-radix.c. 294 * 295 * No barrier required here after the store because 296 * this process will do the invalidate, which starts with 297 * ptesync. 298 */ 299 process_tb[mm->context.id].prtb0 = 0; 300 } 301 } 302 303 #ifdef CONFIG_PPC_RADIX_MMU 304 void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) 305 { 306 mtspr(SPRN_PID, next->context.id); 307 isync(); 308 } 309 #endif 310