1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2011 4 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> 5 */ 6 #include <linux/hugetlb.h> 7 #include <linux/proc_fs.h> 8 #include <linux/vmalloc.h> 9 #include <linux/mm.h> 10 #include <asm/cacheflush.h> 11 #include <asm/facility.h> 12 #include <asm/pgalloc.h> 13 #include <asm/kfence.h> 14 #include <asm/page.h> 15 #include <asm/set_memory.h> 16 17 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) 18 { 19 asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0" 20 : [addr] "+a" (addr) : [skey] "d" (skey)); 21 return addr; 22 } 23 24 void __storage_key_init_range(unsigned long start, unsigned long end) 25 { 26 unsigned long boundary, size; 27 28 while (start < end) { 29 if (MACHINE_HAS_EDAT1) { 30 /* set storage keys for a 1MB frame */ 31 size = 1UL << 20; 32 boundary = (start + size) & ~(size - 1); 33 if (boundary <= end) { 34 do { 35 start = sske_frame(start, PAGE_DEFAULT_KEY); 36 } while (start < boundary); 37 continue; 38 } 39 } 40 page_set_storage_key(start, PAGE_DEFAULT_KEY, 1); 41 start += PAGE_SIZE; 42 } 43 } 44 45 #ifdef CONFIG_PROC_FS 46 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 47 48 void arch_report_meminfo(struct seq_file *m) 49 { 50 seq_printf(m, "DirectMap4k: %8lu kB\n", 51 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2); 52 seq_printf(m, "DirectMap1M: %8lu kB\n", 53 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10); 54 seq_printf(m, "DirectMap2G: %8lu kB\n", 55 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21); 56 } 57 #endif /* CONFIG_PROC_FS */ 58 59 static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, 60 unsigned long dtt) 61 { 62 unsigned long *table, mask; 63 64 mask = 0; 65 if (MACHINE_HAS_EDAT2) { 66 switch (dtt) { 67 case CRDTE_DTT_REGION3: 68 mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); 69 break; 70 case CRDTE_DTT_SEGMENT: 71 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); 72 break; 73 case CRDTE_DTT_PAGE: 74 mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1); 75 break; 76 } 77 table = (unsigned long *)((unsigned long)old & mask); 78 crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce); 79 } else if (MACHINE_HAS_IDTE) { 80 cspg(old, *old, new); 81 } else { 82 csp((unsigned int *)old + 1, *old, new); 83 } 84 } 85 86 static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, 87 unsigned long flags) 88 { 89 pte_t *ptep, new; 90 91 if (flags == SET_MEMORY_4K) 92 return 0; 93 ptep = pte_offset_kernel(pmdp, addr); 94 do { 95 new = *ptep; 96 if (pte_none(new)) 97 return -EINVAL; 98 if (flags & SET_MEMORY_RO) 99 new = pte_wrprotect(new); 100 else if (flags & SET_MEMORY_RW) 101 new = pte_mkwrite_novma(pte_mkdirty(new)); 102 if (flags & SET_MEMORY_NX) 103 new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 104 else if (flags & SET_MEMORY_X) 105 new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 106 if (flags & SET_MEMORY_INV) { 107 new = set_pte_bit(new, __pgprot(_PAGE_INVALID)); 108 } else if (flags & SET_MEMORY_DEF) { 109 new = __pte(pte_val(new) & PAGE_MASK); 110 new = set_pte_bit(new, PAGE_KERNEL); 111 if (!MACHINE_HAS_NX) 112 new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 113 } 114 pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); 115 ptep++; 116 addr += PAGE_SIZE; 117 cond_resched(); 118 } while (addr < end); 119 return 0; 120 } 121 122 static int split_pmd_page(pmd_t *pmdp, unsigned long addr) 123 { 124 unsigned long pte_addr, prot; 125 pte_t *pt_dir, *ptep; 126 pmd_t new; 127 int i, ro, nx; 128 129 pt_dir = vmem_pte_alloc(); 130 if (!pt_dir) 131 return -ENOMEM; 132 pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT; 133 ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT); 134 nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC); 135 prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); 136 if (!nx) 137 prot &= ~_PAGE_NOEXEC; 138 ptep = pt_dir; 139 for (i = 0; i < PTRS_PER_PTE; i++) { 140 set_pte(ptep, __pte(pte_addr | prot)); 141 pte_addr += PAGE_SIZE; 142 ptep++; 143 } 144 new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY); 145 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); 146 update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE); 147 update_page_count(PG_DIRECT_MAP_1M, -1); 148 return 0; 149 } 150 151 static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, 152 unsigned long flags) 153 { 154 pmd_t new = *pmdp; 155 156 if (flags & SET_MEMORY_RO) 157 new = pmd_wrprotect(new); 158 else if (flags & SET_MEMORY_RW) 159 new = pmd_mkwrite_novma(pmd_mkdirty(new)); 160 if (flags & SET_MEMORY_NX) 161 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 162 else if (flags & SET_MEMORY_X) 163 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 164 if (flags & SET_MEMORY_INV) { 165 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 166 } else if (flags & SET_MEMORY_DEF) { 167 new = __pmd(pmd_val(new) & PMD_MASK); 168 new = set_pmd_bit(new, SEGMENT_KERNEL); 169 if (!MACHINE_HAS_NX) 170 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 171 } 172 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); 173 } 174 175 static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, 176 unsigned long flags) 177 { 178 unsigned long next; 179 int need_split; 180 pmd_t *pmdp; 181 int rc = 0; 182 183 pmdp = pmd_offset(pudp, addr); 184 do { 185 if (pmd_none(*pmdp)) 186 return -EINVAL; 187 next = pmd_addr_end(addr, end); 188 if (pmd_large(*pmdp)) { 189 need_split = !!(flags & SET_MEMORY_4K); 190 need_split |= !!(addr & ~PMD_MASK); 191 need_split |= !!(addr + PMD_SIZE > next); 192 if (need_split) { 193 rc = split_pmd_page(pmdp, addr); 194 if (rc) 195 return rc; 196 continue; 197 } 198 modify_pmd_page(pmdp, addr, flags); 199 } else { 200 rc = walk_pte_level(pmdp, addr, next, flags); 201 if (rc) 202 return rc; 203 } 204 pmdp++; 205 addr = next; 206 cond_resched(); 207 } while (addr < end); 208 return rc; 209 } 210 211 static int split_pud_page(pud_t *pudp, unsigned long addr) 212 { 213 unsigned long pmd_addr, prot; 214 pmd_t *pm_dir, *pmdp; 215 pud_t new; 216 int i, ro, nx; 217 218 pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 219 if (!pm_dir) 220 return -ENOMEM; 221 pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; 222 ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT); 223 nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC); 224 prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL); 225 if (!nx) 226 prot &= ~_SEGMENT_ENTRY_NOEXEC; 227 pmdp = pm_dir; 228 for (i = 0; i < PTRS_PER_PMD; i++) { 229 set_pmd(pmdp, __pmd(pmd_addr | prot)); 230 pmd_addr += PMD_SIZE; 231 pmdp++; 232 } 233 new = __pud(__pa(pm_dir) | _REGION3_ENTRY); 234 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); 235 update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD); 236 update_page_count(PG_DIRECT_MAP_2G, -1); 237 return 0; 238 } 239 240 static void modify_pud_page(pud_t *pudp, unsigned long addr, 241 unsigned long flags) 242 { 243 pud_t new = *pudp; 244 245 if (flags & SET_MEMORY_RO) 246 new = pud_wrprotect(new); 247 else if (flags & SET_MEMORY_RW) 248 new = pud_mkwrite(pud_mkdirty(new)); 249 if (flags & SET_MEMORY_NX) 250 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 251 else if (flags & SET_MEMORY_X) 252 new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 253 if (flags & SET_MEMORY_INV) { 254 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID)); 255 } else if (flags & SET_MEMORY_DEF) { 256 new = __pud(pud_val(new) & PUD_MASK); 257 new = set_pud_bit(new, REGION3_KERNEL); 258 if (!MACHINE_HAS_NX) 259 new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 260 } 261 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); 262 } 263 264 static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, 265 unsigned long flags) 266 { 267 unsigned long next; 268 int need_split; 269 pud_t *pudp; 270 int rc = 0; 271 272 pudp = pud_offset(p4d, addr); 273 do { 274 if (pud_none(*pudp)) 275 return -EINVAL; 276 next = pud_addr_end(addr, end); 277 if (pud_leaf(*pudp)) { 278 need_split = !!(flags & SET_MEMORY_4K); 279 need_split |= !!(addr & ~PUD_MASK); 280 need_split |= !!(addr + PUD_SIZE > next); 281 if (need_split) { 282 rc = split_pud_page(pudp, addr); 283 if (rc) 284 break; 285 continue; 286 } 287 modify_pud_page(pudp, addr, flags); 288 } else { 289 rc = walk_pmd_level(pudp, addr, next, flags); 290 } 291 pudp++; 292 addr = next; 293 cond_resched(); 294 } while (addr < end && !rc); 295 return rc; 296 } 297 298 static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, 299 unsigned long flags) 300 { 301 unsigned long next; 302 p4d_t *p4dp; 303 int rc = 0; 304 305 p4dp = p4d_offset(pgd, addr); 306 do { 307 if (p4d_none(*p4dp)) 308 return -EINVAL; 309 next = p4d_addr_end(addr, end); 310 rc = walk_pud_level(p4dp, addr, next, flags); 311 p4dp++; 312 addr = next; 313 cond_resched(); 314 } while (addr < end && !rc); 315 return rc; 316 } 317 318 DEFINE_MUTEX(cpa_mutex); 319 320 static int change_page_attr(unsigned long addr, unsigned long end, 321 unsigned long flags) 322 { 323 unsigned long next; 324 int rc = -EINVAL; 325 pgd_t *pgdp; 326 327 pgdp = pgd_offset_k(addr); 328 do { 329 if (pgd_none(*pgdp)) 330 break; 331 next = pgd_addr_end(addr, end); 332 rc = walk_p4d_level(pgdp, addr, next, flags); 333 if (rc) 334 break; 335 cond_resched(); 336 } while (pgdp++, addr = next, addr < end && !rc); 337 return rc; 338 } 339 340 static int change_page_attr_alias(unsigned long addr, unsigned long end, 341 unsigned long flags) 342 { 343 unsigned long alias, offset, va_start, va_end; 344 struct vm_struct *area; 345 int rc = 0; 346 347 /* 348 * Changes to read-only permissions on kernel VA mappings are also 349 * applied to the kernel direct mapping. Execute permissions are 350 * intentionally not transferred to keep all allocated pages within 351 * the direct mapping non-executable. 352 */ 353 flags &= SET_MEMORY_RO | SET_MEMORY_RW; 354 if (!flags) 355 return 0; 356 area = NULL; 357 while (addr < end) { 358 if (!area) 359 area = find_vm_area((void *)addr); 360 if (!area || !(area->flags & VM_ALLOC)) 361 return 0; 362 va_start = (unsigned long)area->addr; 363 va_end = va_start + area->nr_pages * PAGE_SIZE; 364 offset = (addr - va_start) >> PAGE_SHIFT; 365 alias = (unsigned long)page_address(area->pages[offset]); 366 rc = change_page_attr(alias, alias + PAGE_SIZE, flags); 367 if (rc) 368 break; 369 addr += PAGE_SIZE; 370 if (addr >= va_end) 371 area = NULL; 372 } 373 return rc; 374 } 375 376 int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags) 377 { 378 unsigned long end; 379 int rc; 380 381 if (!MACHINE_HAS_NX) 382 flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); 383 if (!flags) 384 return 0; 385 if (!numpages) 386 return 0; 387 addr &= PAGE_MASK; 388 end = addr + numpages * PAGE_SIZE; 389 mutex_lock(&cpa_mutex); 390 rc = change_page_attr(addr, end, flags); 391 if (rc) 392 goto out; 393 rc = change_page_attr_alias(addr, end, flags); 394 out: 395 mutex_unlock(&cpa_mutex); 396 return rc; 397 } 398 399 int set_direct_map_invalid_noflush(struct page *page) 400 { 401 return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV); 402 } 403 404 int set_direct_map_default_noflush(struct page *page) 405 { 406 return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF); 407 } 408 409 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) 410 411 static void ipte_range(pte_t *pte, unsigned long address, int nr) 412 { 413 int i; 414 415 if (test_facility(13)) { 416 __ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL); 417 return; 418 } 419 for (i = 0; i < nr; i++) { 420 __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL); 421 address += PAGE_SIZE; 422 pte++; 423 } 424 } 425 426 void __kernel_map_pages(struct page *page, int numpages, int enable) 427 { 428 unsigned long address; 429 pte_t *ptep, pte; 430 int nr, i, j; 431 432 for (i = 0; i < numpages;) { 433 address = (unsigned long)page_to_virt(page + i); 434 ptep = virt_to_kpte(address); 435 nr = (unsigned long)ptep >> ilog2(sizeof(long)); 436 nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); 437 nr = min(numpages - i, nr); 438 if (enable) { 439 for (j = 0; j < nr; j++) { 440 pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID)); 441 set_pte(ptep, pte); 442 address += PAGE_SIZE; 443 ptep++; 444 } 445 } else { 446 ipte_range(ptep, address, nr); 447 } 448 i += nr; 449 } 450 } 451 452 #endif /* CONFIG_DEBUG_PAGEALLOC */ 453