1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2011 4 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> 5 */ 6 #include <linux/hugetlb.h> 7 #include <linux/vmalloc.h> 8 #include <linux/mm.h> 9 #include <asm/cacheflush.h> 10 #include <asm/facility.h> 11 #include <asm/pgalloc.h> 12 #include <asm/kfence.h> 13 #include <asm/page.h> 14 #include <asm/set_memory.h> 15 16 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) 17 { 18 asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0" 19 : [addr] "+a" (addr) : [skey] "d" (skey)); 20 return addr; 21 } 22 23 void __storage_key_init_range(unsigned long start, unsigned long end) 24 { 25 unsigned long boundary, size; 26 27 while (start < end) { 28 if (MACHINE_HAS_EDAT1) { 29 /* set storage keys for a 1MB frame */ 30 size = 1UL << 20; 31 boundary = (start + size) & ~(size - 1); 32 if (boundary <= end) { 33 do { 34 start = sske_frame(start, PAGE_DEFAULT_KEY); 35 } while (start < boundary); 36 continue; 37 } 38 } 39 page_set_storage_key(start, PAGE_DEFAULT_KEY, 1); 40 start += PAGE_SIZE; 41 } 42 } 43 44 #ifdef CONFIG_PROC_FS 45 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 46 47 void arch_report_meminfo(struct seq_file *m) 48 { 49 seq_printf(m, "DirectMap4k: %8lu kB\n", 50 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2); 51 seq_printf(m, "DirectMap1M: %8lu kB\n", 52 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10); 53 seq_printf(m, "DirectMap2G: %8lu kB\n", 54 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21); 55 } 56 #endif /* CONFIG_PROC_FS */ 57 58 static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, 59 unsigned long dtt) 60 { 61 unsigned long *table, mask; 62 63 mask = 0; 64 if (MACHINE_HAS_EDAT2) { 65 switch (dtt) { 66 case CRDTE_DTT_REGION3: 67 mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); 68 break; 69 case CRDTE_DTT_SEGMENT: 70 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); 71 break; 72 case CRDTE_DTT_PAGE: 73 mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1); 74 break; 75 } 76 table = (unsigned long *)((unsigned long)old & mask); 77 crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce); 78 } else if (MACHINE_HAS_IDTE) { 79 cspg(old, *old, new); 80 } else { 81 csp((unsigned int *)old + 1, *old, new); 82 } 83 } 84 85 static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, 86 unsigned long flags) 87 { 88 pte_t *ptep, new; 89 90 if (flags == SET_MEMORY_4K) 91 return 0; 92 ptep = pte_offset_kernel(pmdp, addr); 93 do { 94 new = *ptep; 95 if (pte_none(new)) 96 return -EINVAL; 97 if (flags & SET_MEMORY_RO) 98 new = pte_wrprotect(new); 99 else if (flags & SET_MEMORY_RW) 100 new = pte_mkwrite(pte_mkdirty(new)); 101 if (flags & SET_MEMORY_NX) 102 new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 103 else if (flags & SET_MEMORY_X) 104 new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 105 if (flags & SET_MEMORY_INV) { 106 new = set_pte_bit(new, __pgprot(_PAGE_INVALID)); 107 } else if (flags & SET_MEMORY_DEF) { 108 new = __pte(pte_val(new) & PAGE_MASK); 109 new = set_pte_bit(new, PAGE_KERNEL); 110 if (!MACHINE_HAS_NX) 111 new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 112 } 113 pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); 114 ptep++; 115 addr += PAGE_SIZE; 116 cond_resched(); 117 } while (addr < end); 118 return 0; 119 } 120 121 static int split_pmd_page(pmd_t *pmdp, unsigned long addr) 122 { 123 unsigned long pte_addr, prot; 124 pte_t *pt_dir, *ptep; 125 pmd_t new; 126 int i, ro, nx; 127 128 pt_dir = vmem_pte_alloc(); 129 if (!pt_dir) 130 return -ENOMEM; 131 pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT; 132 ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT); 133 nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC); 134 prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); 135 if (!nx) 136 prot &= ~_PAGE_NOEXEC; 137 ptep = pt_dir; 138 for (i = 0; i < PTRS_PER_PTE; i++) { 139 set_pte(ptep, __pte(pte_addr | prot)); 140 pte_addr += PAGE_SIZE; 141 ptep++; 142 } 143 new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY); 144 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); 145 update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE); 146 update_page_count(PG_DIRECT_MAP_1M, -1); 147 return 0; 148 } 149 150 static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, 151 unsigned long flags) 152 { 153 pmd_t new = *pmdp; 154 155 if (flags & SET_MEMORY_RO) 156 new = pmd_wrprotect(new); 157 else if (flags & SET_MEMORY_RW) 158 new = pmd_mkwrite(pmd_mkdirty(new)); 159 if (flags & SET_MEMORY_NX) 160 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 161 else if (flags & SET_MEMORY_X) 162 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 163 if (flags & SET_MEMORY_INV) { 164 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 165 } else if (flags & SET_MEMORY_DEF) { 166 new = __pmd(pmd_val(new) & PMD_MASK); 167 new = set_pmd_bit(new, SEGMENT_KERNEL); 168 if (!MACHINE_HAS_NX) 169 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 170 } 171 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); 172 } 173 174 static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, 175 unsigned long flags) 176 { 177 unsigned long next; 178 int need_split; 179 pmd_t *pmdp; 180 int rc = 0; 181 182 pmdp = pmd_offset(pudp, addr); 183 do { 184 if (pmd_none(*pmdp)) 185 return -EINVAL; 186 next = pmd_addr_end(addr, end); 187 if (pmd_large(*pmdp)) { 188 need_split = !!(flags & SET_MEMORY_4K); 189 need_split |= !!(addr & ~PMD_MASK); 190 need_split |= !!(addr + PMD_SIZE > next); 191 if (need_split) { 192 rc = split_pmd_page(pmdp, addr); 193 if (rc) 194 return rc; 195 continue; 196 } 197 modify_pmd_page(pmdp, addr, flags); 198 } else { 199 rc = walk_pte_level(pmdp, addr, next, flags); 200 if (rc) 201 return rc; 202 } 203 pmdp++; 204 addr = next; 205 cond_resched(); 206 } while (addr < end); 207 return rc; 208 } 209 210 static int split_pud_page(pud_t *pudp, unsigned long addr) 211 { 212 unsigned long pmd_addr, prot; 213 pmd_t *pm_dir, *pmdp; 214 pud_t new; 215 int i, ro, nx; 216 217 pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 218 if (!pm_dir) 219 return -ENOMEM; 220 pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; 221 ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT); 222 nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC); 223 prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL); 224 if (!nx) 225 prot &= ~_SEGMENT_ENTRY_NOEXEC; 226 pmdp = pm_dir; 227 for (i = 0; i < PTRS_PER_PMD; i++) { 228 set_pmd(pmdp, __pmd(pmd_addr | prot)); 229 pmd_addr += PMD_SIZE; 230 pmdp++; 231 } 232 new = __pud(__pa(pm_dir) | _REGION3_ENTRY); 233 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); 234 update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD); 235 update_page_count(PG_DIRECT_MAP_2G, -1); 236 return 0; 237 } 238 239 static void modify_pud_page(pud_t *pudp, unsigned long addr, 240 unsigned long flags) 241 { 242 pud_t new = *pudp; 243 244 if (flags & SET_MEMORY_RO) 245 new = pud_wrprotect(new); 246 else if (flags & SET_MEMORY_RW) 247 new = pud_mkwrite(pud_mkdirty(new)); 248 if (flags & SET_MEMORY_NX) 249 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 250 else if (flags & SET_MEMORY_X) 251 new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 252 if (flags & SET_MEMORY_INV) { 253 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID)); 254 } else if (flags & SET_MEMORY_DEF) { 255 new = __pud(pud_val(new) & PUD_MASK); 256 new = set_pud_bit(new, REGION3_KERNEL); 257 if (!MACHINE_HAS_NX) 258 new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 259 } 260 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); 261 } 262 263 static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, 264 unsigned long flags) 265 { 266 unsigned long next; 267 int need_split; 268 pud_t *pudp; 269 int rc = 0; 270 271 pudp = pud_offset(p4d, addr); 272 do { 273 if (pud_none(*pudp)) 274 return -EINVAL; 275 next = pud_addr_end(addr, end); 276 if (pud_large(*pudp)) { 277 need_split = !!(flags & SET_MEMORY_4K); 278 need_split |= !!(addr & ~PUD_MASK); 279 need_split |= !!(addr + PUD_SIZE > next); 280 if (need_split) { 281 rc = split_pud_page(pudp, addr); 282 if (rc) 283 break; 284 continue; 285 } 286 modify_pud_page(pudp, addr, flags); 287 } else { 288 rc = walk_pmd_level(pudp, addr, next, flags); 289 } 290 pudp++; 291 addr = next; 292 cond_resched(); 293 } while (addr < end && !rc); 294 return rc; 295 } 296 297 static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, 298 unsigned long flags) 299 { 300 unsigned long next; 301 p4d_t *p4dp; 302 int rc = 0; 303 304 p4dp = p4d_offset(pgd, addr); 305 do { 306 if (p4d_none(*p4dp)) 307 return -EINVAL; 308 next = p4d_addr_end(addr, end); 309 rc = walk_pud_level(p4dp, addr, next, flags); 310 p4dp++; 311 addr = next; 312 cond_resched(); 313 } while (addr < end && !rc); 314 return rc; 315 } 316 317 DEFINE_MUTEX(cpa_mutex); 318 319 static int change_page_attr(unsigned long addr, unsigned long end, 320 unsigned long flags) 321 { 322 unsigned long next; 323 int rc = -EINVAL; 324 pgd_t *pgdp; 325 326 pgdp = pgd_offset_k(addr); 327 do { 328 if (pgd_none(*pgdp)) 329 break; 330 next = pgd_addr_end(addr, end); 331 rc = walk_p4d_level(pgdp, addr, next, flags); 332 if (rc) 333 break; 334 cond_resched(); 335 } while (pgdp++, addr = next, addr < end && !rc); 336 return rc; 337 } 338 339 static int change_page_attr_alias(unsigned long addr, unsigned long end, 340 unsigned long flags) 341 { 342 unsigned long alias, offset, va_start, va_end; 343 struct vm_struct *area; 344 int rc = 0; 345 346 /* 347 * Changes to read-only permissions on kernel VA mappings are also 348 * applied to the kernel direct mapping. Execute permissions are 349 * intentionally not transferred to keep all allocated pages within 350 * the direct mapping non-executable. 351 */ 352 flags &= SET_MEMORY_RO | SET_MEMORY_RW; 353 if (!flags) 354 return 0; 355 area = NULL; 356 while (addr < end) { 357 if (!area) 358 area = find_vm_area((void *)addr); 359 if (!area || !(area->flags & VM_ALLOC)) 360 return 0; 361 va_start = (unsigned long)area->addr; 362 va_end = va_start + area->nr_pages * PAGE_SIZE; 363 offset = (addr - va_start) >> PAGE_SHIFT; 364 alias = (unsigned long)page_address(area->pages[offset]); 365 rc = change_page_attr(alias, alias + PAGE_SIZE, flags); 366 if (rc) 367 break; 368 addr += PAGE_SIZE; 369 if (addr >= va_end) 370 area = NULL; 371 } 372 return rc; 373 } 374 375 int __set_memory(unsigned long addr, int numpages, unsigned long flags) 376 { 377 unsigned long end; 378 int rc; 379 380 if (!MACHINE_HAS_NX) 381 flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); 382 if (!flags) 383 return 0; 384 if (!numpages) 385 return 0; 386 addr &= PAGE_MASK; 387 end = addr + numpages * PAGE_SIZE; 388 mutex_lock(&cpa_mutex); 389 rc = change_page_attr(addr, end, flags); 390 if (rc) 391 goto out; 392 rc = change_page_attr_alias(addr, end, flags); 393 out: 394 mutex_unlock(&cpa_mutex); 395 return rc; 396 } 397 398 int set_direct_map_invalid_noflush(struct page *page) 399 { 400 return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV); 401 } 402 403 int set_direct_map_default_noflush(struct page *page) 404 { 405 return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF); 406 } 407 408 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) 409 410 static void ipte_range(pte_t *pte, unsigned long address, int nr) 411 { 412 int i; 413 414 if (test_facility(13)) { 415 __ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL); 416 return; 417 } 418 for (i = 0; i < nr; i++) { 419 __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL); 420 address += PAGE_SIZE; 421 pte++; 422 } 423 } 424 425 void __kernel_map_pages(struct page *page, int numpages, int enable) 426 { 427 unsigned long address; 428 pte_t *ptep, pte; 429 int nr, i, j; 430 431 for (i = 0; i < numpages;) { 432 address = (unsigned long)page_to_virt(page + i); 433 ptep = virt_to_kpte(address); 434 nr = (unsigned long)ptep >> ilog2(sizeof(long)); 435 nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); 436 nr = min(numpages - i, nr); 437 if (enable) { 438 for (j = 0; j < nr; j++) { 439 pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID)); 440 set_pte(ptep, pte); 441 address += PAGE_SIZE; 442 ptep++; 443 } 444 } else { 445 ipte_range(ptep, address, nr); 446 } 447 i += nr; 448 } 449 } 450 451 #endif /* CONFIG_DEBUG_PAGEALLOC */ 452