1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 Regents of the University of California 4 * Copyright (C) 2019 Western Digital Corporation or its affiliates. 5 * Copyright (C) 2020 FORTH-ICS/CARV 6 * Nick Kossifidis <mick@ics.forth.gr> 7 */ 8 9 #include <linux/init.h> 10 #include <linux/mm.h> 11 #include <linux/memblock.h> 12 #include <linux/initrd.h> 13 #include <linux/swap.h> 14 #include <linux/swiotlb.h> 15 #include <linux/sizes.h> 16 #include <linux/of_fdt.h> 17 #include <linux/of_reserved_mem.h> 18 #include <linux/libfdt.h> 19 #include <linux/set_memory.h> 20 #include <linux/dma-map-ops.h> 21 #include <linux/crash_dump.h> 22 #include <linux/hugetlb.h> 23 #ifdef CONFIG_RELOCATABLE 24 #include <linux/elf.h> 25 #endif 26 #include <linux/kfence.h> 27 28 #include <asm/fixmap.h> 29 #include <asm/tlbflush.h> 30 #include <asm/sections.h> 31 #include <asm/soc.h> 32 #include <asm/io.h> 33 #include <asm/ptdump.h> 34 #include <asm/numa.h> 35 36 #include "../kernel/head.h" 37 38 struct kernel_mapping kernel_map __ro_after_init; 39 EXPORT_SYMBOL(kernel_map); 40 #ifdef CONFIG_XIP_KERNEL 41 #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) 42 #endif 43 44 #ifdef CONFIG_64BIT 45 u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39; 46 #else 47 u64 satp_mode __ro_after_init = SATP_MODE_32; 48 #endif 49 EXPORT_SYMBOL(satp_mode); 50 51 bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); 52 bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); 53 EXPORT_SYMBOL(pgtable_l4_enabled); 54 EXPORT_SYMBOL(pgtable_l5_enabled); 55 56 phys_addr_t phys_ram_base __ro_after_init; 57 EXPORT_SYMBOL(phys_ram_base); 58 59 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] 60 __page_aligned_bss; 61 EXPORT_SYMBOL(empty_zero_page); 62 63 extern char _start[]; 64 void *_dtb_early_va __initdata; 65 uintptr_t _dtb_early_pa __initdata; 66 67 static phys_addr_t dma32_phys_limit __initdata; 68 69 static void __init zone_sizes_init(void) 70 { 71 unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; 72 73 #ifdef CONFIG_ZONE_DMA32 74 max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); 75 #endif 76 max_zone_pfns[ZONE_NORMAL] = max_low_pfn; 77 78 free_area_init(max_zone_pfns); 79 } 80 81 #if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM) 82 83 #define LOG2_SZ_1K ilog2(SZ_1K) 84 #define LOG2_SZ_1M ilog2(SZ_1M) 85 #define LOG2_SZ_1G ilog2(SZ_1G) 86 #define LOG2_SZ_1T ilog2(SZ_1T) 87 88 static inline void print_mlk(char *name, unsigned long b, unsigned long t) 89 { 90 pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld kB)\n", name, b, t, 91 (((t) - (b)) >> LOG2_SZ_1K)); 92 } 93 94 static inline void print_mlm(char *name, unsigned long b, unsigned long t) 95 { 96 pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld MB)\n", name, b, t, 97 (((t) - (b)) >> LOG2_SZ_1M)); 98 } 99 100 static inline void print_mlg(char *name, unsigned long b, unsigned long t) 101 { 102 pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld GB)\n", name, b, t, 103 (((t) - (b)) >> LOG2_SZ_1G)); 104 } 105 106 #ifdef CONFIG_64BIT 107 static inline void print_mlt(char *name, unsigned long b, unsigned long t) 108 { 109 pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld TB)\n", name, b, t, 110 (((t) - (b)) >> LOG2_SZ_1T)); 111 } 112 #else 113 #define print_mlt(n, b, t) do {} while (0) 114 #endif 115 116 static inline void print_ml(char *name, unsigned long b, unsigned long t) 117 { 118 unsigned long diff = t - b; 119 120 if (IS_ENABLED(CONFIG_64BIT) && (diff >> LOG2_SZ_1T) >= 10) 121 print_mlt(name, b, t); 122 else if ((diff >> LOG2_SZ_1G) >= 10) 123 print_mlg(name, b, t); 124 else if ((diff >> LOG2_SZ_1M) >= 10) 125 print_mlm(name, b, t); 126 else 127 print_mlk(name, b, t); 128 } 129 130 static void __init print_vm_layout(void) 131 { 132 pr_notice("Virtual kernel memory layout:\n"); 133 print_ml("fixmap", (unsigned long)FIXADDR_START, 134 (unsigned long)FIXADDR_TOP); 135 print_ml("pci io", (unsigned long)PCI_IO_START, 136 (unsigned long)PCI_IO_END); 137 print_ml("vmemmap", (unsigned long)VMEMMAP_START, 138 (unsigned long)VMEMMAP_END); 139 print_ml("vmalloc", (unsigned long)VMALLOC_START, 140 (unsigned long)VMALLOC_END); 141 #ifdef CONFIG_64BIT 142 print_ml("modules", (unsigned long)MODULES_VADDR, 143 (unsigned long)MODULES_END); 144 #endif 145 print_ml("lowmem", (unsigned long)PAGE_OFFSET, 146 (unsigned long)high_memory); 147 if (IS_ENABLED(CONFIG_64BIT)) { 148 #ifdef CONFIG_KASAN 149 print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); 150 #endif 151 152 print_ml("kernel", (unsigned long)kernel_map.virt_addr, 153 (unsigned long)ADDRESS_SPACE_END); 154 } 155 } 156 #else 157 static void print_vm_layout(void) { } 158 #endif /* CONFIG_DEBUG_VM */ 159 160 void __init mem_init(void) 161 { 162 #ifdef CONFIG_FLATMEM 163 BUG_ON(!mem_map); 164 #endif /* CONFIG_FLATMEM */ 165 166 swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE); 167 memblock_free_all(); 168 169 print_vm_layout(); 170 } 171 172 /* Limit the memory size via mem. */ 173 static phys_addr_t memory_limit; 174 175 static int __init early_mem(char *p) 176 { 177 u64 size; 178 179 if (!p) 180 return 1; 181 182 size = memparse(p, &p) & PAGE_MASK; 183 memory_limit = min_t(u64, size, memory_limit); 184 185 pr_notice("Memory limited to %lldMB\n", (u64)memory_limit >> 20); 186 187 return 0; 188 } 189 early_param("mem", early_mem); 190 191 static void __init setup_bootmem(void) 192 { 193 phys_addr_t vmlinux_end = __pa_symbol(&_end); 194 phys_addr_t max_mapped_addr; 195 phys_addr_t phys_ram_end, vmlinux_start; 196 197 if (IS_ENABLED(CONFIG_XIP_KERNEL)) 198 vmlinux_start = __pa_symbol(&_sdata); 199 else 200 vmlinux_start = __pa_symbol(&_start); 201 202 memblock_enforce_memory_limit(memory_limit); 203 204 /* 205 * Make sure we align the reservation on PMD_SIZE since we will 206 * map the kernel in the linear mapping as read-only: we do not want 207 * any allocation to happen between _end and the next pmd aligned page. 208 */ 209 if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) 210 vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; 211 /* 212 * Reserve from the start of the kernel to the end of the kernel 213 */ 214 memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); 215 216 phys_ram_end = memblock_end_of_DRAM(); 217 if (!IS_ENABLED(CONFIG_XIP_KERNEL)) 218 phys_ram_base = memblock_start_of_DRAM(); 219 220 /* 221 * In 64-bit, any use of __va/__pa before this point is wrong as we 222 * did not know the start of DRAM before. 223 */ 224 if (IS_ENABLED(CONFIG_64BIT)) 225 kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; 226 227 /* 228 * memblock allocator is not aware of the fact that last 4K bytes of 229 * the addressable memory can not be mapped because of IS_ERR_VALUE 230 * macro. Make sure that last 4k bytes are not usable by memblock 231 * if end of dram is equal to maximum addressable memory. For 64-bit 232 * kernel, this problem can't happen here as the end of the virtual 233 * address space is occupied by the kernel mapping then this check must 234 * be done as soon as the kernel mapping base address is determined. 235 */ 236 if (!IS_ENABLED(CONFIG_64BIT)) { 237 max_mapped_addr = __pa(~(ulong)0); 238 if (max_mapped_addr == (phys_ram_end - 1)) 239 memblock_set_current_limit(max_mapped_addr - 4096); 240 } 241 242 min_low_pfn = PFN_UP(phys_ram_base); 243 max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); 244 high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); 245 246 dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); 247 set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); 248 249 reserve_initrd_mem(); 250 251 /* 252 * No allocation should be done before reserving the memory as defined 253 * in the device tree, otherwise the allocation could end up in a 254 * reserved region. 255 */ 256 early_init_fdt_scan_reserved_mem(); 257 258 /* 259 * If DTB is built in, no need to reserve its memblock. 260 * Otherwise, do reserve it but avoid using 261 * early_init_fdt_reserve_self() since __pa() does 262 * not work for DTB pointers that are fixmap addresses 263 */ 264 if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) 265 memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); 266 267 dma_contiguous_reserve(dma32_phys_limit); 268 if (IS_ENABLED(CONFIG_64BIT)) 269 hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); 270 } 271 272 #ifdef CONFIG_MMU 273 struct pt_alloc_ops pt_ops __initdata; 274 275 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; 276 pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; 277 static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; 278 279 pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); 280 281 #ifdef CONFIG_XIP_KERNEL 282 #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) 283 #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) 284 #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) 285 #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) 286 #endif /* CONFIG_XIP_KERNEL */ 287 288 static const pgprot_t protection_map[16] = { 289 [VM_NONE] = PAGE_NONE, 290 [VM_READ] = PAGE_READ, 291 [VM_WRITE] = PAGE_COPY, 292 [VM_WRITE | VM_READ] = PAGE_COPY, 293 [VM_EXEC] = PAGE_EXEC, 294 [VM_EXEC | VM_READ] = PAGE_READ_EXEC, 295 [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, 296 [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC, 297 [VM_SHARED] = PAGE_NONE, 298 [VM_SHARED | VM_READ] = PAGE_READ, 299 [VM_SHARED | VM_WRITE] = PAGE_SHARED, 300 [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, 301 [VM_SHARED | VM_EXEC] = PAGE_EXEC, 302 [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READ_EXEC, 303 [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_EXEC, 304 [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC 305 }; 306 DECLARE_VM_GET_PAGE_PROT 307 308 void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) 309 { 310 unsigned long addr = __fix_to_virt(idx); 311 pte_t *ptep; 312 313 BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); 314 315 ptep = &fixmap_pte[pte_index(addr)]; 316 317 if (pgprot_val(prot)) 318 set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); 319 else 320 pte_clear(&init_mm, addr, ptep); 321 local_flush_tlb_page(addr); 322 } 323 324 static inline pte_t *__init get_pte_virt_early(phys_addr_t pa) 325 { 326 return (pte_t *)((uintptr_t)pa); 327 } 328 329 static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa) 330 { 331 clear_fixmap(FIX_PTE); 332 return (pte_t *)set_fixmap_offset(FIX_PTE, pa); 333 } 334 335 static inline pte_t *__init get_pte_virt_late(phys_addr_t pa) 336 { 337 return (pte_t *) __va(pa); 338 } 339 340 static inline phys_addr_t __init alloc_pte_early(uintptr_t va) 341 { 342 /* 343 * We only create PMD or PGD early mappings so we 344 * should never reach here with MMU disabled. 345 */ 346 BUG(); 347 } 348 349 static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) 350 { 351 return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); 352 } 353 354 static phys_addr_t __init alloc_pte_late(uintptr_t va) 355 { 356 struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); 357 358 BUG_ON(!ptdesc || !pagetable_pte_ctor(ptdesc)); 359 return __pa((pte_t *)ptdesc_address(ptdesc)); 360 } 361 362 static void __init create_pte_mapping(pte_t *ptep, 363 uintptr_t va, phys_addr_t pa, 364 phys_addr_t sz, pgprot_t prot) 365 { 366 uintptr_t pte_idx = pte_index(va); 367 368 BUG_ON(sz != PAGE_SIZE); 369 370 if (pte_none(ptep[pte_idx])) 371 ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot); 372 } 373 374 #ifndef __PAGETABLE_PMD_FOLDED 375 376 static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; 377 static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; 378 static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); 379 380 #ifdef CONFIG_XIP_KERNEL 381 #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd)) 382 #define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd)) 383 #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) 384 #endif /* CONFIG_XIP_KERNEL */ 385 386 static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss; 387 static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss; 388 static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); 389 390 #ifdef CONFIG_XIP_KERNEL 391 #define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d)) 392 #define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d)) 393 #define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d)) 394 #endif /* CONFIG_XIP_KERNEL */ 395 396 static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss; 397 static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; 398 static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); 399 400 #ifdef CONFIG_XIP_KERNEL 401 #define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud)) 402 #define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud)) 403 #define early_pud ((pud_t *)XIP_FIXUP(early_pud)) 404 #endif /* CONFIG_XIP_KERNEL */ 405 406 static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) 407 { 408 /* Before MMU is enabled */ 409 return (pmd_t *)((uintptr_t)pa); 410 } 411 412 static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa) 413 { 414 clear_fixmap(FIX_PMD); 415 return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); 416 } 417 418 static pmd_t *__init get_pmd_virt_late(phys_addr_t pa) 419 { 420 return (pmd_t *) __va(pa); 421 } 422 423 static phys_addr_t __init alloc_pmd_early(uintptr_t va) 424 { 425 BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); 426 427 return (uintptr_t)early_pmd; 428 } 429 430 static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) 431 { 432 return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); 433 } 434 435 static phys_addr_t __init alloc_pmd_late(uintptr_t va) 436 { 437 struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); 438 439 BUG_ON(!ptdesc || !pagetable_pmd_ctor(ptdesc)); 440 return __pa((pmd_t *)ptdesc_address(ptdesc)); 441 } 442 443 static void __init create_pmd_mapping(pmd_t *pmdp, 444 uintptr_t va, phys_addr_t pa, 445 phys_addr_t sz, pgprot_t prot) 446 { 447 pte_t *ptep; 448 phys_addr_t pte_phys; 449 uintptr_t pmd_idx = pmd_index(va); 450 451 if (sz == PMD_SIZE) { 452 if (pmd_none(pmdp[pmd_idx])) 453 pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot); 454 return; 455 } 456 457 if (pmd_none(pmdp[pmd_idx])) { 458 pte_phys = pt_ops.alloc_pte(va); 459 pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); 460 ptep = pt_ops.get_pte_virt(pte_phys); 461 memset(ptep, 0, PAGE_SIZE); 462 } else { 463 pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx])); 464 ptep = pt_ops.get_pte_virt(pte_phys); 465 } 466 467 create_pte_mapping(ptep, va, pa, sz, prot); 468 } 469 470 static pud_t *__init get_pud_virt_early(phys_addr_t pa) 471 { 472 return (pud_t *)((uintptr_t)pa); 473 } 474 475 static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa) 476 { 477 clear_fixmap(FIX_PUD); 478 return (pud_t *)set_fixmap_offset(FIX_PUD, pa); 479 } 480 481 static pud_t *__init get_pud_virt_late(phys_addr_t pa) 482 { 483 return (pud_t *)__va(pa); 484 } 485 486 static phys_addr_t __init alloc_pud_early(uintptr_t va) 487 { 488 /* Only one PUD is available for early mapping */ 489 BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); 490 491 return (uintptr_t)early_pud; 492 } 493 494 static phys_addr_t __init alloc_pud_fixmap(uintptr_t va) 495 { 496 return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); 497 } 498 499 static phys_addr_t alloc_pud_late(uintptr_t va) 500 { 501 unsigned long vaddr; 502 503 vaddr = __get_free_page(GFP_KERNEL); 504 BUG_ON(!vaddr); 505 return __pa(vaddr); 506 } 507 508 static p4d_t *__init get_p4d_virt_early(phys_addr_t pa) 509 { 510 return (p4d_t *)((uintptr_t)pa); 511 } 512 513 static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa) 514 { 515 clear_fixmap(FIX_P4D); 516 return (p4d_t *)set_fixmap_offset(FIX_P4D, pa); 517 } 518 519 static p4d_t *__init get_p4d_virt_late(phys_addr_t pa) 520 { 521 return (p4d_t *)__va(pa); 522 } 523 524 static phys_addr_t __init alloc_p4d_early(uintptr_t va) 525 { 526 /* Only one P4D is available for early mapping */ 527 BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); 528 529 return (uintptr_t)early_p4d; 530 } 531 532 static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va) 533 { 534 return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); 535 } 536 537 static phys_addr_t alloc_p4d_late(uintptr_t va) 538 { 539 unsigned long vaddr; 540 541 vaddr = __get_free_page(GFP_KERNEL); 542 BUG_ON(!vaddr); 543 return __pa(vaddr); 544 } 545 546 static void __init create_pud_mapping(pud_t *pudp, 547 uintptr_t va, phys_addr_t pa, 548 phys_addr_t sz, pgprot_t prot) 549 { 550 pmd_t *nextp; 551 phys_addr_t next_phys; 552 uintptr_t pud_index = pud_index(va); 553 554 if (sz == PUD_SIZE) { 555 if (pud_val(pudp[pud_index]) == 0) 556 pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot); 557 return; 558 } 559 560 if (pud_val(pudp[pud_index]) == 0) { 561 next_phys = pt_ops.alloc_pmd(va); 562 pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE); 563 nextp = pt_ops.get_pmd_virt(next_phys); 564 memset(nextp, 0, PAGE_SIZE); 565 } else { 566 next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index])); 567 nextp = pt_ops.get_pmd_virt(next_phys); 568 } 569 570 create_pmd_mapping(nextp, va, pa, sz, prot); 571 } 572 573 static void __init create_p4d_mapping(p4d_t *p4dp, 574 uintptr_t va, phys_addr_t pa, 575 phys_addr_t sz, pgprot_t prot) 576 { 577 pud_t *nextp; 578 phys_addr_t next_phys; 579 uintptr_t p4d_index = p4d_index(va); 580 581 if (sz == P4D_SIZE) { 582 if (p4d_val(p4dp[p4d_index]) == 0) 583 p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot); 584 return; 585 } 586 587 if (p4d_val(p4dp[p4d_index]) == 0) { 588 next_phys = pt_ops.alloc_pud(va); 589 p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE); 590 nextp = pt_ops.get_pud_virt(next_phys); 591 memset(nextp, 0, PAGE_SIZE); 592 } else { 593 next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index])); 594 nextp = pt_ops.get_pud_virt(next_phys); 595 } 596 597 create_pud_mapping(nextp, va, pa, sz, prot); 598 } 599 600 #define pgd_next_t p4d_t 601 #define alloc_pgd_next(__va) (pgtable_l5_enabled ? \ 602 pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \ 603 pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))) 604 #define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \ 605 pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \ 606 pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa))) 607 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ 608 (pgtable_l5_enabled ? \ 609 create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \ 610 (pgtable_l4_enabled ? \ 611 create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \ 612 create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))) 613 #define fixmap_pgd_next (pgtable_l5_enabled ? \ 614 (uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \ 615 (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)) 616 #define trampoline_pgd_next (pgtable_l5_enabled ? \ 617 (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ 618 (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) 619 #else 620 #define pgd_next_t pte_t 621 #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) 622 #define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa) 623 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ 624 create_pte_mapping(__nextp, __va, __pa, __sz, __prot) 625 #define fixmap_pgd_next ((uintptr_t)fixmap_pte) 626 #define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) 627 #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) 628 #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) 629 #endif /* __PAGETABLE_PMD_FOLDED */ 630 631 void __init create_pgd_mapping(pgd_t *pgdp, 632 uintptr_t va, phys_addr_t pa, 633 phys_addr_t sz, pgprot_t prot) 634 { 635 pgd_next_t *nextp; 636 phys_addr_t next_phys; 637 uintptr_t pgd_idx = pgd_index(va); 638 639 if (sz == PGDIR_SIZE) { 640 if (pgd_val(pgdp[pgd_idx]) == 0) 641 pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot); 642 return; 643 } 644 645 if (pgd_val(pgdp[pgd_idx]) == 0) { 646 next_phys = alloc_pgd_next(va); 647 pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE); 648 nextp = get_pgd_next_virt(next_phys); 649 memset(nextp, 0, PAGE_SIZE); 650 } else { 651 next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx])); 652 nextp = get_pgd_next_virt(next_phys); 653 } 654 655 create_pgd_next_mapping(nextp, va, pa, sz, prot); 656 } 657 658 static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va, 659 phys_addr_t size) 660 { 661 if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) 662 return PGDIR_SIZE; 663 664 if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) 665 return P4D_SIZE; 666 667 if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) 668 return PUD_SIZE; 669 670 if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) 671 return PMD_SIZE; 672 673 return PAGE_SIZE; 674 } 675 676 #ifdef CONFIG_XIP_KERNEL 677 #define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base)) 678 extern char _xiprom[], _exiprom[], __data_loc; 679 680 /* called from head.S with MMU off */ 681 asmlinkage void __init __copy_data(void) 682 { 683 void *from = (void *)(&__data_loc); 684 void *to = (void *)CONFIG_PHYS_RAM_BASE; 685 size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata)); 686 687 memcpy(to, from, sz); 688 } 689 #endif 690 691 #ifdef CONFIG_STRICT_KERNEL_RWX 692 static __init pgprot_t pgprot_from_va(uintptr_t va) 693 { 694 if (is_va_kernel_text(va)) 695 return PAGE_KERNEL_READ_EXEC; 696 697 /* 698 * In 64-bit kernel, the kernel mapping is outside the linear mapping so 699 * we must protect its linear mapping alias from being executed and 700 * written. 701 * And rodata section is marked readonly in mark_rodata_ro. 702 */ 703 if (IS_ENABLED(CONFIG_64BIT) && is_va_kernel_lm_alias_text(va)) 704 return PAGE_KERNEL_READ; 705 706 return PAGE_KERNEL; 707 } 708 709 void mark_rodata_ro(void) 710 { 711 set_kernel_memory(__start_rodata, _data, set_memory_ro); 712 if (IS_ENABLED(CONFIG_64BIT)) 713 set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data), 714 set_memory_ro); 715 716 debug_checkwx(); 717 } 718 #else 719 static __init pgprot_t pgprot_from_va(uintptr_t va) 720 { 721 if (IS_ENABLED(CONFIG_64BIT) && !is_kernel_mapping(va)) 722 return PAGE_KERNEL; 723 724 return PAGE_KERNEL_EXEC; 725 } 726 #endif /* CONFIG_STRICT_KERNEL_RWX */ 727 728 #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) 729 u64 __pi_set_satp_mode_from_cmdline(uintptr_t dtb_pa); 730 731 static void __init disable_pgtable_l5(void) 732 { 733 pgtable_l5_enabled = false; 734 kernel_map.page_offset = PAGE_OFFSET_L4; 735 satp_mode = SATP_MODE_48; 736 } 737 738 static void __init disable_pgtable_l4(void) 739 { 740 pgtable_l4_enabled = false; 741 kernel_map.page_offset = PAGE_OFFSET_L3; 742 satp_mode = SATP_MODE_39; 743 } 744 745 static int __init print_no4lvl(char *p) 746 { 747 pr_info("Disabled 4-level and 5-level paging"); 748 return 0; 749 } 750 early_param("no4lvl", print_no4lvl); 751 752 static int __init print_no5lvl(char *p) 753 { 754 pr_info("Disabled 5-level paging"); 755 return 0; 756 } 757 early_param("no5lvl", print_no5lvl); 758 759 /* 760 * There is a simple way to determine if 4-level is supported by the 761 * underlying hardware: establish 1:1 mapping in 4-level page table mode 762 * then read SATP to see if the configuration was taken into account 763 * meaning sv48 is supported. 764 */ 765 static __init void set_satp_mode(uintptr_t dtb_pa) 766 { 767 u64 identity_satp, hw_satp; 768 uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; 769 u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa); 770 771 if (satp_mode_cmdline == SATP_MODE_57) { 772 disable_pgtable_l5(); 773 } else if (satp_mode_cmdline == SATP_MODE_48) { 774 disable_pgtable_l5(); 775 disable_pgtable_l4(); 776 return; 777 } 778 779 create_p4d_mapping(early_p4d, 780 set_satp_mode_pmd, (uintptr_t)early_pud, 781 P4D_SIZE, PAGE_TABLE); 782 create_pud_mapping(early_pud, 783 set_satp_mode_pmd, (uintptr_t)early_pmd, 784 PUD_SIZE, PAGE_TABLE); 785 /* Handle the case where set_satp_mode straddles 2 PMDs */ 786 create_pmd_mapping(early_pmd, 787 set_satp_mode_pmd, set_satp_mode_pmd, 788 PMD_SIZE, PAGE_KERNEL_EXEC); 789 create_pmd_mapping(early_pmd, 790 set_satp_mode_pmd + PMD_SIZE, 791 set_satp_mode_pmd + PMD_SIZE, 792 PMD_SIZE, PAGE_KERNEL_EXEC); 793 retry: 794 create_pgd_mapping(early_pg_dir, 795 set_satp_mode_pmd, 796 pgtable_l5_enabled ? 797 (uintptr_t)early_p4d : (uintptr_t)early_pud, 798 PGDIR_SIZE, PAGE_TABLE); 799 800 identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; 801 802 local_flush_tlb_all(); 803 csr_write(CSR_SATP, identity_satp); 804 hw_satp = csr_swap(CSR_SATP, 0ULL); 805 local_flush_tlb_all(); 806 807 if (hw_satp != identity_satp) { 808 if (pgtable_l5_enabled) { 809 disable_pgtable_l5(); 810 memset(early_pg_dir, 0, PAGE_SIZE); 811 goto retry; 812 } 813 disable_pgtable_l4(); 814 } 815 816 memset(early_pg_dir, 0, PAGE_SIZE); 817 memset(early_p4d, 0, PAGE_SIZE); 818 memset(early_pud, 0, PAGE_SIZE); 819 memset(early_pmd, 0, PAGE_SIZE); 820 } 821 #endif 822 823 /* 824 * setup_vm() is called from head.S with MMU-off. 825 * 826 * Following requirements should be honoured for setup_vm() to work 827 * correctly: 828 * 1) It should use PC-relative addressing for accessing kernel symbols. 829 * To achieve this we always use GCC cmodel=medany. 830 * 2) The compiler instrumentation for FTRACE will not work for setup_vm() 831 * so disable compiler instrumentation when FTRACE is enabled. 832 * 833 * Currently, the above requirements are honoured by using custom CFLAGS 834 * for init.o in mm/Makefile. 835 */ 836 837 #ifndef __riscv_cmodel_medany 838 #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." 839 #endif 840 841 #ifdef CONFIG_RELOCATABLE 842 extern unsigned long __rela_dyn_start, __rela_dyn_end; 843 844 static void __init relocate_kernel(void) 845 { 846 Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; 847 /* 848 * This holds the offset between the linked virtual address and the 849 * relocated virtual address. 850 */ 851 uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; 852 /* 853 * This holds the offset between kernel linked virtual address and 854 * physical address. 855 */ 856 uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; 857 858 for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { 859 Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); 860 Elf64_Addr relocated_addr = rela->r_addend; 861 862 if (rela->r_info != R_RISCV_RELATIVE) 863 continue; 864 865 /* 866 * Make sure to not relocate vdso symbols like rt_sigreturn 867 * which are linked from the address 0 in vmlinux since 868 * vdso symbol addresses are actually used as an offset from 869 * mm->context.vdso in VDSO_OFFSET macro. 870 */ 871 if (relocated_addr >= KERNEL_LINK_ADDR) 872 relocated_addr += reloc_offset; 873 874 *(Elf64_Addr *)addr = relocated_addr; 875 } 876 } 877 #endif /* CONFIG_RELOCATABLE */ 878 879 #ifdef CONFIG_XIP_KERNEL 880 static void __init create_kernel_page_table(pgd_t *pgdir, 881 __always_unused bool early) 882 { 883 uintptr_t va, end_va; 884 885 /* Map the flash resident part */ 886 end_va = kernel_map.virt_addr + kernel_map.xiprom_sz; 887 for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE) 888 create_pgd_mapping(pgdir, va, 889 kernel_map.xiprom + (va - kernel_map.virt_addr), 890 PMD_SIZE, PAGE_KERNEL_EXEC); 891 892 /* Map the data in RAM */ 893 end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size; 894 for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE) 895 create_pgd_mapping(pgdir, va, 896 kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)), 897 PMD_SIZE, PAGE_KERNEL); 898 } 899 #else 900 static void __init create_kernel_page_table(pgd_t *pgdir, bool early) 901 { 902 uintptr_t va, end_va; 903 904 end_va = kernel_map.virt_addr + kernel_map.size; 905 for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE) 906 create_pgd_mapping(pgdir, va, 907 kernel_map.phys_addr + (va - kernel_map.virt_addr), 908 PMD_SIZE, 909 early ? 910 PAGE_KERNEL_EXEC : pgprot_from_va(va)); 911 } 912 #endif 913 914 /* 915 * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel, 916 * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR 917 * entry. 918 */ 919 static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va, 920 uintptr_t dtb_pa) 921 { 922 #ifndef CONFIG_BUILTIN_DTB 923 uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); 924 925 /* Make sure the fdt fixmap address is always aligned on PMD size */ 926 BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE)); 927 928 /* In 32-bit only, the fdt lies in its own PGD */ 929 if (!IS_ENABLED(CONFIG_64BIT)) { 930 create_pgd_mapping(early_pg_dir, fix_fdt_va, 931 pa, MAX_FDT_SIZE, PAGE_KERNEL); 932 } else { 933 create_pmd_mapping(fixmap_pmd, fix_fdt_va, 934 pa, PMD_SIZE, PAGE_KERNEL); 935 create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE, 936 pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); 937 } 938 939 dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1)); 940 #else 941 /* 942 * For 64-bit kernel, __va can't be used since it would return a linear 943 * mapping address whereas dtb_early_va will be used before 944 * setup_vm_final installs the linear mapping. For 32-bit kernel, as the 945 * kernel is mapped in the linear mapping, that makes no difference. 946 */ 947 dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa)); 948 #endif 949 950 dtb_early_pa = dtb_pa; 951 } 952 953 /* 954 * MMU is not enabled, the page tables are allocated directly using 955 * early_pmd/pud/p4d and the address returned is the physical one. 956 */ 957 static void __init pt_ops_set_early(void) 958 { 959 pt_ops.alloc_pte = alloc_pte_early; 960 pt_ops.get_pte_virt = get_pte_virt_early; 961 #ifndef __PAGETABLE_PMD_FOLDED 962 pt_ops.alloc_pmd = alloc_pmd_early; 963 pt_ops.get_pmd_virt = get_pmd_virt_early; 964 pt_ops.alloc_pud = alloc_pud_early; 965 pt_ops.get_pud_virt = get_pud_virt_early; 966 pt_ops.alloc_p4d = alloc_p4d_early; 967 pt_ops.get_p4d_virt = get_p4d_virt_early; 968 #endif 969 } 970 971 /* 972 * MMU is enabled but page table setup is not complete yet. 973 * fixmap page table alloc functions must be used as a means to temporarily 974 * map the allocated physical pages since the linear mapping does not exist yet. 975 * 976 * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va, 977 * but it will be used as described above. 978 */ 979 static void __init pt_ops_set_fixmap(void) 980 { 981 pt_ops.alloc_pte = kernel_mapping_pa_to_va(alloc_pte_fixmap); 982 pt_ops.get_pte_virt = kernel_mapping_pa_to_va(get_pte_virt_fixmap); 983 #ifndef __PAGETABLE_PMD_FOLDED 984 pt_ops.alloc_pmd = kernel_mapping_pa_to_va(alloc_pmd_fixmap); 985 pt_ops.get_pmd_virt = kernel_mapping_pa_to_va(get_pmd_virt_fixmap); 986 pt_ops.alloc_pud = kernel_mapping_pa_to_va(alloc_pud_fixmap); 987 pt_ops.get_pud_virt = kernel_mapping_pa_to_va(get_pud_virt_fixmap); 988 pt_ops.alloc_p4d = kernel_mapping_pa_to_va(alloc_p4d_fixmap); 989 pt_ops.get_p4d_virt = kernel_mapping_pa_to_va(get_p4d_virt_fixmap); 990 #endif 991 } 992 993 /* 994 * MMU is enabled and page table setup is complete, so from now, we can use 995 * generic page allocation functions to setup page table. 996 */ 997 static void __init pt_ops_set_late(void) 998 { 999 pt_ops.alloc_pte = alloc_pte_late; 1000 pt_ops.get_pte_virt = get_pte_virt_late; 1001 #ifndef __PAGETABLE_PMD_FOLDED 1002 pt_ops.alloc_pmd = alloc_pmd_late; 1003 pt_ops.get_pmd_virt = get_pmd_virt_late; 1004 pt_ops.alloc_pud = alloc_pud_late; 1005 pt_ops.get_pud_virt = get_pud_virt_late; 1006 pt_ops.alloc_p4d = alloc_p4d_late; 1007 pt_ops.get_p4d_virt = get_p4d_virt_late; 1008 #endif 1009 } 1010 1011 asmlinkage void __init setup_vm(uintptr_t dtb_pa) 1012 { 1013 pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; 1014 1015 kernel_map.virt_addr = KERNEL_LINK_ADDR; 1016 kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); 1017 1018 #ifdef CONFIG_XIP_KERNEL 1019 kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; 1020 kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); 1021 1022 phys_ram_base = CONFIG_PHYS_RAM_BASE; 1023 kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; 1024 kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); 1025 1026 kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; 1027 #else 1028 kernel_map.phys_addr = (uintptr_t)(&_start); 1029 kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; 1030 #endif 1031 1032 #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) 1033 set_satp_mode(dtb_pa); 1034 #endif 1035 1036 /* 1037 * In 64-bit, we defer the setup of va_pa_offset to setup_bootmem, 1038 * where we have the system memory layout: this allows us to align 1039 * the physical and virtual mappings and then make use of PUD/P4D/PGD 1040 * for the linear mapping. This is only possible because the kernel 1041 * mapping lies outside the linear mapping. 1042 * In 32-bit however, as the kernel resides in the linear mapping, 1043 * setup_vm_final can not change the mapping established here, 1044 * otherwise the same kernel addresses would get mapped to different 1045 * physical addresses (if the start of dram is different from the 1046 * kernel physical address start). 1047 */ 1048 kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ? 1049 0UL : PAGE_OFFSET - kernel_map.phys_addr; 1050 kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; 1051 1052 /* 1053 * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit 1054 * kernel, whereas for 64-bit kernel, the end of the virtual address 1055 * space is occupied by the modules/BPF/kernel mappings which reduces 1056 * the available size of the linear mapping. 1057 */ 1058 memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0); 1059 1060 /* Sanity check alignment and size */ 1061 BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); 1062 BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0); 1063 1064 #ifdef CONFIG_64BIT 1065 /* 1066 * The last 4K bytes of the addressable memory can not be mapped because 1067 * of IS_ERR_VALUE macro. 1068 */ 1069 BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); 1070 #endif 1071 1072 #ifdef CONFIG_RELOCATABLE 1073 /* 1074 * Early page table uses only one PUD, which makes it possible 1075 * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset 1076 * makes the kernel cross over a PUD_SIZE boundary, raise a bug 1077 * since a part of the kernel would not get mapped. 1078 */ 1079 BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); 1080 relocate_kernel(); 1081 #endif 1082 1083 apply_early_boot_alternatives(); 1084 pt_ops_set_early(); 1085 1086 /* Setup early PGD for fixmap */ 1087 create_pgd_mapping(early_pg_dir, FIXADDR_START, 1088 fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); 1089 1090 #ifndef __PAGETABLE_PMD_FOLDED 1091 /* Setup fixmap P4D and PUD */ 1092 if (pgtable_l5_enabled) 1093 create_p4d_mapping(fixmap_p4d, FIXADDR_START, 1094 (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE); 1095 /* Setup fixmap PUD and PMD */ 1096 if (pgtable_l4_enabled) 1097 create_pud_mapping(fixmap_pud, FIXADDR_START, 1098 (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE); 1099 create_pmd_mapping(fixmap_pmd, FIXADDR_START, 1100 (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); 1101 /* Setup trampoline PGD and PMD */ 1102 create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, 1103 trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); 1104 if (pgtable_l5_enabled) 1105 create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr, 1106 (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE); 1107 if (pgtable_l4_enabled) 1108 create_pud_mapping(trampoline_pud, kernel_map.virt_addr, 1109 (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); 1110 #ifdef CONFIG_XIP_KERNEL 1111 create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, 1112 kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC); 1113 #else 1114 create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, 1115 kernel_map.phys_addr, PMD_SIZE, PAGE_KERNEL_EXEC); 1116 #endif 1117 #else 1118 /* Setup trampoline PGD */ 1119 create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, 1120 kernel_map.phys_addr, PGDIR_SIZE, PAGE_KERNEL_EXEC); 1121 #endif 1122 1123 /* 1124 * Setup early PGD covering entire kernel which will allow 1125 * us to reach paging_init(). We map all memory banks later 1126 * in setup_vm_final() below. 1127 */ 1128 create_kernel_page_table(early_pg_dir, true); 1129 1130 /* Setup early mapping for FDT early scan */ 1131 create_fdt_early_page_table(__fix_to_virt(FIX_FDT), dtb_pa); 1132 1133 /* 1134 * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap 1135 * range can not span multiple pmds. 1136 */ 1137 BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) 1138 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); 1139 1140 #ifndef __PAGETABLE_PMD_FOLDED 1141 /* 1142 * Early ioremap fixmap is already created as it lies within first 2MB 1143 * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END 1144 * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn 1145 * the user if not. 1146 */ 1147 fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))]; 1148 fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))]; 1149 if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) { 1150 WARN_ON(1); 1151 pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n", 1152 pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd)); 1153 pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", 1154 fix_to_virt(FIX_BTMAP_BEGIN)); 1155 pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", 1156 fix_to_virt(FIX_BTMAP_END)); 1157 1158 pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); 1159 pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); 1160 } 1161 #endif 1162 1163 pt_ops_set_fixmap(); 1164 } 1165 1166 static void __init create_linear_mapping_range(phys_addr_t start, 1167 phys_addr_t end, 1168 uintptr_t fixed_map_size) 1169 { 1170 phys_addr_t pa; 1171 uintptr_t va, map_size; 1172 1173 for (pa = start; pa < end; pa += map_size) { 1174 va = (uintptr_t)__va(pa); 1175 map_size = fixed_map_size ? fixed_map_size : 1176 best_map_size(pa, va, end - pa); 1177 1178 create_pgd_mapping(swapper_pg_dir, va, pa, map_size, 1179 pgprot_from_va(va)); 1180 } 1181 } 1182 1183 static void __init create_linear_mapping_page_table(void) 1184 { 1185 phys_addr_t start, end; 1186 phys_addr_t kfence_pool __maybe_unused; 1187 u64 i; 1188 1189 #ifdef CONFIG_STRICT_KERNEL_RWX 1190 phys_addr_t ktext_start = __pa_symbol(_start); 1191 phys_addr_t ktext_size = __init_data_begin - _start; 1192 phys_addr_t krodata_start = __pa_symbol(__start_rodata); 1193 phys_addr_t krodata_size = _data - __start_rodata; 1194 1195 /* Isolate kernel text and rodata so they don't get mapped with a PUD */ 1196 memblock_mark_nomap(ktext_start, ktext_size); 1197 memblock_mark_nomap(krodata_start, krodata_size); 1198 #endif 1199 1200 #ifdef CONFIG_KFENCE 1201 /* 1202 * kfence pool must be backed by PAGE_SIZE mappings, so allocate it 1203 * before we setup the linear mapping so that we avoid using hugepages 1204 * for this region. 1205 */ 1206 kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); 1207 BUG_ON(!kfence_pool); 1208 1209 memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); 1210 __kfence_pool = __va(kfence_pool); 1211 #endif 1212 1213 /* Map all memory banks in the linear mapping */ 1214 for_each_mem_range(i, &start, &end) { 1215 if (start >= end) 1216 break; 1217 if (start <= __pa(PAGE_OFFSET) && 1218 __pa(PAGE_OFFSET) < end) 1219 start = __pa(PAGE_OFFSET); 1220 if (end >= __pa(PAGE_OFFSET) + memory_limit) 1221 end = __pa(PAGE_OFFSET) + memory_limit; 1222 1223 create_linear_mapping_range(start, end, 0); 1224 } 1225 1226 #ifdef CONFIG_STRICT_KERNEL_RWX 1227 create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0); 1228 create_linear_mapping_range(krodata_start, 1229 krodata_start + krodata_size, 0); 1230 1231 memblock_clear_nomap(ktext_start, ktext_size); 1232 memblock_clear_nomap(krodata_start, krodata_size); 1233 #endif 1234 1235 #ifdef CONFIG_KFENCE 1236 create_linear_mapping_range(kfence_pool, 1237 kfence_pool + KFENCE_POOL_SIZE, 1238 PAGE_SIZE); 1239 1240 memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); 1241 #endif 1242 } 1243 1244 static void __init setup_vm_final(void) 1245 { 1246 /* Setup swapper PGD for fixmap */ 1247 #if !defined(CONFIG_64BIT) 1248 /* 1249 * In 32-bit, the device tree lies in a pgd entry, so it must be copied 1250 * directly in swapper_pg_dir in addition to the pgd entry that points 1251 * to fixmap_pte. 1252 */ 1253 unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT)); 1254 1255 set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]); 1256 #endif 1257 create_pgd_mapping(swapper_pg_dir, FIXADDR_START, 1258 __pa_symbol(fixmap_pgd_next), 1259 PGDIR_SIZE, PAGE_TABLE); 1260 1261 /* Map the linear mapping */ 1262 create_linear_mapping_page_table(); 1263 1264 /* Map the kernel */ 1265 if (IS_ENABLED(CONFIG_64BIT)) 1266 create_kernel_page_table(swapper_pg_dir, false); 1267 1268 #ifdef CONFIG_KASAN 1269 kasan_swapper_init(); 1270 #endif 1271 1272 /* Clear fixmap PTE and PMD mappings */ 1273 clear_fixmap(FIX_PTE); 1274 clear_fixmap(FIX_PMD); 1275 clear_fixmap(FIX_PUD); 1276 clear_fixmap(FIX_P4D); 1277 1278 /* Move to swapper page table */ 1279 csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode); 1280 local_flush_tlb_all(); 1281 1282 pt_ops_set_late(); 1283 } 1284 #else 1285 asmlinkage void __init setup_vm(uintptr_t dtb_pa) 1286 { 1287 dtb_early_va = (void *)dtb_pa; 1288 dtb_early_pa = dtb_pa; 1289 } 1290 1291 static inline void setup_vm_final(void) 1292 { 1293 } 1294 #endif /* CONFIG_MMU */ 1295 1296 /* 1297 * reserve_crashkernel() - reserves memory for crash kernel 1298 * 1299 * This function reserves memory area given in "crashkernel=" kernel command 1300 * line parameter. The memory reserved is used by dump capture kernel when 1301 * primary kernel is crashing. 1302 */ 1303 static void __init reserve_crashkernel(void) 1304 { 1305 unsigned long long crash_base = 0; 1306 unsigned long long crash_size = 0; 1307 unsigned long search_start = memblock_start_of_DRAM(); 1308 unsigned long search_end = memblock_end_of_DRAM(); 1309 1310 int ret = 0; 1311 1312 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1313 return; 1314 /* 1315 * Don't reserve a region for a crash kernel on a crash kernel 1316 * since it doesn't make much sense and we have limited memory 1317 * resources. 1318 */ 1319 if (is_kdump_kernel()) { 1320 pr_info("crashkernel: ignoring reservation request\n"); 1321 return; 1322 } 1323 1324 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 1325 &crash_size, &crash_base); 1326 if (ret || !crash_size) 1327 return; 1328 1329 crash_size = PAGE_ALIGN(crash_size); 1330 1331 if (crash_base) { 1332 search_start = crash_base; 1333 search_end = crash_base + crash_size; 1334 } 1335 1336 /* 1337 * Current riscv boot protocol requires 2MB alignment for 1338 * RV64 and 4MB alignment for RV32 (hugepage size) 1339 * 1340 * Try to alloc from 32bit addressible physical memory so that 1341 * swiotlb can work on the crash kernel. 1342 */ 1343 crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, 1344 search_start, 1345 min(search_end, (unsigned long)(SZ_4G - 1))); 1346 if (crash_base == 0) { 1347 /* Try again without restricting region to 32bit addressible memory */ 1348 crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, 1349 search_start, search_end); 1350 if (crash_base == 0) { 1351 pr_warn("crashkernel: couldn't allocate %lldKB\n", 1352 crash_size >> 10); 1353 return; 1354 } 1355 } 1356 1357 pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n", 1358 crash_base, crash_base + crash_size, crash_size >> 20); 1359 1360 crashk_res.start = crash_base; 1361 crashk_res.end = crash_base + crash_size - 1; 1362 } 1363 1364 void __init paging_init(void) 1365 { 1366 setup_bootmem(); 1367 setup_vm_final(); 1368 1369 /* Depend on that Linear Mapping is ready */ 1370 memblock_allow_resize(); 1371 } 1372 1373 void __init misc_mem_init(void) 1374 { 1375 early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); 1376 arch_numa_init(); 1377 sparse_init(); 1378 zone_sizes_init(); 1379 reserve_crashkernel(); 1380 memblock_dump_all(); 1381 } 1382 1383 #ifdef CONFIG_SPARSEMEM_VMEMMAP 1384 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 1385 struct vmem_altmap *altmap) 1386 { 1387 return vmemmap_populate_basepages(start, end, node, NULL); 1388 } 1389 #endif 1390 1391 #if defined(CONFIG_MMU) && defined(CONFIG_64BIT) 1392 /* 1393 * Pre-allocates page-table pages for a specific area in the kernel 1394 * page-table. Only the level which needs to be synchronized between 1395 * all page-tables is allocated because the synchronization can be 1396 * expensive. 1397 */ 1398 static void __init preallocate_pgd_pages_range(unsigned long start, unsigned long end, 1399 const char *area) 1400 { 1401 unsigned long addr; 1402 const char *lvl; 1403 1404 for (addr = start; addr < end && addr >= start; addr = ALIGN(addr + 1, PGDIR_SIZE)) { 1405 pgd_t *pgd = pgd_offset_k(addr); 1406 p4d_t *p4d; 1407 pud_t *pud; 1408 pmd_t *pmd; 1409 1410 lvl = "p4d"; 1411 p4d = p4d_alloc(&init_mm, pgd, addr); 1412 if (!p4d) 1413 goto failed; 1414 1415 if (pgtable_l5_enabled) 1416 continue; 1417 1418 lvl = "pud"; 1419 pud = pud_alloc(&init_mm, p4d, addr); 1420 if (!pud) 1421 goto failed; 1422 1423 if (pgtable_l4_enabled) 1424 continue; 1425 1426 lvl = "pmd"; 1427 pmd = pmd_alloc(&init_mm, pud, addr); 1428 if (!pmd) 1429 goto failed; 1430 } 1431 return; 1432 1433 failed: 1434 /* 1435 * The pages have to be there now or they will be missing in 1436 * process page-tables later. 1437 */ 1438 panic("Failed to pre-allocate %s pages for %s area\n", lvl, area); 1439 } 1440 1441 void __init pgtable_cache_init(void) 1442 { 1443 preallocate_pgd_pages_range(VMALLOC_START, VMALLOC_END, "vmalloc"); 1444 if (IS_ENABLED(CONFIG_MODULES)) 1445 preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules"); 1446 } 1447 #endif 1448