1 /* 2 * Based on arch/arm/mm/mmu.c 3 * 4 * Copyright (C) 1995-2005 Russell King 5 * Copyright (C) 2012 ARM Ltd. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include <linux/export.h> 21 #include <linux/kernel.h> 22 #include <linux/errno.h> 23 #include <linux/init.h> 24 #include <linux/mman.h> 25 #include <linux/nodemask.h> 26 #include <linux/memblock.h> 27 #include <linux/fs.h> 28 #include <linux/io.h> 29 #include <linux/slab.h> 30 #include <linux/stop_machine.h> 31 32 #include <asm/cputype.h> 33 #include <asm/fixmap.h> 34 #include <asm/sections.h> 35 #include <asm/setup.h> 36 #include <asm/sizes.h> 37 #include <asm/tlb.h> 38 #include <asm/memblock.h> 39 #include <asm/mmu_context.h> 40 41 #include "mm.h" 42 43 /* 44 * Empty_zero_page is a special page that is used for zero-initialized data 45 * and COW. 46 */ 47 struct page *empty_zero_page; 48 EXPORT_SYMBOL(empty_zero_page); 49 50 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 51 unsigned long size, pgprot_t vma_prot) 52 { 53 if (!pfn_valid(pfn)) 54 return pgprot_noncached(vma_prot); 55 else if (file->f_flags & O_SYNC) 56 return pgprot_writecombine(vma_prot); 57 return vma_prot; 58 } 59 EXPORT_SYMBOL(phys_mem_access_prot); 60 61 static void __init *early_alloc(unsigned long sz) 62 { 63 void *ptr = __va(memblock_alloc(sz, sz)); 64 BUG_ON(!ptr); 65 memset(ptr, 0, sz); 66 return ptr; 67 } 68 69 /* 70 * remap a PMD into pages 71 */ 72 static void split_pmd(pmd_t *pmd, pte_t *pte) 73 { 74 unsigned long pfn = pmd_pfn(*pmd); 75 int i = 0; 76 77 do { 78 /* 79 * Need to have the least restrictive permissions available 80 * permissions will be fixed up later 81 */ 82 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); 83 pfn++; 84 } while (pte++, i++, i < PTRS_PER_PTE); 85 } 86 87 static void alloc_init_pte(pmd_t *pmd, unsigned long addr, 88 unsigned long end, unsigned long pfn, 89 pgprot_t prot, 90 void *(*alloc)(unsigned long size)) 91 { 92 pte_t *pte; 93 94 if (pmd_none(*pmd) || pmd_sect(*pmd)) { 95 pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); 96 if (pmd_sect(*pmd)) 97 split_pmd(pmd, pte); 98 __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); 99 flush_tlb_all(); 100 } 101 BUG_ON(pmd_bad(*pmd)); 102 103 pte = pte_offset_kernel(pmd, addr); 104 do { 105 set_pte(pte, pfn_pte(pfn, prot)); 106 pfn++; 107 } while (pte++, addr += PAGE_SIZE, addr != end); 108 } 109 110 void split_pud(pud_t *old_pud, pmd_t *pmd) 111 { 112 unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; 113 pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); 114 int i = 0; 115 116 do { 117 set_pmd(pmd, __pmd(addr | prot)); 118 addr += PMD_SIZE; 119 } while (pmd++, i++, i < PTRS_PER_PMD); 120 } 121 122 static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, 123 unsigned long addr, unsigned long end, 124 phys_addr_t phys, pgprot_t prot, 125 void *(*alloc)(unsigned long size)) 126 { 127 pmd_t *pmd; 128 unsigned long next; 129 130 /* 131 * Check for initial section mappings in the pgd/pud and remove them. 132 */ 133 if (pud_none(*pud) || pud_sect(*pud)) { 134 pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); 135 if (pud_sect(*pud)) { 136 /* 137 * need to have the 1G of mappings continue to be 138 * present 139 */ 140 split_pud(pud, pmd); 141 } 142 pud_populate(mm, pud, pmd); 143 flush_tlb_all(); 144 } 145 BUG_ON(pud_bad(*pud)); 146 147 pmd = pmd_offset(pud, addr); 148 do { 149 next = pmd_addr_end(addr, end); 150 /* try section mapping first */ 151 if (((addr | next | phys) & ~SECTION_MASK) == 0) { 152 pmd_t old_pmd =*pmd; 153 set_pmd(pmd, __pmd(phys | 154 pgprot_val(mk_sect_prot(prot)))); 155 /* 156 * Check for previous table entries created during 157 * boot (__create_page_tables) and flush them. 158 */ 159 if (!pmd_none(old_pmd)) { 160 flush_tlb_all(); 161 if (pmd_table(old_pmd)) { 162 phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); 163 if (!WARN_ON_ONCE(slab_is_available())) 164 memblock_free(table, PAGE_SIZE); 165 } 166 } 167 } else { 168 alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), 169 prot, alloc); 170 } 171 phys += next - addr; 172 } while (pmd++, addr = next, addr != end); 173 } 174 175 static inline bool use_1G_block(unsigned long addr, unsigned long next, 176 unsigned long phys) 177 { 178 if (PAGE_SHIFT != 12) 179 return false; 180 181 if (((addr | next | phys) & ~PUD_MASK) != 0) 182 return false; 183 184 return true; 185 } 186 187 static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, 188 unsigned long addr, unsigned long end, 189 phys_addr_t phys, pgprot_t prot, 190 void *(*alloc)(unsigned long size)) 191 { 192 pud_t *pud; 193 unsigned long next; 194 195 if (pgd_none(*pgd)) { 196 pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); 197 pgd_populate(mm, pgd, pud); 198 } 199 BUG_ON(pgd_bad(*pgd)); 200 201 pud = pud_offset(pgd, addr); 202 do { 203 next = pud_addr_end(addr, end); 204 205 /* 206 * For 4K granule only, attempt to put down a 1GB block 207 */ 208 if (use_1G_block(addr, next, phys)) { 209 pud_t old_pud = *pud; 210 set_pud(pud, __pud(phys | 211 pgprot_val(mk_sect_prot(prot)))); 212 213 /* 214 * If we have an old value for a pud, it will 215 * be pointing to a pmd table that we no longer 216 * need (from swapper_pg_dir). 217 * 218 * Look up the old pmd table and free it. 219 */ 220 if (!pud_none(old_pud)) { 221 flush_tlb_all(); 222 if (pud_table(old_pud)) { 223 phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); 224 if (!WARN_ON_ONCE(slab_is_available())) 225 memblock_free(table, PAGE_SIZE); 226 } 227 } 228 } else { 229 alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); 230 } 231 phys += next - addr; 232 } while (pud++, addr = next, addr != end); 233 } 234 235 /* 236 * Create the page directory entries and any necessary page tables for the 237 * mapping specified by 'md'. 238 */ 239 static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, 240 phys_addr_t phys, unsigned long virt, 241 phys_addr_t size, pgprot_t prot, 242 void *(*alloc)(unsigned long size)) 243 { 244 unsigned long addr, length, end, next; 245 246 addr = virt & PAGE_MASK; 247 length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); 248 249 end = addr + length; 250 do { 251 next = pgd_addr_end(addr, end); 252 alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); 253 phys += next - addr; 254 } while (pgd++, addr = next, addr != end); 255 } 256 257 static void *late_alloc(unsigned long size) 258 { 259 void *ptr; 260 261 BUG_ON(size > PAGE_SIZE); 262 ptr = (void *)__get_free_page(PGALLOC_GFP); 263 BUG_ON(!ptr); 264 return ptr; 265 } 266 267 static void __ref create_mapping(phys_addr_t phys, unsigned long virt, 268 phys_addr_t size, pgprot_t prot) 269 { 270 if (virt < VMALLOC_START) { 271 pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", 272 &phys, virt); 273 return; 274 } 275 __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, 276 size, prot, early_alloc); 277 } 278 279 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, 280 unsigned long virt, phys_addr_t size, 281 pgprot_t prot) 282 { 283 __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, 284 late_alloc); 285 } 286 287 static void create_mapping_late(phys_addr_t phys, unsigned long virt, 288 phys_addr_t size, pgprot_t prot) 289 { 290 if (virt < VMALLOC_START) { 291 pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", 292 &phys, virt); 293 return; 294 } 295 296 return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), 297 phys, virt, size, prot, late_alloc); 298 } 299 300 #ifdef CONFIG_DEBUG_RODATA 301 static void __init __map_memblock(phys_addr_t start, phys_addr_t end) 302 { 303 /* 304 * Set up the executable regions using the existing section mappings 305 * for now. This will get more fine grained later once all memory 306 * is mapped 307 */ 308 unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); 309 unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); 310 311 if (end < kernel_x_start) { 312 create_mapping(start, __phys_to_virt(start), 313 end - start, PAGE_KERNEL); 314 } else if (start >= kernel_x_end) { 315 create_mapping(start, __phys_to_virt(start), 316 end - start, PAGE_KERNEL); 317 } else { 318 if (start < kernel_x_start) 319 create_mapping(start, __phys_to_virt(start), 320 kernel_x_start - start, 321 PAGE_KERNEL); 322 create_mapping(kernel_x_start, 323 __phys_to_virt(kernel_x_start), 324 kernel_x_end - kernel_x_start, 325 PAGE_KERNEL_EXEC); 326 if (kernel_x_end < end) 327 create_mapping(kernel_x_end, 328 __phys_to_virt(kernel_x_end), 329 end - kernel_x_end, 330 PAGE_KERNEL); 331 } 332 333 } 334 #else 335 static void __init __map_memblock(phys_addr_t start, phys_addr_t end) 336 { 337 create_mapping(start, __phys_to_virt(start), end - start, 338 PAGE_KERNEL_EXEC); 339 } 340 #endif 341 342 static void __init map_mem(void) 343 { 344 struct memblock_region *reg; 345 phys_addr_t limit; 346 347 /* 348 * Temporarily limit the memblock range. We need to do this as 349 * create_mapping requires puds, pmds and ptes to be allocated from 350 * memory addressable from the initial direct kernel mapping. 351 * 352 * The initial direct kernel mapping, located at swapper_pg_dir, gives 353 * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from 354 * PHYS_OFFSET (which must be aligned to 2MB as per 355 * Documentation/arm64/booting.txt). 356 */ 357 if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) 358 limit = PHYS_OFFSET + PMD_SIZE; 359 else 360 limit = PHYS_OFFSET + PUD_SIZE; 361 memblock_set_current_limit(limit); 362 363 /* map all the memory banks */ 364 for_each_memblock(memory, reg) { 365 phys_addr_t start = reg->base; 366 phys_addr_t end = start + reg->size; 367 368 if (start >= end) 369 break; 370 371 #ifndef CONFIG_ARM64_64K_PAGES 372 /* 373 * For the first memory bank align the start address and 374 * current memblock limit to prevent create_mapping() from 375 * allocating pte page tables from unmapped memory. 376 * When 64K pages are enabled, the pte page table for the 377 * first PGDIR_SIZE is already present in swapper_pg_dir. 378 */ 379 if (start < limit) 380 start = ALIGN(start, PMD_SIZE); 381 if (end < limit) { 382 limit = end & PMD_MASK; 383 memblock_set_current_limit(limit); 384 } 385 #endif 386 __map_memblock(start, end); 387 } 388 389 /* Limit no longer required. */ 390 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); 391 } 392 393 void __init fixup_executable(void) 394 { 395 #ifdef CONFIG_DEBUG_RODATA 396 /* now that we are actually fully mapped, make the start/end more fine grained */ 397 if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { 398 unsigned long aligned_start = round_down(__pa(_stext), 399 SECTION_SIZE); 400 401 create_mapping(aligned_start, __phys_to_virt(aligned_start), 402 __pa(_stext) - aligned_start, 403 PAGE_KERNEL); 404 } 405 406 if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { 407 unsigned long aligned_end = round_up(__pa(__init_end), 408 SECTION_SIZE); 409 create_mapping(__pa(__init_end), (unsigned long)__init_end, 410 aligned_end - __pa(__init_end), 411 PAGE_KERNEL); 412 } 413 #endif 414 } 415 416 #ifdef CONFIG_DEBUG_RODATA 417 void mark_rodata_ro(void) 418 { 419 create_mapping_late(__pa(_stext), (unsigned long)_stext, 420 (unsigned long)_etext - (unsigned long)_stext, 421 PAGE_KERNEL_EXEC | PTE_RDONLY); 422 423 } 424 #endif 425 426 void fixup_init(void) 427 { 428 create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, 429 (unsigned long)__init_end - (unsigned long)__init_begin, 430 PAGE_KERNEL); 431 } 432 433 /* 434 * paging_init() sets up the page tables, initialises the zone memory 435 * maps and sets up the zero page. 436 */ 437 void __init paging_init(void) 438 { 439 void *zero_page; 440 441 map_mem(); 442 fixup_executable(); 443 444 /* allocate the zero page. */ 445 zero_page = early_alloc(PAGE_SIZE); 446 447 bootmem_init(); 448 449 empty_zero_page = virt_to_page(zero_page); 450 451 /* 452 * TTBR0 is only used for the identity mapping at this stage. Make it 453 * point to zero page to avoid speculatively fetching new entries. 454 */ 455 cpu_set_reserved_ttbr0(); 456 flush_tlb_all(); 457 } 458 459 /* 460 * Enable the identity mapping to allow the MMU disabling. 461 */ 462 void setup_mm_for_reboot(void) 463 { 464 cpu_switch_mm(idmap_pg_dir, &init_mm); 465 flush_tlb_all(); 466 } 467 468 /* 469 * Check whether a kernel address is valid (derived from arch/x86/). 470 */ 471 int kern_addr_valid(unsigned long addr) 472 { 473 pgd_t *pgd; 474 pud_t *pud; 475 pmd_t *pmd; 476 pte_t *pte; 477 478 if ((((long)addr) >> VA_BITS) != -1UL) 479 return 0; 480 481 pgd = pgd_offset_k(addr); 482 if (pgd_none(*pgd)) 483 return 0; 484 485 pud = pud_offset(pgd, addr); 486 if (pud_none(*pud)) 487 return 0; 488 489 if (pud_sect(*pud)) 490 return pfn_valid(pud_pfn(*pud)); 491 492 pmd = pmd_offset(pud, addr); 493 if (pmd_none(*pmd)) 494 return 0; 495 496 if (pmd_sect(*pmd)) 497 return pfn_valid(pmd_pfn(*pmd)); 498 499 pte = pte_offset_kernel(pmd, addr); 500 if (pte_none(*pte)) 501 return 0; 502 503 return pfn_valid(pte_pfn(*pte)); 504 } 505 #ifdef CONFIG_SPARSEMEM_VMEMMAP 506 #ifdef CONFIG_ARM64_64K_PAGES 507 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 508 { 509 return vmemmap_populate_basepages(start, end, node); 510 } 511 #else /* !CONFIG_ARM64_64K_PAGES */ 512 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 513 { 514 unsigned long addr = start; 515 unsigned long next; 516 pgd_t *pgd; 517 pud_t *pud; 518 pmd_t *pmd; 519 520 do { 521 next = pmd_addr_end(addr, end); 522 523 pgd = vmemmap_pgd_populate(addr, node); 524 if (!pgd) 525 return -ENOMEM; 526 527 pud = vmemmap_pud_populate(pgd, addr, node); 528 if (!pud) 529 return -ENOMEM; 530 531 pmd = pmd_offset(pud, addr); 532 if (pmd_none(*pmd)) { 533 void *p = NULL; 534 535 p = vmemmap_alloc_block_buf(PMD_SIZE, node); 536 if (!p) 537 return -ENOMEM; 538 539 set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); 540 } else 541 vmemmap_verify((pte_t *)pmd, node, addr, next); 542 } while (addr = next, addr != end); 543 544 return 0; 545 } 546 #endif /* CONFIG_ARM64_64K_PAGES */ 547 void vmemmap_free(unsigned long start, unsigned long end) 548 { 549 } 550 #endif /* CONFIG_SPARSEMEM_VMEMMAP */ 551 552 static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; 553 #if CONFIG_ARM64_PGTABLE_LEVELS > 2 554 static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; 555 #endif 556 #if CONFIG_ARM64_PGTABLE_LEVELS > 3 557 static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; 558 #endif 559 560 static inline pud_t * fixmap_pud(unsigned long addr) 561 { 562 pgd_t *pgd = pgd_offset_k(addr); 563 564 BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); 565 566 return pud_offset(pgd, addr); 567 } 568 569 static inline pmd_t * fixmap_pmd(unsigned long addr) 570 { 571 pud_t *pud = fixmap_pud(addr); 572 573 BUG_ON(pud_none(*pud) || pud_bad(*pud)); 574 575 return pmd_offset(pud, addr); 576 } 577 578 static inline pte_t * fixmap_pte(unsigned long addr) 579 { 580 pmd_t *pmd = fixmap_pmd(addr); 581 582 BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); 583 584 return pte_offset_kernel(pmd, addr); 585 } 586 587 void __init early_fixmap_init(void) 588 { 589 pgd_t *pgd; 590 pud_t *pud; 591 pmd_t *pmd; 592 unsigned long addr = FIXADDR_START; 593 594 pgd = pgd_offset_k(addr); 595 pgd_populate(&init_mm, pgd, bm_pud); 596 pud = pud_offset(pgd, addr); 597 pud_populate(&init_mm, pud, bm_pmd); 598 pmd = pmd_offset(pud, addr); 599 pmd_populate_kernel(&init_mm, pmd, bm_pte); 600 601 /* 602 * The boot-ioremap range spans multiple pmds, for which 603 * we are not preparted: 604 */ 605 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) 606 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); 607 608 if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) 609 || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { 610 WARN_ON(1); 611 pr_warn("pmd %p != %p, %p\n", 612 pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), 613 fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); 614 pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", 615 fix_to_virt(FIX_BTMAP_BEGIN)); 616 pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", 617 fix_to_virt(FIX_BTMAP_END)); 618 619 pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); 620 pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); 621 } 622 } 623 624 void __set_fixmap(enum fixed_addresses idx, 625 phys_addr_t phys, pgprot_t flags) 626 { 627 unsigned long addr = __fix_to_virt(idx); 628 pte_t *pte; 629 630 if (idx >= __end_of_fixed_addresses) { 631 BUG(); 632 return; 633 } 634 635 pte = fixmap_pte(addr); 636 637 if (pgprot_val(flags)) { 638 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); 639 } else { 640 pte_clear(&init_mm, addr, pte); 641 flush_tlb_kernel_range(addr, addr+PAGE_SIZE); 642 } 643 } 644