xref: /openbmc/linux/arch/arm64/mm/mmu.c (revision 4bce6fce)
1 /*
2  * Based on arch/arm/mm/mmu.c
3  *
4  * Copyright (C) 1995-2005 Russell King
5  * Copyright (C) 2012 ARM Ltd.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <linux/export.h>
21 #include <linux/kernel.h>
22 #include <linux/errno.h>
23 #include <linux/init.h>
24 #include <linux/mman.h>
25 #include <linux/nodemask.h>
26 #include <linux/memblock.h>
27 #include <linux/fs.h>
28 #include <linux/io.h>
29 #include <linux/slab.h>
30 #include <linux/stop_machine.h>
31 
32 #include <asm/cputype.h>
33 #include <asm/fixmap.h>
34 #include <asm/sections.h>
35 #include <asm/setup.h>
36 #include <asm/sizes.h>
37 #include <asm/tlb.h>
38 #include <asm/memblock.h>
39 #include <asm/mmu_context.h>
40 
41 #include "mm.h"
42 
43 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
44 
45 /*
46  * Empty_zero_page is a special page that is used for zero-initialized data
47  * and COW.
48  */
49 struct page *empty_zero_page;
50 EXPORT_SYMBOL(empty_zero_page);
51 
52 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
53 			      unsigned long size, pgprot_t vma_prot)
54 {
55 	if (!pfn_valid(pfn))
56 		return pgprot_noncached(vma_prot);
57 	else if (file->f_flags & O_SYNC)
58 		return pgprot_writecombine(vma_prot);
59 	return vma_prot;
60 }
61 EXPORT_SYMBOL(phys_mem_access_prot);
62 
63 static void __init *early_alloc(unsigned long sz)
64 {
65 	void *ptr = __va(memblock_alloc(sz, sz));
66 	BUG_ON(!ptr);
67 	memset(ptr, 0, sz);
68 	return ptr;
69 }
70 
71 /*
72  * remap a PMD into pages
73  */
74 static void split_pmd(pmd_t *pmd, pte_t *pte)
75 {
76 	unsigned long pfn = pmd_pfn(*pmd);
77 	int i = 0;
78 
79 	do {
80 		/*
81 		 * Need to have the least restrictive permissions available
82 		 * permissions will be fixed up later
83 		 */
84 		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
85 		pfn++;
86 	} while (pte++, i++, i < PTRS_PER_PTE);
87 }
88 
89 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
90 				  unsigned long end, unsigned long pfn,
91 				  pgprot_t prot,
92 				  void *(*alloc)(unsigned long size))
93 {
94 	pte_t *pte;
95 
96 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
97 		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
98 		if (pmd_sect(*pmd))
99 			split_pmd(pmd, pte);
100 		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
101 		flush_tlb_all();
102 	}
103 	BUG_ON(pmd_bad(*pmd));
104 
105 	pte = pte_offset_kernel(pmd, addr);
106 	do {
107 		set_pte(pte, pfn_pte(pfn, prot));
108 		pfn++;
109 	} while (pte++, addr += PAGE_SIZE, addr != end);
110 }
111 
112 void split_pud(pud_t *old_pud, pmd_t *pmd)
113 {
114 	unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT;
115 	pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr);
116 	int i = 0;
117 
118 	do {
119 		set_pmd(pmd, __pmd(addr | prot));
120 		addr += PMD_SIZE;
121 	} while (pmd++, i++, i < PTRS_PER_PMD);
122 }
123 
124 static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
125 				  unsigned long addr, unsigned long end,
126 				  phys_addr_t phys, pgprot_t prot,
127 				  void *(*alloc)(unsigned long size))
128 {
129 	pmd_t *pmd;
130 	unsigned long next;
131 
132 	/*
133 	 * Check for initial section mappings in the pgd/pud and remove them.
134 	 */
135 	if (pud_none(*pud) || pud_sect(*pud)) {
136 		pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
137 		if (pud_sect(*pud)) {
138 			/*
139 			 * need to have the 1G of mappings continue to be
140 			 * present
141 			 */
142 			split_pud(pud, pmd);
143 		}
144 		pud_populate(mm, pud, pmd);
145 		flush_tlb_all();
146 	}
147 	BUG_ON(pud_bad(*pud));
148 
149 	pmd = pmd_offset(pud, addr);
150 	do {
151 		next = pmd_addr_end(addr, end);
152 		/* try section mapping first */
153 		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
154 			pmd_t old_pmd =*pmd;
155 			set_pmd(pmd, __pmd(phys |
156 					   pgprot_val(mk_sect_prot(prot))));
157 			/*
158 			 * Check for previous table entries created during
159 			 * boot (__create_page_tables) and flush them.
160 			 */
161 			if (!pmd_none(old_pmd)) {
162 				flush_tlb_all();
163 				if (pmd_table(old_pmd)) {
164 					phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0));
165 					if (!WARN_ON_ONCE(slab_is_available()))
166 						memblock_free(table, PAGE_SIZE);
167 				}
168 			}
169 		} else {
170 			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
171 				       prot, alloc);
172 		}
173 		phys += next - addr;
174 	} while (pmd++, addr = next, addr != end);
175 }
176 
177 static inline bool use_1G_block(unsigned long addr, unsigned long next,
178 			unsigned long phys)
179 {
180 	if (PAGE_SHIFT != 12)
181 		return false;
182 
183 	if (((addr | next | phys) & ~PUD_MASK) != 0)
184 		return false;
185 
186 	return true;
187 }
188 
189 static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
190 				  unsigned long addr, unsigned long end,
191 				  phys_addr_t phys, pgprot_t prot,
192 				  void *(*alloc)(unsigned long size))
193 {
194 	pud_t *pud;
195 	unsigned long next;
196 
197 	if (pgd_none(*pgd)) {
198 		pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
199 		pgd_populate(mm, pgd, pud);
200 	}
201 	BUG_ON(pgd_bad(*pgd));
202 
203 	pud = pud_offset(pgd, addr);
204 	do {
205 		next = pud_addr_end(addr, end);
206 
207 		/*
208 		 * For 4K granule only, attempt to put down a 1GB block
209 		 */
210 		if (use_1G_block(addr, next, phys)) {
211 			pud_t old_pud = *pud;
212 			set_pud(pud, __pud(phys |
213 					   pgprot_val(mk_sect_prot(prot))));
214 
215 			/*
216 			 * If we have an old value for a pud, it will
217 			 * be pointing to a pmd table that we no longer
218 			 * need (from swapper_pg_dir).
219 			 *
220 			 * Look up the old pmd table and free it.
221 			 */
222 			if (!pud_none(old_pud)) {
223 				flush_tlb_all();
224 				if (pud_table(old_pud)) {
225 					phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
226 					if (!WARN_ON_ONCE(slab_is_available()))
227 						memblock_free(table, PAGE_SIZE);
228 				}
229 			}
230 		} else {
231 			alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
232 		}
233 		phys += next - addr;
234 	} while (pud++, addr = next, addr != end);
235 }
236 
237 /*
238  * Create the page directory entries and any necessary page tables for the
239  * mapping specified by 'md'.
240  */
241 static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
242 				    phys_addr_t phys, unsigned long virt,
243 				    phys_addr_t size, pgprot_t prot,
244 				    void *(*alloc)(unsigned long size))
245 {
246 	unsigned long addr, length, end, next;
247 
248 	addr = virt & PAGE_MASK;
249 	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
250 
251 	end = addr + length;
252 	do {
253 		next = pgd_addr_end(addr, end);
254 		alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
255 		phys += next - addr;
256 	} while (pgd++, addr = next, addr != end);
257 }
258 
259 static void *late_alloc(unsigned long size)
260 {
261 	void *ptr;
262 
263 	BUG_ON(size > PAGE_SIZE);
264 	ptr = (void *)__get_free_page(PGALLOC_GFP);
265 	BUG_ON(!ptr);
266 	return ptr;
267 }
268 
269 static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
270 				  phys_addr_t size, pgprot_t prot)
271 {
272 	if (virt < VMALLOC_START) {
273 		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
274 			&phys, virt);
275 		return;
276 	}
277 	__create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt,
278 			 size, prot, early_alloc);
279 }
280 
281 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
282 			       unsigned long virt, phys_addr_t size,
283 			       pgprot_t prot)
284 {
285 	__create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
286 				late_alloc);
287 }
288 
289 static void create_mapping_late(phys_addr_t phys, unsigned long virt,
290 				  phys_addr_t size, pgprot_t prot)
291 {
292 	if (virt < VMALLOC_START) {
293 		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
294 			&phys, virt);
295 		return;
296 	}
297 
298 	return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK),
299 				phys, virt, size, prot, late_alloc);
300 }
301 
302 #ifdef CONFIG_DEBUG_RODATA
303 static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
304 {
305 	/*
306 	 * Set up the executable regions using the existing section mappings
307 	 * for now. This will get more fine grained later once all memory
308 	 * is mapped
309 	 */
310 	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
311 	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
312 
313 	if (end < kernel_x_start) {
314 		create_mapping(start, __phys_to_virt(start),
315 			end - start, PAGE_KERNEL);
316 	} else if (start >= kernel_x_end) {
317 		create_mapping(start, __phys_to_virt(start),
318 			end - start, PAGE_KERNEL);
319 	} else {
320 		if (start < kernel_x_start)
321 			create_mapping(start, __phys_to_virt(start),
322 				kernel_x_start - start,
323 				PAGE_KERNEL);
324 		create_mapping(kernel_x_start,
325 				__phys_to_virt(kernel_x_start),
326 				kernel_x_end - kernel_x_start,
327 				PAGE_KERNEL_EXEC);
328 		if (kernel_x_end < end)
329 			create_mapping(kernel_x_end,
330 				__phys_to_virt(kernel_x_end),
331 				end - kernel_x_end,
332 				PAGE_KERNEL);
333 	}
334 
335 }
336 #else
337 static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
338 {
339 	create_mapping(start, __phys_to_virt(start), end - start,
340 			PAGE_KERNEL_EXEC);
341 }
342 #endif
343 
344 static void __init map_mem(void)
345 {
346 	struct memblock_region *reg;
347 	phys_addr_t limit;
348 
349 	/*
350 	 * Temporarily limit the memblock range. We need to do this as
351 	 * create_mapping requires puds, pmds and ptes to be allocated from
352 	 * memory addressable from the initial direct kernel mapping.
353 	 *
354 	 * The initial direct kernel mapping, located at swapper_pg_dir, gives
355 	 * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
356 	 * PHYS_OFFSET (which must be aligned to 2MB as per
357 	 * Documentation/arm64/booting.txt).
358 	 */
359 	if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
360 		limit = PHYS_OFFSET + PMD_SIZE;
361 	else
362 		limit = PHYS_OFFSET + PUD_SIZE;
363 	memblock_set_current_limit(limit);
364 
365 	/* map all the memory banks */
366 	for_each_memblock(memory, reg) {
367 		phys_addr_t start = reg->base;
368 		phys_addr_t end = start + reg->size;
369 
370 		if (start >= end)
371 			break;
372 
373 #ifndef CONFIG_ARM64_64K_PAGES
374 		/*
375 		 * For the first memory bank align the start address and
376 		 * current memblock limit to prevent create_mapping() from
377 		 * allocating pte page tables from unmapped memory.
378 		 * When 64K pages are enabled, the pte page table for the
379 		 * first PGDIR_SIZE is already present in swapper_pg_dir.
380 		 */
381 		if (start < limit)
382 			start = ALIGN(start, PMD_SIZE);
383 		if (end < limit) {
384 			limit = end & PMD_MASK;
385 			memblock_set_current_limit(limit);
386 		}
387 #endif
388 		__map_memblock(start, end);
389 	}
390 
391 	/* Limit no longer required. */
392 	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
393 }
394 
395 void __init fixup_executable(void)
396 {
397 #ifdef CONFIG_DEBUG_RODATA
398 	/* now that we are actually fully mapped, make the start/end more fine grained */
399 	if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
400 		unsigned long aligned_start = round_down(__pa(_stext),
401 							SECTION_SIZE);
402 
403 		create_mapping(aligned_start, __phys_to_virt(aligned_start),
404 				__pa(_stext) - aligned_start,
405 				PAGE_KERNEL);
406 	}
407 
408 	if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
409 		unsigned long aligned_end = round_up(__pa(__init_end),
410 							SECTION_SIZE);
411 		create_mapping(__pa(__init_end), (unsigned long)__init_end,
412 				aligned_end - __pa(__init_end),
413 				PAGE_KERNEL);
414 	}
415 #endif
416 }
417 
418 #ifdef CONFIG_DEBUG_RODATA
419 void mark_rodata_ro(void)
420 {
421 	create_mapping_late(__pa(_stext), (unsigned long)_stext,
422 				(unsigned long)_etext - (unsigned long)_stext,
423 				PAGE_KERNEL_EXEC | PTE_RDONLY);
424 
425 }
426 #endif
427 
428 void fixup_init(void)
429 {
430 	create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
431 			(unsigned long)__init_end - (unsigned long)__init_begin,
432 			PAGE_KERNEL);
433 }
434 
435 /*
436  * paging_init() sets up the page tables, initialises the zone memory
437  * maps and sets up the zero page.
438  */
439 void __init paging_init(void)
440 {
441 	void *zero_page;
442 
443 	map_mem();
444 	fixup_executable();
445 
446 	/* allocate the zero page. */
447 	zero_page = early_alloc(PAGE_SIZE);
448 
449 	bootmem_init();
450 
451 	empty_zero_page = virt_to_page(zero_page);
452 
453 	/*
454 	 * TTBR0 is only used for the identity mapping at this stage. Make it
455 	 * point to zero page to avoid speculatively fetching new entries.
456 	 */
457 	cpu_set_reserved_ttbr0();
458 	flush_tlb_all();
459 	cpu_set_default_tcr_t0sz();
460 }
461 
462 /*
463  * Enable the identity mapping to allow the MMU disabling.
464  */
465 void setup_mm_for_reboot(void)
466 {
467 	cpu_set_reserved_ttbr0();
468 	flush_tlb_all();
469 	cpu_set_idmap_tcr_t0sz();
470 	cpu_switch_mm(idmap_pg_dir, &init_mm);
471 }
472 
473 /*
474  * Check whether a kernel address is valid (derived from arch/x86/).
475  */
476 int kern_addr_valid(unsigned long addr)
477 {
478 	pgd_t *pgd;
479 	pud_t *pud;
480 	pmd_t *pmd;
481 	pte_t *pte;
482 
483 	if ((((long)addr) >> VA_BITS) != -1UL)
484 		return 0;
485 
486 	pgd = pgd_offset_k(addr);
487 	if (pgd_none(*pgd))
488 		return 0;
489 
490 	pud = pud_offset(pgd, addr);
491 	if (pud_none(*pud))
492 		return 0;
493 
494 	if (pud_sect(*pud))
495 		return pfn_valid(pud_pfn(*pud));
496 
497 	pmd = pmd_offset(pud, addr);
498 	if (pmd_none(*pmd))
499 		return 0;
500 
501 	if (pmd_sect(*pmd))
502 		return pfn_valid(pmd_pfn(*pmd));
503 
504 	pte = pte_offset_kernel(pmd, addr);
505 	if (pte_none(*pte))
506 		return 0;
507 
508 	return pfn_valid(pte_pfn(*pte));
509 }
510 #ifdef CONFIG_SPARSEMEM_VMEMMAP
511 #ifdef CONFIG_ARM64_64K_PAGES
512 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
513 {
514 	return vmemmap_populate_basepages(start, end, node);
515 }
516 #else	/* !CONFIG_ARM64_64K_PAGES */
517 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
518 {
519 	unsigned long addr = start;
520 	unsigned long next;
521 	pgd_t *pgd;
522 	pud_t *pud;
523 	pmd_t *pmd;
524 
525 	do {
526 		next = pmd_addr_end(addr, end);
527 
528 		pgd = vmemmap_pgd_populate(addr, node);
529 		if (!pgd)
530 			return -ENOMEM;
531 
532 		pud = vmemmap_pud_populate(pgd, addr, node);
533 		if (!pud)
534 			return -ENOMEM;
535 
536 		pmd = pmd_offset(pud, addr);
537 		if (pmd_none(*pmd)) {
538 			void *p = NULL;
539 
540 			p = vmemmap_alloc_block_buf(PMD_SIZE, node);
541 			if (!p)
542 				return -ENOMEM;
543 
544 			set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
545 		} else
546 			vmemmap_verify((pte_t *)pmd, node, addr, next);
547 	} while (addr = next, addr != end);
548 
549 	return 0;
550 }
551 #endif	/* CONFIG_ARM64_64K_PAGES */
552 void vmemmap_free(unsigned long start, unsigned long end)
553 {
554 }
555 #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
556 
557 static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
558 #if CONFIG_PGTABLE_LEVELS > 2
559 static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
560 #endif
561 #if CONFIG_PGTABLE_LEVELS > 3
562 static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
563 #endif
564 
565 static inline pud_t * fixmap_pud(unsigned long addr)
566 {
567 	pgd_t *pgd = pgd_offset_k(addr);
568 
569 	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
570 
571 	return pud_offset(pgd, addr);
572 }
573 
574 static inline pmd_t * fixmap_pmd(unsigned long addr)
575 {
576 	pud_t *pud = fixmap_pud(addr);
577 
578 	BUG_ON(pud_none(*pud) || pud_bad(*pud));
579 
580 	return pmd_offset(pud, addr);
581 }
582 
583 static inline pte_t * fixmap_pte(unsigned long addr)
584 {
585 	pmd_t *pmd = fixmap_pmd(addr);
586 
587 	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
588 
589 	return pte_offset_kernel(pmd, addr);
590 }
591 
592 void __init early_fixmap_init(void)
593 {
594 	pgd_t *pgd;
595 	pud_t *pud;
596 	pmd_t *pmd;
597 	unsigned long addr = FIXADDR_START;
598 
599 	pgd = pgd_offset_k(addr);
600 	pgd_populate(&init_mm, pgd, bm_pud);
601 	pud = pud_offset(pgd, addr);
602 	pud_populate(&init_mm, pud, bm_pmd);
603 	pmd = pmd_offset(pud, addr);
604 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
605 
606 	/*
607 	 * The boot-ioremap range spans multiple pmds, for which
608 	 * we are not preparted:
609 	 */
610 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
611 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
612 
613 	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
614 	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
615 		WARN_ON(1);
616 		pr_warn("pmd %p != %p, %p\n",
617 			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
618 			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
619 		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
620 			fix_to_virt(FIX_BTMAP_BEGIN));
621 		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
622 			fix_to_virt(FIX_BTMAP_END));
623 
624 		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
625 		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
626 	}
627 }
628 
629 void __set_fixmap(enum fixed_addresses idx,
630 			       phys_addr_t phys, pgprot_t flags)
631 {
632 	unsigned long addr = __fix_to_virt(idx);
633 	pte_t *pte;
634 
635 	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
636 
637 	pte = fixmap_pte(addr);
638 
639 	if (pgprot_val(flags)) {
640 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
641 	} else {
642 		pte_clear(&init_mm, addr, pte);
643 		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
644 	}
645 }
646