xref: /openbmc/linux/arch/x86/mm/pgtable.c (revision 9a29ad52)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/mm.h>
3 #include <linux/gfp.h>
4 #include <linux/hugetlb.h>
5 #include <asm/pgalloc.h>
6 #include <asm/pgtable.h>
7 #include <asm/tlb.h>
8 #include <asm/fixmap.h>
9 #include <asm/mtrr.h>
10 
11 #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
12 phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
13 EXPORT_SYMBOL(physical_mask);
14 #endif
15 
16 #define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
17 
18 #ifdef CONFIG_HIGHPTE
19 #define PGALLOC_USER_GFP __GFP_HIGHMEM
20 #else
21 #define PGALLOC_USER_GFP 0
22 #endif
23 
24 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
25 
26 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
27 {
28 	return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
29 }
30 
31 pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
32 {
33 	struct page *pte;
34 
35 	pte = alloc_pages(__userpte_alloc_gfp, 0);
36 	if (!pte)
37 		return NULL;
38 	if (!pgtable_page_ctor(pte)) {
39 		__free_page(pte);
40 		return NULL;
41 	}
42 	return pte;
43 }
44 
45 static int __init setup_userpte(char *arg)
46 {
47 	if (!arg)
48 		return -EINVAL;
49 
50 	/*
51 	 * "userpte=nohigh" disables allocation of user pagetables in
52 	 * high memory.
53 	 */
54 	if (strcmp(arg, "nohigh") == 0)
55 		__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
56 	else
57 		return -EINVAL;
58 	return 0;
59 }
60 early_param("userpte", setup_userpte);
61 
62 void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
63 {
64 	pgtable_page_dtor(pte);
65 	paravirt_release_pte(page_to_pfn(pte));
66 	tlb_remove_table(tlb, pte);
67 }
68 
69 #if CONFIG_PGTABLE_LEVELS > 2
70 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
71 {
72 	struct page *page = virt_to_page(pmd);
73 	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
74 	/*
75 	 * NOTE! For PAE, any changes to the top page-directory-pointer-table
76 	 * entries need a full cr3 reload to flush.
77 	 */
78 #ifdef CONFIG_X86_PAE
79 	tlb->need_flush_all = 1;
80 #endif
81 	pgtable_pmd_page_dtor(page);
82 	tlb_remove_table(tlb, page);
83 }
84 
85 #if CONFIG_PGTABLE_LEVELS > 3
86 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
87 {
88 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
89 	tlb_remove_table(tlb, virt_to_page(pud));
90 }
91 
92 #if CONFIG_PGTABLE_LEVELS > 4
93 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
94 {
95 	paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
96 	tlb_remove_table(tlb, virt_to_page(p4d));
97 }
98 #endif	/* CONFIG_PGTABLE_LEVELS > 4 */
99 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
100 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
101 
102 static inline void pgd_list_add(pgd_t *pgd)
103 {
104 	struct page *page = virt_to_page(pgd);
105 
106 	list_add(&page->lru, &pgd_list);
107 }
108 
109 static inline void pgd_list_del(pgd_t *pgd)
110 {
111 	struct page *page = virt_to_page(pgd);
112 
113 	list_del(&page->lru);
114 }
115 
116 #define UNSHARED_PTRS_PER_PGD				\
117 	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
118 
119 
120 static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
121 {
122 	virt_to_page(pgd)->pt_mm = mm;
123 }
124 
125 struct mm_struct *pgd_page_get_mm(struct page *page)
126 {
127 	return page->pt_mm;
128 }
129 
130 static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
131 {
132 	/* If the pgd points to a shared pagetable level (either the
133 	   ptes in non-PAE, or shared PMD in PAE), then just copy the
134 	   references from swapper_pg_dir. */
135 	if (CONFIG_PGTABLE_LEVELS == 2 ||
136 	    (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
137 	    CONFIG_PGTABLE_LEVELS >= 4) {
138 		clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
139 				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
140 				KERNEL_PGD_PTRS);
141 	}
142 
143 	/* list required to sync kernel mapping updates */
144 	if (!SHARED_KERNEL_PMD) {
145 		pgd_set_mm(pgd, mm);
146 		pgd_list_add(pgd);
147 	}
148 }
149 
150 static void pgd_dtor(pgd_t *pgd)
151 {
152 	if (SHARED_KERNEL_PMD)
153 		return;
154 
155 	spin_lock(&pgd_lock);
156 	pgd_list_del(pgd);
157 	spin_unlock(&pgd_lock);
158 }
159 
160 /*
161  * List of all pgd's needed for non-PAE so it can invalidate entries
162  * in both cached and uncached pgd's; not needed for PAE since the
163  * kernel pmd is shared. If PAE were not to share the pmd a similar
164  * tactic would be needed. This is essentially codepath-based locking
165  * against pageattr.c; it is the unique case in which a valid change
166  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
167  * vmalloc faults work because attached pagetables are never freed.
168  * -- nyc
169  */
170 
171 #ifdef CONFIG_X86_PAE
172 /*
173  * In PAE mode, we need to do a cr3 reload (=tlb flush) when
174  * updating the top-level pagetable entries to guarantee the
175  * processor notices the update.  Since this is expensive, and
176  * all 4 top-level entries are used almost immediately in a
177  * new process's life, we just pre-populate them here.
178  *
179  * Also, if we're in a paravirt environment where the kernel pmd is
180  * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
181  * and initialize the kernel pmds here.
182  */
183 #define PREALLOCATED_PMDS	UNSHARED_PTRS_PER_PGD
184 
185 void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
186 {
187 	paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
188 
189 	/* Note: almost everything apart from _PAGE_PRESENT is
190 	   reserved at the pmd (PDPT) level. */
191 	set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
192 
193 	/*
194 	 * According to Intel App note "TLBs, Paging-Structure Caches,
195 	 * and Their Invalidation", April 2007, document 317080-001,
196 	 * section 8.1: in PAE mode we explicitly have to flush the
197 	 * TLB via cr3 if the top-level pgd is changed...
198 	 */
199 	flush_tlb_mm(mm);
200 }
201 #else  /* !CONFIG_X86_PAE */
202 
203 /* No need to prepopulate any pagetable entries in non-PAE modes. */
204 #define PREALLOCATED_PMDS	0
205 
206 #endif	/* CONFIG_X86_PAE */
207 
208 static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])
209 {
210 	int i;
211 
212 	for(i = 0; i < PREALLOCATED_PMDS; i++)
213 		if (pmds[i]) {
214 			pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
215 			free_page((unsigned long)pmds[i]);
216 			mm_dec_nr_pmds(mm);
217 		}
218 }
219 
220 static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
221 {
222 	int i;
223 	bool failed = false;
224 	gfp_t gfp = PGALLOC_GFP;
225 
226 	if (mm == &init_mm)
227 		gfp &= ~__GFP_ACCOUNT;
228 
229 	for(i = 0; i < PREALLOCATED_PMDS; i++) {
230 		pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
231 		if (!pmd)
232 			failed = true;
233 		if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
234 			free_page((unsigned long)pmd);
235 			pmd = NULL;
236 			failed = true;
237 		}
238 		if (pmd)
239 			mm_inc_nr_pmds(mm);
240 		pmds[i] = pmd;
241 	}
242 
243 	if (failed) {
244 		free_pmds(mm, pmds);
245 		return -ENOMEM;
246 	}
247 
248 	return 0;
249 }
250 
251 /*
252  * Mop up any pmd pages which may still be attached to the pgd.
253  * Normally they will be freed by munmap/exit_mmap, but any pmd we
254  * preallocate which never got a corresponding vma will need to be
255  * freed manually.
256  */
257 static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
258 {
259 	int i;
260 
261 	for(i = 0; i < PREALLOCATED_PMDS; i++) {
262 		pgd_t pgd = pgdp[i];
263 
264 		if (pgd_val(pgd) != 0) {
265 			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
266 
267 			pgdp[i] = native_make_pgd(0);
268 
269 			paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
270 			pmd_free(mm, pmd);
271 			mm_dec_nr_pmds(mm);
272 		}
273 	}
274 }
275 
276 static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
277 {
278 	p4d_t *p4d;
279 	pud_t *pud;
280 	int i;
281 
282 	if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
283 		return;
284 
285 	p4d = p4d_offset(pgd, 0);
286 	pud = pud_offset(p4d, 0);
287 
288 	for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
289 		pmd_t *pmd = pmds[i];
290 
291 		if (i >= KERNEL_PGD_BOUNDARY)
292 			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
293 			       sizeof(pmd_t) * PTRS_PER_PMD);
294 
295 		pud_populate(mm, pud, pmd);
296 	}
297 }
298 
299 /*
300  * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
301  * assumes that pgd should be in one page.
302  *
303  * But kernel with PAE paging that is not running as a Xen domain
304  * only needs to allocate 32 bytes for pgd instead of one page.
305  */
306 #ifdef CONFIG_X86_PAE
307 
308 #include <linux/slab.h>
309 
310 #define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
311 #define PGD_ALIGN	32
312 
313 static struct kmem_cache *pgd_cache;
314 
315 static int __init pgd_cache_init(void)
316 {
317 	/*
318 	 * When PAE kernel is running as a Xen domain, it does not use
319 	 * shared kernel pmd. And this requires a whole page for pgd.
320 	 */
321 	if (!SHARED_KERNEL_PMD)
322 		return 0;
323 
324 	/*
325 	 * when PAE kernel is not running as a Xen domain, it uses
326 	 * shared kernel pmd. Shared kernel pmd does not require a whole
327 	 * page for pgd. We are able to just allocate a 32-byte for pgd.
328 	 * During boot time, we create a 32-byte slab for pgd table allocation.
329 	 */
330 	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
331 				      SLAB_PANIC, NULL);
332 	if (!pgd_cache)
333 		return -ENOMEM;
334 
335 	return 0;
336 }
337 core_initcall(pgd_cache_init);
338 
339 static inline pgd_t *_pgd_alloc(void)
340 {
341 	/*
342 	 * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
343 	 * We allocate one page for pgd.
344 	 */
345 	if (!SHARED_KERNEL_PMD)
346 		return (pgd_t *)__get_free_page(PGALLOC_GFP);
347 
348 	/*
349 	 * Now PAE kernel is not running as a Xen domain. We can allocate
350 	 * a 32-byte slab for pgd to save memory space.
351 	 */
352 	return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
353 }
354 
355 static inline void _pgd_free(pgd_t *pgd)
356 {
357 	if (!SHARED_KERNEL_PMD)
358 		free_page((unsigned long)pgd);
359 	else
360 		kmem_cache_free(pgd_cache, pgd);
361 }
362 #else
363 
364 static inline pgd_t *_pgd_alloc(void)
365 {
366 	return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
367 }
368 
369 static inline void _pgd_free(pgd_t *pgd)
370 {
371 	free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
372 }
373 #endif /* CONFIG_X86_PAE */
374 
375 pgd_t *pgd_alloc(struct mm_struct *mm)
376 {
377 	pgd_t *pgd;
378 	pmd_t *pmds[PREALLOCATED_PMDS];
379 
380 	pgd = _pgd_alloc();
381 
382 	if (pgd == NULL)
383 		goto out;
384 
385 	mm->pgd = pgd;
386 
387 	if (preallocate_pmds(mm, pmds) != 0)
388 		goto out_free_pgd;
389 
390 	if (paravirt_pgd_alloc(mm) != 0)
391 		goto out_free_pmds;
392 
393 	/*
394 	 * Make sure that pre-populating the pmds is atomic with
395 	 * respect to anything walking the pgd_list, so that they
396 	 * never see a partially populated pgd.
397 	 */
398 	spin_lock(&pgd_lock);
399 
400 	pgd_ctor(mm, pgd);
401 	pgd_prepopulate_pmd(mm, pgd, pmds);
402 
403 	spin_unlock(&pgd_lock);
404 
405 	return pgd;
406 
407 out_free_pmds:
408 	free_pmds(mm, pmds);
409 out_free_pgd:
410 	_pgd_free(pgd);
411 out:
412 	return NULL;
413 }
414 
415 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
416 {
417 	pgd_mop_up_pmds(mm, pgd);
418 	pgd_dtor(pgd);
419 	paravirt_pgd_free(mm, pgd);
420 	_pgd_free(pgd);
421 }
422 
423 /*
424  * Used to set accessed or dirty bits in the page table entries
425  * on other architectures. On x86, the accessed and dirty bits
426  * are tracked by hardware. However, do_wp_page calls this function
427  * to also make the pte writeable at the same time the dirty bit is
428  * set. In that case we do actually need to write the PTE.
429  */
430 int ptep_set_access_flags(struct vm_area_struct *vma,
431 			  unsigned long address, pte_t *ptep,
432 			  pte_t entry, int dirty)
433 {
434 	int changed = !pte_same(*ptep, entry);
435 
436 	if (changed && dirty)
437 		*ptep = entry;
438 
439 	return changed;
440 }
441 
442 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
443 int pmdp_set_access_flags(struct vm_area_struct *vma,
444 			  unsigned long address, pmd_t *pmdp,
445 			  pmd_t entry, int dirty)
446 {
447 	int changed = !pmd_same(*pmdp, entry);
448 
449 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
450 
451 	if (changed && dirty) {
452 		*pmdp = entry;
453 		/*
454 		 * We had a write-protection fault here and changed the pmd
455 		 * to to more permissive. No need to flush the TLB for that,
456 		 * #PF is architecturally guaranteed to do that and in the
457 		 * worst-case we'll generate a spurious fault.
458 		 */
459 	}
460 
461 	return changed;
462 }
463 
464 int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
465 			  pud_t *pudp, pud_t entry, int dirty)
466 {
467 	int changed = !pud_same(*pudp, entry);
468 
469 	VM_BUG_ON(address & ~HPAGE_PUD_MASK);
470 
471 	if (changed && dirty) {
472 		*pudp = entry;
473 		/*
474 		 * We had a write-protection fault here and changed the pud
475 		 * to to more permissive. No need to flush the TLB for that,
476 		 * #PF is architecturally guaranteed to do that and in the
477 		 * worst-case we'll generate a spurious fault.
478 		 */
479 	}
480 
481 	return changed;
482 }
483 #endif
484 
485 int ptep_test_and_clear_young(struct vm_area_struct *vma,
486 			      unsigned long addr, pte_t *ptep)
487 {
488 	int ret = 0;
489 
490 	if (pte_young(*ptep))
491 		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
492 					 (unsigned long *) &ptep->pte);
493 
494 	return ret;
495 }
496 
497 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
498 int pmdp_test_and_clear_young(struct vm_area_struct *vma,
499 			      unsigned long addr, pmd_t *pmdp)
500 {
501 	int ret = 0;
502 
503 	if (pmd_young(*pmdp))
504 		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
505 					 (unsigned long *)pmdp);
506 
507 	return ret;
508 }
509 int pudp_test_and_clear_young(struct vm_area_struct *vma,
510 			      unsigned long addr, pud_t *pudp)
511 {
512 	int ret = 0;
513 
514 	if (pud_young(*pudp))
515 		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
516 					 (unsigned long *)pudp);
517 
518 	return ret;
519 }
520 #endif
521 
522 int ptep_clear_flush_young(struct vm_area_struct *vma,
523 			   unsigned long address, pte_t *ptep)
524 {
525 	/*
526 	 * On x86 CPUs, clearing the accessed bit without a TLB flush
527 	 * doesn't cause data corruption. [ It could cause incorrect
528 	 * page aging and the (mistaken) reclaim of hot pages, but the
529 	 * chance of that should be relatively low. ]
530 	 *
531 	 * So as a performance optimization don't flush the TLB when
532 	 * clearing the accessed bit, it will eventually be flushed by
533 	 * a context switch or a VM operation anyway. [ In the rare
534 	 * event of it not getting flushed for a long time the delay
535 	 * shouldn't really matter because there's no real memory
536 	 * pressure for swapout to react to. ]
537 	 */
538 	return ptep_test_and_clear_young(vma, address, ptep);
539 }
540 
541 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
542 int pmdp_clear_flush_young(struct vm_area_struct *vma,
543 			   unsigned long address, pmd_t *pmdp)
544 {
545 	int young;
546 
547 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
548 
549 	young = pmdp_test_and_clear_young(vma, address, pmdp);
550 	if (young)
551 		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
552 
553 	return young;
554 }
555 #endif
556 
557 /**
558  * reserve_top_address - reserves a hole in the top of kernel address space
559  * @reserve - size of hole to reserve
560  *
561  * Can be used to relocate the fixmap area and poke a hole in the top
562  * of kernel address space to make room for a hypervisor.
563  */
564 void __init reserve_top_address(unsigned long reserve)
565 {
566 #ifdef CONFIG_X86_32
567 	BUG_ON(fixmaps_set > 0);
568 	__FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
569 	printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
570 	       -reserve, __FIXADDR_TOP + PAGE_SIZE);
571 #endif
572 }
573 
574 int fixmaps_set;
575 
576 void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
577 {
578 	unsigned long address = __fix_to_virt(idx);
579 
580 	if (idx >= __end_of_fixed_addresses) {
581 		BUG();
582 		return;
583 	}
584 	set_pte_vaddr(address, pte);
585 	fixmaps_set++;
586 }
587 
588 void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
589 		       pgprot_t flags)
590 {
591 	/* Sanitize 'prot' against any unsupported bits: */
592 	pgprot_val(flags) &= __default_kernel_pte_mask;
593 
594 	__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
595 }
596 
597 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
598 #ifdef CONFIG_X86_5LEVEL
599 /**
600  * p4d_set_huge - setup kernel P4D mapping
601  *
602  * No 512GB pages yet -- always return 0
603  */
604 int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
605 {
606 	return 0;
607 }
608 
609 /**
610  * p4d_clear_huge - clear kernel P4D mapping when it is set
611  *
612  * No 512GB pages yet -- always return 0
613  */
614 int p4d_clear_huge(p4d_t *p4d)
615 {
616 	return 0;
617 }
618 #endif
619 
620 /**
621  * pud_set_huge - setup kernel PUD mapping
622  *
623  * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
624  * function sets up a huge page only if any of the following conditions are met:
625  *
626  * - MTRRs are disabled, or
627  *
628  * - MTRRs are enabled and the range is completely covered by a single MTRR, or
629  *
630  * - MTRRs are enabled and the corresponding MTRR memory type is WB, which
631  *   has no effect on the requested PAT memory type.
632  *
633  * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
634  * page mapping attempt fails.
635  *
636  * Returns 1 on success and 0 on failure.
637  */
638 int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
639 {
640 	u8 mtrr, uniform;
641 
642 	mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
643 	if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
644 	    (mtrr != MTRR_TYPE_WRBACK))
645 		return 0;
646 
647 	/* Bail out if we are we on a populated non-leaf entry: */
648 	if (pud_present(*pud) && !pud_huge(*pud))
649 		return 0;
650 
651 	prot = pgprot_4k_2_large(prot);
652 
653 	set_pte((pte_t *)pud, pfn_pte(
654 		(u64)addr >> PAGE_SHIFT,
655 		__pgprot(pgprot_val(prot) | _PAGE_PSE)));
656 
657 	return 1;
658 }
659 
660 /**
661  * pmd_set_huge - setup kernel PMD mapping
662  *
663  * See text over pud_set_huge() above.
664  *
665  * Returns 1 on success and 0 on failure.
666  */
667 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
668 {
669 	u8 mtrr, uniform;
670 
671 	mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
672 	if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
673 	    (mtrr != MTRR_TYPE_WRBACK)) {
674 		pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
675 			     __func__, addr, addr + PMD_SIZE);
676 		return 0;
677 	}
678 
679 	/* Bail out if we are we on a populated non-leaf entry: */
680 	if (pmd_present(*pmd) && !pmd_huge(*pmd))
681 		return 0;
682 
683 	prot = pgprot_4k_2_large(prot);
684 
685 	set_pte((pte_t *)pmd, pfn_pte(
686 		(u64)addr >> PAGE_SHIFT,
687 		__pgprot(pgprot_val(prot) | _PAGE_PSE)));
688 
689 	return 1;
690 }
691 
692 /**
693  * pud_clear_huge - clear kernel PUD mapping when it is set
694  *
695  * Returns 1 on success and 0 on failure (no PUD map is found).
696  */
697 int pud_clear_huge(pud_t *pud)
698 {
699 	if (pud_large(*pud)) {
700 		pud_clear(pud);
701 		return 1;
702 	}
703 
704 	return 0;
705 }
706 
707 /**
708  * pmd_clear_huge - clear kernel PMD mapping when it is set
709  *
710  * Returns 1 on success and 0 on failure (no PMD map is found).
711  */
712 int pmd_clear_huge(pmd_t *pmd)
713 {
714 	if (pmd_large(*pmd)) {
715 		pmd_clear(pmd);
716 		return 1;
717 	}
718 
719 	return 0;
720 }
721 
722 /**
723  * pud_free_pmd_page - Clear pud entry and free pmd page.
724  * @pud: Pointer to a PUD.
725  *
726  * Context: The pud range has been unmaped and TLB purged.
727  * Return: 1 if clearing the entry succeeded. 0 otherwise.
728  */
729 int pud_free_pmd_page(pud_t *pud)
730 {
731 	pmd_t *pmd;
732 	int i;
733 
734 	if (pud_none(*pud))
735 		return 1;
736 
737 	pmd = (pmd_t *)pud_page_vaddr(*pud);
738 
739 	for (i = 0; i < PTRS_PER_PMD; i++)
740 		if (!pmd_free_pte_page(&pmd[i]))
741 			return 0;
742 
743 	pud_clear(pud);
744 	free_page((unsigned long)pmd);
745 
746 	return 1;
747 }
748 
749 /**
750  * pmd_free_pte_page - Clear pmd entry and free pte page.
751  * @pmd: Pointer to a PMD.
752  *
753  * Context: The pmd range has been unmaped and TLB purged.
754  * Return: 1 if clearing the entry succeeded. 0 otherwise.
755  */
756 int pmd_free_pte_page(pmd_t *pmd)
757 {
758 	pte_t *pte;
759 
760 	if (pmd_none(*pmd))
761 		return 1;
762 
763 	pte = (pte_t *)pmd_page_vaddr(*pmd);
764 	pmd_clear(pmd);
765 	free_page((unsigned long)pte);
766 
767 	return 1;
768 }
769 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
770