xref: /openbmc/linux/arch/arm64/mm/hugetlbpage.c (revision 8ebc80a25f9d9bf7a8e368b266d5b740c485c362)
11802d0beSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2084bd298SSteve Capper /*
3084bd298SSteve Capper  * arch/arm64/mm/hugetlbpage.c
4084bd298SSteve Capper  *
5084bd298SSteve Capper  * Copyright (C) 2013 Linaro Ltd.
6084bd298SSteve Capper  *
7084bd298SSteve Capper  * Based on arch/x86/mm/hugetlbpage.c.
8084bd298SSteve Capper  */
9084bd298SSteve Capper 
10084bd298SSteve Capper #include <linux/init.h>
11084bd298SSteve Capper #include <linux/fs.h>
12084bd298SSteve Capper #include <linux/mm.h>
13084bd298SSteve Capper #include <linux/hugetlb.h>
14084bd298SSteve Capper #include <linux/pagemap.h>
15084bd298SSteve Capper #include <linux/err.h>
16084bd298SSteve Capper #include <linux/sysctl.h>
17084bd298SSteve Capper #include <asm/mman.h>
18084bd298SSteve Capper #include <asm/tlb.h>
19084bd298SSteve Capper #include <asm/tlbflush.h>
20084bd298SSteve Capper 
21abb7962aSAnshuman Khandual /*
22abb7962aSAnshuman Khandual  * HugeTLB Support Matrix
23abb7962aSAnshuman Khandual  *
24abb7962aSAnshuman Khandual  * ---------------------------------------------------
25abb7962aSAnshuman Khandual  * | Page Size | CONT PTE |  PMD  | CONT PMD |  PUD  |
26abb7962aSAnshuman Khandual  * ---------------------------------------------------
27abb7962aSAnshuman Khandual  * |     4K    |   64K    |   2M  |    32M   |   1G  |
28abb7962aSAnshuman Khandual  * |    16K    |    2M    |  32M  |     1G   |       |
29abb7962aSAnshuman Khandual  * |    64K    |    2M    | 512M  |    16G   |       |
30abb7962aSAnshuman Khandual  * ---------------------------------------------------
31abb7962aSAnshuman Khandual  */
32abb7962aSAnshuman Khandual 
33abb7962aSAnshuman Khandual /*
34abb7962aSAnshuman Khandual  * Reserve CMA areas for the largest supported gigantic
35abb7962aSAnshuman Khandual  * huge page when requested. Any other smaller gigantic
36abb7962aSAnshuman Khandual  * huge pages could still be served from those areas.
37abb7962aSAnshuman Khandual  */
38abb7962aSAnshuman Khandual #ifdef CONFIG_CMA
arm64_hugetlb_cma_reserve(void)39abb7962aSAnshuman Khandual void __init arm64_hugetlb_cma_reserve(void)
40abb7962aSAnshuman Khandual {
41abb7962aSAnshuman Khandual 	int order;
42abb7962aSAnshuman Khandual 
43f8b46c4bSAnshuman Khandual 	if (pud_sect_supported())
44abb7962aSAnshuman Khandual 		order = PUD_SHIFT - PAGE_SHIFT;
45f8b46c4bSAnshuman Khandual 	else
462e5809a4SMike Kravetz 		order = CONT_PMD_SHIFT - PAGE_SHIFT;
47e6359798SWill Deacon 
48abb7962aSAnshuman Khandual 	/*
49abb7962aSAnshuman Khandual 	 * HugeTLB CMA reservation is required for gigantic
50abb7962aSAnshuman Khandual 	 * huge pages which could not be allocated via the
51abb7962aSAnshuman Khandual 	 * page allocator. Just warn if there is any change
52abb7962aSAnshuman Khandual 	 * breaking this assumption.
53abb7962aSAnshuman Khandual 	 */
54abb7962aSAnshuman Khandual 	WARN_ON(order <= MAX_ORDER);
55abb7962aSAnshuman Khandual 	hugetlb_cma_reserve(order);
56abb7962aSAnshuman Khandual }
57abb7962aSAnshuman Khandual #endif /* CONFIG_CMA */
58abb7962aSAnshuman Khandual 
__hugetlb_valid_size(unsigned long size)59a8a733b2SAnshuman Khandual static bool __hugetlb_valid_size(unsigned long size)
60a8a733b2SAnshuman Khandual {
61a8a733b2SAnshuman Khandual 	switch (size) {
62a8a733b2SAnshuman Khandual #ifndef __PAGETABLE_PMD_FOLDED
63a8a733b2SAnshuman Khandual 	case PUD_SIZE:
64a8a733b2SAnshuman Khandual 		return pud_sect_supported();
65a8a733b2SAnshuman Khandual #endif
66a8a733b2SAnshuman Khandual 	case CONT_PMD_SIZE:
67a8a733b2SAnshuman Khandual 	case PMD_SIZE:
68a8a733b2SAnshuman Khandual 	case CONT_PTE_SIZE:
69a8a733b2SAnshuman Khandual 		return true;
70a8a733b2SAnshuman Khandual 	}
71a8a733b2SAnshuman Khandual 
72a8a733b2SAnshuman Khandual 	return false;
73a8a733b2SAnshuman Khandual }
74a8a733b2SAnshuman Khandual 
755480280dSAnshuman Khandual #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
arch_hugetlb_migration_supported(struct hstate * h)765480280dSAnshuman Khandual bool arch_hugetlb_migration_supported(struct hstate *h)
775480280dSAnshuman Khandual {
785480280dSAnshuman Khandual 	size_t pagesize = huge_page_size(h);
795480280dSAnshuman Khandual 
80a8a733b2SAnshuman Khandual 	if (!__hugetlb_valid_size(pagesize)) {
815480280dSAnshuman Khandual 		pr_warn("%s: unrecognized huge page size 0x%lx\n",
825480280dSAnshuman Khandual 			__func__, pagesize);
835480280dSAnshuman Khandual 		return false;
845480280dSAnshuman Khandual 	}
85a8a733b2SAnshuman Khandual 	return true;
86a8a733b2SAnshuman Khandual }
875480280dSAnshuman Khandual #endif
885480280dSAnshuman Khandual 
pmd_huge(pmd_t pmd)89084bd298SSteve Capper int pmd_huge(pmd_t pmd)
90084bd298SSteve Capper {
91fd28f5d4SChristoffer Dall 	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
92084bd298SSteve Capper }
93084bd298SSteve Capper 
pud_huge(pud_t pud)94084bd298SSteve Capper int pud_huge(pud_t pud)
95084bd298SSteve Capper {
964797ec2dSMark Salter #ifndef __PAGETABLE_PMD_FOLDED
97fd28f5d4SChristoffer Dall 	return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
984797ec2dSMark Salter #else
994797ec2dSMark Salter 	return 0;
1004797ec2dSMark Salter #endif
101084bd298SSteve Capper }
102084bd298SSteve Capper 
find_num_contig(struct mm_struct * mm,unsigned long addr,pte_t * ptep,size_t * pgsize)10366b3923aSDavid Woods static int find_num_contig(struct mm_struct *mm, unsigned long addr,
104bb9dd3dfSSteve Capper 			   pte_t *ptep, size_t *pgsize)
10566b3923aSDavid Woods {
10620a004e7SWill Deacon 	pgd_t *pgdp = pgd_offset(mm, addr);
107e9f63768SMike Rapoport 	p4d_t *p4dp;
10820a004e7SWill Deacon 	pud_t *pudp;
10920a004e7SWill Deacon 	pmd_t *pmdp;
11066b3923aSDavid Woods 
11166b3923aSDavid Woods 	*pgsize = PAGE_SIZE;
112e9f63768SMike Rapoport 	p4dp = p4d_offset(pgdp, addr);
113e9f63768SMike Rapoport 	pudp = pud_offset(p4dp, addr);
11420a004e7SWill Deacon 	pmdp = pmd_offset(pudp, addr);
11520a004e7SWill Deacon 	if ((pte_t *)pmdp == ptep) {
11666b3923aSDavid Woods 		*pgsize = PMD_SIZE;
11766b3923aSDavid Woods 		return CONT_PMDS;
11866b3923aSDavid Woods 	}
11966b3923aSDavid Woods 	return CONT_PTES;
12066b3923aSDavid Woods }
12166b3923aSDavid Woods 
num_contig_ptes(unsigned long size,size_t * pgsize)122c3e4ed5cSPunit Agrawal static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
123c3e4ed5cSPunit Agrawal {
124*62112e7fSRyan Roberts 	int contig_ptes = 1;
125c3e4ed5cSPunit Agrawal 
126c3e4ed5cSPunit Agrawal 	*pgsize = size;
127c3e4ed5cSPunit Agrawal 
128c3e4ed5cSPunit Agrawal 	switch (size) {
129c3e4ed5cSPunit Agrawal 	case CONT_PMD_SIZE:
130c3e4ed5cSPunit Agrawal 		*pgsize = PMD_SIZE;
131c3e4ed5cSPunit Agrawal 		contig_ptes = CONT_PMDS;
132c3e4ed5cSPunit Agrawal 		break;
133c3e4ed5cSPunit Agrawal 	case CONT_PTE_SIZE:
134c3e4ed5cSPunit Agrawal 		*pgsize = PAGE_SIZE;
135c3e4ed5cSPunit Agrawal 		contig_ptes = CONT_PTES;
136c3e4ed5cSPunit Agrawal 		break;
137*62112e7fSRyan Roberts 	default:
138*62112e7fSRyan Roberts 		WARN_ON(!__hugetlb_valid_size(size));
139c3e4ed5cSPunit Agrawal 	}
140c3e4ed5cSPunit Agrawal 
141c3e4ed5cSPunit Agrawal 	return contig_ptes;
142c3e4ed5cSPunit Agrawal }
143c3e4ed5cSPunit Agrawal 
huge_ptep_get(pte_t * ptep)144bc5dfb4fSBaolin Wang pte_t huge_ptep_get(pte_t *ptep)
145bc5dfb4fSBaolin Wang {
146bc5dfb4fSBaolin Wang 	int ncontig, i;
147bc5dfb4fSBaolin Wang 	size_t pgsize;
148bc5dfb4fSBaolin Wang 	pte_t orig_pte = ptep_get(ptep);
149bc5dfb4fSBaolin Wang 
150bc5dfb4fSBaolin Wang 	if (!pte_present(orig_pte) || !pte_cont(orig_pte))
151bc5dfb4fSBaolin Wang 		return orig_pte;
152bc5dfb4fSBaolin Wang 
153bc5dfb4fSBaolin Wang 	ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
154bc5dfb4fSBaolin Wang 	for (i = 0; i < ncontig; i++, ptep++) {
155bc5dfb4fSBaolin Wang 		pte_t pte = ptep_get(ptep);
156bc5dfb4fSBaolin Wang 
157bc5dfb4fSBaolin Wang 		if (pte_dirty(pte))
158bc5dfb4fSBaolin Wang 			orig_pte = pte_mkdirty(orig_pte);
159bc5dfb4fSBaolin Wang 
160bc5dfb4fSBaolin Wang 		if (pte_young(pte))
161bc5dfb4fSBaolin Wang 			orig_pte = pte_mkyoung(orig_pte);
162bc5dfb4fSBaolin Wang 	}
163bc5dfb4fSBaolin Wang 	return orig_pte;
164bc5dfb4fSBaolin Wang }
165bc5dfb4fSBaolin Wang 
166d8bdcff2SSteve Capper /*
167d8bdcff2SSteve Capper  * Changing some bits of contiguous entries requires us to follow a
168d8bdcff2SSteve Capper  * Break-Before-Make approach, breaking the whole contiguous set
169d8bdcff2SSteve Capper  * before we can change any entries. See ARM DDI 0487A.k_iss10775,
170d8bdcff2SSteve Capper  * "Misprogramming of the Contiguous bit", page D4-1762.
171d8bdcff2SSteve Capper  *
172d8bdcff2SSteve Capper  * This helper performs the break step.
173d8bdcff2SSteve Capper  */
get_clear_contig(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long pgsize,unsigned long ncontig)174fb396bb4SAnshuman Khandual static pte_t get_clear_contig(struct mm_struct *mm,
175d8bdcff2SSteve Capper 			     unsigned long addr,
176d8bdcff2SSteve Capper 			     pte_t *ptep,
177d8bdcff2SSteve Capper 			     unsigned long pgsize,
178d8bdcff2SSteve Capper 			     unsigned long ncontig)
179d8bdcff2SSteve Capper {
180*62112e7fSRyan Roberts 	pte_t pte, tmp_pte;
181*62112e7fSRyan Roberts 	bool present;
182d8bdcff2SSteve Capper 
183*62112e7fSRyan Roberts 	pte = ptep_get_and_clear(mm, addr, ptep);
184*62112e7fSRyan Roberts 	present = pte_present(pte);
185*62112e7fSRyan Roberts 	while (--ncontig) {
186*62112e7fSRyan Roberts 		ptep++;
187*62112e7fSRyan Roberts 		addr += pgsize;
188*62112e7fSRyan Roberts 		tmp_pte = ptep_get_and_clear(mm, addr, ptep);
189*62112e7fSRyan Roberts 		if (present) {
190*62112e7fSRyan Roberts 			if (pte_dirty(tmp_pte))
191*62112e7fSRyan Roberts 				pte = pte_mkdirty(pte);
192*62112e7fSRyan Roberts 			if (pte_young(tmp_pte))
193*62112e7fSRyan Roberts 				pte = pte_mkyoung(pte);
194d8bdcff2SSteve Capper 		}
195*62112e7fSRyan Roberts 	}
196*62112e7fSRyan Roberts 	return pte;
197d8bdcff2SSteve Capper }
198d8bdcff2SSteve Capper 
get_clear_contig_flush(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long pgsize,unsigned long ncontig)19941098230SWill Deacon static pte_t get_clear_contig_flush(struct mm_struct *mm,
20041098230SWill Deacon 				    unsigned long addr,
20141098230SWill Deacon 				    pte_t *ptep,
20241098230SWill Deacon 				    unsigned long pgsize,
20341098230SWill Deacon 				    unsigned long ncontig)
20441098230SWill Deacon {
20541098230SWill Deacon 	pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
20641098230SWill Deacon 	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
20741098230SWill Deacon 
20841098230SWill Deacon 	flush_tlb_range(&vma, addr, addr + (pgsize * ncontig));
20941098230SWill Deacon 	return orig_pte;
21041098230SWill Deacon }
21141098230SWill Deacon 
212d8bdcff2SSteve Capper /*
213d8bdcff2SSteve Capper  * Changing some bits of contiguous entries requires us to follow a
214d8bdcff2SSteve Capper  * Break-Before-Make approach, breaking the whole contiguous set
215d8bdcff2SSteve Capper  * before we can change any entries. See ARM DDI 0487A.k_iss10775,
216d8bdcff2SSteve Capper  * "Misprogramming of the Contiguous bit", page D4-1762.
217d8bdcff2SSteve Capper  *
218d8bdcff2SSteve Capper  * This helper performs the break step for use cases where the
219d8bdcff2SSteve Capper  * original pte is not needed.
220d8bdcff2SSteve Capper  */
clear_flush(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long pgsize,unsigned long ncontig)221d8bdcff2SSteve Capper static void clear_flush(struct mm_struct *mm,
222d8bdcff2SSteve Capper 			     unsigned long addr,
223d8bdcff2SSteve Capper 			     pte_t *ptep,
224d8bdcff2SSteve Capper 			     unsigned long pgsize,
225d8bdcff2SSteve Capper 			     unsigned long ncontig)
226d8bdcff2SSteve Capper {
2278b11ec1bSLinus Torvalds 	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
228d8bdcff2SSteve Capper 	unsigned long i, saddr = addr;
229d8bdcff2SSteve Capper 
230d8bdcff2SSteve Capper 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
23100de2c9fSQi Zheng 		ptep_clear(mm, addr, ptep);
232d8bdcff2SSteve Capper 
233d8bdcff2SSteve Capper 	flush_tlb_range(&vma, saddr, addr);
234d8bdcff2SSteve Capper }
235d8bdcff2SSteve Capper 
set_huge_pte_at(struct mm_struct * mm,unsigned long addr,pte_t * ptep,pte_t pte,unsigned long sz)23666b3923aSDavid Woods void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
237935d4f0cSRyan Roberts 			    pte_t *ptep, pte_t pte, unsigned long sz)
23866b3923aSDavid Woods {
23966b3923aSDavid Woods 	size_t pgsize;
24066b3923aSDavid Woods 	int i;
241bb9dd3dfSSteve Capper 	int ncontig;
24229a7287dSSteve Capper 	unsigned long pfn, dpfn;
24366b3923aSDavid Woods 	pgprot_t hugeprot;
24466b3923aSDavid Woods 
2456f1bace9SRyan Roberts 	ncontig = num_contig_ptes(sz, &pgsize);
2466f1bace9SRyan Roberts 
24718f39629SQi Zheng 	if (!pte_present(pte)) {
2486f1bace9SRyan Roberts 		for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
24918f39629SQi Zheng 			set_pte_at(mm, addr, ptep, pte);
25018f39629SQi Zheng 		return;
25118f39629SQi Zheng 	}
252d3ea7952SSteve Capper 
253bb9dd3dfSSteve Capper 	if (!pte_cont(pte)) {
25466b3923aSDavid Woods 		set_pte_at(mm, addr, ptep, pte);
25566b3923aSDavid Woods 		return;
25666b3923aSDavid Woods 	}
25766b3923aSDavid Woods 
25866b3923aSDavid Woods 	pfn = pte_pfn(pte);
25929a7287dSSteve Capper 	dpfn = pgsize >> PAGE_SHIFT;
260b5b0be86SSteve Capper 	hugeprot = pte_pgprot(pte);
26129a7287dSSteve Capper 
262d8bdcff2SSteve Capper 	clear_flush(mm, addr, ptep, pgsize, ncontig);
263d8bdcff2SSteve Capper 
26420a004e7SWill Deacon 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
26566b3923aSDavid Woods 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
26666b3923aSDavid Woods }
26766b3923aSDavid Woods 
huge_pte_alloc(struct mm_struct * mm,struct vm_area_struct * vma,unsigned long addr,unsigned long sz)268aec44e0fSPeter Xu pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
26966b3923aSDavid Woods 		      unsigned long addr, unsigned long sz)
27066b3923aSDavid Woods {
27120a004e7SWill Deacon 	pgd_t *pgdp;
272e9f63768SMike Rapoport 	p4d_t *p4dp;
27320a004e7SWill Deacon 	pud_t *pudp;
27420a004e7SWill Deacon 	pmd_t *pmdp;
27520a004e7SWill Deacon 	pte_t *ptep = NULL;
27666b3923aSDavid Woods 
27720a004e7SWill Deacon 	pgdp = pgd_offset(mm, addr);
278e9f63768SMike Rapoport 	p4dp = p4d_offset(pgdp, addr);
279e9f63768SMike Rapoport 	pudp = pud_alloc(mm, p4dp, addr);
28020a004e7SWill Deacon 	if (!pudp)
28166b3923aSDavid Woods 		return NULL;
28266b3923aSDavid Woods 
28366b3923aSDavid Woods 	if (sz == PUD_SIZE) {
28420a004e7SWill Deacon 		ptep = (pte_t *)pudp;
285441a6278SAnshuman Khandual 	} else if (sz == (CONT_PTE_SIZE)) {
28620a004e7SWill Deacon 		pmdp = pmd_alloc(mm, pudp, addr);
287027d0c71SMark Rutland 		if (!pmdp)
288027d0c71SMark Rutland 			return NULL;
28966b3923aSDavid Woods 
29066b3923aSDavid Woods 		WARN_ON(addr & (sz - 1));
291cafcb9caSHugh Dickins 		ptep = pte_alloc_huge(mm, pmdp, addr);
29266b3923aSDavid Woods 	} else if (sz == PMD_SIZE) {
293c1991e07SPeter Xu 		if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
294aec44e0fSPeter Xu 			ptep = huge_pmd_share(mm, vma, addr, pudp);
29566b3923aSDavid Woods 		else
29620a004e7SWill Deacon 			ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
297441a6278SAnshuman Khandual 	} else if (sz == (CONT_PMD_SIZE)) {
29820a004e7SWill Deacon 		pmdp = pmd_alloc(mm, pudp, addr);
29966b3923aSDavid Woods 		WARN_ON(addr & (sz - 1));
30020a004e7SWill Deacon 		return (pte_t *)pmdp;
30166b3923aSDavid Woods 	}
30266b3923aSDavid Woods 
30320a004e7SWill Deacon 	return ptep;
30466b3923aSDavid Woods }
30566b3923aSDavid Woods 
huge_pte_offset(struct mm_struct * mm,unsigned long addr,unsigned long sz)3067868a208SPunit Agrawal pte_t *huge_pte_offset(struct mm_struct *mm,
3077868a208SPunit Agrawal 		       unsigned long addr, unsigned long sz)
30866b3923aSDavid Woods {
30920a004e7SWill Deacon 	pgd_t *pgdp;
310e9f63768SMike Rapoport 	p4d_t *p4dp;
31120a004e7SWill Deacon 	pud_t *pudp, pud;
31220a004e7SWill Deacon 	pmd_t *pmdp, pmd;
31366b3923aSDavid Woods 
31420a004e7SWill Deacon 	pgdp = pgd_offset(mm, addr);
31520a004e7SWill Deacon 	if (!pgd_present(READ_ONCE(*pgdp)))
31666b3923aSDavid Woods 		return NULL;
317f02ab08aSPunit Agrawal 
318e9f63768SMike Rapoport 	p4dp = p4d_offset(pgdp, addr);
319e9f63768SMike Rapoport 	if (!p4d_present(READ_ONCE(*p4dp)))
320e9f63768SMike Rapoport 		return NULL;
321e9f63768SMike Rapoport 
322e9f63768SMike Rapoport 	pudp = pud_offset(p4dp, addr);
32320a004e7SWill Deacon 	pud = READ_ONCE(*pudp);
32420a004e7SWill Deacon 	if (sz != PUD_SIZE && pud_none(pud))
32566b3923aSDavid Woods 		return NULL;
32630f3ac00SPunit Agrawal 	/* hugepage or swap? */
32720a004e7SWill Deacon 	if (pud_huge(pud) || !pud_present(pud))
32820a004e7SWill Deacon 		return (pte_t *)pudp;
329f02ab08aSPunit Agrawal 	/* table; check the next level */
33066b3923aSDavid Woods 
33130f3ac00SPunit Agrawal 	if (sz == CONT_PMD_SIZE)
33230f3ac00SPunit Agrawal 		addr &= CONT_PMD_MASK;
33330f3ac00SPunit Agrawal 
33420a004e7SWill Deacon 	pmdp = pmd_offset(pudp, addr);
33520a004e7SWill Deacon 	pmd = READ_ONCE(*pmdp);
33630f3ac00SPunit Agrawal 	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
33720a004e7SWill Deacon 	    pmd_none(pmd))
338f02ab08aSPunit Agrawal 		return NULL;
33920a004e7SWill Deacon 	if (pmd_huge(pmd) || !pmd_present(pmd))
34020a004e7SWill Deacon 		return (pte_t *)pmdp;
341f02ab08aSPunit Agrawal 
34220a004e7SWill Deacon 	if (sz == CONT_PTE_SIZE)
343cafcb9caSHugh Dickins 		return pte_offset_huge(pmdp, (addr & CONT_PTE_MASK));
34430f3ac00SPunit Agrawal 
34566b3923aSDavid Woods 	return NULL;
34666b3923aSDavid Woods }
34766b3923aSDavid Woods 
hugetlb_mask_last_page(struct hstate * h)3481bcdb769SBaolin Wang unsigned long hugetlb_mask_last_page(struct hstate *h)
3491bcdb769SBaolin Wang {
3501bcdb769SBaolin Wang 	unsigned long hp_size = huge_page_size(h);
3511bcdb769SBaolin Wang 
3521bcdb769SBaolin Wang 	switch (hp_size) {
3531bcdb769SBaolin Wang #ifndef __PAGETABLE_PMD_FOLDED
3541bcdb769SBaolin Wang 	case PUD_SIZE:
3551bcdb769SBaolin Wang 		return PGDIR_SIZE - PUD_SIZE;
3561bcdb769SBaolin Wang #endif
3571bcdb769SBaolin Wang 	case CONT_PMD_SIZE:
3581bcdb769SBaolin Wang 		return PUD_SIZE - CONT_PMD_SIZE;
3591bcdb769SBaolin Wang 	case PMD_SIZE:
3601bcdb769SBaolin Wang 		return PUD_SIZE - PMD_SIZE;
3611bcdb769SBaolin Wang 	case CONT_PTE_SIZE:
3621bcdb769SBaolin Wang 		return PMD_SIZE - CONT_PTE_SIZE;
3631bcdb769SBaolin Wang 	default:
3641bcdb769SBaolin Wang 		break;
3651bcdb769SBaolin Wang 	}
3661bcdb769SBaolin Wang 
3671bcdb769SBaolin Wang 	return 0UL;
3681bcdb769SBaolin Wang }
3691bcdb769SBaolin Wang 
arch_make_huge_pte(pte_t entry,unsigned int shift,vm_flags_t flags)37079c1c594SChristophe Leroy pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
37166b3923aSDavid Woods {
37279c1c594SChristophe Leroy 	size_t pagesize = 1UL << shift;
37366b3923aSDavid Woods 
37416785bd7SAnshuman Khandual 	entry = pte_mkhuge(entry);
37566b3923aSDavid Woods 	if (pagesize == CONT_PTE_SIZE) {
37666b3923aSDavid Woods 		entry = pte_mkcont(entry);
37766b3923aSDavid Woods 	} else if (pagesize == CONT_PMD_SIZE) {
37866b3923aSDavid Woods 		entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
37966b3923aSDavid Woods 	} else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
38066b3923aSDavid Woods 		pr_warn("%s: unrecognized huge page size 0x%lx\n",
38166b3923aSDavid Woods 			__func__, pagesize);
38266b3923aSDavid Woods 	}
38366b3923aSDavid Woods 	return entry;
38466b3923aSDavid Woods }
38566b3923aSDavid Woods 
huge_pte_clear(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long sz)386c3e4ed5cSPunit Agrawal void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
387c3e4ed5cSPunit Agrawal 		    pte_t *ptep, unsigned long sz)
388c3e4ed5cSPunit Agrawal {
389c3e4ed5cSPunit Agrawal 	int i, ncontig;
390c3e4ed5cSPunit Agrawal 	size_t pgsize;
391c3e4ed5cSPunit Agrawal 
392c3e4ed5cSPunit Agrawal 	ncontig = num_contig_ptes(sz, &pgsize);
393c3e4ed5cSPunit Agrawal 
394c3e4ed5cSPunit Agrawal 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
395c3e4ed5cSPunit Agrawal 		pte_clear(mm, addr, ptep);
396c3e4ed5cSPunit Agrawal }
397c3e4ed5cSPunit Agrawal 
huge_ptep_get_and_clear(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long sz)398c04035ceSRyan Roberts pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
399c04035ceSRyan Roberts 			      pte_t *ptep, unsigned long sz)
40066b3923aSDavid Woods {
401d8bdcff2SSteve Capper 	int ncontig;
40266b3923aSDavid Woods 	size_t pgsize;
40329a7287dSSteve Capper 
404*62112e7fSRyan Roberts 	ncontig = num_contig_ptes(sz, &pgsize);
405fb396bb4SAnshuman Khandual 	return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
40666b3923aSDavid Woods }
40766b3923aSDavid Woods 
408031e6e6bSSteve Capper /*
409031e6e6bSSteve Capper  * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
410031e6e6bSSteve Capper  * and write permission.
411031e6e6bSSteve Capper  *
412031e6e6bSSteve Capper  * For a contiguous huge pte range we need to check whether or not write
413031e6e6bSSteve Capper  * permission has to change only on the first pte in the set. Then for
414031e6e6bSSteve Capper  * all the contiguous ptes we need to check whether or not there is a
415031e6e6bSSteve Capper  * discrepancy between dirty or young.
416031e6e6bSSteve Capper  */
__cont_access_flags_changed(pte_t * ptep,pte_t pte,int ncontig)417031e6e6bSSteve Capper static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
418031e6e6bSSteve Capper {
419031e6e6bSSteve Capper 	int i;
420031e6e6bSSteve Capper 
421f0d9d79eSBaolin Wang 	if (pte_write(pte) != pte_write(ptep_get(ptep)))
422031e6e6bSSteve Capper 		return 1;
423031e6e6bSSteve Capper 
424031e6e6bSSteve Capper 	for (i = 0; i < ncontig; i++) {
425f0d9d79eSBaolin Wang 		pte_t orig_pte = ptep_get(ptep + i);
426031e6e6bSSteve Capper 
427031e6e6bSSteve Capper 		if (pte_dirty(pte) != pte_dirty(orig_pte))
428031e6e6bSSteve Capper 			return 1;
429031e6e6bSSteve Capper 
430031e6e6bSSteve Capper 		if (pte_young(pte) != pte_young(orig_pte))
431031e6e6bSSteve Capper 			return 1;
432031e6e6bSSteve Capper 	}
433031e6e6bSSteve Capper 
434031e6e6bSSteve Capper 	return 0;
435031e6e6bSSteve Capper }
436031e6e6bSSteve Capper 
huge_ptep_set_access_flags(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep,pte_t pte,int dirty)43766b3923aSDavid Woods int huge_ptep_set_access_flags(struct vm_area_struct *vma,
43866b3923aSDavid Woods 			       unsigned long addr, pte_t *ptep,
43966b3923aSDavid Woods 			       pte_t pte, int dirty)
44066b3923aSDavid Woods {
441031e6e6bSSteve Capper 	int ncontig, i;
44266b3923aSDavid Woods 	size_t pgsize = 0;
44329a7287dSSteve Capper 	unsigned long pfn = pte_pfn(pte), dpfn;
44441098230SWill Deacon 	struct mm_struct *mm = vma->vm_mm;
44529a7287dSSteve Capper 	pgprot_t hugeprot;
446d8bdcff2SSteve Capper 	pte_t orig_pte;
44766b3923aSDavid Woods 
44829a7287dSSteve Capper 	if (!pte_cont(pte))
44966b3923aSDavid Woods 		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
45029a7287dSSteve Capper 
45141098230SWill Deacon 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
45229a7287dSSteve Capper 	dpfn = pgsize >> PAGE_SHIFT;
45329a7287dSSteve Capper 
454031e6e6bSSteve Capper 	if (!__cont_access_flags_changed(ptep, pte, ncontig))
455031e6e6bSSteve Capper 		return 0;
456031e6e6bSSteve Capper 
45741098230SWill Deacon 	orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
458d8bdcff2SSteve Capper 
459469ed9d8SSteve Capper 	/* Make sure we don't lose the dirty or young state */
460d8bdcff2SSteve Capper 	if (pte_dirty(orig_pte))
461d8bdcff2SSteve Capper 		pte = pte_mkdirty(pte);
462d8bdcff2SSteve Capper 
463469ed9d8SSteve Capper 	if (pte_young(orig_pte))
464469ed9d8SSteve Capper 		pte = pte_mkyoung(pte);
465469ed9d8SSteve Capper 
466d8bdcff2SSteve Capper 	hugeprot = pte_pgprot(pte);
467d8bdcff2SSteve Capper 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
46841098230SWill Deacon 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
46929a7287dSSteve Capper 
470031e6e6bSSteve Capper 	return 1;
47166b3923aSDavid Woods }
47266b3923aSDavid Woods 
huge_ptep_set_wrprotect(struct mm_struct * mm,unsigned long addr,pte_t * ptep)47366b3923aSDavid Woods void huge_ptep_set_wrprotect(struct mm_struct *mm,
47466b3923aSDavid Woods 			     unsigned long addr, pte_t *ptep)
47566b3923aSDavid Woods {
476d8bdcff2SSteve Capper 	unsigned long pfn, dpfn;
477d8bdcff2SSteve Capper 	pgprot_t hugeprot;
47866b3923aSDavid Woods 	int ncontig, i;
47929a7287dSSteve Capper 	size_t pgsize;
480d8bdcff2SSteve Capper 	pte_t pte;
48129a7287dSSteve Capper 
48220a004e7SWill Deacon 	if (!pte_cont(READ_ONCE(*ptep))) {
48329a7287dSSteve Capper 		ptep_set_wrprotect(mm, addr, ptep);
48429a7287dSSteve Capper 		return;
48529a7287dSSteve Capper 	}
48666b3923aSDavid Woods 
487f0b38d65SSteve Capper 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
488d8bdcff2SSteve Capper 	dpfn = pgsize >> PAGE_SHIFT;
489d8bdcff2SSteve Capper 
49041098230SWill Deacon 	pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
491d8bdcff2SSteve Capper 	pte = pte_wrprotect(pte);
492d8bdcff2SSteve Capper 
493d8bdcff2SSteve Capper 	hugeprot = pte_pgprot(pte);
494d8bdcff2SSteve Capper 	pfn = pte_pfn(pte);
495d8bdcff2SSteve Capper 
496d8bdcff2SSteve Capper 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
497d8bdcff2SSteve Capper 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
49866b3923aSDavid Woods }
49966b3923aSDavid Woods 
huge_ptep_clear_flush(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep)500ae075629SBaolin Wang pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
50166b3923aSDavid Woods 			    unsigned long addr, pte_t *ptep)
50266b3923aSDavid Woods {
50341098230SWill Deacon 	struct mm_struct *mm = vma->vm_mm;
50429a7287dSSteve Capper 	size_t pgsize;
505d8bdcff2SSteve Capper 	int ncontig;
50666b3923aSDavid Woods 
507ae075629SBaolin Wang 	if (!pte_cont(READ_ONCE(*ptep)))
508ae075629SBaolin Wang 		return ptep_clear_flush(vma, addr, ptep);
50929a7287dSSteve Capper 
51041098230SWill Deacon 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
51141098230SWill Deacon 	return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
51266b3923aSDavid Woods }
51366b3923aSDavid Woods 
hugetlbpage_init(void)514a21b0b78SAllen Pais static int __init hugetlbpage_init(void)
515a21b0b78SAllen Pais {
5164227ce0cSAnshuman Khandual 	/*
5174227ce0cSAnshuman Khandual 	 * HugeTLB pages are supported on maximum four page table
5184227ce0cSAnshuman Khandual 	 * levels (PUD, CONT PMD, PMD, CONT PTE) for a given base
5194227ce0cSAnshuman Khandual 	 * page size, corresponding to hugetlb_add_hstate() calls
5204227ce0cSAnshuman Khandual 	 * here.
5214227ce0cSAnshuman Khandual 	 *
5224227ce0cSAnshuman Khandual 	 * HUGE_MAX_HSTATE should at least match maximum supported
5234227ce0cSAnshuman Khandual 	 * HugeTLB page sizes on the platform. Any new addition to
5244227ce0cSAnshuman Khandual 	 * supported HugeTLB page sizes will also require changing
5254227ce0cSAnshuman Khandual 	 * HUGE_MAX_HSTATE as well.
5264227ce0cSAnshuman Khandual 	 */
5274227ce0cSAnshuman Khandual 	BUILD_BUG_ON(HUGE_MAX_HSTATE < 4);
528f8b46c4bSAnshuman Khandual 	if (pud_sect_supported())
52938237830SMike Kravetz 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
530f8b46c4bSAnshuman Khandual 
531a1634a54SGavin Shan 	hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
53238237830SMike Kravetz 	hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
533a1634a54SGavin Shan 	hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
534a21b0b78SAllen Pais 
535a21b0b78SAllen Pais 	return 0;
536a21b0b78SAllen Pais }
537a21b0b78SAllen Pais arch_initcall(hugetlbpage_init);
538a21b0b78SAllen Pais 
arch_hugetlb_valid_size(unsigned long size)539ae94da89SMike Kravetz bool __init arch_hugetlb_valid_size(unsigned long size)
540084bd298SSteve Capper {
541a8a733b2SAnshuman Khandual 	return __hugetlb_valid_size(size);
542ae94da89SMike Kravetz }
5435db568e7SAnshuman Khandual 
huge_ptep_modify_prot_start(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep)5445db568e7SAnshuman Khandual pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
5455db568e7SAnshuman Khandual {
546c04035ceSRyan Roberts 	unsigned long psize = huge_page_size(hstate_vma(vma));
547c04035ceSRyan Roberts 
5485db568e7SAnshuman Khandual 	if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) &&
5495db568e7SAnshuman Khandual 	    cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
5505db568e7SAnshuman Khandual 		/*
5515db568e7SAnshuman Khandual 		 * Break-before-make (BBM) is required for all user space mappings
5525db568e7SAnshuman Khandual 		 * when the permission changes from executable to non-executable
5535db568e7SAnshuman Khandual 		 * in cases where cpu is affected with errata #2645198.
5545db568e7SAnshuman Khandual 		 */
5555db568e7SAnshuman Khandual 		if (pte_user_exec(READ_ONCE(*ptep)))
5565db568e7SAnshuman Khandual 			return huge_ptep_clear_flush(vma, addr, ptep);
5575db568e7SAnshuman Khandual 	}
558c04035ceSRyan Roberts 	return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize);
5595db568e7SAnshuman Khandual }
5605db568e7SAnshuman Khandual 
huge_ptep_modify_prot_commit(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep,pte_t old_pte,pte_t pte)5615db568e7SAnshuman Khandual void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
5625db568e7SAnshuman Khandual 				  pte_t old_pte, pte_t pte)
5635db568e7SAnshuman Khandual {
564935d4f0cSRyan Roberts 	unsigned long psize = huge_page_size(hstate_vma(vma));
565935d4f0cSRyan Roberts 
566935d4f0cSRyan Roberts 	set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
5675db568e7SAnshuman Khandual }
568