xref: /openbmc/linux/arch/arm64/mm/hugetlbpage.c (revision 0984d159)
1 /*
2  * arch/arm64/mm/hugetlbpage.c
3  *
4  * Copyright (C) 2013 Linaro Ltd.
5  *
6  * Based on arch/x86/mm/hugetlbpage.c.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  */
17 
18 #include <linux/init.h>
19 #include <linux/fs.h>
20 #include <linux/mm.h>
21 #include <linux/hugetlb.h>
22 #include <linux/pagemap.h>
23 #include <linux/err.h>
24 #include <linux/sysctl.h>
25 #include <asm/mman.h>
26 #include <asm/tlb.h>
27 #include <asm/tlbflush.h>
28 #include <asm/pgalloc.h>
29 
30 int pmd_huge(pmd_t pmd)
31 {
32 	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
33 }
34 
35 int pud_huge(pud_t pud)
36 {
37 #ifndef __PAGETABLE_PMD_FOLDED
38 	return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
39 #else
40 	return 0;
41 #endif
42 }
43 
44 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
45 			   pte_t *ptep, pte_t pte, size_t *pgsize)
46 {
47 	pgd_t *pgd = pgd_offset(mm, addr);
48 	pud_t *pud;
49 	pmd_t *pmd;
50 
51 	*pgsize = PAGE_SIZE;
52 	if (!pte_cont(pte))
53 		return 1;
54 	if (!pgd_present(*pgd)) {
55 		VM_BUG_ON(!pgd_present(*pgd));
56 		return 1;
57 	}
58 	pud = pud_offset(pgd, addr);
59 	if (!pud_present(*pud)) {
60 		VM_BUG_ON(!pud_present(*pud));
61 		return 1;
62 	}
63 	pmd = pmd_offset(pud, addr);
64 	if (!pmd_present(*pmd)) {
65 		VM_BUG_ON(!pmd_present(*pmd));
66 		return 1;
67 	}
68 	if ((pte_t *)pmd == ptep) {
69 		*pgsize = PMD_SIZE;
70 		return CONT_PMDS;
71 	}
72 	return CONT_PTES;
73 }
74 
75 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
76 			    pte_t *ptep, pte_t pte)
77 {
78 	size_t pgsize;
79 	int i;
80 	int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize);
81 	unsigned long pfn;
82 	pgprot_t hugeprot;
83 
84 	if (ncontig == 1) {
85 		set_pte_at(mm, addr, ptep, pte);
86 		return;
87 	}
88 
89 	pfn = pte_pfn(pte);
90 	hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
91 	for (i = 0; i < ncontig; i++) {
92 		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
93 			 pte_val(pfn_pte(pfn, hugeprot)));
94 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
95 		ptep++;
96 		pfn += pgsize >> PAGE_SHIFT;
97 		addr += pgsize;
98 	}
99 }
100 
101 pte_t *huge_pte_alloc(struct mm_struct *mm,
102 		      unsigned long addr, unsigned long sz)
103 {
104 	pgd_t *pgd;
105 	pud_t *pud;
106 	pte_t *pte = NULL;
107 
108 	pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
109 	pgd = pgd_offset(mm, addr);
110 	pud = pud_alloc(mm, pgd, addr);
111 	if (!pud)
112 		return NULL;
113 
114 	if (sz == PUD_SIZE) {
115 		pte = (pte_t *)pud;
116 	} else if (sz == (PAGE_SIZE * CONT_PTES)) {
117 		pmd_t *pmd = pmd_alloc(mm, pud, addr);
118 
119 		WARN_ON(addr & (sz - 1));
120 		/*
121 		 * Note that if this code were ever ported to the
122 		 * 32-bit arm platform then it will cause trouble in
123 		 * the case where CONFIG_HIGHPTE is set, since there
124 		 * will be no pte_unmap() to correspond with this
125 		 * pte_alloc_map().
126 		 */
127 		pte = pte_alloc_map(mm, pmd, addr);
128 	} else if (sz == PMD_SIZE) {
129 		if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
130 		    pud_none(*pud))
131 			pte = huge_pmd_share(mm, addr, pud);
132 		else
133 			pte = (pte_t *)pmd_alloc(mm, pud, addr);
134 	} else if (sz == (PMD_SIZE * CONT_PMDS)) {
135 		pmd_t *pmd;
136 
137 		pmd = pmd_alloc(mm, pud, addr);
138 		WARN_ON(addr & (sz - 1));
139 		return (pte_t *)pmd;
140 	}
141 
142 	pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
143 	       sz, pte, pte_val(*pte));
144 	return pte;
145 }
146 
147 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
148 {
149 	pgd_t *pgd;
150 	pud_t *pud;
151 	pmd_t *pmd = NULL;
152 	pte_t *pte = NULL;
153 
154 	pgd = pgd_offset(mm, addr);
155 	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
156 	if (!pgd_present(*pgd))
157 		return NULL;
158 	pud = pud_offset(pgd, addr);
159 	if (!pud_present(*pud))
160 		return NULL;
161 
162 	if (pud_huge(*pud))
163 		return (pte_t *)pud;
164 	pmd = pmd_offset(pud, addr);
165 	if (!pmd_present(*pmd))
166 		return NULL;
167 
168 	if (pte_cont(pmd_pte(*pmd))) {
169 		pmd = pmd_offset(
170 			pud, (addr & CONT_PMD_MASK));
171 		return (pte_t *)pmd;
172 	}
173 	if (pmd_huge(*pmd))
174 		return (pte_t *)pmd;
175 	pte = pte_offset_kernel(pmd, addr);
176 	if (pte_present(*pte) && pte_cont(*pte)) {
177 		pte = pte_offset_kernel(
178 			pmd, (addr & CONT_PTE_MASK));
179 		return pte;
180 	}
181 	return NULL;
182 }
183 
184 pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
185 			 struct page *page, int writable)
186 {
187 	size_t pagesize = huge_page_size(hstate_vma(vma));
188 
189 	if (pagesize == CONT_PTE_SIZE) {
190 		entry = pte_mkcont(entry);
191 	} else if (pagesize == CONT_PMD_SIZE) {
192 		entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
193 	} else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
194 		pr_warn("%s: unrecognized huge page size 0x%lx\n",
195 			__func__, pagesize);
196 	}
197 	return entry;
198 }
199 
200 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
201 			      unsigned long addr, pte_t *ptep)
202 {
203 	pte_t pte;
204 
205 	if (pte_cont(*ptep)) {
206 		int ncontig, i;
207 		size_t pgsize;
208 		pte_t *cpte;
209 		bool is_dirty = false;
210 
211 		cpte = huge_pte_offset(mm, addr);
212 		ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
213 		/* save the 1st pte to return */
214 		pte = ptep_get_and_clear(mm, addr, cpte);
215 		for (i = 1; i < ncontig; ++i) {
216 			/*
217 			 * If HW_AFDBM is enabled, then the HW could
218 			 * turn on the dirty bit for any of the page
219 			 * in the set, so check them all.
220 			 */
221 			++cpte;
222 			if (pte_dirty(ptep_get_and_clear(mm, addr, cpte)))
223 				is_dirty = true;
224 		}
225 		if (is_dirty)
226 			return pte_mkdirty(pte);
227 		else
228 			return pte;
229 	} else {
230 		return ptep_get_and_clear(mm, addr, ptep);
231 	}
232 }
233 
234 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
235 			       unsigned long addr, pte_t *ptep,
236 			       pte_t pte, int dirty)
237 {
238 	pte_t *cpte;
239 
240 	if (pte_cont(pte)) {
241 		int ncontig, i, changed = 0;
242 		size_t pgsize = 0;
243 		unsigned long pfn = pte_pfn(pte);
244 		/* Select all bits except the pfn */
245 		pgprot_t hugeprot =
246 			__pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
247 				 pte_val(pte));
248 
249 		cpte = huge_pte_offset(vma->vm_mm, addr);
250 		pfn = pte_pfn(*cpte);
251 		ncontig = find_num_contig(vma->vm_mm, addr, cpte,
252 					  *cpte, &pgsize);
253 		for (i = 0; i < ncontig; ++i, ++cpte) {
254 			changed = ptep_set_access_flags(vma, addr, cpte,
255 							pfn_pte(pfn,
256 								hugeprot),
257 							dirty);
258 			pfn += pgsize >> PAGE_SHIFT;
259 		}
260 		return changed;
261 	} else {
262 		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
263 	}
264 }
265 
266 void huge_ptep_set_wrprotect(struct mm_struct *mm,
267 			     unsigned long addr, pte_t *ptep)
268 {
269 	if (pte_cont(*ptep)) {
270 		int ncontig, i;
271 		pte_t *cpte;
272 		size_t pgsize = 0;
273 
274 		cpte = huge_pte_offset(mm, addr);
275 		ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
276 		for (i = 0; i < ncontig; ++i, ++cpte)
277 			ptep_set_wrprotect(mm, addr, cpte);
278 	} else {
279 		ptep_set_wrprotect(mm, addr, ptep);
280 	}
281 }
282 
283 void huge_ptep_clear_flush(struct vm_area_struct *vma,
284 			   unsigned long addr, pte_t *ptep)
285 {
286 	if (pte_cont(*ptep)) {
287 		int ncontig, i;
288 		pte_t *cpte;
289 		size_t pgsize = 0;
290 
291 		cpte = huge_pte_offset(vma->vm_mm, addr);
292 		ncontig = find_num_contig(vma->vm_mm, addr, cpte,
293 					  *cpte, &pgsize);
294 		for (i = 0; i < ncontig; ++i, ++cpte)
295 			ptep_clear_flush(vma, addr, cpte);
296 	} else {
297 		ptep_clear_flush(vma, addr, ptep);
298 	}
299 }
300 
301 static __init int setup_hugepagesz(char *opt)
302 {
303 	unsigned long ps = memparse(opt, &opt);
304 
305 	if (ps == PMD_SIZE) {
306 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
307 	} else if (ps == PUD_SIZE) {
308 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
309 	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
310 		hugetlb_add_hstate(CONT_PTE_SHIFT);
311 	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
312 		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
313 	} else {
314 		hugetlb_bad_size();
315 		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
316 		return 0;
317 	}
318 	return 1;
319 }
320 __setup("hugepagesz=", setup_hugepagesz);
321 
322 #ifdef CONFIG_ARM64_64K_PAGES
323 static __init int add_default_hugepagesz(void)
324 {
325 	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
326 		hugetlb_add_hstate(CONT_PMD_SHIFT);
327 	return 0;
328 }
329 arch_initcall(add_default_hugepagesz);
330 #endif
331