xref: /openbmc/linux/arch/arm64/mm/hugetlbpage.c (revision e721eb06)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * arch/arm64/mm/hugetlbpage.c
4  *
5  * Copyright (C) 2013 Linaro Ltd.
6  *
7  * Based on arch/x86/mm/hugetlbpage.c.
8  */
9 
10 #include <linux/init.h>
11 #include <linux/fs.h>
12 #include <linux/mm.h>
13 #include <linux/hugetlb.h>
14 #include <linux/pagemap.h>
15 #include <linux/err.h>
16 #include <linux/sysctl.h>
17 #include <asm/mman.h>
18 #include <asm/tlb.h>
19 #include <asm/tlbflush.h>
20 #include <asm/pgalloc.h>
21 
22 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
23 bool arch_hugetlb_migration_supported(struct hstate *h)
24 {
25 	size_t pagesize = huge_page_size(h);
26 
27 	switch (pagesize) {
28 #ifdef CONFIG_ARM64_4K_PAGES
29 	case PUD_SIZE:
30 #endif
31 	case PMD_SIZE:
32 	case CONT_PMD_SIZE:
33 	case CONT_PTE_SIZE:
34 		return true;
35 	}
36 	pr_warn("%s: unrecognized huge page size 0x%lx\n",
37 			__func__, pagesize);
38 	return false;
39 }
40 #endif
41 
42 int pmd_huge(pmd_t pmd)
43 {
44 	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
45 }
46 
47 int pud_huge(pud_t pud)
48 {
49 #ifndef __PAGETABLE_PMD_FOLDED
50 	return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
51 #else
52 	return 0;
53 #endif
54 }
55 
56 /*
57  * Select all bits except the pfn
58  */
59 static inline pgprot_t pte_pgprot(pte_t pte)
60 {
61 	unsigned long pfn = pte_pfn(pte);
62 
63 	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
64 }
65 
66 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
67 			   pte_t *ptep, size_t *pgsize)
68 {
69 	pgd_t *pgdp = pgd_offset(mm, addr);
70 	p4d_t *p4dp;
71 	pud_t *pudp;
72 	pmd_t *pmdp;
73 
74 	*pgsize = PAGE_SIZE;
75 	p4dp = p4d_offset(pgdp, addr);
76 	pudp = pud_offset(p4dp, addr);
77 	pmdp = pmd_offset(pudp, addr);
78 	if ((pte_t *)pmdp == ptep) {
79 		*pgsize = PMD_SIZE;
80 		return CONT_PMDS;
81 	}
82 	return CONT_PTES;
83 }
84 
85 static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
86 {
87 	int contig_ptes = 0;
88 
89 	*pgsize = size;
90 
91 	switch (size) {
92 #ifdef CONFIG_ARM64_4K_PAGES
93 	case PUD_SIZE:
94 #endif
95 	case PMD_SIZE:
96 		contig_ptes = 1;
97 		break;
98 	case CONT_PMD_SIZE:
99 		*pgsize = PMD_SIZE;
100 		contig_ptes = CONT_PMDS;
101 		break;
102 	case CONT_PTE_SIZE:
103 		*pgsize = PAGE_SIZE;
104 		contig_ptes = CONT_PTES;
105 		break;
106 	}
107 
108 	return contig_ptes;
109 }
110 
111 /*
112  * Changing some bits of contiguous entries requires us to follow a
113  * Break-Before-Make approach, breaking the whole contiguous set
114  * before we can change any entries. See ARM DDI 0487A.k_iss10775,
115  * "Misprogramming of the Contiguous bit", page D4-1762.
116  *
117  * This helper performs the break step.
118  */
119 static pte_t get_clear_flush(struct mm_struct *mm,
120 			     unsigned long addr,
121 			     pte_t *ptep,
122 			     unsigned long pgsize,
123 			     unsigned long ncontig)
124 {
125 	pte_t orig_pte = huge_ptep_get(ptep);
126 	bool valid = pte_valid(orig_pte);
127 	unsigned long i, saddr = addr;
128 
129 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
130 		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
131 
132 		/*
133 		 * If HW_AFDBM is enabled, then the HW could turn on
134 		 * the dirty or accessed bit for any page in the set,
135 		 * so check them all.
136 		 */
137 		if (pte_dirty(pte))
138 			orig_pte = pte_mkdirty(orig_pte);
139 
140 		if (pte_young(pte))
141 			orig_pte = pte_mkyoung(orig_pte);
142 	}
143 
144 	if (valid) {
145 		struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
146 		flush_tlb_range(&vma, saddr, addr);
147 	}
148 	return orig_pte;
149 }
150 
151 /*
152  * Changing some bits of contiguous entries requires us to follow a
153  * Break-Before-Make approach, breaking the whole contiguous set
154  * before we can change any entries. See ARM DDI 0487A.k_iss10775,
155  * "Misprogramming of the Contiguous bit", page D4-1762.
156  *
157  * This helper performs the break step for use cases where the
158  * original pte is not needed.
159  */
160 static void clear_flush(struct mm_struct *mm,
161 			     unsigned long addr,
162 			     pte_t *ptep,
163 			     unsigned long pgsize,
164 			     unsigned long ncontig)
165 {
166 	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
167 	unsigned long i, saddr = addr;
168 
169 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
170 		pte_clear(mm, addr, ptep);
171 
172 	flush_tlb_range(&vma, saddr, addr);
173 }
174 
175 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
176 			    pte_t *ptep, pte_t pte)
177 {
178 	size_t pgsize;
179 	int i;
180 	int ncontig;
181 	unsigned long pfn, dpfn;
182 	pgprot_t hugeprot;
183 
184 	/*
185 	 * Code needs to be expanded to handle huge swap and migration
186 	 * entries. Needed for HUGETLB and MEMORY_FAILURE.
187 	 */
188 	WARN_ON(!pte_present(pte));
189 
190 	if (!pte_cont(pte)) {
191 		set_pte_at(mm, addr, ptep, pte);
192 		return;
193 	}
194 
195 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
196 	pfn = pte_pfn(pte);
197 	dpfn = pgsize >> PAGE_SHIFT;
198 	hugeprot = pte_pgprot(pte);
199 
200 	clear_flush(mm, addr, ptep, pgsize, ncontig);
201 
202 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
203 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
204 }
205 
206 void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
207 			  pte_t *ptep, pte_t pte, unsigned long sz)
208 {
209 	int i, ncontig;
210 	size_t pgsize;
211 
212 	ncontig = num_contig_ptes(sz, &pgsize);
213 
214 	for (i = 0; i < ncontig; i++, ptep++)
215 		set_pte(ptep, pte);
216 }
217 
218 pte_t *huge_pte_alloc(struct mm_struct *mm,
219 		      unsigned long addr, unsigned long sz)
220 {
221 	pgd_t *pgdp;
222 	p4d_t *p4dp;
223 	pud_t *pudp;
224 	pmd_t *pmdp;
225 	pte_t *ptep = NULL;
226 
227 	pgdp = pgd_offset(mm, addr);
228 	p4dp = p4d_offset(pgdp, addr);
229 	pudp = pud_alloc(mm, p4dp, addr);
230 	if (!pudp)
231 		return NULL;
232 
233 	if (sz == PUD_SIZE) {
234 		ptep = (pte_t *)pudp;
235 	} else if (sz == (CONT_PTE_SIZE)) {
236 		pmdp = pmd_alloc(mm, pudp, addr);
237 		if (!pmdp)
238 			return NULL;
239 
240 		WARN_ON(addr & (sz - 1));
241 		/*
242 		 * Note that if this code were ever ported to the
243 		 * 32-bit arm platform then it will cause trouble in
244 		 * the case where CONFIG_HIGHPTE is set, since there
245 		 * will be no pte_unmap() to correspond with this
246 		 * pte_alloc_map().
247 		 */
248 		ptep = pte_alloc_map(mm, pmdp, addr);
249 	} else if (sz == PMD_SIZE) {
250 		if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
251 		    pud_none(READ_ONCE(*pudp)))
252 			ptep = huge_pmd_share(mm, addr, pudp);
253 		else
254 			ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
255 	} else if (sz == (CONT_PMD_SIZE)) {
256 		pmdp = pmd_alloc(mm, pudp, addr);
257 		WARN_ON(addr & (sz - 1));
258 		return (pte_t *)pmdp;
259 	}
260 
261 	return ptep;
262 }
263 
264 pte_t *huge_pte_offset(struct mm_struct *mm,
265 		       unsigned long addr, unsigned long sz)
266 {
267 	pgd_t *pgdp;
268 	p4d_t *p4dp;
269 	pud_t *pudp, pud;
270 	pmd_t *pmdp, pmd;
271 
272 	pgdp = pgd_offset(mm, addr);
273 	if (!pgd_present(READ_ONCE(*pgdp)))
274 		return NULL;
275 
276 	p4dp = p4d_offset(pgdp, addr);
277 	if (!p4d_present(READ_ONCE(*p4dp)))
278 		return NULL;
279 
280 	pudp = pud_offset(p4dp, addr);
281 	pud = READ_ONCE(*pudp);
282 	if (sz != PUD_SIZE && pud_none(pud))
283 		return NULL;
284 	/* hugepage or swap? */
285 	if (pud_huge(pud) || !pud_present(pud))
286 		return (pte_t *)pudp;
287 	/* table; check the next level */
288 
289 	if (sz == CONT_PMD_SIZE)
290 		addr &= CONT_PMD_MASK;
291 
292 	pmdp = pmd_offset(pudp, addr);
293 	pmd = READ_ONCE(*pmdp);
294 	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
295 	    pmd_none(pmd))
296 		return NULL;
297 	if (pmd_huge(pmd) || !pmd_present(pmd))
298 		return (pte_t *)pmdp;
299 
300 	if (sz == CONT_PTE_SIZE)
301 		return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
302 
303 	return NULL;
304 }
305 
306 pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
307 			 struct page *page, int writable)
308 {
309 	size_t pagesize = huge_page_size(hstate_vma(vma));
310 
311 	if (pagesize == CONT_PTE_SIZE) {
312 		entry = pte_mkcont(entry);
313 	} else if (pagesize == CONT_PMD_SIZE) {
314 		entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
315 	} else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
316 		pr_warn("%s: unrecognized huge page size 0x%lx\n",
317 			__func__, pagesize);
318 	}
319 	return entry;
320 }
321 
322 void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
323 		    pte_t *ptep, unsigned long sz)
324 {
325 	int i, ncontig;
326 	size_t pgsize;
327 
328 	ncontig = num_contig_ptes(sz, &pgsize);
329 
330 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
331 		pte_clear(mm, addr, ptep);
332 }
333 
334 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
335 			      unsigned long addr, pte_t *ptep)
336 {
337 	int ncontig;
338 	size_t pgsize;
339 	pte_t orig_pte = huge_ptep_get(ptep);
340 
341 	if (!pte_cont(orig_pte))
342 		return ptep_get_and_clear(mm, addr, ptep);
343 
344 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
345 
346 	return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
347 }
348 
349 /*
350  * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
351  * and write permission.
352  *
353  * For a contiguous huge pte range we need to check whether or not write
354  * permission has to change only on the first pte in the set. Then for
355  * all the contiguous ptes we need to check whether or not there is a
356  * discrepancy between dirty or young.
357  */
358 static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
359 {
360 	int i;
361 
362 	if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
363 		return 1;
364 
365 	for (i = 0; i < ncontig; i++) {
366 		pte_t orig_pte = huge_ptep_get(ptep + i);
367 
368 		if (pte_dirty(pte) != pte_dirty(orig_pte))
369 			return 1;
370 
371 		if (pte_young(pte) != pte_young(orig_pte))
372 			return 1;
373 	}
374 
375 	return 0;
376 }
377 
378 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
379 			       unsigned long addr, pte_t *ptep,
380 			       pte_t pte, int dirty)
381 {
382 	int ncontig, i;
383 	size_t pgsize = 0;
384 	unsigned long pfn = pte_pfn(pte), dpfn;
385 	pgprot_t hugeprot;
386 	pte_t orig_pte;
387 
388 	if (!pte_cont(pte))
389 		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
390 
391 	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
392 	dpfn = pgsize >> PAGE_SHIFT;
393 
394 	if (!__cont_access_flags_changed(ptep, pte, ncontig))
395 		return 0;
396 
397 	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
398 
399 	/* Make sure we don't lose the dirty or young state */
400 	if (pte_dirty(orig_pte))
401 		pte = pte_mkdirty(pte);
402 
403 	if (pte_young(orig_pte))
404 		pte = pte_mkyoung(pte);
405 
406 	hugeprot = pte_pgprot(pte);
407 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
408 		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
409 
410 	return 1;
411 }
412 
413 void huge_ptep_set_wrprotect(struct mm_struct *mm,
414 			     unsigned long addr, pte_t *ptep)
415 {
416 	unsigned long pfn, dpfn;
417 	pgprot_t hugeprot;
418 	int ncontig, i;
419 	size_t pgsize;
420 	pte_t pte;
421 
422 	if (!pte_cont(READ_ONCE(*ptep))) {
423 		ptep_set_wrprotect(mm, addr, ptep);
424 		return;
425 	}
426 
427 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
428 	dpfn = pgsize >> PAGE_SHIFT;
429 
430 	pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
431 	pte = pte_wrprotect(pte);
432 
433 	hugeprot = pte_pgprot(pte);
434 	pfn = pte_pfn(pte);
435 
436 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
437 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
438 }
439 
440 void huge_ptep_clear_flush(struct vm_area_struct *vma,
441 			   unsigned long addr, pte_t *ptep)
442 {
443 	size_t pgsize;
444 	int ncontig;
445 
446 	if (!pte_cont(READ_ONCE(*ptep))) {
447 		ptep_clear_flush(vma, addr, ptep);
448 		return;
449 	}
450 
451 	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
452 	clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
453 }
454 
455 static int __init hugetlbpage_init(void)
456 {
457 #ifdef CONFIG_ARM64_4K_PAGES
458 	hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
459 #endif
460 	hugetlb_add_hstate((CONT_PMD_SHIFT + PMD_SHIFT) - PAGE_SHIFT);
461 	hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
462 	hugetlb_add_hstate((CONT_PTE_SHIFT + PAGE_SHIFT) - PAGE_SHIFT);
463 
464 	return 0;
465 }
466 arch_initcall(hugetlbpage_init);
467 
468 bool __init arch_hugetlb_valid_size(unsigned long size)
469 {
470 	switch (size) {
471 #ifdef CONFIG_ARM64_4K_PAGES
472 	case PUD_SIZE:
473 #endif
474 	case CONT_PMD_SIZE:
475 	case PMD_SIZE:
476 	case CONT_PTE_SIZE:
477 		return true;
478 	}
479 
480 	return false;
481 }
482