xref: /openbmc/linux/arch/sparc/mm/hugetlbpage.c (revision 5637bc5048340456176fcd4c8986edc1ac1acbe1)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
227137e52SSam Ravnborg /*
327137e52SSam Ravnborg  * SPARC64 Huge TLB page support.
427137e52SSam Ravnborg  *
527137e52SSam Ravnborg  * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
627137e52SSam Ravnborg  */
727137e52SSam Ravnborg 
827137e52SSam Ravnborg #include <linux/fs.h>
927137e52SSam Ravnborg #include <linux/mm.h>
1001042607SIngo Molnar #include <linux/sched/mm.h>
1127137e52SSam Ravnborg #include <linux/hugetlb.h>
1227137e52SSam Ravnborg #include <linux/pagemap.h>
1327137e52SSam Ravnborg #include <linux/sysctl.h>
1427137e52SSam Ravnborg 
1527137e52SSam Ravnborg #include <asm/mman.h>
1627137e52SSam Ravnborg #include <asm/pgalloc.h>
177bc3777cSNitin Gupta #include <asm/pgtable.h>
1827137e52SSam Ravnborg #include <asm/tlb.h>
1927137e52SSam Ravnborg #include <asm/tlbflush.h>
2027137e52SSam Ravnborg #include <asm/cacheflush.h>
2127137e52SSam Ravnborg #include <asm/mmu_context.h>
2227137e52SSam Ravnborg 
2327137e52SSam Ravnborg /* Slightly simplified from the non-hugepage variant because by
2427137e52SSam Ravnborg  * definition we don't have to worry about any page coloring stuff
2527137e52SSam Ravnborg  */
2627137e52SSam Ravnborg 
2727137e52SSam Ravnborg static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
2827137e52SSam Ravnborg 							unsigned long addr,
2927137e52SSam Ravnborg 							unsigned long len,
3027137e52SSam Ravnborg 							unsigned long pgoff,
3127137e52SSam Ravnborg 							unsigned long flags)
3227137e52SSam Ravnborg {
33c7d9f77dSNitin Gupta 	struct hstate *h = hstate_file(filp);
3427137e52SSam Ravnborg 	unsigned long task_size = TASK_SIZE;
352aea28b9SMichel Lespinasse 	struct vm_unmapped_area_info info;
3627137e52SSam Ravnborg 
3727137e52SSam Ravnborg 	if (test_thread_flag(TIF_32BIT))
3827137e52SSam Ravnborg 		task_size = STACK_TOP32;
3927137e52SSam Ravnborg 
402aea28b9SMichel Lespinasse 	info.flags = 0;
412aea28b9SMichel Lespinasse 	info.length = len;
422aea28b9SMichel Lespinasse 	info.low_limit = TASK_UNMAPPED_BASE;
432aea28b9SMichel Lespinasse 	info.high_limit = min(task_size, VA_EXCLUDE_START);
44c7d9f77dSNitin Gupta 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
452aea28b9SMichel Lespinasse 	info.align_offset = 0;
462aea28b9SMichel Lespinasse 	addr = vm_unmapped_area(&info);
472aea28b9SMichel Lespinasse 
482aea28b9SMichel Lespinasse 	if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
492aea28b9SMichel Lespinasse 		VM_BUG_ON(addr != -ENOMEM);
502aea28b9SMichel Lespinasse 		info.low_limit = VA_EXCLUDE_END;
512aea28b9SMichel Lespinasse 		info.high_limit = task_size;
522aea28b9SMichel Lespinasse 		addr = vm_unmapped_area(&info);
5327137e52SSam Ravnborg 	}
5427137e52SSam Ravnborg 
5527137e52SSam Ravnborg 	return addr;
5627137e52SSam Ravnborg }
5727137e52SSam Ravnborg 
5827137e52SSam Ravnborg static unsigned long
5927137e52SSam Ravnborg hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
6027137e52SSam Ravnborg 				  const unsigned long len,
6127137e52SSam Ravnborg 				  const unsigned long pgoff,
6227137e52SSam Ravnborg 				  const unsigned long flags)
6327137e52SSam Ravnborg {
64c7d9f77dSNitin Gupta 	struct hstate *h = hstate_file(filp);
6527137e52SSam Ravnborg 	struct mm_struct *mm = current->mm;
6627137e52SSam Ravnborg 	unsigned long addr = addr0;
672aea28b9SMichel Lespinasse 	struct vm_unmapped_area_info info;
6827137e52SSam Ravnborg 
6927137e52SSam Ravnborg 	/* This should only ever run for 32-bit processes.  */
7027137e52SSam Ravnborg 	BUG_ON(!test_thread_flag(TIF_32BIT));
7127137e52SSam Ravnborg 
722aea28b9SMichel Lespinasse 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
732aea28b9SMichel Lespinasse 	info.length = len;
742aea28b9SMichel Lespinasse 	info.low_limit = PAGE_SIZE;
752aea28b9SMichel Lespinasse 	info.high_limit = mm->mmap_base;
76c7d9f77dSNitin Gupta 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
772aea28b9SMichel Lespinasse 	info.align_offset = 0;
782aea28b9SMichel Lespinasse 	addr = vm_unmapped_area(&info);
7927137e52SSam Ravnborg 
8027137e52SSam Ravnborg 	/*
8127137e52SSam Ravnborg 	 * A failed mmap() very likely causes application failure,
8227137e52SSam Ravnborg 	 * so fall back to the bottom-up function here. This scenario
8327137e52SSam Ravnborg 	 * can happen with large stack limits and large mmap()
8427137e52SSam Ravnborg 	 * allocations.
8527137e52SSam Ravnborg 	 */
862aea28b9SMichel Lespinasse 	if (addr & ~PAGE_MASK) {
872aea28b9SMichel Lespinasse 		VM_BUG_ON(addr != -ENOMEM);
882aea28b9SMichel Lespinasse 		info.flags = 0;
892aea28b9SMichel Lespinasse 		info.low_limit = TASK_UNMAPPED_BASE;
902aea28b9SMichel Lespinasse 		info.high_limit = STACK_TOP32;
912aea28b9SMichel Lespinasse 		addr = vm_unmapped_area(&info);
922aea28b9SMichel Lespinasse 	}
9327137e52SSam Ravnborg 
9427137e52SSam Ravnborg 	return addr;
9527137e52SSam Ravnborg }
9627137e52SSam Ravnborg 
9727137e52SSam Ravnborg unsigned long
9827137e52SSam Ravnborg hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
9927137e52SSam Ravnborg 		unsigned long len, unsigned long pgoff, unsigned long flags)
10027137e52SSam Ravnborg {
101c7d9f77dSNitin Gupta 	struct hstate *h = hstate_file(file);
10227137e52SSam Ravnborg 	struct mm_struct *mm = current->mm;
10327137e52SSam Ravnborg 	struct vm_area_struct *vma;
10427137e52SSam Ravnborg 	unsigned long task_size = TASK_SIZE;
10527137e52SSam Ravnborg 
10627137e52SSam Ravnborg 	if (test_thread_flag(TIF_32BIT))
10727137e52SSam Ravnborg 		task_size = STACK_TOP32;
10827137e52SSam Ravnborg 
109c7d9f77dSNitin Gupta 	if (len & ~huge_page_mask(h))
11027137e52SSam Ravnborg 		return -EINVAL;
11127137e52SSam Ravnborg 	if (len > task_size)
11227137e52SSam Ravnborg 		return -ENOMEM;
11327137e52SSam Ravnborg 
11427137e52SSam Ravnborg 	if (flags & MAP_FIXED) {
11527137e52SSam Ravnborg 		if (prepare_hugepage_range(file, addr, len))
11627137e52SSam Ravnborg 			return -EINVAL;
11727137e52SSam Ravnborg 		return addr;
11827137e52SSam Ravnborg 	}
11927137e52SSam Ravnborg 
12027137e52SSam Ravnborg 	if (addr) {
121c7d9f77dSNitin Gupta 		addr = ALIGN(addr, huge_page_size(h));
12227137e52SSam Ravnborg 		vma = find_vma(mm, addr);
12327137e52SSam Ravnborg 		if (task_size - len >= addr &&
1241be7107fSHugh Dickins 		    (!vma || addr + len <= vm_start_gap(vma)))
12527137e52SSam Ravnborg 			return addr;
12627137e52SSam Ravnborg 	}
12727137e52SSam Ravnborg 	if (mm->get_unmapped_area == arch_get_unmapped_area)
12827137e52SSam Ravnborg 		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
12927137e52SSam Ravnborg 				pgoff, flags);
13027137e52SSam Ravnborg 	else
13127137e52SSam Ravnborg 		return hugetlb_get_unmapped_area_topdown(file, addr, len,
13227137e52SSam Ravnborg 				pgoff, flags);
13327137e52SSam Ravnborg }
13427137e52SSam Ravnborg 
135c7d9f77dSNitin Gupta static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
136c7d9f77dSNitin Gupta {
137c7d9f77dSNitin Gupta 	return entry;
138c7d9f77dSNitin Gupta }
139c7d9f77dSNitin Gupta 
140c7d9f77dSNitin Gupta static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
141c7d9f77dSNitin Gupta {
142c7d9f77dSNitin Gupta 	unsigned long hugepage_size = _PAGE_SZ4MB_4V;
143c7d9f77dSNitin Gupta 
144c7d9f77dSNitin Gupta 	pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
145c7d9f77dSNitin Gupta 
146c7d9f77dSNitin Gupta 	switch (shift) {
147df7b2155SNitin Gupta 	case HPAGE_16GB_SHIFT:
148df7b2155SNitin Gupta 		hugepage_size = _PAGE_SZ16GB_4V;
149df7b2155SNitin Gupta 		pte_val(entry) |= _PAGE_PUD_HUGE;
150df7b2155SNitin Gupta 		break;
15185b1da7cSNitin Gupta 	case HPAGE_2GB_SHIFT:
15285b1da7cSNitin Gupta 		hugepage_size = _PAGE_SZ2GB_4V;
15385b1da7cSNitin Gupta 		pte_val(entry) |= _PAGE_PMD_HUGE;
15485b1da7cSNitin Gupta 		break;
155c7d9f77dSNitin Gupta 	case HPAGE_256MB_SHIFT:
156c7d9f77dSNitin Gupta 		hugepage_size = _PAGE_SZ256MB_4V;
157c7d9f77dSNitin Gupta 		pte_val(entry) |= _PAGE_PMD_HUGE;
158c7d9f77dSNitin Gupta 		break;
159c7d9f77dSNitin Gupta 	case HPAGE_SHIFT:
160c7d9f77dSNitin Gupta 		pte_val(entry) |= _PAGE_PMD_HUGE;
161c7d9f77dSNitin Gupta 		break;
162dcd1912dSNitin Gupta 	case HPAGE_64K_SHIFT:
163dcd1912dSNitin Gupta 		hugepage_size = _PAGE_SZ64K_4V;
164dcd1912dSNitin Gupta 		break;
165c7d9f77dSNitin Gupta 	default:
166c7d9f77dSNitin Gupta 		WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
167c7d9f77dSNitin Gupta 	}
168c7d9f77dSNitin Gupta 
169c7d9f77dSNitin Gupta 	pte_val(entry) = pte_val(entry) | hugepage_size;
170c7d9f77dSNitin Gupta 	return entry;
171c7d9f77dSNitin Gupta }
172c7d9f77dSNitin Gupta 
173c7d9f77dSNitin Gupta static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
174c7d9f77dSNitin Gupta {
175c7d9f77dSNitin Gupta 	if (tlb_type == hypervisor)
176c7d9f77dSNitin Gupta 		return sun4v_hugepage_shift_to_tte(entry, shift);
177c7d9f77dSNitin Gupta 	else
178c7d9f77dSNitin Gupta 		return sun4u_hugepage_shift_to_tte(entry, shift);
179c7d9f77dSNitin Gupta }
180c7d9f77dSNitin Gupta 
181c7d9f77dSNitin Gupta pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
182c7d9f77dSNitin Gupta 			 struct page *page, int writeable)
183c7d9f77dSNitin Gupta {
184c7d9f77dSNitin Gupta 	unsigned int shift = huge_page_shift(hstate_vma(vma));
18574a04967SKhalid Aziz 	pte_t pte;
186c7d9f77dSNitin Gupta 
18774a04967SKhalid Aziz 	pte = hugepage_shift_to_tte(entry, shift);
18874a04967SKhalid Aziz 
18974a04967SKhalid Aziz #ifdef CONFIG_SPARC64
19074a04967SKhalid Aziz 	/* If this vma has ADI enabled on it, turn on TTE.mcd
19174a04967SKhalid Aziz 	 */
19274a04967SKhalid Aziz 	if (vma->vm_flags & VM_SPARC_ADI)
19374a04967SKhalid Aziz 		return pte_mkmcd(pte);
19474a04967SKhalid Aziz 	else
19574a04967SKhalid Aziz 		return pte_mknotmcd(pte);
19674a04967SKhalid Aziz #else
19774a04967SKhalid Aziz 	return pte;
19874a04967SKhalid Aziz #endif
199c7d9f77dSNitin Gupta }
200c7d9f77dSNitin Gupta 
201c7d9f77dSNitin Gupta static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
202c7d9f77dSNitin Gupta {
203c7d9f77dSNitin Gupta 	unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
204c7d9f77dSNitin Gupta 	unsigned int shift;
205c7d9f77dSNitin Gupta 
206c7d9f77dSNitin Gupta 	switch (tte_szbits) {
207df7b2155SNitin Gupta 	case _PAGE_SZ16GB_4V:
208df7b2155SNitin Gupta 		shift = HPAGE_16GB_SHIFT;
209df7b2155SNitin Gupta 		break;
21085b1da7cSNitin Gupta 	case _PAGE_SZ2GB_4V:
21185b1da7cSNitin Gupta 		shift = HPAGE_2GB_SHIFT;
21285b1da7cSNitin Gupta 		break;
213c7d9f77dSNitin Gupta 	case _PAGE_SZ256MB_4V:
214c7d9f77dSNitin Gupta 		shift = HPAGE_256MB_SHIFT;
215c7d9f77dSNitin Gupta 		break;
216c7d9f77dSNitin Gupta 	case _PAGE_SZ4MB_4V:
217c7d9f77dSNitin Gupta 		shift = REAL_HPAGE_SHIFT;
218c7d9f77dSNitin Gupta 		break;
219dcd1912dSNitin Gupta 	case _PAGE_SZ64K_4V:
220dcd1912dSNitin Gupta 		shift = HPAGE_64K_SHIFT;
221dcd1912dSNitin Gupta 		break;
222c7d9f77dSNitin Gupta 	default:
223c7d9f77dSNitin Gupta 		shift = PAGE_SHIFT;
224c7d9f77dSNitin Gupta 		break;
225c7d9f77dSNitin Gupta 	}
226c7d9f77dSNitin Gupta 	return shift;
227c7d9f77dSNitin Gupta }
228c7d9f77dSNitin Gupta 
229c7d9f77dSNitin Gupta static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
230c7d9f77dSNitin Gupta {
231c7d9f77dSNitin Gupta 	unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
232c7d9f77dSNitin Gupta 	unsigned int shift;
233c7d9f77dSNitin Gupta 
234c7d9f77dSNitin Gupta 	switch (tte_szbits) {
235c7d9f77dSNitin Gupta 	case _PAGE_SZ256MB_4U:
236c7d9f77dSNitin Gupta 		shift = HPAGE_256MB_SHIFT;
237c7d9f77dSNitin Gupta 		break;
238c7d9f77dSNitin Gupta 	case _PAGE_SZ4MB_4U:
239c7d9f77dSNitin Gupta 		shift = REAL_HPAGE_SHIFT;
240c7d9f77dSNitin Gupta 		break;
241dcd1912dSNitin Gupta 	case _PAGE_SZ64K_4U:
242dcd1912dSNitin Gupta 		shift = HPAGE_64K_SHIFT;
243dcd1912dSNitin Gupta 		break;
244c7d9f77dSNitin Gupta 	default:
245c7d9f77dSNitin Gupta 		shift = PAGE_SHIFT;
246c7d9f77dSNitin Gupta 		break;
247c7d9f77dSNitin Gupta 	}
248c7d9f77dSNitin Gupta 	return shift;
249c7d9f77dSNitin Gupta }
250c7d9f77dSNitin Gupta 
251c7d9f77dSNitin Gupta static unsigned int huge_tte_to_shift(pte_t entry)
252c7d9f77dSNitin Gupta {
253c7d9f77dSNitin Gupta 	unsigned long shift;
254c7d9f77dSNitin Gupta 
255c7d9f77dSNitin Gupta 	if (tlb_type == hypervisor)
256c7d9f77dSNitin Gupta 		shift = sun4v_huge_tte_to_shift(entry);
257c7d9f77dSNitin Gupta 	else
258c7d9f77dSNitin Gupta 		shift = sun4u_huge_tte_to_shift(entry);
259c7d9f77dSNitin Gupta 
260c7d9f77dSNitin Gupta 	if (shift == PAGE_SHIFT)
261c7d9f77dSNitin Gupta 		WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
262c7d9f77dSNitin Gupta 			  pte_val(entry));
263c7d9f77dSNitin Gupta 
264c7d9f77dSNitin Gupta 	return shift;
265c7d9f77dSNitin Gupta }
266c7d9f77dSNitin Gupta 
267c7d9f77dSNitin Gupta static unsigned long huge_tte_to_size(pte_t pte)
268c7d9f77dSNitin Gupta {
269c7d9f77dSNitin Gupta 	unsigned long size = 1UL << huge_tte_to_shift(pte);
270c7d9f77dSNitin Gupta 
271c7d9f77dSNitin Gupta 	if (size == REAL_HPAGE_SIZE)
272c7d9f77dSNitin Gupta 		size = HPAGE_SIZE;
273c7d9f77dSNitin Gupta 	return size;
274c7d9f77dSNitin Gupta }
275c7d9f77dSNitin Gupta 
27627137e52SSam Ravnborg pte_t *huge_pte_alloc(struct mm_struct *mm,
27727137e52SSam Ravnborg 			unsigned long addr, unsigned long sz)
27827137e52SSam Ravnborg {
27927137e52SSam Ravnborg 	pgd_t *pgd;
280*5637bc50SMike Rapoport 	p4d_t *p4d;
28127137e52SSam Ravnborg 	pud_t *pud;
282dcd1912dSNitin Gupta 	pmd_t *pmd;
28327137e52SSam Ravnborg 
28427137e52SSam Ravnborg 	pgd = pgd_offset(mm, addr);
285*5637bc50SMike Rapoport 	p4d = p4d_offset(pgd, addr);
286*5637bc50SMike Rapoport 	pud = pud_alloc(mm, p4d, addr);
287df7b2155SNitin Gupta 	if (!pud)
288df7b2155SNitin Gupta 		return NULL;
289df7b2155SNitin Gupta 	if (sz >= PUD_SIZE)
2904dbe87d5SNitin Gupta 		return (pte_t *)pud;
291dcd1912dSNitin Gupta 	pmd = pmd_alloc(mm, pud, addr);
292dcd1912dSNitin Gupta 	if (!pmd)
293dcd1912dSNitin Gupta 		return NULL;
29459f1183dSNitin Gupta 	if (sz >= PMD_SIZE)
2954dbe87d5SNitin Gupta 		return (pte_t *)pmd;
2964dbe87d5SNitin Gupta 	return pte_alloc_map(mm, pmd, addr);
29727137e52SSam Ravnborg }
29827137e52SSam Ravnborg 
2997868a208SPunit Agrawal pte_t *huge_pte_offset(struct mm_struct *mm,
3007868a208SPunit Agrawal 		       unsigned long addr, unsigned long sz)
30127137e52SSam Ravnborg {
30227137e52SSam Ravnborg 	pgd_t *pgd;
303*5637bc50SMike Rapoport 	p4d_t *p4d;
30427137e52SSam Ravnborg 	pud_t *pud;
305dcd1912dSNitin Gupta 	pmd_t *pmd;
30627137e52SSam Ravnborg 
30727137e52SSam Ravnborg 	pgd = pgd_offset(mm, addr);
3084dbe87d5SNitin Gupta 	if (pgd_none(*pgd))
3094dbe87d5SNitin Gupta 		return NULL;
310*5637bc50SMike Rapoport 	p4d = p4d_offset(pgd, addr);
311*5637bc50SMike Rapoport 	if (p4d_none(*p4d))
312*5637bc50SMike Rapoport 		return NULL;
313*5637bc50SMike Rapoport 	pud = pud_offset(p4d, addr);
3144dbe87d5SNitin Gupta 	if (pud_none(*pud))
3154dbe87d5SNitin Gupta 		return NULL;
316df7b2155SNitin Gupta 	if (is_hugetlb_pud(*pud))
3174dbe87d5SNitin Gupta 		return (pte_t *)pud;
318dcd1912dSNitin Gupta 	pmd = pmd_offset(pud, addr);
3194dbe87d5SNitin Gupta 	if (pmd_none(*pmd))
3204dbe87d5SNitin Gupta 		return NULL;
321dcd1912dSNitin Gupta 	if (is_hugetlb_pmd(*pmd))
3224dbe87d5SNitin Gupta 		return (pte_t *)pmd;
3234dbe87d5SNitin Gupta 	return pte_offset_map(pmd, addr);
32427137e52SSam Ravnborg }
32527137e52SSam Ravnborg 
32627137e52SSam Ravnborg void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
32727137e52SSam Ravnborg 		     pte_t *ptep, pte_t entry)
32827137e52SSam Ravnborg {
329df7b2155SNitin Gupta 	unsigned int nptes, orig_shift, shift;
330df7b2155SNitin Gupta 	unsigned long i, size;
3317bc3777cSNitin Gupta 	pte_t orig;
33227137e52SSam Ravnborg 
333c7d9f77dSNitin Gupta 	size = huge_tte_to_size(entry);
334df7b2155SNitin Gupta 
335df7b2155SNitin Gupta 	shift = PAGE_SHIFT;
336df7b2155SNitin Gupta 	if (size >= PUD_SIZE)
337df7b2155SNitin Gupta 		shift = PUD_SHIFT;
338df7b2155SNitin Gupta 	else if (size >= PMD_SIZE)
339df7b2155SNitin Gupta 		shift = PMD_SHIFT;
340df7b2155SNitin Gupta 	else
341df7b2155SNitin Gupta 		shift = PAGE_SHIFT;
342df7b2155SNitin Gupta 
343dcd1912dSNitin Gupta 	nptes = size >> shift;
344c7d9f77dSNitin Gupta 
34527137e52SSam Ravnborg 	if (!pte_present(*ptep) && pte_present(entry))
346c7d9f77dSNitin Gupta 		mm->context.hugetlb_pte_count += nptes;
34727137e52SSam Ravnborg 
348c7d9f77dSNitin Gupta 	addr &= ~(size - 1);
3497bc3777cSNitin Gupta 	orig = *ptep;
350ac65e282SNitin Gupta 	orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
35124e49ee3SNitin Gupta 
352c7d9f77dSNitin Gupta 	for (i = 0; i < nptes; i++)
353dcd1912dSNitin Gupta 		ptep[i] = __pte(pte_val(entry) + (i << shift));
354c7d9f77dSNitin Gupta 
355dcd1912dSNitin Gupta 	maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
356c7d9f77dSNitin Gupta 	/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
357c7d9f77dSNitin Gupta 	if (size == HPAGE_SIZE)
358c7d9f77dSNitin Gupta 		maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
359dcd1912dSNitin Gupta 				    orig_shift);
36027137e52SSam Ravnborg }
36127137e52SSam Ravnborg 
36227137e52SSam Ravnborg pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
36327137e52SSam Ravnborg 			      pte_t *ptep)
36427137e52SSam Ravnborg {
365df7b2155SNitin Gupta 	unsigned int i, nptes, orig_shift, shift;
366c7d9f77dSNitin Gupta 	unsigned long size;
36727137e52SSam Ravnborg 	pte_t entry;
36827137e52SSam Ravnborg 
36927137e52SSam Ravnborg 	entry = *ptep;
370c7d9f77dSNitin Gupta 	size = huge_tte_to_size(entry);
371f10bb007SNitin Gupta 
372df7b2155SNitin Gupta 	shift = PAGE_SHIFT;
373df7b2155SNitin Gupta 	if (size >= PUD_SIZE)
374df7b2155SNitin Gupta 		shift = PUD_SHIFT;
375df7b2155SNitin Gupta 	else if (size >= PMD_SIZE)
376df7b2155SNitin Gupta 		shift = PMD_SHIFT;
377df7b2155SNitin Gupta 	else
378df7b2155SNitin Gupta 		shift = PAGE_SHIFT;
379df7b2155SNitin Gupta 
380df7b2155SNitin Gupta 	nptes = size >> shift;
381df7b2155SNitin Gupta 	orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
382c7d9f77dSNitin Gupta 
38327137e52SSam Ravnborg 	if (pte_present(entry))
384c7d9f77dSNitin Gupta 		mm->context.hugetlb_pte_count -= nptes;
38527137e52SSam Ravnborg 
386c7d9f77dSNitin Gupta 	addr &= ~(size - 1);
387c7d9f77dSNitin Gupta 	for (i = 0; i < nptes; i++)
388c7d9f77dSNitin Gupta 		ptep[i] = __pte(0UL);
38927137e52SSam Ravnborg 
390df7b2155SNitin Gupta 	maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
391c7d9f77dSNitin Gupta 	/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
392c7d9f77dSNitin Gupta 	if (size == HPAGE_SIZE)
393c7d9f77dSNitin Gupta 		maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
394df7b2155SNitin Gupta 				    orig_shift);
39524e49ee3SNitin Gupta 
39627137e52SSam Ravnborg 	return entry;
39727137e52SSam Ravnborg }
39827137e52SSam Ravnborg 
39927137e52SSam Ravnborg int pmd_huge(pmd_t pmd)
40027137e52SSam Ravnborg {
4017bc3777cSNitin Gupta 	return !pmd_none(pmd) &&
4027bc3777cSNitin Gupta 		(pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
40327137e52SSam Ravnborg }
40427137e52SSam Ravnborg 
40527137e52SSam Ravnborg int pud_huge(pud_t pud)
40627137e52SSam Ravnborg {
407df7b2155SNitin Gupta 	return !pud_none(pud) &&
408df7b2155SNitin Gupta 		(pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
40927137e52SSam Ravnborg }
4107bc3777cSNitin Gupta 
4117bc3777cSNitin Gupta static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
4127bc3777cSNitin Gupta 			   unsigned long addr)
4137bc3777cSNitin Gupta {
4147bc3777cSNitin Gupta 	pgtable_t token = pmd_pgtable(*pmd);
4157bc3777cSNitin Gupta 
4167bc3777cSNitin Gupta 	pmd_clear(pmd);
4177bc3777cSNitin Gupta 	pte_free_tlb(tlb, token, addr);
418c4812909SKirill A. Shutemov 	mm_dec_nr_ptes(tlb->mm);
4197bc3777cSNitin Gupta }
4207bc3777cSNitin Gupta 
4217bc3777cSNitin Gupta static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
4227bc3777cSNitin Gupta 				   unsigned long addr, unsigned long end,
4237bc3777cSNitin Gupta 				   unsigned long floor, unsigned long ceiling)
4247bc3777cSNitin Gupta {
4257bc3777cSNitin Gupta 	pmd_t *pmd;
4267bc3777cSNitin Gupta 	unsigned long next;
4277bc3777cSNitin Gupta 	unsigned long start;
4287bc3777cSNitin Gupta 
4297bc3777cSNitin Gupta 	start = addr;
4307bc3777cSNitin Gupta 	pmd = pmd_offset(pud, addr);
4317bc3777cSNitin Gupta 	do {
4327bc3777cSNitin Gupta 		next = pmd_addr_end(addr, end);
4337bc3777cSNitin Gupta 		if (pmd_none(*pmd))
4347bc3777cSNitin Gupta 			continue;
4357bc3777cSNitin Gupta 		if (is_hugetlb_pmd(*pmd))
4367bc3777cSNitin Gupta 			pmd_clear(pmd);
4377bc3777cSNitin Gupta 		else
4387bc3777cSNitin Gupta 			hugetlb_free_pte_range(tlb, pmd, addr);
4397bc3777cSNitin Gupta 	} while (pmd++, addr = next, addr != end);
4407bc3777cSNitin Gupta 
4417bc3777cSNitin Gupta 	start &= PUD_MASK;
4427bc3777cSNitin Gupta 	if (start < floor)
4437bc3777cSNitin Gupta 		return;
4447bc3777cSNitin Gupta 	if (ceiling) {
4457bc3777cSNitin Gupta 		ceiling &= PUD_MASK;
4467bc3777cSNitin Gupta 		if (!ceiling)
4477bc3777cSNitin Gupta 			return;
4487bc3777cSNitin Gupta 	}
4497bc3777cSNitin Gupta 	if (end - 1 > ceiling - 1)
4507bc3777cSNitin Gupta 		return;
4517bc3777cSNitin Gupta 
4527bc3777cSNitin Gupta 	pmd = pmd_offset(pud, start);
4537bc3777cSNitin Gupta 	pud_clear(pud);
4547bc3777cSNitin Gupta 	pmd_free_tlb(tlb, pmd, start);
4557bc3777cSNitin Gupta 	mm_dec_nr_pmds(tlb->mm);
4567bc3777cSNitin Gupta }
4577bc3777cSNitin Gupta 
458*5637bc50SMike Rapoport static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
4597bc3777cSNitin Gupta 				   unsigned long addr, unsigned long end,
4607bc3777cSNitin Gupta 				   unsigned long floor, unsigned long ceiling)
4617bc3777cSNitin Gupta {
4627bc3777cSNitin Gupta 	pud_t *pud;
4637bc3777cSNitin Gupta 	unsigned long next;
4647bc3777cSNitin Gupta 	unsigned long start;
4657bc3777cSNitin Gupta 
4667bc3777cSNitin Gupta 	start = addr;
467*5637bc50SMike Rapoport 	pud = pud_offset(p4d, addr);
4687bc3777cSNitin Gupta 	do {
4697bc3777cSNitin Gupta 		next = pud_addr_end(addr, end);
4707bc3777cSNitin Gupta 		if (pud_none_or_clear_bad(pud))
4717bc3777cSNitin Gupta 			continue;
472df7b2155SNitin Gupta 		if (is_hugetlb_pud(*pud))
473df7b2155SNitin Gupta 			pud_clear(pud);
474df7b2155SNitin Gupta 		else
4757bc3777cSNitin Gupta 			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
4767bc3777cSNitin Gupta 					       ceiling);
4777bc3777cSNitin Gupta 	} while (pud++, addr = next, addr != end);
4787bc3777cSNitin Gupta 
4797bc3777cSNitin Gupta 	start &= PGDIR_MASK;
4807bc3777cSNitin Gupta 	if (start < floor)
4817bc3777cSNitin Gupta 		return;
4827bc3777cSNitin Gupta 	if (ceiling) {
4837bc3777cSNitin Gupta 		ceiling &= PGDIR_MASK;
4847bc3777cSNitin Gupta 		if (!ceiling)
4857bc3777cSNitin Gupta 			return;
4867bc3777cSNitin Gupta 	}
4877bc3777cSNitin Gupta 	if (end - 1 > ceiling - 1)
4887bc3777cSNitin Gupta 		return;
4897bc3777cSNitin Gupta 
490*5637bc50SMike Rapoport 	pud = pud_offset(p4d, start);
491*5637bc50SMike Rapoport 	p4d_clear(p4d);
4927bc3777cSNitin Gupta 	pud_free_tlb(tlb, pud, start);
493b4e98d9aSKirill A. Shutemov 	mm_dec_nr_puds(tlb->mm);
4947bc3777cSNitin Gupta }
4957bc3777cSNitin Gupta 
4967bc3777cSNitin Gupta void hugetlb_free_pgd_range(struct mmu_gather *tlb,
4977bc3777cSNitin Gupta 			    unsigned long addr, unsigned long end,
4987bc3777cSNitin Gupta 			    unsigned long floor, unsigned long ceiling)
4997bc3777cSNitin Gupta {
5007bc3777cSNitin Gupta 	pgd_t *pgd;
501*5637bc50SMike Rapoport 	p4d_t *p4d;
5027bc3777cSNitin Gupta 	unsigned long next;
5037bc3777cSNitin Gupta 
504544f8f93SNitin Gupta 	addr &= PMD_MASK;
505544f8f93SNitin Gupta 	if (addr < floor) {
506544f8f93SNitin Gupta 		addr += PMD_SIZE;
507544f8f93SNitin Gupta 		if (!addr)
508544f8f93SNitin Gupta 			return;
509544f8f93SNitin Gupta 	}
510544f8f93SNitin Gupta 	if (ceiling) {
511544f8f93SNitin Gupta 		ceiling &= PMD_MASK;
512544f8f93SNitin Gupta 		if (!ceiling)
513544f8f93SNitin Gupta 			return;
514544f8f93SNitin Gupta 	}
515544f8f93SNitin Gupta 	if (end - 1 > ceiling - 1)
516544f8f93SNitin Gupta 		end -= PMD_SIZE;
517544f8f93SNitin Gupta 	if (addr > end - 1)
518544f8f93SNitin Gupta 		return;
519544f8f93SNitin Gupta 
5207bc3777cSNitin Gupta 	pgd = pgd_offset(tlb->mm, addr);
521*5637bc50SMike Rapoport 	p4d = p4d_offset(pgd, addr);
5227bc3777cSNitin Gupta 	do {
523*5637bc50SMike Rapoport 		next = p4d_addr_end(addr, end);
524*5637bc50SMike Rapoport 		if (p4d_none_or_clear_bad(p4d))
5257bc3777cSNitin Gupta 			continue;
526*5637bc50SMike Rapoport 		hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
527*5637bc50SMike Rapoport 	} while (p4d++, addr = next, addr != end);
5287bc3777cSNitin Gupta }
529