xref: /openbmc/linux/mm/hugetlb.c (revision 4c887265)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Generic hugetlb support.
31da177e4SLinus Torvalds  * (C) William Irwin, April 2004
41da177e4SLinus Torvalds  */
51da177e4SLinus Torvalds #include <linux/gfp.h>
61da177e4SLinus Torvalds #include <linux/list.h>
71da177e4SLinus Torvalds #include <linux/init.h>
81da177e4SLinus Torvalds #include <linux/module.h>
91da177e4SLinus Torvalds #include <linux/mm.h>
101da177e4SLinus Torvalds #include <linux/sysctl.h>
111da177e4SLinus Torvalds #include <linux/highmem.h>
121da177e4SLinus Torvalds #include <linux/nodemask.h>
1363551ae0SDavid Gibson #include <linux/pagemap.h>
1463551ae0SDavid Gibson #include <asm/page.h>
1563551ae0SDavid Gibson #include <asm/pgtable.h>
1663551ae0SDavid Gibson 
1763551ae0SDavid Gibson #include <linux/hugetlb.h>
181da177e4SLinus Torvalds 
191da177e4SLinus Torvalds const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
201da177e4SLinus Torvalds static unsigned long nr_huge_pages, free_huge_pages;
211da177e4SLinus Torvalds unsigned long max_huge_pages;
221da177e4SLinus Torvalds static struct list_head hugepage_freelists[MAX_NUMNODES];
231da177e4SLinus Torvalds static unsigned int nr_huge_pages_node[MAX_NUMNODES];
241da177e4SLinus Torvalds static unsigned int free_huge_pages_node[MAX_NUMNODES];
251da177e4SLinus Torvalds static DEFINE_SPINLOCK(hugetlb_lock);
261da177e4SLinus Torvalds 
271da177e4SLinus Torvalds static void enqueue_huge_page(struct page *page)
281da177e4SLinus Torvalds {
291da177e4SLinus Torvalds 	int nid = page_to_nid(page);
301da177e4SLinus Torvalds 	list_add(&page->lru, &hugepage_freelists[nid]);
311da177e4SLinus Torvalds 	free_huge_pages++;
321da177e4SLinus Torvalds 	free_huge_pages_node[nid]++;
331da177e4SLinus Torvalds }
341da177e4SLinus Torvalds 
351da177e4SLinus Torvalds static struct page *dequeue_huge_page(void)
361da177e4SLinus Torvalds {
371da177e4SLinus Torvalds 	int nid = numa_node_id();
381da177e4SLinus Torvalds 	struct page *page = NULL;
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds 	if (list_empty(&hugepage_freelists[nid])) {
411da177e4SLinus Torvalds 		for (nid = 0; nid < MAX_NUMNODES; ++nid)
421da177e4SLinus Torvalds 			if (!list_empty(&hugepage_freelists[nid]))
431da177e4SLinus Torvalds 				break;
441da177e4SLinus Torvalds 	}
451da177e4SLinus Torvalds 	if (nid >= 0 && nid < MAX_NUMNODES &&
461da177e4SLinus Torvalds 	    !list_empty(&hugepage_freelists[nid])) {
471da177e4SLinus Torvalds 		page = list_entry(hugepage_freelists[nid].next,
481da177e4SLinus Torvalds 				  struct page, lru);
491da177e4SLinus Torvalds 		list_del(&page->lru);
501da177e4SLinus Torvalds 		free_huge_pages--;
511da177e4SLinus Torvalds 		free_huge_pages_node[nid]--;
521da177e4SLinus Torvalds 	}
531da177e4SLinus Torvalds 	return page;
541da177e4SLinus Torvalds }
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds static struct page *alloc_fresh_huge_page(void)
571da177e4SLinus Torvalds {
581da177e4SLinus Torvalds 	static int nid = 0;
591da177e4SLinus Torvalds 	struct page *page;
601da177e4SLinus Torvalds 	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
611da177e4SLinus Torvalds 					HUGETLB_PAGE_ORDER);
621da177e4SLinus Torvalds 	nid = (nid + 1) % num_online_nodes();
631da177e4SLinus Torvalds 	if (page) {
641da177e4SLinus Torvalds 		nr_huge_pages++;
651da177e4SLinus Torvalds 		nr_huge_pages_node[page_to_nid(page)]++;
661da177e4SLinus Torvalds 	}
671da177e4SLinus Torvalds 	return page;
681da177e4SLinus Torvalds }
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds void free_huge_page(struct page *page)
711da177e4SLinus Torvalds {
721da177e4SLinus Torvalds 	BUG_ON(page_count(page));
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds 	INIT_LIST_HEAD(&page->lru);
751da177e4SLinus Torvalds 	page[1].mapping = NULL;
761da177e4SLinus Torvalds 
771da177e4SLinus Torvalds 	spin_lock(&hugetlb_lock);
781da177e4SLinus Torvalds 	enqueue_huge_page(page);
791da177e4SLinus Torvalds 	spin_unlock(&hugetlb_lock);
801da177e4SLinus Torvalds }
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds struct page *alloc_huge_page(void)
831da177e4SLinus Torvalds {
841da177e4SLinus Torvalds 	struct page *page;
851da177e4SLinus Torvalds 	int i;
861da177e4SLinus Torvalds 
871da177e4SLinus Torvalds 	spin_lock(&hugetlb_lock);
881da177e4SLinus Torvalds 	page = dequeue_huge_page();
891da177e4SLinus Torvalds 	if (!page) {
901da177e4SLinus Torvalds 		spin_unlock(&hugetlb_lock);
911da177e4SLinus Torvalds 		return NULL;
921da177e4SLinus Torvalds 	}
931da177e4SLinus Torvalds 	spin_unlock(&hugetlb_lock);
941da177e4SLinus Torvalds 	set_page_count(page, 1);
951da177e4SLinus Torvalds 	page[1].mapping = (void *)free_huge_page;
961da177e4SLinus Torvalds 	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
971da177e4SLinus Torvalds 		clear_highpage(&page[i]);
981da177e4SLinus Torvalds 	return page;
991da177e4SLinus Torvalds }
1001da177e4SLinus Torvalds 
1011da177e4SLinus Torvalds static int __init hugetlb_init(void)
1021da177e4SLinus Torvalds {
1031da177e4SLinus Torvalds 	unsigned long i;
1041da177e4SLinus Torvalds 	struct page *page;
1051da177e4SLinus Torvalds 
1061da177e4SLinus Torvalds 	for (i = 0; i < MAX_NUMNODES; ++i)
1071da177e4SLinus Torvalds 		INIT_LIST_HEAD(&hugepage_freelists[i]);
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds 	for (i = 0; i < max_huge_pages; ++i) {
1101da177e4SLinus Torvalds 		page = alloc_fresh_huge_page();
1111da177e4SLinus Torvalds 		if (!page)
1121da177e4SLinus Torvalds 			break;
1131da177e4SLinus Torvalds 		spin_lock(&hugetlb_lock);
1141da177e4SLinus Torvalds 		enqueue_huge_page(page);
1151da177e4SLinus Torvalds 		spin_unlock(&hugetlb_lock);
1161da177e4SLinus Torvalds 	}
1171da177e4SLinus Torvalds 	max_huge_pages = free_huge_pages = nr_huge_pages = i;
1181da177e4SLinus Torvalds 	printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
1191da177e4SLinus Torvalds 	return 0;
1201da177e4SLinus Torvalds }
1211da177e4SLinus Torvalds module_init(hugetlb_init);
1221da177e4SLinus Torvalds 
1231da177e4SLinus Torvalds static int __init hugetlb_setup(char *s)
1241da177e4SLinus Torvalds {
1251da177e4SLinus Torvalds 	if (sscanf(s, "%lu", &max_huge_pages) <= 0)
1261da177e4SLinus Torvalds 		max_huge_pages = 0;
1271da177e4SLinus Torvalds 	return 1;
1281da177e4SLinus Torvalds }
1291da177e4SLinus Torvalds __setup("hugepages=", hugetlb_setup);
1301da177e4SLinus Torvalds 
1311da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
1321da177e4SLinus Torvalds static void update_and_free_page(struct page *page)
1331da177e4SLinus Torvalds {
1341da177e4SLinus Torvalds 	int i;
1351da177e4SLinus Torvalds 	nr_huge_pages--;
1361da177e4SLinus Torvalds 	nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--;
1371da177e4SLinus Torvalds 	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
1381da177e4SLinus Torvalds 		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
1391da177e4SLinus Torvalds 				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
1401da177e4SLinus Torvalds 				1 << PG_private | 1<< PG_writeback);
1411da177e4SLinus Torvalds 		set_page_count(&page[i], 0);
1421da177e4SLinus Torvalds 	}
1431da177e4SLinus Torvalds 	set_page_count(page, 1);
1441da177e4SLinus Torvalds 	__free_pages(page, HUGETLB_PAGE_ORDER);
1451da177e4SLinus Torvalds }
1461da177e4SLinus Torvalds 
1471da177e4SLinus Torvalds #ifdef CONFIG_HIGHMEM
1481da177e4SLinus Torvalds static void try_to_free_low(unsigned long count)
1491da177e4SLinus Torvalds {
1501da177e4SLinus Torvalds 	int i, nid;
1511da177e4SLinus Torvalds 	for (i = 0; i < MAX_NUMNODES; ++i) {
1521da177e4SLinus Torvalds 		struct page *page, *next;
1531da177e4SLinus Torvalds 		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
1541da177e4SLinus Torvalds 			if (PageHighMem(page))
1551da177e4SLinus Torvalds 				continue;
1561da177e4SLinus Torvalds 			list_del(&page->lru);
1571da177e4SLinus Torvalds 			update_and_free_page(page);
1581da177e4SLinus Torvalds 			nid = page_zone(page)->zone_pgdat->node_id;
1591da177e4SLinus Torvalds 			free_huge_pages--;
1601da177e4SLinus Torvalds 			free_huge_pages_node[nid]--;
1611da177e4SLinus Torvalds 			if (count >= nr_huge_pages)
1621da177e4SLinus Torvalds 				return;
1631da177e4SLinus Torvalds 		}
1641da177e4SLinus Torvalds 	}
1651da177e4SLinus Torvalds }
1661da177e4SLinus Torvalds #else
1671da177e4SLinus Torvalds static inline void try_to_free_low(unsigned long count)
1681da177e4SLinus Torvalds {
1691da177e4SLinus Torvalds }
1701da177e4SLinus Torvalds #endif
1711da177e4SLinus Torvalds 
1721da177e4SLinus Torvalds static unsigned long set_max_huge_pages(unsigned long count)
1731da177e4SLinus Torvalds {
1741da177e4SLinus Torvalds 	while (count > nr_huge_pages) {
1751da177e4SLinus Torvalds 		struct page *page = alloc_fresh_huge_page();
1761da177e4SLinus Torvalds 		if (!page)
1771da177e4SLinus Torvalds 			return nr_huge_pages;
1781da177e4SLinus Torvalds 		spin_lock(&hugetlb_lock);
1791da177e4SLinus Torvalds 		enqueue_huge_page(page);
1801da177e4SLinus Torvalds 		spin_unlock(&hugetlb_lock);
1811da177e4SLinus Torvalds 	}
1821da177e4SLinus Torvalds 	if (count >= nr_huge_pages)
1831da177e4SLinus Torvalds 		return nr_huge_pages;
1841da177e4SLinus Torvalds 
1851da177e4SLinus Torvalds 	spin_lock(&hugetlb_lock);
1861da177e4SLinus Torvalds 	try_to_free_low(count);
1871da177e4SLinus Torvalds 	while (count < nr_huge_pages) {
1881da177e4SLinus Torvalds 		struct page *page = dequeue_huge_page();
1891da177e4SLinus Torvalds 		if (!page)
1901da177e4SLinus Torvalds 			break;
1911da177e4SLinus Torvalds 		update_and_free_page(page);
1921da177e4SLinus Torvalds 	}
1931da177e4SLinus Torvalds 	spin_unlock(&hugetlb_lock);
1941da177e4SLinus Torvalds 	return nr_huge_pages;
1951da177e4SLinus Torvalds }
1961da177e4SLinus Torvalds 
1971da177e4SLinus Torvalds int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1981da177e4SLinus Torvalds 			   struct file *file, void __user *buffer,
1991da177e4SLinus Torvalds 			   size_t *length, loff_t *ppos)
2001da177e4SLinus Torvalds {
2011da177e4SLinus Torvalds 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
2021da177e4SLinus Torvalds 	max_huge_pages = set_max_huge_pages(max_huge_pages);
2031da177e4SLinus Torvalds 	return 0;
2041da177e4SLinus Torvalds }
2051da177e4SLinus Torvalds #endif /* CONFIG_SYSCTL */
2061da177e4SLinus Torvalds 
2071da177e4SLinus Torvalds int hugetlb_report_meminfo(char *buf)
2081da177e4SLinus Torvalds {
2091da177e4SLinus Torvalds 	return sprintf(buf,
2101da177e4SLinus Torvalds 			"HugePages_Total: %5lu\n"
2111da177e4SLinus Torvalds 			"HugePages_Free:  %5lu\n"
2121da177e4SLinus Torvalds 			"Hugepagesize:    %5lu kB\n",
2131da177e4SLinus Torvalds 			nr_huge_pages,
2141da177e4SLinus Torvalds 			free_huge_pages,
2151da177e4SLinus Torvalds 			HPAGE_SIZE/1024);
2161da177e4SLinus Torvalds }
2171da177e4SLinus Torvalds 
2181da177e4SLinus Torvalds int hugetlb_report_node_meminfo(int nid, char *buf)
2191da177e4SLinus Torvalds {
2201da177e4SLinus Torvalds 	return sprintf(buf,
2211da177e4SLinus Torvalds 		"Node %d HugePages_Total: %5u\n"
2221da177e4SLinus Torvalds 		"Node %d HugePages_Free:  %5u\n",
2231da177e4SLinus Torvalds 		nid, nr_huge_pages_node[nid],
2241da177e4SLinus Torvalds 		nid, free_huge_pages_node[nid]);
2251da177e4SLinus Torvalds }
2261da177e4SLinus Torvalds 
2271da177e4SLinus Torvalds int is_hugepage_mem_enough(size_t size)
2281da177e4SLinus Torvalds {
2291da177e4SLinus Torvalds 	return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages;
2301da177e4SLinus Torvalds }
2311da177e4SLinus Torvalds 
2321da177e4SLinus Torvalds /* Return the number pages of memory we physically have, in PAGE_SIZE units. */
2331da177e4SLinus Torvalds unsigned long hugetlb_total_pages(void)
2341da177e4SLinus Torvalds {
2351da177e4SLinus Torvalds 	return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
2361da177e4SLinus Torvalds }
2371da177e4SLinus Torvalds EXPORT_SYMBOL(hugetlb_total_pages);
2381da177e4SLinus Torvalds 
2391da177e4SLinus Torvalds /*
2401da177e4SLinus Torvalds  * We cannot handle pagefaults against hugetlb pages at all.  They cause
2411da177e4SLinus Torvalds  * handle_mm_fault() to try to instantiate regular-sized pages in the
2421da177e4SLinus Torvalds  * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
2431da177e4SLinus Torvalds  * this far.
2441da177e4SLinus Torvalds  */
2451da177e4SLinus Torvalds static struct page *hugetlb_nopage(struct vm_area_struct *vma,
2461da177e4SLinus Torvalds 				unsigned long address, int *unused)
2471da177e4SLinus Torvalds {
2481da177e4SLinus Torvalds 	BUG();
2491da177e4SLinus Torvalds 	return NULL;
2501da177e4SLinus Torvalds }
2511da177e4SLinus Torvalds 
2521da177e4SLinus Torvalds struct vm_operations_struct hugetlb_vm_ops = {
2531da177e4SLinus Torvalds 	.nopage = hugetlb_nopage,
2541da177e4SLinus Torvalds };
2551da177e4SLinus Torvalds 
25663551ae0SDavid Gibson static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page)
25763551ae0SDavid Gibson {
25863551ae0SDavid Gibson 	pte_t entry;
25963551ae0SDavid Gibson 
26063551ae0SDavid Gibson 	if (vma->vm_flags & VM_WRITE) {
26163551ae0SDavid Gibson 		entry =
26263551ae0SDavid Gibson 		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
26363551ae0SDavid Gibson 	} else {
26463551ae0SDavid Gibson 		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
26563551ae0SDavid Gibson 	}
26663551ae0SDavid Gibson 	entry = pte_mkyoung(entry);
26763551ae0SDavid Gibson 	entry = pte_mkhuge(entry);
26863551ae0SDavid Gibson 
26963551ae0SDavid Gibson 	return entry;
27063551ae0SDavid Gibson }
27163551ae0SDavid Gibson 
27263551ae0SDavid Gibson int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
27363551ae0SDavid Gibson 			    struct vm_area_struct *vma)
27463551ae0SDavid Gibson {
27563551ae0SDavid Gibson 	pte_t *src_pte, *dst_pte, entry;
27663551ae0SDavid Gibson 	struct page *ptepage;
2771c59827dSHugh Dickins 	unsigned long addr;
27863551ae0SDavid Gibson 
2791c59827dSHugh Dickins 	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
280c74df32cSHugh Dickins 		src_pte = huge_pte_offset(src, addr);
281c74df32cSHugh Dickins 		if (!src_pte)
282c74df32cSHugh Dickins 			continue;
28363551ae0SDavid Gibson 		dst_pte = huge_pte_alloc(dst, addr);
28463551ae0SDavid Gibson 		if (!dst_pte)
28563551ae0SDavid Gibson 			goto nomem;
286c74df32cSHugh Dickins 		spin_lock(&dst->page_table_lock);
2871c59827dSHugh Dickins 		spin_lock(&src->page_table_lock);
288c74df32cSHugh Dickins 		if (!pte_none(*src_pte)) {
28963551ae0SDavid Gibson 			entry = *src_pte;
29063551ae0SDavid Gibson 			ptepage = pte_page(entry);
29163551ae0SDavid Gibson 			get_page(ptepage);
2924294621fSHugh Dickins 			add_mm_counter(dst, file_rss, HPAGE_SIZE / PAGE_SIZE);
29363551ae0SDavid Gibson 			set_huge_pte_at(dst, addr, dst_pte, entry);
2941c59827dSHugh Dickins 		}
2951c59827dSHugh Dickins 		spin_unlock(&src->page_table_lock);
296c74df32cSHugh Dickins 		spin_unlock(&dst->page_table_lock);
29763551ae0SDavid Gibson 	}
29863551ae0SDavid Gibson 	return 0;
29963551ae0SDavid Gibson 
30063551ae0SDavid Gibson nomem:
30163551ae0SDavid Gibson 	return -ENOMEM;
30263551ae0SDavid Gibson }
30363551ae0SDavid Gibson 
30463551ae0SDavid Gibson void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
30563551ae0SDavid Gibson 			  unsigned long end)
30663551ae0SDavid Gibson {
30763551ae0SDavid Gibson 	struct mm_struct *mm = vma->vm_mm;
30863551ae0SDavid Gibson 	unsigned long address;
309c7546f8fSDavid Gibson 	pte_t *ptep;
31063551ae0SDavid Gibson 	pte_t pte;
31163551ae0SDavid Gibson 	struct page *page;
31263551ae0SDavid Gibson 
31363551ae0SDavid Gibson 	WARN_ON(!is_vm_hugetlb_page(vma));
31463551ae0SDavid Gibson 	BUG_ON(start & ~HPAGE_MASK);
31563551ae0SDavid Gibson 	BUG_ON(end & ~HPAGE_MASK);
31663551ae0SDavid Gibson 
317508034a3SHugh Dickins 	spin_lock(&mm->page_table_lock);
318508034a3SHugh Dickins 
319365e9c87SHugh Dickins 	/* Update high watermark before we lower rss */
320365e9c87SHugh Dickins 	update_hiwater_rss(mm);
321365e9c87SHugh Dickins 
32263551ae0SDavid Gibson 	for (address = start; address < end; address += HPAGE_SIZE) {
323c7546f8fSDavid Gibson 		ptep = huge_pte_offset(mm, address);
324c7546f8fSDavid Gibson 		if (!ptep)
325c7546f8fSDavid Gibson 			continue;
326c7546f8fSDavid Gibson 
327c7546f8fSDavid Gibson 		pte = huge_ptep_get_and_clear(mm, address, ptep);
32863551ae0SDavid Gibson 		if (pte_none(pte))
32963551ae0SDavid Gibson 			continue;
330c7546f8fSDavid Gibson 
33163551ae0SDavid Gibson 		page = pte_page(pte);
33263551ae0SDavid Gibson 		put_page(page);
3334294621fSHugh Dickins 		add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE));
33463551ae0SDavid Gibson 	}
33563551ae0SDavid Gibson 
3361da177e4SLinus Torvalds 	spin_unlock(&mm->page_table_lock);
337508034a3SHugh Dickins 	flush_tlb_range(vma, start, end);
3381da177e4SLinus Torvalds }
33963551ae0SDavid Gibson 
3404c887265SAdam Litke static struct page *find_lock_huge_page(struct address_space *mapping,
3414c887265SAdam Litke 			unsigned long idx)
34263551ae0SDavid Gibson {
34363551ae0SDavid Gibson 	struct page *page;
3444c887265SAdam Litke 	int err;
3454c887265SAdam Litke 	struct inode *inode = mapping->host;
3464c887265SAdam Litke 	unsigned long size;
34763551ae0SDavid Gibson 
3484c887265SAdam Litke retry:
3494c887265SAdam Litke 	page = find_lock_page(mapping, idx);
3504c887265SAdam Litke 	if (page)
35163551ae0SDavid Gibson 		goto out;
35263551ae0SDavid Gibson 
3534c887265SAdam Litke 	/* Check to make sure the mapping hasn't been truncated */
3544c887265SAdam Litke 	size = i_size_read(inode) >> HPAGE_SHIFT;
3554c887265SAdam Litke 	if (idx >= size)
35663551ae0SDavid Gibson 		goto out;
3574c887265SAdam Litke 
3584c887265SAdam Litke 	if (hugetlb_get_quota(mapping))
3594c887265SAdam Litke 		goto out;
36063551ae0SDavid Gibson 	page = alloc_huge_page();
36163551ae0SDavid Gibson 	if (!page) {
36263551ae0SDavid Gibson 		hugetlb_put_quota(mapping);
36363551ae0SDavid Gibson 		goto out;
36463551ae0SDavid Gibson 	}
36563551ae0SDavid Gibson 
3664c887265SAdam Litke 	err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
3674c887265SAdam Litke 	if (err) {
3684c887265SAdam Litke 		put_page(page);
3694c887265SAdam Litke 		hugetlb_put_quota(mapping);
3704c887265SAdam Litke 		if (err == -EEXIST)
3714c887265SAdam Litke 			goto retry;
3724c887265SAdam Litke 		page = NULL;
3734c887265SAdam Litke 	}
3744c887265SAdam Litke out:
3754c887265SAdam Litke 	return page;
3764c887265SAdam Litke }
3774c887265SAdam Litke 
378ac9b9c66SHugh Dickins int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
379ac9b9c66SHugh Dickins 			unsigned long address, int write_access)
380ac9b9c66SHugh Dickins {
381ac9b9c66SHugh Dickins 	int ret = VM_FAULT_SIGBUS;
3824c887265SAdam Litke 	unsigned long idx;
3834c887265SAdam Litke 	unsigned long size;
384ac9b9c66SHugh Dickins 	pte_t *pte;
3854c887265SAdam Litke 	struct page *page;
3864c887265SAdam Litke 	struct address_space *mapping;
3874c887265SAdam Litke 
3884c887265SAdam Litke 	pte = huge_pte_alloc(mm, address);
3894c887265SAdam Litke 	if (!pte)
3904c887265SAdam Litke 		goto out;
3914c887265SAdam Litke 
3924c887265SAdam Litke 	mapping = vma->vm_file->f_mapping;
3934c887265SAdam Litke 	idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
3944c887265SAdam Litke 		+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
3954c887265SAdam Litke 
3964c887265SAdam Litke 	/*
3974c887265SAdam Litke 	 * Use page lock to guard against racing truncation
3984c887265SAdam Litke 	 * before we get page_table_lock.
3994c887265SAdam Litke 	 */
4004c887265SAdam Litke 	page = find_lock_huge_page(mapping, idx);
4014c887265SAdam Litke 	if (!page)
4024c887265SAdam Litke 		goto out;
403ac9b9c66SHugh Dickins 
404ac9b9c66SHugh Dickins 	spin_lock(&mm->page_table_lock);
4054c887265SAdam Litke 	size = i_size_read(mapping->host) >> HPAGE_SHIFT;
4064c887265SAdam Litke 	if (idx >= size)
4074c887265SAdam Litke 		goto backout;
4084c887265SAdam Litke 
409ac9b9c66SHugh Dickins 	ret = VM_FAULT_MINOR;
4104c887265SAdam Litke 	if (!pte_none(*pte))
4114c887265SAdam Litke 		goto backout;
4124c887265SAdam Litke 
4134c887265SAdam Litke 	add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE);
4144c887265SAdam Litke 	set_huge_pte_at(mm, address, pte, make_huge_pte(vma, page));
415ac9b9c66SHugh Dickins 	spin_unlock(&mm->page_table_lock);
4164c887265SAdam Litke 	unlock_page(page);
4174c887265SAdam Litke out:
418ac9b9c66SHugh Dickins 	return ret;
4194c887265SAdam Litke 
4204c887265SAdam Litke backout:
4214c887265SAdam Litke 	spin_unlock(&mm->page_table_lock);
4224c887265SAdam Litke 	hugetlb_put_quota(mapping);
4234c887265SAdam Litke 	unlock_page(page);
4244c887265SAdam Litke 	put_page(page);
4254c887265SAdam Litke 	goto out;
426ac9b9c66SHugh Dickins }
427ac9b9c66SHugh Dickins 
42863551ae0SDavid Gibson int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
42963551ae0SDavid Gibson 			struct page **pages, struct vm_area_struct **vmas,
43063551ae0SDavid Gibson 			unsigned long *position, int *length, int i)
43163551ae0SDavid Gibson {
43263551ae0SDavid Gibson 	unsigned long vpfn, vaddr = *position;
43363551ae0SDavid Gibson 	int remainder = *length;
43463551ae0SDavid Gibson 
43563551ae0SDavid Gibson 	vpfn = vaddr/PAGE_SIZE;
4361c59827dSHugh Dickins 	spin_lock(&mm->page_table_lock);
43763551ae0SDavid Gibson 	while (vaddr < vma->vm_end && remainder) {
43863551ae0SDavid Gibson 		pte_t *pte;
43963551ae0SDavid Gibson 		struct page *page;
44063551ae0SDavid Gibson 
4414c887265SAdam Litke 		/*
4424c887265SAdam Litke 		 * Some archs (sparc64, sh*) have multiple pte_ts to
4434c887265SAdam Litke 		 * each hugepage.  We have to make * sure we get the
4444c887265SAdam Litke 		 * first, for the page indexing below to work.
4454c887265SAdam Litke 		 */
44663551ae0SDavid Gibson 		pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
44763551ae0SDavid Gibson 
4481c59827dSHugh Dickins 		if (!pte || pte_none(*pte)) {
4494c887265SAdam Litke 			int ret;
4504c887265SAdam Litke 
4514c887265SAdam Litke 			spin_unlock(&mm->page_table_lock);
4524c887265SAdam Litke 			ret = hugetlb_fault(mm, vma, vaddr, 0);
4534c887265SAdam Litke 			spin_lock(&mm->page_table_lock);
4544c887265SAdam Litke 			if (ret == VM_FAULT_MINOR)
4554c887265SAdam Litke 				continue;
4564c887265SAdam Litke 
4571c59827dSHugh Dickins 			remainder = 0;
4581c59827dSHugh Dickins 			if (!i)
4591c59827dSHugh Dickins 				i = -EFAULT;
4601c59827dSHugh Dickins 			break;
4611c59827dSHugh Dickins 		}
46263551ae0SDavid Gibson 
4634c887265SAdam Litke 		if (pages) {
46463551ae0SDavid Gibson 			page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
46563551ae0SDavid Gibson 			get_page(page);
46663551ae0SDavid Gibson 			pages[i] = page;
46763551ae0SDavid Gibson 		}
46863551ae0SDavid Gibson 
46963551ae0SDavid Gibson 		if (vmas)
47063551ae0SDavid Gibson 			vmas[i] = vma;
47163551ae0SDavid Gibson 
47263551ae0SDavid Gibson 		vaddr += PAGE_SIZE;
47363551ae0SDavid Gibson 		++vpfn;
47463551ae0SDavid Gibson 		--remainder;
47563551ae0SDavid Gibson 		++i;
47663551ae0SDavid Gibson 	}
4771c59827dSHugh Dickins 	spin_unlock(&mm->page_table_lock);
47863551ae0SDavid Gibson 	*length = remainder;
47963551ae0SDavid Gibson 	*position = vaddr;
48063551ae0SDavid Gibson 
48163551ae0SDavid Gibson 	return i;
48263551ae0SDavid Gibson }
483