xref: /openbmc/linux/mm/hugetlb.c (revision fe1668ae)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Generic hugetlb support.
31da177e4SLinus Torvalds  * (C) William Irwin, April 2004
41da177e4SLinus Torvalds  */
51da177e4SLinus Torvalds #include <linux/gfp.h>
61da177e4SLinus Torvalds #include <linux/list.h>
71da177e4SLinus Torvalds #include <linux/init.h>
81da177e4SLinus Torvalds #include <linux/module.h>
91da177e4SLinus Torvalds #include <linux/mm.h>
101da177e4SLinus Torvalds #include <linux/sysctl.h>
111da177e4SLinus Torvalds #include <linux/highmem.h>
121da177e4SLinus Torvalds #include <linux/nodemask.h>
1363551ae0SDavid Gibson #include <linux/pagemap.h>
145da7ca86SChristoph Lameter #include <linux/mempolicy.h>
15aea47ff3SChristoph Lameter #include <linux/cpuset.h>
163935baa9SDavid Gibson #include <linux/mutex.h>
175da7ca86SChristoph Lameter 
1863551ae0SDavid Gibson #include <asm/page.h>
1963551ae0SDavid Gibson #include <asm/pgtable.h>
2063551ae0SDavid Gibson 
2163551ae0SDavid Gibson #include <linux/hugetlb.h>
227835e98bSNick Piggin #include "internal.h"
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
25a43a8c39SChen, Kenneth W static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
261da177e4SLinus Torvalds unsigned long max_huge_pages;
271da177e4SLinus Torvalds static struct list_head hugepage_freelists[MAX_NUMNODES];
281da177e4SLinus Torvalds static unsigned int nr_huge_pages_node[MAX_NUMNODES];
291da177e4SLinus Torvalds static unsigned int free_huge_pages_node[MAX_NUMNODES];
303935baa9SDavid Gibson /*
313935baa9SDavid Gibson  * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
323935baa9SDavid Gibson  */
333935baa9SDavid Gibson static DEFINE_SPINLOCK(hugetlb_lock);
340bd0f9fbSEric Paris 
3579ac6ba4SDavid Gibson static void clear_huge_page(struct page *page, unsigned long addr)
3679ac6ba4SDavid Gibson {
3779ac6ba4SDavid Gibson 	int i;
3879ac6ba4SDavid Gibson 
3979ac6ba4SDavid Gibson 	might_sleep();
4079ac6ba4SDavid Gibson 	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) {
4179ac6ba4SDavid Gibson 		cond_resched();
4279ac6ba4SDavid Gibson 		clear_user_highpage(page + i, addr);
4379ac6ba4SDavid Gibson 	}
4479ac6ba4SDavid Gibson }
4579ac6ba4SDavid Gibson 
4679ac6ba4SDavid Gibson static void copy_huge_page(struct page *dst, struct page *src,
4779ac6ba4SDavid Gibson 			   unsigned long addr)
4879ac6ba4SDavid Gibson {
4979ac6ba4SDavid Gibson 	int i;
5079ac6ba4SDavid Gibson 
5179ac6ba4SDavid Gibson 	might_sleep();
5279ac6ba4SDavid Gibson 	for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) {
5379ac6ba4SDavid Gibson 		cond_resched();
5479ac6ba4SDavid Gibson 		copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE);
5579ac6ba4SDavid Gibson 	}
5679ac6ba4SDavid Gibson }
5779ac6ba4SDavid Gibson 
581da177e4SLinus Torvalds static void enqueue_huge_page(struct page *page)
591da177e4SLinus Torvalds {
601da177e4SLinus Torvalds 	int nid = page_to_nid(page);
611da177e4SLinus Torvalds 	list_add(&page->lru, &hugepage_freelists[nid]);
621da177e4SLinus Torvalds 	free_huge_pages++;
631da177e4SLinus Torvalds 	free_huge_pages_node[nid]++;
641da177e4SLinus Torvalds }
651da177e4SLinus Torvalds 
665da7ca86SChristoph Lameter static struct page *dequeue_huge_page(struct vm_area_struct *vma,
675da7ca86SChristoph Lameter 				unsigned long address)
681da177e4SLinus Torvalds {
691da177e4SLinus Torvalds 	int nid = numa_node_id();
701da177e4SLinus Torvalds 	struct page *page = NULL;
715da7ca86SChristoph Lameter 	struct zonelist *zonelist = huge_zonelist(vma, address);
7296df9333SChristoph Lameter 	struct zone **z;
731da177e4SLinus Torvalds 
7496df9333SChristoph Lameter 	for (z = zonelist->zones; *z; z++) {
7589fa3024SChristoph Lameter 		nid = zone_to_nid(*z);
76aea47ff3SChristoph Lameter 		if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
77aea47ff3SChristoph Lameter 		    !list_empty(&hugepage_freelists[nid]))
781da177e4SLinus Torvalds 			break;
791da177e4SLinus Torvalds 	}
8096df9333SChristoph Lameter 
8196df9333SChristoph Lameter 	if (*z) {
821da177e4SLinus Torvalds 		page = list_entry(hugepage_freelists[nid].next,
831da177e4SLinus Torvalds 				  struct page, lru);
841da177e4SLinus Torvalds 		list_del(&page->lru);
851da177e4SLinus Torvalds 		free_huge_pages--;
861da177e4SLinus Torvalds 		free_huge_pages_node[nid]--;
871da177e4SLinus Torvalds 	}
881da177e4SLinus Torvalds 	return page;
891da177e4SLinus Torvalds }
901da177e4SLinus Torvalds 
9127a85ef1SDavid Gibson static void free_huge_page(struct page *page)
9227a85ef1SDavid Gibson {
9327a85ef1SDavid Gibson 	BUG_ON(page_count(page));
9427a85ef1SDavid Gibson 
9527a85ef1SDavid Gibson 	INIT_LIST_HEAD(&page->lru);
9627a85ef1SDavid Gibson 
9727a85ef1SDavid Gibson 	spin_lock(&hugetlb_lock);
9827a85ef1SDavid Gibson 	enqueue_huge_page(page);
9927a85ef1SDavid Gibson 	spin_unlock(&hugetlb_lock);
10027a85ef1SDavid Gibson }
10127a85ef1SDavid Gibson 
102a482289dSNick Piggin static int alloc_fresh_huge_page(void)
1031da177e4SLinus Torvalds {
1041da177e4SLinus Torvalds 	static int nid = 0;
1051da177e4SLinus Torvalds 	struct page *page;
1061da177e4SLinus Torvalds 	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
1071da177e4SLinus Torvalds 					HUGETLB_PAGE_ORDER);
108fdb7cc59SPaul Jackson 	nid = next_node(nid, node_online_map);
109fdb7cc59SPaul Jackson 	if (nid == MAX_NUMNODES)
110fdb7cc59SPaul Jackson 		nid = first_node(node_online_map);
1111da177e4SLinus Torvalds 	if (page) {
112a482289dSNick Piggin 		page[1].lru.next = (void *)free_huge_page;	/* dtor */
1130bd0f9fbSEric Paris 		spin_lock(&hugetlb_lock);
1141da177e4SLinus Torvalds 		nr_huge_pages++;
1151da177e4SLinus Torvalds 		nr_huge_pages_node[page_to_nid(page)]++;
1160bd0f9fbSEric Paris 		spin_unlock(&hugetlb_lock);
117a482289dSNick Piggin 		put_page(page); /* free it into the hugepage allocator */
118a482289dSNick Piggin 		return 1;
1191da177e4SLinus Torvalds 	}
120a482289dSNick Piggin 	return 0;
1211da177e4SLinus Torvalds }
1221da177e4SLinus Torvalds 
12327a85ef1SDavid Gibson static struct page *alloc_huge_page(struct vm_area_struct *vma,
12427a85ef1SDavid Gibson 				    unsigned long addr)
1251da177e4SLinus Torvalds {
1261da177e4SLinus Torvalds 	struct page *page;
1271da177e4SLinus Torvalds 
1281da177e4SLinus Torvalds 	spin_lock(&hugetlb_lock);
129a43a8c39SChen, Kenneth W 	if (vma->vm_flags & VM_MAYSHARE)
130a43a8c39SChen, Kenneth W 		resv_huge_pages--;
131a43a8c39SChen, Kenneth W 	else if (free_huge_pages <= resv_huge_pages)
132b45b5bd6SDavid Gibson 		goto fail;
133b45b5bd6SDavid Gibson 
134b45b5bd6SDavid Gibson 	page = dequeue_huge_page(vma, addr);
135b45b5bd6SDavid Gibson 	if (!page)
136b45b5bd6SDavid Gibson 		goto fail;
137b45b5bd6SDavid Gibson 
1381da177e4SLinus Torvalds 	spin_unlock(&hugetlb_lock);
1397835e98bSNick Piggin 	set_page_refcounted(page);
1401da177e4SLinus Torvalds 	return page;
141b45b5bd6SDavid Gibson 
142b45b5bd6SDavid Gibson fail:
143b45b5bd6SDavid Gibson 	spin_unlock(&hugetlb_lock);
144b45b5bd6SDavid Gibson 	return NULL;
145b45b5bd6SDavid Gibson }
146b45b5bd6SDavid Gibson 
1471da177e4SLinus Torvalds static int __init hugetlb_init(void)
1481da177e4SLinus Torvalds {
1491da177e4SLinus Torvalds 	unsigned long i;
1501da177e4SLinus Torvalds 
1513c726f8dSBenjamin Herrenschmidt 	if (HPAGE_SHIFT == 0)
1523c726f8dSBenjamin Herrenschmidt 		return 0;
1533c726f8dSBenjamin Herrenschmidt 
1541da177e4SLinus Torvalds 	for (i = 0; i < MAX_NUMNODES; ++i)
1551da177e4SLinus Torvalds 		INIT_LIST_HEAD(&hugepage_freelists[i]);
1561da177e4SLinus Torvalds 
1571da177e4SLinus Torvalds 	for (i = 0; i < max_huge_pages; ++i) {
158a482289dSNick Piggin 		if (!alloc_fresh_huge_page())
1591da177e4SLinus Torvalds 			break;
1601da177e4SLinus Torvalds 	}
1611da177e4SLinus Torvalds 	max_huge_pages = free_huge_pages = nr_huge_pages = i;
1621da177e4SLinus Torvalds 	printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
1631da177e4SLinus Torvalds 	return 0;
1641da177e4SLinus Torvalds }
1651da177e4SLinus Torvalds module_init(hugetlb_init);
1661da177e4SLinus Torvalds 
1671da177e4SLinus Torvalds static int __init hugetlb_setup(char *s)
1681da177e4SLinus Torvalds {
1691da177e4SLinus Torvalds 	if (sscanf(s, "%lu", &max_huge_pages) <= 0)
1701da177e4SLinus Torvalds 		max_huge_pages = 0;
1711da177e4SLinus Torvalds 	return 1;
1721da177e4SLinus Torvalds }
1731da177e4SLinus Torvalds __setup("hugepages=", hugetlb_setup);
1741da177e4SLinus Torvalds 
1751da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
1761da177e4SLinus Torvalds static void update_and_free_page(struct page *page)
1771da177e4SLinus Torvalds {
1781da177e4SLinus Torvalds 	int i;
1791da177e4SLinus Torvalds 	nr_huge_pages--;
1804415cc8dSChristoph Lameter 	nr_huge_pages_node[page_to_nid(page)]--;
1811da177e4SLinus Torvalds 	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
1821da177e4SLinus Torvalds 		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
1831da177e4SLinus Torvalds 				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
1841da177e4SLinus Torvalds 				1 << PG_private | 1<< PG_writeback);
1851da177e4SLinus Torvalds 	}
186a482289dSNick Piggin 	page[1].lru.next = NULL;
1877835e98bSNick Piggin 	set_page_refcounted(page);
1881da177e4SLinus Torvalds 	__free_pages(page, HUGETLB_PAGE_ORDER);
1891da177e4SLinus Torvalds }
1901da177e4SLinus Torvalds 
1911da177e4SLinus Torvalds #ifdef CONFIG_HIGHMEM
1921da177e4SLinus Torvalds static void try_to_free_low(unsigned long count)
1931da177e4SLinus Torvalds {
1944415cc8dSChristoph Lameter 	int i;
1954415cc8dSChristoph Lameter 
1961da177e4SLinus Torvalds 	for (i = 0; i < MAX_NUMNODES; ++i) {
1971da177e4SLinus Torvalds 		struct page *page, *next;
1981da177e4SLinus Torvalds 		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
1991da177e4SLinus Torvalds 			if (PageHighMem(page))
2001da177e4SLinus Torvalds 				continue;
2011da177e4SLinus Torvalds 			list_del(&page->lru);
2021da177e4SLinus Torvalds 			update_and_free_page(page);
2031da177e4SLinus Torvalds 			free_huge_pages--;
2044415cc8dSChristoph Lameter 			free_huge_pages_node[page_to_nid(page)]--;
2051da177e4SLinus Torvalds 			if (count >= nr_huge_pages)
2061da177e4SLinus Torvalds 				return;
2071da177e4SLinus Torvalds 		}
2081da177e4SLinus Torvalds 	}
2091da177e4SLinus Torvalds }
2101da177e4SLinus Torvalds #else
2111da177e4SLinus Torvalds static inline void try_to_free_low(unsigned long count)
2121da177e4SLinus Torvalds {
2131da177e4SLinus Torvalds }
2141da177e4SLinus Torvalds #endif
2151da177e4SLinus Torvalds 
2161da177e4SLinus Torvalds static unsigned long set_max_huge_pages(unsigned long count)
2171da177e4SLinus Torvalds {
2181da177e4SLinus Torvalds 	while (count > nr_huge_pages) {
219a482289dSNick Piggin 		if (!alloc_fresh_huge_page())
2201da177e4SLinus Torvalds 			return nr_huge_pages;
2211da177e4SLinus Torvalds 	}
2221da177e4SLinus Torvalds 	if (count >= nr_huge_pages)
2231da177e4SLinus Torvalds 		return nr_huge_pages;
2241da177e4SLinus Torvalds 
2251da177e4SLinus Torvalds 	spin_lock(&hugetlb_lock);
226a43a8c39SChen, Kenneth W 	count = max(count, resv_huge_pages);
2271da177e4SLinus Torvalds 	try_to_free_low(count);
2281da177e4SLinus Torvalds 	while (count < nr_huge_pages) {
2295da7ca86SChristoph Lameter 		struct page *page = dequeue_huge_page(NULL, 0);
2301da177e4SLinus Torvalds 		if (!page)
2311da177e4SLinus Torvalds 			break;
2321da177e4SLinus Torvalds 		update_and_free_page(page);
2331da177e4SLinus Torvalds 	}
2341da177e4SLinus Torvalds 	spin_unlock(&hugetlb_lock);
2351da177e4SLinus Torvalds 	return nr_huge_pages;
2361da177e4SLinus Torvalds }
2371da177e4SLinus Torvalds 
2381da177e4SLinus Torvalds int hugetlb_sysctl_handler(struct ctl_table *table, int write,
2391da177e4SLinus Torvalds 			   struct file *file, void __user *buffer,
2401da177e4SLinus Torvalds 			   size_t *length, loff_t *ppos)
2411da177e4SLinus Torvalds {
2421da177e4SLinus Torvalds 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
2431da177e4SLinus Torvalds 	max_huge_pages = set_max_huge_pages(max_huge_pages);
2441da177e4SLinus Torvalds 	return 0;
2451da177e4SLinus Torvalds }
2461da177e4SLinus Torvalds #endif /* CONFIG_SYSCTL */
2471da177e4SLinus Torvalds 
2481da177e4SLinus Torvalds int hugetlb_report_meminfo(char *buf)
2491da177e4SLinus Torvalds {
2501da177e4SLinus Torvalds 	return sprintf(buf,
2511da177e4SLinus Torvalds 			"HugePages_Total: %5lu\n"
2521da177e4SLinus Torvalds 			"HugePages_Free:  %5lu\n"
253b45b5bd6SDavid Gibson 			"HugePages_Rsvd:  %5lu\n"
2541da177e4SLinus Torvalds 			"Hugepagesize:    %5lu kB\n",
2551da177e4SLinus Torvalds 			nr_huge_pages,
2561da177e4SLinus Torvalds 			free_huge_pages,
257a43a8c39SChen, Kenneth W 			resv_huge_pages,
2581da177e4SLinus Torvalds 			HPAGE_SIZE/1024);
2591da177e4SLinus Torvalds }
2601da177e4SLinus Torvalds 
2611da177e4SLinus Torvalds int hugetlb_report_node_meminfo(int nid, char *buf)
2621da177e4SLinus Torvalds {
2631da177e4SLinus Torvalds 	return sprintf(buf,
2641da177e4SLinus Torvalds 		"Node %d HugePages_Total: %5u\n"
2651da177e4SLinus Torvalds 		"Node %d HugePages_Free:  %5u\n",
2661da177e4SLinus Torvalds 		nid, nr_huge_pages_node[nid],
2671da177e4SLinus Torvalds 		nid, free_huge_pages_node[nid]);
2681da177e4SLinus Torvalds }
2691da177e4SLinus Torvalds 
2701da177e4SLinus Torvalds /* Return the number pages of memory we physically have, in PAGE_SIZE units. */
2711da177e4SLinus Torvalds unsigned long hugetlb_total_pages(void)
2721da177e4SLinus Torvalds {
2731da177e4SLinus Torvalds 	return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
2741da177e4SLinus Torvalds }
2751da177e4SLinus Torvalds 
2761da177e4SLinus Torvalds /*
2771da177e4SLinus Torvalds  * We cannot handle pagefaults against hugetlb pages at all.  They cause
2781da177e4SLinus Torvalds  * handle_mm_fault() to try to instantiate regular-sized pages in the
2791da177e4SLinus Torvalds  * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
2801da177e4SLinus Torvalds  * this far.
2811da177e4SLinus Torvalds  */
2821da177e4SLinus Torvalds static struct page *hugetlb_nopage(struct vm_area_struct *vma,
2831da177e4SLinus Torvalds 				unsigned long address, int *unused)
2841da177e4SLinus Torvalds {
2851da177e4SLinus Torvalds 	BUG();
2861da177e4SLinus Torvalds 	return NULL;
2871da177e4SLinus Torvalds }
2881da177e4SLinus Torvalds 
2891da177e4SLinus Torvalds struct vm_operations_struct hugetlb_vm_ops = {
2901da177e4SLinus Torvalds 	.nopage = hugetlb_nopage,
2911da177e4SLinus Torvalds };
2921da177e4SLinus Torvalds 
2931e8f889bSDavid Gibson static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
2941e8f889bSDavid Gibson 				int writable)
29563551ae0SDavid Gibson {
29663551ae0SDavid Gibson 	pte_t entry;
29763551ae0SDavid Gibson 
2981e8f889bSDavid Gibson 	if (writable) {
29963551ae0SDavid Gibson 		entry =
30063551ae0SDavid Gibson 		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
30163551ae0SDavid Gibson 	} else {
30263551ae0SDavid Gibson 		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
30363551ae0SDavid Gibson 	}
30463551ae0SDavid Gibson 	entry = pte_mkyoung(entry);
30563551ae0SDavid Gibson 	entry = pte_mkhuge(entry);
30663551ae0SDavid Gibson 
30763551ae0SDavid Gibson 	return entry;
30863551ae0SDavid Gibson }
30963551ae0SDavid Gibson 
3101e8f889bSDavid Gibson static void set_huge_ptep_writable(struct vm_area_struct *vma,
3111e8f889bSDavid Gibson 				   unsigned long address, pte_t *ptep)
3121e8f889bSDavid Gibson {
3131e8f889bSDavid Gibson 	pte_t entry;
3141e8f889bSDavid Gibson 
3151e8f889bSDavid Gibson 	entry = pte_mkwrite(pte_mkdirty(*ptep));
3161e8f889bSDavid Gibson 	ptep_set_access_flags(vma, address, ptep, entry, 1);
3171e8f889bSDavid Gibson 	update_mmu_cache(vma, address, entry);
3181e8f889bSDavid Gibson 	lazy_mmu_prot_update(entry);
3191e8f889bSDavid Gibson }
3201e8f889bSDavid Gibson 
3211e8f889bSDavid Gibson 
32263551ae0SDavid Gibson int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
32363551ae0SDavid Gibson 			    struct vm_area_struct *vma)
32463551ae0SDavid Gibson {
32563551ae0SDavid Gibson 	pte_t *src_pte, *dst_pte, entry;
32663551ae0SDavid Gibson 	struct page *ptepage;
3271c59827dSHugh Dickins 	unsigned long addr;
3281e8f889bSDavid Gibson 	int cow;
3291e8f889bSDavid Gibson 
3301e8f889bSDavid Gibson 	cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
33163551ae0SDavid Gibson 
3321c59827dSHugh Dickins 	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
333c74df32cSHugh Dickins 		src_pte = huge_pte_offset(src, addr);
334c74df32cSHugh Dickins 		if (!src_pte)
335c74df32cSHugh Dickins 			continue;
33663551ae0SDavid Gibson 		dst_pte = huge_pte_alloc(dst, addr);
33763551ae0SDavid Gibson 		if (!dst_pte)
33863551ae0SDavid Gibson 			goto nomem;
339c74df32cSHugh Dickins 		spin_lock(&dst->page_table_lock);
3401c59827dSHugh Dickins 		spin_lock(&src->page_table_lock);
341c74df32cSHugh Dickins 		if (!pte_none(*src_pte)) {
3421e8f889bSDavid Gibson 			if (cow)
3431e8f889bSDavid Gibson 				ptep_set_wrprotect(src, addr, src_pte);
34463551ae0SDavid Gibson 			entry = *src_pte;
34563551ae0SDavid Gibson 			ptepage = pte_page(entry);
34663551ae0SDavid Gibson 			get_page(ptepage);
3474294621fSHugh Dickins 			add_mm_counter(dst, file_rss, HPAGE_SIZE / PAGE_SIZE);
34863551ae0SDavid Gibson 			set_huge_pte_at(dst, addr, dst_pte, entry);
3491c59827dSHugh Dickins 		}
3501c59827dSHugh Dickins 		spin_unlock(&src->page_table_lock);
351c74df32cSHugh Dickins 		spin_unlock(&dst->page_table_lock);
35263551ae0SDavid Gibson 	}
35363551ae0SDavid Gibson 	return 0;
35463551ae0SDavid Gibson 
35563551ae0SDavid Gibson nomem:
35663551ae0SDavid Gibson 	return -ENOMEM;
35763551ae0SDavid Gibson }
35863551ae0SDavid Gibson 
35963551ae0SDavid Gibson void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
36063551ae0SDavid Gibson 			  unsigned long end)
36163551ae0SDavid Gibson {
36263551ae0SDavid Gibson 	struct mm_struct *mm = vma->vm_mm;
36363551ae0SDavid Gibson 	unsigned long address;
364c7546f8fSDavid Gibson 	pte_t *ptep;
36563551ae0SDavid Gibson 	pte_t pte;
36663551ae0SDavid Gibson 	struct page *page;
367fe1668aeSChen, Kenneth W 	struct page *tmp;
368fe1668aeSChen, Kenneth W 	LIST_HEAD(page_list);
36963551ae0SDavid Gibson 
37063551ae0SDavid Gibson 	WARN_ON(!is_vm_hugetlb_page(vma));
37163551ae0SDavid Gibson 	BUG_ON(start & ~HPAGE_MASK);
37263551ae0SDavid Gibson 	BUG_ON(end & ~HPAGE_MASK);
37363551ae0SDavid Gibson 
374508034a3SHugh Dickins 	spin_lock(&mm->page_table_lock);
375508034a3SHugh Dickins 
376365e9c87SHugh Dickins 	/* Update high watermark before we lower rss */
377365e9c87SHugh Dickins 	update_hiwater_rss(mm);
378365e9c87SHugh Dickins 
37963551ae0SDavid Gibson 	for (address = start; address < end; address += HPAGE_SIZE) {
380c7546f8fSDavid Gibson 		ptep = huge_pte_offset(mm, address);
381c7546f8fSDavid Gibson 		if (!ptep)
382c7546f8fSDavid Gibson 			continue;
383c7546f8fSDavid Gibson 
384c7546f8fSDavid Gibson 		pte = huge_ptep_get_and_clear(mm, address, ptep);
38563551ae0SDavid Gibson 		if (pte_none(pte))
38663551ae0SDavid Gibson 			continue;
387c7546f8fSDavid Gibson 
38863551ae0SDavid Gibson 		page = pte_page(pte);
389fe1668aeSChen, Kenneth W 		list_add(&page->lru, &page_list);
3904294621fSHugh Dickins 		add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE));
39163551ae0SDavid Gibson 	}
39263551ae0SDavid Gibson 
3931da177e4SLinus Torvalds 	spin_unlock(&mm->page_table_lock);
394508034a3SHugh Dickins 	flush_tlb_range(vma, start, end);
395fe1668aeSChen, Kenneth W 	list_for_each_entry_safe(page, tmp, &page_list, lru) {
396fe1668aeSChen, Kenneth W 		list_del(&page->lru);
397fe1668aeSChen, Kenneth W 		put_page(page);
398fe1668aeSChen, Kenneth W 	}
3991da177e4SLinus Torvalds }
40063551ae0SDavid Gibson 
4011e8f889bSDavid Gibson static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
4021e8f889bSDavid Gibson 			unsigned long address, pte_t *ptep, pte_t pte)
4031e8f889bSDavid Gibson {
4041e8f889bSDavid Gibson 	struct page *old_page, *new_page;
40579ac6ba4SDavid Gibson 	int avoidcopy;
4061e8f889bSDavid Gibson 
4071e8f889bSDavid Gibson 	old_page = pte_page(pte);
4081e8f889bSDavid Gibson 
4091e8f889bSDavid Gibson 	/* If no-one else is actually using this page, avoid the copy
4101e8f889bSDavid Gibson 	 * and just make the page writable */
4111e8f889bSDavid Gibson 	avoidcopy = (page_count(old_page) == 1);
4121e8f889bSDavid Gibson 	if (avoidcopy) {
4131e8f889bSDavid Gibson 		set_huge_ptep_writable(vma, address, ptep);
4141e8f889bSDavid Gibson 		return VM_FAULT_MINOR;
4151e8f889bSDavid Gibson 	}
4161e8f889bSDavid Gibson 
4171e8f889bSDavid Gibson 	page_cache_get(old_page);
4185da7ca86SChristoph Lameter 	new_page = alloc_huge_page(vma, address);
4191e8f889bSDavid Gibson 
4201e8f889bSDavid Gibson 	if (!new_page) {
4211e8f889bSDavid Gibson 		page_cache_release(old_page);
4220df420d8SChristoph Lameter 		return VM_FAULT_OOM;
4231e8f889bSDavid Gibson 	}
4241e8f889bSDavid Gibson 
4251e8f889bSDavid Gibson 	spin_unlock(&mm->page_table_lock);
42679ac6ba4SDavid Gibson 	copy_huge_page(new_page, old_page, address);
4271e8f889bSDavid Gibson 	spin_lock(&mm->page_table_lock);
4281e8f889bSDavid Gibson 
4291e8f889bSDavid Gibson 	ptep = huge_pte_offset(mm, address & HPAGE_MASK);
4301e8f889bSDavid Gibson 	if (likely(pte_same(*ptep, pte))) {
4311e8f889bSDavid Gibson 		/* Break COW */
4321e8f889bSDavid Gibson 		set_huge_pte_at(mm, address, ptep,
4331e8f889bSDavid Gibson 				make_huge_pte(vma, new_page, 1));
4341e8f889bSDavid Gibson 		/* Make the old page be freed below */
4351e8f889bSDavid Gibson 		new_page = old_page;
4361e8f889bSDavid Gibson 	}
4371e8f889bSDavid Gibson 	page_cache_release(new_page);
4381e8f889bSDavid Gibson 	page_cache_release(old_page);
4391e8f889bSDavid Gibson 	return VM_FAULT_MINOR;
4401e8f889bSDavid Gibson }
4411e8f889bSDavid Gibson 
44286e5216fSAdam Litke int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
4431e8f889bSDavid Gibson 			unsigned long address, pte_t *ptep, int write_access)
444ac9b9c66SHugh Dickins {
445ac9b9c66SHugh Dickins 	int ret = VM_FAULT_SIGBUS;
4464c887265SAdam Litke 	unsigned long idx;
4474c887265SAdam Litke 	unsigned long size;
4484c887265SAdam Litke 	struct page *page;
4494c887265SAdam Litke 	struct address_space *mapping;
4501e8f889bSDavid Gibson 	pte_t new_pte;
4514c887265SAdam Litke 
4524c887265SAdam Litke 	mapping = vma->vm_file->f_mapping;
4534c887265SAdam Litke 	idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
4544c887265SAdam Litke 		+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
4554c887265SAdam Litke 
4564c887265SAdam Litke 	/*
4574c887265SAdam Litke 	 * Use page lock to guard against racing truncation
4584c887265SAdam Litke 	 * before we get page_table_lock.
4594c887265SAdam Litke 	 */
4606bda666aSChristoph Lameter retry:
4616bda666aSChristoph Lameter 	page = find_lock_page(mapping, idx);
4626bda666aSChristoph Lameter 	if (!page) {
4636bda666aSChristoph Lameter 		if (hugetlb_get_quota(mapping))
4644c887265SAdam Litke 			goto out;
4656bda666aSChristoph Lameter 		page = alloc_huge_page(vma, address);
4666bda666aSChristoph Lameter 		if (!page) {
4676bda666aSChristoph Lameter 			hugetlb_put_quota(mapping);
4680df420d8SChristoph Lameter 			ret = VM_FAULT_OOM;
4696bda666aSChristoph Lameter 			goto out;
4706bda666aSChristoph Lameter 		}
47179ac6ba4SDavid Gibson 		clear_huge_page(page, address);
472ac9b9c66SHugh Dickins 
4736bda666aSChristoph Lameter 		if (vma->vm_flags & VM_SHARED) {
4746bda666aSChristoph Lameter 			int err;
4756bda666aSChristoph Lameter 
4766bda666aSChristoph Lameter 			err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
4776bda666aSChristoph Lameter 			if (err) {
4786bda666aSChristoph Lameter 				put_page(page);
4796bda666aSChristoph Lameter 				hugetlb_put_quota(mapping);
4806bda666aSChristoph Lameter 				if (err == -EEXIST)
4816bda666aSChristoph Lameter 					goto retry;
4826bda666aSChristoph Lameter 				goto out;
4836bda666aSChristoph Lameter 			}
4846bda666aSChristoph Lameter 		} else
4856bda666aSChristoph Lameter 			lock_page(page);
4866bda666aSChristoph Lameter 	}
4871e8f889bSDavid Gibson 
488ac9b9c66SHugh Dickins 	spin_lock(&mm->page_table_lock);
4894c887265SAdam Litke 	size = i_size_read(mapping->host) >> HPAGE_SHIFT;
4904c887265SAdam Litke 	if (idx >= size)
4914c887265SAdam Litke 		goto backout;
4924c887265SAdam Litke 
493ac9b9c66SHugh Dickins 	ret = VM_FAULT_MINOR;
49486e5216fSAdam Litke 	if (!pte_none(*ptep))
4954c887265SAdam Litke 		goto backout;
4964c887265SAdam Litke 
4974c887265SAdam Litke 	add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE);
4981e8f889bSDavid Gibson 	new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
4991e8f889bSDavid Gibson 				&& (vma->vm_flags & VM_SHARED)));
5001e8f889bSDavid Gibson 	set_huge_pte_at(mm, address, ptep, new_pte);
5011e8f889bSDavid Gibson 
5021e8f889bSDavid Gibson 	if (write_access && !(vma->vm_flags & VM_SHARED)) {
5031e8f889bSDavid Gibson 		/* Optimization, do the COW without a second fault */
5041e8f889bSDavid Gibson 		ret = hugetlb_cow(mm, vma, address, ptep, new_pte);
5051e8f889bSDavid Gibson 	}
5061e8f889bSDavid Gibson 
507ac9b9c66SHugh Dickins 	spin_unlock(&mm->page_table_lock);
5084c887265SAdam Litke 	unlock_page(page);
5094c887265SAdam Litke out:
510ac9b9c66SHugh Dickins 	return ret;
5114c887265SAdam Litke 
5124c887265SAdam Litke backout:
5134c887265SAdam Litke 	spin_unlock(&mm->page_table_lock);
5144c887265SAdam Litke 	hugetlb_put_quota(mapping);
5154c887265SAdam Litke 	unlock_page(page);
5164c887265SAdam Litke 	put_page(page);
5174c887265SAdam Litke 	goto out;
518ac9b9c66SHugh Dickins }
519ac9b9c66SHugh Dickins 
52086e5216fSAdam Litke int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
52186e5216fSAdam Litke 			unsigned long address, int write_access)
52286e5216fSAdam Litke {
52386e5216fSAdam Litke 	pte_t *ptep;
52486e5216fSAdam Litke 	pte_t entry;
5251e8f889bSDavid Gibson 	int ret;
5263935baa9SDavid Gibson 	static DEFINE_MUTEX(hugetlb_instantiation_mutex);
52786e5216fSAdam Litke 
52886e5216fSAdam Litke 	ptep = huge_pte_alloc(mm, address);
52986e5216fSAdam Litke 	if (!ptep)
53086e5216fSAdam Litke 		return VM_FAULT_OOM;
53186e5216fSAdam Litke 
5323935baa9SDavid Gibson 	/*
5333935baa9SDavid Gibson 	 * Serialize hugepage allocation and instantiation, so that we don't
5343935baa9SDavid Gibson 	 * get spurious allocation failures if two CPUs race to instantiate
5353935baa9SDavid Gibson 	 * the same page in the page cache.
5363935baa9SDavid Gibson 	 */
5373935baa9SDavid Gibson 	mutex_lock(&hugetlb_instantiation_mutex);
53886e5216fSAdam Litke 	entry = *ptep;
5393935baa9SDavid Gibson 	if (pte_none(entry)) {
5403935baa9SDavid Gibson 		ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
5413935baa9SDavid Gibson 		mutex_unlock(&hugetlb_instantiation_mutex);
5423935baa9SDavid Gibson 		return ret;
5433935baa9SDavid Gibson 	}
54486e5216fSAdam Litke 
5451e8f889bSDavid Gibson 	ret = VM_FAULT_MINOR;
5461e8f889bSDavid Gibson 
5471e8f889bSDavid Gibson 	spin_lock(&mm->page_table_lock);
5481e8f889bSDavid Gibson 	/* Check for a racing update before calling hugetlb_cow */
5491e8f889bSDavid Gibson 	if (likely(pte_same(entry, *ptep)))
5501e8f889bSDavid Gibson 		if (write_access && !pte_write(entry))
5511e8f889bSDavid Gibson 			ret = hugetlb_cow(mm, vma, address, ptep, entry);
5521e8f889bSDavid Gibson 	spin_unlock(&mm->page_table_lock);
5533935baa9SDavid Gibson 	mutex_unlock(&hugetlb_instantiation_mutex);
5541e8f889bSDavid Gibson 
5551e8f889bSDavid Gibson 	return ret;
55686e5216fSAdam Litke }
55786e5216fSAdam Litke 
55863551ae0SDavid Gibson int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
55963551ae0SDavid Gibson 			struct page **pages, struct vm_area_struct **vmas,
56063551ae0SDavid Gibson 			unsigned long *position, int *length, int i)
56163551ae0SDavid Gibson {
562d5d4b0aaSChen, Kenneth W 	unsigned long pfn_offset;
563d5d4b0aaSChen, Kenneth W 	unsigned long vaddr = *position;
56463551ae0SDavid Gibson 	int remainder = *length;
56563551ae0SDavid Gibson 
5661c59827dSHugh Dickins 	spin_lock(&mm->page_table_lock);
56763551ae0SDavid Gibson 	while (vaddr < vma->vm_end && remainder) {
56863551ae0SDavid Gibson 		pte_t *pte;
56963551ae0SDavid Gibson 		struct page *page;
57063551ae0SDavid Gibson 
5714c887265SAdam Litke 		/*
5724c887265SAdam Litke 		 * Some archs (sparc64, sh*) have multiple pte_ts to
5734c887265SAdam Litke 		 * each hugepage.  We have to make * sure we get the
5744c887265SAdam Litke 		 * first, for the page indexing below to work.
5754c887265SAdam Litke 		 */
57663551ae0SDavid Gibson 		pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
57763551ae0SDavid Gibson 
5781c59827dSHugh Dickins 		if (!pte || pte_none(*pte)) {
5794c887265SAdam Litke 			int ret;
5804c887265SAdam Litke 
5814c887265SAdam Litke 			spin_unlock(&mm->page_table_lock);
5824c887265SAdam Litke 			ret = hugetlb_fault(mm, vma, vaddr, 0);
5834c887265SAdam Litke 			spin_lock(&mm->page_table_lock);
5844c887265SAdam Litke 			if (ret == VM_FAULT_MINOR)
5854c887265SAdam Litke 				continue;
5864c887265SAdam Litke 
5871c59827dSHugh Dickins 			remainder = 0;
5881c59827dSHugh Dickins 			if (!i)
5891c59827dSHugh Dickins 				i = -EFAULT;
5901c59827dSHugh Dickins 			break;
5911c59827dSHugh Dickins 		}
59263551ae0SDavid Gibson 
593d5d4b0aaSChen, Kenneth W 		pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT;
594d5d4b0aaSChen, Kenneth W 		page = pte_page(*pte);
595d5d4b0aaSChen, Kenneth W same_page:
596d6692183SChen, Kenneth W 		if (pages) {
59763551ae0SDavid Gibson 			get_page(page);
598d5d4b0aaSChen, Kenneth W 			pages[i] = page + pfn_offset;
599d6692183SChen, Kenneth W 		}
60063551ae0SDavid Gibson 
60163551ae0SDavid Gibson 		if (vmas)
60263551ae0SDavid Gibson 			vmas[i] = vma;
60363551ae0SDavid Gibson 
60463551ae0SDavid Gibson 		vaddr += PAGE_SIZE;
605d5d4b0aaSChen, Kenneth W 		++pfn_offset;
60663551ae0SDavid Gibson 		--remainder;
60763551ae0SDavid Gibson 		++i;
608d5d4b0aaSChen, Kenneth W 		if (vaddr < vma->vm_end && remainder &&
609d5d4b0aaSChen, Kenneth W 				pfn_offset < HPAGE_SIZE/PAGE_SIZE) {
610d5d4b0aaSChen, Kenneth W 			/*
611d5d4b0aaSChen, Kenneth W 			 * We use pfn_offset to avoid touching the pageframes
612d5d4b0aaSChen, Kenneth W 			 * of this compound page.
613d5d4b0aaSChen, Kenneth W 			 */
614d5d4b0aaSChen, Kenneth W 			goto same_page;
615d5d4b0aaSChen, Kenneth W 		}
61663551ae0SDavid Gibson 	}
6171c59827dSHugh Dickins 	spin_unlock(&mm->page_table_lock);
61863551ae0SDavid Gibson 	*length = remainder;
61963551ae0SDavid Gibson 	*position = vaddr;
62063551ae0SDavid Gibson 
62163551ae0SDavid Gibson 	return i;
62263551ae0SDavid Gibson }
6238f860591SZhang, Yanmin 
6248f860591SZhang, Yanmin void hugetlb_change_protection(struct vm_area_struct *vma,
6258f860591SZhang, Yanmin 		unsigned long address, unsigned long end, pgprot_t newprot)
6268f860591SZhang, Yanmin {
6278f860591SZhang, Yanmin 	struct mm_struct *mm = vma->vm_mm;
6288f860591SZhang, Yanmin 	unsigned long start = address;
6298f860591SZhang, Yanmin 	pte_t *ptep;
6308f860591SZhang, Yanmin 	pte_t pte;
6318f860591SZhang, Yanmin 
6328f860591SZhang, Yanmin 	BUG_ON(address >= end);
6338f860591SZhang, Yanmin 	flush_cache_range(vma, address, end);
6348f860591SZhang, Yanmin 
6358f860591SZhang, Yanmin 	spin_lock(&mm->page_table_lock);
6368f860591SZhang, Yanmin 	for (; address < end; address += HPAGE_SIZE) {
6378f860591SZhang, Yanmin 		ptep = huge_pte_offset(mm, address);
6388f860591SZhang, Yanmin 		if (!ptep)
6398f860591SZhang, Yanmin 			continue;
6408f860591SZhang, Yanmin 		if (!pte_none(*ptep)) {
6418f860591SZhang, Yanmin 			pte = huge_ptep_get_and_clear(mm, address, ptep);
6428f860591SZhang, Yanmin 			pte = pte_mkhuge(pte_modify(pte, newprot));
6438f860591SZhang, Yanmin 			set_huge_pte_at(mm, address, ptep, pte);
6448f860591SZhang, Yanmin 			lazy_mmu_prot_update(pte);
6458f860591SZhang, Yanmin 		}
6468f860591SZhang, Yanmin 	}
6478f860591SZhang, Yanmin 	spin_unlock(&mm->page_table_lock);
6488f860591SZhang, Yanmin 
6498f860591SZhang, Yanmin 	flush_tlb_range(vma, start, end);
6508f860591SZhang, Yanmin }
6518f860591SZhang, Yanmin 
652a43a8c39SChen, Kenneth W struct file_region {
653a43a8c39SChen, Kenneth W 	struct list_head link;
654a43a8c39SChen, Kenneth W 	long from;
655a43a8c39SChen, Kenneth W 	long to;
656a43a8c39SChen, Kenneth W };
657a43a8c39SChen, Kenneth W 
658a43a8c39SChen, Kenneth W static long region_add(struct list_head *head, long f, long t)
659a43a8c39SChen, Kenneth W {
660a43a8c39SChen, Kenneth W 	struct file_region *rg, *nrg, *trg;
661a43a8c39SChen, Kenneth W 
662a43a8c39SChen, Kenneth W 	/* Locate the region we are either in or before. */
663a43a8c39SChen, Kenneth W 	list_for_each_entry(rg, head, link)
664a43a8c39SChen, Kenneth W 		if (f <= rg->to)
665a43a8c39SChen, Kenneth W 			break;
666a43a8c39SChen, Kenneth W 
667a43a8c39SChen, Kenneth W 	/* Round our left edge to the current segment if it encloses us. */
668a43a8c39SChen, Kenneth W 	if (f > rg->from)
669a43a8c39SChen, Kenneth W 		f = rg->from;
670a43a8c39SChen, Kenneth W 
671a43a8c39SChen, Kenneth W 	/* Check for and consume any regions we now overlap with. */
672a43a8c39SChen, Kenneth W 	nrg = rg;
673a43a8c39SChen, Kenneth W 	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
674a43a8c39SChen, Kenneth W 		if (&rg->link == head)
675a43a8c39SChen, Kenneth W 			break;
676a43a8c39SChen, Kenneth W 		if (rg->from > t)
677a43a8c39SChen, Kenneth W 			break;
678a43a8c39SChen, Kenneth W 
679a43a8c39SChen, Kenneth W 		/* If this area reaches higher then extend our area to
680a43a8c39SChen, Kenneth W 		 * include it completely.  If this is not the first area
681a43a8c39SChen, Kenneth W 		 * which we intend to reuse, free it. */
682a43a8c39SChen, Kenneth W 		if (rg->to > t)
683a43a8c39SChen, Kenneth W 			t = rg->to;
684a43a8c39SChen, Kenneth W 		if (rg != nrg) {
685a43a8c39SChen, Kenneth W 			list_del(&rg->link);
686a43a8c39SChen, Kenneth W 			kfree(rg);
687a43a8c39SChen, Kenneth W 		}
688a43a8c39SChen, Kenneth W 	}
689a43a8c39SChen, Kenneth W 	nrg->from = f;
690a43a8c39SChen, Kenneth W 	nrg->to = t;
691a43a8c39SChen, Kenneth W 	return 0;
692a43a8c39SChen, Kenneth W }
693a43a8c39SChen, Kenneth W 
694a43a8c39SChen, Kenneth W static long region_chg(struct list_head *head, long f, long t)
695a43a8c39SChen, Kenneth W {
696a43a8c39SChen, Kenneth W 	struct file_region *rg, *nrg;
697a43a8c39SChen, Kenneth W 	long chg = 0;
698a43a8c39SChen, Kenneth W 
699a43a8c39SChen, Kenneth W 	/* Locate the region we are before or in. */
700a43a8c39SChen, Kenneth W 	list_for_each_entry(rg, head, link)
701a43a8c39SChen, Kenneth W 		if (f <= rg->to)
702a43a8c39SChen, Kenneth W 			break;
703a43a8c39SChen, Kenneth W 
704a43a8c39SChen, Kenneth W 	/* If we are below the current region then a new region is required.
705a43a8c39SChen, Kenneth W 	 * Subtle, allocate a new region at the position but make it zero
706a43a8c39SChen, Kenneth W 	 * size such that we can guarentee to record the reservation. */
707a43a8c39SChen, Kenneth W 	if (&rg->link == head || t < rg->from) {
708a43a8c39SChen, Kenneth W 		nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
709a43a8c39SChen, Kenneth W 		if (nrg == 0)
710a43a8c39SChen, Kenneth W 			return -ENOMEM;
711a43a8c39SChen, Kenneth W 		nrg->from = f;
712a43a8c39SChen, Kenneth W 		nrg->to   = f;
713a43a8c39SChen, Kenneth W 		INIT_LIST_HEAD(&nrg->link);
714a43a8c39SChen, Kenneth W 		list_add(&nrg->link, rg->link.prev);
715a43a8c39SChen, Kenneth W 
716a43a8c39SChen, Kenneth W 		return t - f;
717a43a8c39SChen, Kenneth W 	}
718a43a8c39SChen, Kenneth W 
719a43a8c39SChen, Kenneth W 	/* Round our left edge to the current segment if it encloses us. */
720a43a8c39SChen, Kenneth W 	if (f > rg->from)
721a43a8c39SChen, Kenneth W 		f = rg->from;
722a43a8c39SChen, Kenneth W 	chg = t - f;
723a43a8c39SChen, Kenneth W 
724a43a8c39SChen, Kenneth W 	/* Check for and consume any regions we now overlap with. */
725a43a8c39SChen, Kenneth W 	list_for_each_entry(rg, rg->link.prev, link) {
726a43a8c39SChen, Kenneth W 		if (&rg->link == head)
727a43a8c39SChen, Kenneth W 			break;
728a43a8c39SChen, Kenneth W 		if (rg->from > t)
729a43a8c39SChen, Kenneth W 			return chg;
730a43a8c39SChen, Kenneth W 
731a43a8c39SChen, Kenneth W 		/* We overlap with this area, if it extends futher than
732a43a8c39SChen, Kenneth W 		 * us then we must extend ourselves.  Account for its
733a43a8c39SChen, Kenneth W 		 * existing reservation. */
734a43a8c39SChen, Kenneth W 		if (rg->to > t) {
735a43a8c39SChen, Kenneth W 			chg += rg->to - t;
736a43a8c39SChen, Kenneth W 			t = rg->to;
737a43a8c39SChen, Kenneth W 		}
738a43a8c39SChen, Kenneth W 		chg -= rg->to - rg->from;
739a43a8c39SChen, Kenneth W 	}
740a43a8c39SChen, Kenneth W 	return chg;
741a43a8c39SChen, Kenneth W }
742a43a8c39SChen, Kenneth W 
743a43a8c39SChen, Kenneth W static long region_truncate(struct list_head *head, long end)
744a43a8c39SChen, Kenneth W {
745a43a8c39SChen, Kenneth W 	struct file_region *rg, *trg;
746a43a8c39SChen, Kenneth W 	long chg = 0;
747a43a8c39SChen, Kenneth W 
748a43a8c39SChen, Kenneth W 	/* Locate the region we are either in or before. */
749a43a8c39SChen, Kenneth W 	list_for_each_entry(rg, head, link)
750a43a8c39SChen, Kenneth W 		if (end <= rg->to)
751a43a8c39SChen, Kenneth W 			break;
752a43a8c39SChen, Kenneth W 	if (&rg->link == head)
753a43a8c39SChen, Kenneth W 		return 0;
754a43a8c39SChen, Kenneth W 
755a43a8c39SChen, Kenneth W 	/* If we are in the middle of a region then adjust it. */
756a43a8c39SChen, Kenneth W 	if (end > rg->from) {
757a43a8c39SChen, Kenneth W 		chg = rg->to - end;
758a43a8c39SChen, Kenneth W 		rg->to = end;
759a43a8c39SChen, Kenneth W 		rg = list_entry(rg->link.next, typeof(*rg), link);
760a43a8c39SChen, Kenneth W 	}
761a43a8c39SChen, Kenneth W 
762a43a8c39SChen, Kenneth W 	/* Drop any remaining regions. */
763a43a8c39SChen, Kenneth W 	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
764a43a8c39SChen, Kenneth W 		if (&rg->link == head)
765a43a8c39SChen, Kenneth W 			break;
766a43a8c39SChen, Kenneth W 		chg += rg->to - rg->from;
767a43a8c39SChen, Kenneth W 		list_del(&rg->link);
768a43a8c39SChen, Kenneth W 		kfree(rg);
769a43a8c39SChen, Kenneth W 	}
770a43a8c39SChen, Kenneth W 	return chg;
771a43a8c39SChen, Kenneth W }
772a43a8c39SChen, Kenneth W 
773a43a8c39SChen, Kenneth W static int hugetlb_acct_memory(long delta)
774a43a8c39SChen, Kenneth W {
775a43a8c39SChen, Kenneth W 	int ret = -ENOMEM;
776a43a8c39SChen, Kenneth W 
777a43a8c39SChen, Kenneth W 	spin_lock(&hugetlb_lock);
778a43a8c39SChen, Kenneth W 	if ((delta + resv_huge_pages) <= free_huge_pages) {
779a43a8c39SChen, Kenneth W 		resv_huge_pages += delta;
780a43a8c39SChen, Kenneth W 		ret = 0;
781a43a8c39SChen, Kenneth W 	}
782a43a8c39SChen, Kenneth W 	spin_unlock(&hugetlb_lock);
783a43a8c39SChen, Kenneth W 	return ret;
784a43a8c39SChen, Kenneth W }
785a43a8c39SChen, Kenneth W 
786a43a8c39SChen, Kenneth W int hugetlb_reserve_pages(struct inode *inode, long from, long to)
787a43a8c39SChen, Kenneth W {
788a43a8c39SChen, Kenneth W 	long ret, chg;
789a43a8c39SChen, Kenneth W 
790a43a8c39SChen, Kenneth W 	chg = region_chg(&inode->i_mapping->private_list, from, to);
791a43a8c39SChen, Kenneth W 	if (chg < 0)
792a43a8c39SChen, Kenneth W 		return chg;
793a43a8c39SChen, Kenneth W 	ret = hugetlb_acct_memory(chg);
794a43a8c39SChen, Kenneth W 	if (ret < 0)
795a43a8c39SChen, Kenneth W 		return ret;
796a43a8c39SChen, Kenneth W 	region_add(&inode->i_mapping->private_list, from, to);
797a43a8c39SChen, Kenneth W 	return 0;
798a43a8c39SChen, Kenneth W }
799a43a8c39SChen, Kenneth W 
800a43a8c39SChen, Kenneth W void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
801a43a8c39SChen, Kenneth W {
802a43a8c39SChen, Kenneth W 	long chg = region_truncate(&inode->i_mapping->private_list, offset);
803a43a8c39SChen, Kenneth W 	hugetlb_acct_memory(freed - chg);
804a43a8c39SChen, Kenneth W }
805