xref: /openbmc/linux/mm/hugetlb.c (revision 6b0c880d)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Generic hugetlb support.
31da177e4SLinus Torvalds  * (C) William Irwin, April 2004
41da177e4SLinus Torvalds  */
51da177e4SLinus Torvalds #include <linux/gfp.h>
61da177e4SLinus Torvalds #include <linux/list.h>
71da177e4SLinus Torvalds #include <linux/init.h>
81da177e4SLinus Torvalds #include <linux/module.h>
91da177e4SLinus Torvalds #include <linux/mm.h>
101da177e4SLinus Torvalds #include <linux/sysctl.h>
111da177e4SLinus Torvalds #include <linux/highmem.h>
121da177e4SLinus Torvalds #include <linux/nodemask.h>
1363551ae0SDavid Gibson #include <linux/pagemap.h>
145da7ca86SChristoph Lameter #include <linux/mempolicy.h>
15aea47ff3SChristoph Lameter #include <linux/cpuset.h>
163935baa9SDavid Gibson #include <linux/mutex.h>
175da7ca86SChristoph Lameter 
1863551ae0SDavid Gibson #include <asm/page.h>
1963551ae0SDavid Gibson #include <asm/pgtable.h>
2063551ae0SDavid Gibson 
2163551ae0SDavid Gibson #include <linux/hugetlb.h>
227835e98bSNick Piggin #include "internal.h"
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
25a43a8c39SChen, Kenneth W static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
267893d1d5SAdam Litke static unsigned long surplus_huge_pages;
271da177e4SLinus Torvalds unsigned long max_huge_pages;
281da177e4SLinus Torvalds static struct list_head hugepage_freelists[MAX_NUMNODES];
291da177e4SLinus Torvalds static unsigned int nr_huge_pages_node[MAX_NUMNODES];
301da177e4SLinus Torvalds static unsigned int free_huge_pages_node[MAX_NUMNODES];
317893d1d5SAdam Litke static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
32396faf03SMel Gorman static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
33396faf03SMel Gorman unsigned long hugepages_treat_as_movable;
3454f9f80dSAdam Litke int hugetlb_dynamic_pool;
35396faf03SMel Gorman 
363935baa9SDavid Gibson /*
373935baa9SDavid Gibson  * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
383935baa9SDavid Gibson  */
393935baa9SDavid Gibson static DEFINE_SPINLOCK(hugetlb_lock);
400bd0f9fbSEric Paris 
4179ac6ba4SDavid Gibson static void clear_huge_page(struct page *page, unsigned long addr)
4279ac6ba4SDavid Gibson {
4379ac6ba4SDavid Gibson 	int i;
4479ac6ba4SDavid Gibson 
4579ac6ba4SDavid Gibson 	might_sleep();
4679ac6ba4SDavid Gibson 	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) {
4779ac6ba4SDavid Gibson 		cond_resched();
48281e0e3bSRalf Baechle 		clear_user_highpage(page + i, addr + i * PAGE_SIZE);
4979ac6ba4SDavid Gibson 	}
5079ac6ba4SDavid Gibson }
5179ac6ba4SDavid Gibson 
5279ac6ba4SDavid Gibson static void copy_huge_page(struct page *dst, struct page *src,
539de455b2SAtsushi Nemoto 			   unsigned long addr, struct vm_area_struct *vma)
5479ac6ba4SDavid Gibson {
5579ac6ba4SDavid Gibson 	int i;
5679ac6ba4SDavid Gibson 
5779ac6ba4SDavid Gibson 	might_sleep();
5879ac6ba4SDavid Gibson 	for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) {
5979ac6ba4SDavid Gibson 		cond_resched();
609de455b2SAtsushi Nemoto 		copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
6179ac6ba4SDavid Gibson 	}
6279ac6ba4SDavid Gibson }
6379ac6ba4SDavid Gibson 
641da177e4SLinus Torvalds static void enqueue_huge_page(struct page *page)
651da177e4SLinus Torvalds {
661da177e4SLinus Torvalds 	int nid = page_to_nid(page);
671da177e4SLinus Torvalds 	list_add(&page->lru, &hugepage_freelists[nid]);
681da177e4SLinus Torvalds 	free_huge_pages++;
691da177e4SLinus Torvalds 	free_huge_pages_node[nid]++;
701da177e4SLinus Torvalds }
711da177e4SLinus Torvalds 
725da7ca86SChristoph Lameter static struct page *dequeue_huge_page(struct vm_area_struct *vma,
735da7ca86SChristoph Lameter 				unsigned long address)
741da177e4SLinus Torvalds {
7531a5c6e4SNishanth Aravamudan 	int nid;
761da177e4SLinus Torvalds 	struct page *page = NULL;
77480eccf9SLee Schermerhorn 	struct mempolicy *mpol;
78396faf03SMel Gorman 	struct zonelist *zonelist = huge_zonelist(vma, address,
79480eccf9SLee Schermerhorn 					htlb_alloc_mask, &mpol);
8096df9333SChristoph Lameter 	struct zone **z;
811da177e4SLinus Torvalds 
8296df9333SChristoph Lameter 	for (z = zonelist->zones; *z; z++) {
8389fa3024SChristoph Lameter 		nid = zone_to_nid(*z);
84396faf03SMel Gorman 		if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) &&
853abf7afdSAndrew Morton 		    !list_empty(&hugepage_freelists[nid])) {
861da177e4SLinus Torvalds 			page = list_entry(hugepage_freelists[nid].next,
871da177e4SLinus Torvalds 					  struct page, lru);
881da177e4SLinus Torvalds 			list_del(&page->lru);
891da177e4SLinus Torvalds 			free_huge_pages--;
901da177e4SLinus Torvalds 			free_huge_pages_node[nid]--;
91e4e574b7SAdam Litke 			if (vma && vma->vm_flags & VM_MAYSHARE)
92e4e574b7SAdam Litke 				resv_huge_pages--;
935ab3ee7bSKen Chen 			break;
941da177e4SLinus Torvalds 		}
953abf7afdSAndrew Morton 	}
96480eccf9SLee Schermerhorn 	mpol_free(mpol);	/* unref if mpol !NULL */
971da177e4SLinus Torvalds 	return page;
981da177e4SLinus Torvalds }
991da177e4SLinus Torvalds 
1006af2acb6SAdam Litke static void update_and_free_page(struct page *page)
1016af2acb6SAdam Litke {
1026af2acb6SAdam Litke 	int i;
1036af2acb6SAdam Litke 	nr_huge_pages--;
1046af2acb6SAdam Litke 	nr_huge_pages_node[page_to_nid(page)]--;
1056af2acb6SAdam Litke 	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
1066af2acb6SAdam Litke 		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
1076af2acb6SAdam Litke 				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
1086af2acb6SAdam Litke 				1 << PG_private | 1<< PG_writeback);
1096af2acb6SAdam Litke 	}
1106af2acb6SAdam Litke 	set_compound_page_dtor(page, NULL);
1116af2acb6SAdam Litke 	set_page_refcounted(page);
1126af2acb6SAdam Litke 	__free_pages(page, HUGETLB_PAGE_ORDER);
1136af2acb6SAdam Litke }
1146af2acb6SAdam Litke 
11527a85ef1SDavid Gibson static void free_huge_page(struct page *page)
11627a85ef1SDavid Gibson {
1177893d1d5SAdam Litke 	int nid = page_to_nid(page);
11827a85ef1SDavid Gibson 
1197893d1d5SAdam Litke 	BUG_ON(page_count(page));
12027a85ef1SDavid Gibson 	INIT_LIST_HEAD(&page->lru);
12127a85ef1SDavid Gibson 
12227a85ef1SDavid Gibson 	spin_lock(&hugetlb_lock);
1237893d1d5SAdam Litke 	if (surplus_huge_pages_node[nid]) {
1247893d1d5SAdam Litke 		update_and_free_page(page);
1257893d1d5SAdam Litke 		surplus_huge_pages--;
1267893d1d5SAdam Litke 		surplus_huge_pages_node[nid]--;
1277893d1d5SAdam Litke 	} else {
12827a85ef1SDavid Gibson 		enqueue_huge_page(page);
1297893d1d5SAdam Litke 	}
13027a85ef1SDavid Gibson 	spin_unlock(&hugetlb_lock);
13127a85ef1SDavid Gibson }
13227a85ef1SDavid Gibson 
1337893d1d5SAdam Litke /*
1347893d1d5SAdam Litke  * Increment or decrement surplus_huge_pages.  Keep node-specific counters
1357893d1d5SAdam Litke  * balanced by operating on them in a round-robin fashion.
1367893d1d5SAdam Litke  * Returns 1 if an adjustment was made.
1377893d1d5SAdam Litke  */
1387893d1d5SAdam Litke static int adjust_pool_surplus(int delta)
1397893d1d5SAdam Litke {
1407893d1d5SAdam Litke 	static int prev_nid;
1417893d1d5SAdam Litke 	int nid = prev_nid;
1427893d1d5SAdam Litke 	int ret = 0;
1437893d1d5SAdam Litke 
1447893d1d5SAdam Litke 	VM_BUG_ON(delta != -1 && delta != 1);
1457893d1d5SAdam Litke 	do {
1467893d1d5SAdam Litke 		nid = next_node(nid, node_online_map);
1477893d1d5SAdam Litke 		if (nid == MAX_NUMNODES)
1487893d1d5SAdam Litke 			nid = first_node(node_online_map);
1497893d1d5SAdam Litke 
1507893d1d5SAdam Litke 		/* To shrink on this node, there must be a surplus page */
1517893d1d5SAdam Litke 		if (delta < 0 && !surplus_huge_pages_node[nid])
1527893d1d5SAdam Litke 			continue;
1537893d1d5SAdam Litke 		/* Surplus cannot exceed the total number of pages */
1547893d1d5SAdam Litke 		if (delta > 0 && surplus_huge_pages_node[nid] >=
1557893d1d5SAdam Litke 						nr_huge_pages_node[nid])
1567893d1d5SAdam Litke 			continue;
1577893d1d5SAdam Litke 
1587893d1d5SAdam Litke 		surplus_huge_pages += delta;
1597893d1d5SAdam Litke 		surplus_huge_pages_node[nid] += delta;
1607893d1d5SAdam Litke 		ret = 1;
1617893d1d5SAdam Litke 		break;
1627893d1d5SAdam Litke 	} while (nid != prev_nid);
1637893d1d5SAdam Litke 
1647893d1d5SAdam Litke 	prev_nid = nid;
1657893d1d5SAdam Litke 	return ret;
1667893d1d5SAdam Litke }
1677893d1d5SAdam Litke 
168a482289dSNick Piggin static int alloc_fresh_huge_page(void)
1691da177e4SLinus Torvalds {
170f96efd58SJoe Jin 	static int prev_nid;
1711da177e4SLinus Torvalds 	struct page *page;
172f96efd58SJoe Jin 	int nid;
173f96efd58SJoe Jin 
1747ed5cb2bSHugh Dickins 	/*
1757ed5cb2bSHugh Dickins 	 * Copy static prev_nid to local nid, work on that, then copy it
1767ed5cb2bSHugh Dickins 	 * back to prev_nid afterwards: otherwise there's a window in which
1777ed5cb2bSHugh Dickins 	 * a racer might pass invalid nid MAX_NUMNODES to alloc_pages_node.
1787ed5cb2bSHugh Dickins 	 * But we don't need to use a spin_lock here: it really doesn't
1797ed5cb2bSHugh Dickins 	 * matter if occasionally a racer chooses the same nid as we do.
1807ed5cb2bSHugh Dickins 	 */
181f96efd58SJoe Jin 	nid = next_node(prev_nid, node_online_map);
182fdb7cc59SPaul Jackson 	if (nid == MAX_NUMNODES)
183fdb7cc59SPaul Jackson 		nid = first_node(node_online_map);
184f96efd58SJoe Jin 	prev_nid = nid;
185f96efd58SJoe Jin 
186396faf03SMel Gorman 	page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
187f96efd58SJoe Jin 					HUGETLB_PAGE_ORDER);
1881da177e4SLinus Torvalds 	if (page) {
18933f2ef89SAndy Whitcroft 		set_compound_page_dtor(page, free_huge_page);
1900bd0f9fbSEric Paris 		spin_lock(&hugetlb_lock);
1911da177e4SLinus Torvalds 		nr_huge_pages++;
1921da177e4SLinus Torvalds 		nr_huge_pages_node[page_to_nid(page)]++;
1930bd0f9fbSEric Paris 		spin_unlock(&hugetlb_lock);
194a482289dSNick Piggin 		put_page(page); /* free it into the hugepage allocator */
195a482289dSNick Piggin 		return 1;
1961da177e4SLinus Torvalds 	}
197a482289dSNick Piggin 	return 0;
1981da177e4SLinus Torvalds }
1991da177e4SLinus Torvalds 
2007893d1d5SAdam Litke static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
2017893d1d5SAdam Litke 						unsigned long address)
2027893d1d5SAdam Litke {
2037893d1d5SAdam Litke 	struct page *page;
2047893d1d5SAdam Litke 
20554f9f80dSAdam Litke 	/* Check if the dynamic pool is enabled */
20654f9f80dSAdam Litke 	if (!hugetlb_dynamic_pool)
20754f9f80dSAdam Litke 		return NULL;
20854f9f80dSAdam Litke 
2097893d1d5SAdam Litke 	page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
2107893d1d5SAdam Litke 					HUGETLB_PAGE_ORDER);
2117893d1d5SAdam Litke 	if (page) {
2127893d1d5SAdam Litke 		set_compound_page_dtor(page, free_huge_page);
2137893d1d5SAdam Litke 		spin_lock(&hugetlb_lock);
2147893d1d5SAdam Litke 		nr_huge_pages++;
2157893d1d5SAdam Litke 		nr_huge_pages_node[page_to_nid(page)]++;
2167893d1d5SAdam Litke 		surplus_huge_pages++;
2177893d1d5SAdam Litke 		surplus_huge_pages_node[page_to_nid(page)]++;
2187893d1d5SAdam Litke 		spin_unlock(&hugetlb_lock);
2197893d1d5SAdam Litke 	}
2207893d1d5SAdam Litke 
2217893d1d5SAdam Litke 	return page;
2227893d1d5SAdam Litke }
2237893d1d5SAdam Litke 
224e4e574b7SAdam Litke /*
225e4e574b7SAdam Litke  * Increase the hugetlb pool such that it can accomodate a reservation
226e4e574b7SAdam Litke  * of size 'delta'.
227e4e574b7SAdam Litke  */
228e4e574b7SAdam Litke static int gather_surplus_pages(int delta)
229e4e574b7SAdam Litke {
230e4e574b7SAdam Litke 	struct list_head surplus_list;
231e4e574b7SAdam Litke 	struct page *page, *tmp;
232e4e574b7SAdam Litke 	int ret, i;
233e4e574b7SAdam Litke 	int needed, allocated;
234e4e574b7SAdam Litke 
235e4e574b7SAdam Litke 	needed = (resv_huge_pages + delta) - free_huge_pages;
236e4e574b7SAdam Litke 	if (needed <= 0)
237e4e574b7SAdam Litke 		return 0;
238e4e574b7SAdam Litke 
239e4e574b7SAdam Litke 	allocated = 0;
240e4e574b7SAdam Litke 	INIT_LIST_HEAD(&surplus_list);
241e4e574b7SAdam Litke 
242e4e574b7SAdam Litke 	ret = -ENOMEM;
243e4e574b7SAdam Litke retry:
244e4e574b7SAdam Litke 	spin_unlock(&hugetlb_lock);
245e4e574b7SAdam Litke 	for (i = 0; i < needed; i++) {
246e4e574b7SAdam Litke 		page = alloc_buddy_huge_page(NULL, 0);
247e4e574b7SAdam Litke 		if (!page) {
248e4e574b7SAdam Litke 			/*
249e4e574b7SAdam Litke 			 * We were not able to allocate enough pages to
250e4e574b7SAdam Litke 			 * satisfy the entire reservation so we free what
251e4e574b7SAdam Litke 			 * we've allocated so far.
252e4e574b7SAdam Litke 			 */
253e4e574b7SAdam Litke 			spin_lock(&hugetlb_lock);
254e4e574b7SAdam Litke 			needed = 0;
255e4e574b7SAdam Litke 			goto free;
256e4e574b7SAdam Litke 		}
257e4e574b7SAdam Litke 
258e4e574b7SAdam Litke 		list_add(&page->lru, &surplus_list);
259e4e574b7SAdam Litke 	}
260e4e574b7SAdam Litke 	allocated += needed;
261e4e574b7SAdam Litke 
262e4e574b7SAdam Litke 	/*
263e4e574b7SAdam Litke 	 * After retaking hugetlb_lock, we need to recalculate 'needed'
264e4e574b7SAdam Litke 	 * because either resv_huge_pages or free_huge_pages may have changed.
265e4e574b7SAdam Litke 	 */
266e4e574b7SAdam Litke 	spin_lock(&hugetlb_lock);
267e4e574b7SAdam Litke 	needed = (resv_huge_pages + delta) - (free_huge_pages + allocated);
268e4e574b7SAdam Litke 	if (needed > 0)
269e4e574b7SAdam Litke 		goto retry;
270e4e574b7SAdam Litke 
271e4e574b7SAdam Litke 	/*
272e4e574b7SAdam Litke 	 * The surplus_list now contains _at_least_ the number of extra pages
273e4e574b7SAdam Litke 	 * needed to accomodate the reservation.  Add the appropriate number
274e4e574b7SAdam Litke 	 * of pages to the hugetlb pool and free the extras back to the buddy
275e4e574b7SAdam Litke 	 * allocator.
276e4e574b7SAdam Litke 	 */
277e4e574b7SAdam Litke 	needed += allocated;
278e4e574b7SAdam Litke 	ret = 0;
279e4e574b7SAdam Litke free:
280e4e574b7SAdam Litke 	list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
281e4e574b7SAdam Litke 		list_del(&page->lru);
282e4e574b7SAdam Litke 		if ((--needed) >= 0)
283e4e574b7SAdam Litke 			enqueue_huge_page(page);
284e4e574b7SAdam Litke 		else
285e4e574b7SAdam Litke 			update_and_free_page(page);
286e4e574b7SAdam Litke 	}
287e4e574b7SAdam Litke 
288e4e574b7SAdam Litke 	return ret;
289e4e574b7SAdam Litke }
290e4e574b7SAdam Litke 
291e4e574b7SAdam Litke /*
292e4e574b7SAdam Litke  * When releasing a hugetlb pool reservation, any surplus pages that were
293e4e574b7SAdam Litke  * allocated to satisfy the reservation must be explicitly freed if they were
294e4e574b7SAdam Litke  * never used.
295e4e574b7SAdam Litke  */
296e4e574b7SAdam Litke void return_unused_surplus_pages(unsigned long unused_resv_pages)
297e4e574b7SAdam Litke {
298e4e574b7SAdam Litke 	static int nid = -1;
299e4e574b7SAdam Litke 	struct page *page;
300e4e574b7SAdam Litke 	unsigned long nr_pages;
301e4e574b7SAdam Litke 
302e4e574b7SAdam Litke 	nr_pages = min(unused_resv_pages, surplus_huge_pages);
303e4e574b7SAdam Litke 
304e4e574b7SAdam Litke 	while (nr_pages) {
305e4e574b7SAdam Litke 		nid = next_node(nid, node_online_map);
306e4e574b7SAdam Litke 		if (nid == MAX_NUMNODES)
307e4e574b7SAdam Litke 			nid = first_node(node_online_map);
308e4e574b7SAdam Litke 
309e4e574b7SAdam Litke 		if (!surplus_huge_pages_node[nid])
310e4e574b7SAdam Litke 			continue;
311e4e574b7SAdam Litke 
312e4e574b7SAdam Litke 		if (!list_empty(&hugepage_freelists[nid])) {
313e4e574b7SAdam Litke 			page = list_entry(hugepage_freelists[nid].next,
314e4e574b7SAdam Litke 					  struct page, lru);
315e4e574b7SAdam Litke 			list_del(&page->lru);
316e4e574b7SAdam Litke 			update_and_free_page(page);
317e4e574b7SAdam Litke 			free_huge_pages--;
318e4e574b7SAdam Litke 			free_huge_pages_node[nid]--;
319e4e574b7SAdam Litke 			surplus_huge_pages--;
320e4e574b7SAdam Litke 			surplus_huge_pages_node[nid]--;
321e4e574b7SAdam Litke 			nr_pages--;
322e4e574b7SAdam Litke 		}
323e4e574b7SAdam Litke 	}
324e4e574b7SAdam Litke }
325e4e574b7SAdam Litke 
32627a85ef1SDavid Gibson static struct page *alloc_huge_page(struct vm_area_struct *vma,
32727a85ef1SDavid Gibson 				    unsigned long addr)
3281da177e4SLinus Torvalds {
3297893d1d5SAdam Litke 	struct page *page = NULL;
330e4e574b7SAdam Litke 	int use_reserved_page = vma->vm_flags & VM_MAYSHARE;
3311da177e4SLinus Torvalds 
3321da177e4SLinus Torvalds 	spin_lock(&hugetlb_lock);
333e4e574b7SAdam Litke 	if (!use_reserved_page && (free_huge_pages <= resv_huge_pages))
334b45b5bd6SDavid Gibson 		goto fail;
335b45b5bd6SDavid Gibson 
336b45b5bd6SDavid Gibson 	page = dequeue_huge_page(vma, addr);
337b45b5bd6SDavid Gibson 	if (!page)
338b45b5bd6SDavid Gibson 		goto fail;
339b45b5bd6SDavid Gibson 
3401da177e4SLinus Torvalds 	spin_unlock(&hugetlb_lock);
3417835e98bSNick Piggin 	set_page_refcounted(page);
3421da177e4SLinus Torvalds 	return page;
343b45b5bd6SDavid Gibson 
344b45b5bd6SDavid Gibson fail:
345b45b5bd6SDavid Gibson 	spin_unlock(&hugetlb_lock);
3467893d1d5SAdam Litke 
3477893d1d5SAdam Litke 	/*
3487893d1d5SAdam Litke 	 * Private mappings do not use reserved huge pages so the allocation
3497893d1d5SAdam Litke 	 * may have failed due to an undersized hugetlb pool.  Try to grab a
3507893d1d5SAdam Litke 	 * surplus huge page from the buddy allocator.
3517893d1d5SAdam Litke 	 */
352e4e574b7SAdam Litke 	if (!use_reserved_page)
3537893d1d5SAdam Litke 		page = alloc_buddy_huge_page(vma, addr);
3547893d1d5SAdam Litke 
3557893d1d5SAdam Litke 	return page;
356b45b5bd6SDavid Gibson }
357b45b5bd6SDavid Gibson 
3581da177e4SLinus Torvalds static int __init hugetlb_init(void)
3591da177e4SLinus Torvalds {
3601da177e4SLinus Torvalds 	unsigned long i;
3611da177e4SLinus Torvalds 
3623c726f8dSBenjamin Herrenschmidt 	if (HPAGE_SHIFT == 0)
3633c726f8dSBenjamin Herrenschmidt 		return 0;
3643c726f8dSBenjamin Herrenschmidt 
3651da177e4SLinus Torvalds 	for (i = 0; i < MAX_NUMNODES; ++i)
3661da177e4SLinus Torvalds 		INIT_LIST_HEAD(&hugepage_freelists[i]);
3671da177e4SLinus Torvalds 
3681da177e4SLinus Torvalds 	for (i = 0; i < max_huge_pages; ++i) {
369a482289dSNick Piggin 		if (!alloc_fresh_huge_page())
3701da177e4SLinus Torvalds 			break;
3711da177e4SLinus Torvalds 	}
3721da177e4SLinus Torvalds 	max_huge_pages = free_huge_pages = nr_huge_pages = i;
3731da177e4SLinus Torvalds 	printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
3741da177e4SLinus Torvalds 	return 0;
3751da177e4SLinus Torvalds }
3761da177e4SLinus Torvalds module_init(hugetlb_init);
3771da177e4SLinus Torvalds 
3781da177e4SLinus Torvalds static int __init hugetlb_setup(char *s)
3791da177e4SLinus Torvalds {
3801da177e4SLinus Torvalds 	if (sscanf(s, "%lu", &max_huge_pages) <= 0)
3811da177e4SLinus Torvalds 		max_huge_pages = 0;
3821da177e4SLinus Torvalds 	return 1;
3831da177e4SLinus Torvalds }
3841da177e4SLinus Torvalds __setup("hugepages=", hugetlb_setup);
3851da177e4SLinus Torvalds 
3868a630112SKen Chen static unsigned int cpuset_mems_nr(unsigned int *array)
3878a630112SKen Chen {
3888a630112SKen Chen 	int node;
3898a630112SKen Chen 	unsigned int nr = 0;
3908a630112SKen Chen 
3918a630112SKen Chen 	for_each_node_mask(node, cpuset_current_mems_allowed)
3928a630112SKen Chen 		nr += array[node];
3938a630112SKen Chen 
3948a630112SKen Chen 	return nr;
3958a630112SKen Chen }
3968a630112SKen Chen 
3971da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
3981da177e4SLinus Torvalds #ifdef CONFIG_HIGHMEM
3991da177e4SLinus Torvalds static void try_to_free_low(unsigned long count)
4001da177e4SLinus Torvalds {
4014415cc8dSChristoph Lameter 	int i;
4024415cc8dSChristoph Lameter 
4031da177e4SLinus Torvalds 	for (i = 0; i < MAX_NUMNODES; ++i) {
4041da177e4SLinus Torvalds 		struct page *page, *next;
4051da177e4SLinus Torvalds 		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
4066b0c880dSAdam Litke 			if (count >= nr_huge_pages)
4076b0c880dSAdam Litke 				return;
4081da177e4SLinus Torvalds 			if (PageHighMem(page))
4091da177e4SLinus Torvalds 				continue;
4101da177e4SLinus Torvalds 			list_del(&page->lru);
4111da177e4SLinus Torvalds 			update_and_free_page(page);
4121da177e4SLinus Torvalds 			free_huge_pages--;
4134415cc8dSChristoph Lameter 			free_huge_pages_node[page_to_nid(page)]--;
4141da177e4SLinus Torvalds 		}
4151da177e4SLinus Torvalds 	}
4161da177e4SLinus Torvalds }
4171da177e4SLinus Torvalds #else
4181da177e4SLinus Torvalds static inline void try_to_free_low(unsigned long count)
4191da177e4SLinus Torvalds {
4201da177e4SLinus Torvalds }
4211da177e4SLinus Torvalds #endif
4221da177e4SLinus Torvalds 
4237893d1d5SAdam Litke #define persistent_huge_pages (nr_huge_pages - surplus_huge_pages)
4241da177e4SLinus Torvalds static unsigned long set_max_huge_pages(unsigned long count)
4251da177e4SLinus Torvalds {
4267893d1d5SAdam Litke 	unsigned long min_count, ret;
4271da177e4SLinus Torvalds 
4287893d1d5SAdam Litke 	/*
4297893d1d5SAdam Litke 	 * Increase the pool size
4307893d1d5SAdam Litke 	 * First take pages out of surplus state.  Then make up the
4317893d1d5SAdam Litke 	 * remaining difference by allocating fresh huge pages.
4327893d1d5SAdam Litke 	 */
4331da177e4SLinus Torvalds 	spin_lock(&hugetlb_lock);
4347893d1d5SAdam Litke 	while (surplus_huge_pages && count > persistent_huge_pages) {
4357893d1d5SAdam Litke 		if (!adjust_pool_surplus(-1))
4367893d1d5SAdam Litke 			break;
4377893d1d5SAdam Litke 	}
4387893d1d5SAdam Litke 
4397893d1d5SAdam Litke 	while (count > persistent_huge_pages) {
4407893d1d5SAdam Litke 		int ret;
4417893d1d5SAdam Litke 		/*
4427893d1d5SAdam Litke 		 * If this allocation races such that we no longer need the
4437893d1d5SAdam Litke 		 * page, free_huge_page will handle it by freeing the page
4447893d1d5SAdam Litke 		 * and reducing the surplus.
4457893d1d5SAdam Litke 		 */
4467893d1d5SAdam Litke 		spin_unlock(&hugetlb_lock);
4477893d1d5SAdam Litke 		ret = alloc_fresh_huge_page();
4487893d1d5SAdam Litke 		spin_lock(&hugetlb_lock);
4497893d1d5SAdam Litke 		if (!ret)
4507893d1d5SAdam Litke 			goto out;
4517893d1d5SAdam Litke 
4527893d1d5SAdam Litke 	}
4537893d1d5SAdam Litke 
4547893d1d5SAdam Litke 	/*
4557893d1d5SAdam Litke 	 * Decrease the pool size
4567893d1d5SAdam Litke 	 * First return free pages to the buddy allocator (being careful
4577893d1d5SAdam Litke 	 * to keep enough around to satisfy reservations).  Then place
4587893d1d5SAdam Litke 	 * pages into surplus state as needed so the pool will shrink
4597893d1d5SAdam Litke 	 * to the desired size as pages become free.
4607893d1d5SAdam Litke 	 */
4616b0c880dSAdam Litke 	min_count = resv_huge_pages + nr_huge_pages - free_huge_pages;
4626b0c880dSAdam Litke 	min_count = max(count, min_count);
4637893d1d5SAdam Litke 	try_to_free_low(min_count);
4647893d1d5SAdam Litke 	while (min_count < persistent_huge_pages) {
4655da7ca86SChristoph Lameter 		struct page *page = dequeue_huge_page(NULL, 0);
4661da177e4SLinus Torvalds 		if (!page)
4671da177e4SLinus Torvalds 			break;
4681da177e4SLinus Torvalds 		update_and_free_page(page);
4691da177e4SLinus Torvalds 	}
4707893d1d5SAdam Litke 	while (count < persistent_huge_pages) {
4717893d1d5SAdam Litke 		if (!adjust_pool_surplus(1))
4727893d1d5SAdam Litke 			break;
4737893d1d5SAdam Litke 	}
4747893d1d5SAdam Litke out:
4757893d1d5SAdam Litke 	ret = persistent_huge_pages;
4761da177e4SLinus Torvalds 	spin_unlock(&hugetlb_lock);
4777893d1d5SAdam Litke 	return ret;
4781da177e4SLinus Torvalds }
4791da177e4SLinus Torvalds 
4801da177e4SLinus Torvalds int hugetlb_sysctl_handler(struct ctl_table *table, int write,
4811da177e4SLinus Torvalds 			   struct file *file, void __user *buffer,
4821da177e4SLinus Torvalds 			   size_t *length, loff_t *ppos)
4831da177e4SLinus Torvalds {
4841da177e4SLinus Torvalds 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
4851da177e4SLinus Torvalds 	max_huge_pages = set_max_huge_pages(max_huge_pages);
4861da177e4SLinus Torvalds 	return 0;
4871da177e4SLinus Torvalds }
488396faf03SMel Gorman 
489396faf03SMel Gorman int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
490396faf03SMel Gorman 			struct file *file, void __user *buffer,
491396faf03SMel Gorman 			size_t *length, loff_t *ppos)
492396faf03SMel Gorman {
493396faf03SMel Gorman 	proc_dointvec(table, write, file, buffer, length, ppos);
494396faf03SMel Gorman 	if (hugepages_treat_as_movable)
495396faf03SMel Gorman 		htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
496396faf03SMel Gorman 	else
497396faf03SMel Gorman 		htlb_alloc_mask = GFP_HIGHUSER;
498396faf03SMel Gorman 	return 0;
499396faf03SMel Gorman }
500396faf03SMel Gorman 
5011da177e4SLinus Torvalds #endif /* CONFIG_SYSCTL */
5021da177e4SLinus Torvalds 
5031da177e4SLinus Torvalds int hugetlb_report_meminfo(char *buf)
5041da177e4SLinus Torvalds {
5051da177e4SLinus Torvalds 	return sprintf(buf,
5061da177e4SLinus Torvalds 			"HugePages_Total: %5lu\n"
5071da177e4SLinus Torvalds 			"HugePages_Free:  %5lu\n"
508b45b5bd6SDavid Gibson 			"HugePages_Rsvd:  %5lu\n"
5097893d1d5SAdam Litke 			"HugePages_Surp:  %5lu\n"
5101da177e4SLinus Torvalds 			"Hugepagesize:    %5lu kB\n",
5111da177e4SLinus Torvalds 			nr_huge_pages,
5121da177e4SLinus Torvalds 			free_huge_pages,
513a43a8c39SChen, Kenneth W 			resv_huge_pages,
5147893d1d5SAdam Litke 			surplus_huge_pages,
5151da177e4SLinus Torvalds 			HPAGE_SIZE/1024);
5161da177e4SLinus Torvalds }
5171da177e4SLinus Torvalds 
5181da177e4SLinus Torvalds int hugetlb_report_node_meminfo(int nid, char *buf)
5191da177e4SLinus Torvalds {
5201da177e4SLinus Torvalds 	return sprintf(buf,
5211da177e4SLinus Torvalds 		"Node %d HugePages_Total: %5u\n"
5221da177e4SLinus Torvalds 		"Node %d HugePages_Free:  %5u\n",
5231da177e4SLinus Torvalds 		nid, nr_huge_pages_node[nid],
5241da177e4SLinus Torvalds 		nid, free_huge_pages_node[nid]);
5251da177e4SLinus Torvalds }
5261da177e4SLinus Torvalds 
5271da177e4SLinus Torvalds /* Return the number pages of memory we physically have, in PAGE_SIZE units. */
5281da177e4SLinus Torvalds unsigned long hugetlb_total_pages(void)
5291da177e4SLinus Torvalds {
5301da177e4SLinus Torvalds 	return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
5311da177e4SLinus Torvalds }
5321da177e4SLinus Torvalds 
5331da177e4SLinus Torvalds /*
5341da177e4SLinus Torvalds  * We cannot handle pagefaults against hugetlb pages at all.  They cause
5351da177e4SLinus Torvalds  * handle_mm_fault() to try to instantiate regular-sized pages in the
5361da177e4SLinus Torvalds  * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
5371da177e4SLinus Torvalds  * this far.
5381da177e4SLinus Torvalds  */
539d0217ac0SNick Piggin static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
5401da177e4SLinus Torvalds {
5411da177e4SLinus Torvalds 	BUG();
542d0217ac0SNick Piggin 	return 0;
5431da177e4SLinus Torvalds }
5441da177e4SLinus Torvalds 
5451da177e4SLinus Torvalds struct vm_operations_struct hugetlb_vm_ops = {
546d0217ac0SNick Piggin 	.fault = hugetlb_vm_op_fault,
5471da177e4SLinus Torvalds };
5481da177e4SLinus Torvalds 
5491e8f889bSDavid Gibson static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
5501e8f889bSDavid Gibson 				int writable)
55163551ae0SDavid Gibson {
55263551ae0SDavid Gibson 	pte_t entry;
55363551ae0SDavid Gibson 
5541e8f889bSDavid Gibson 	if (writable) {
55563551ae0SDavid Gibson 		entry =
55663551ae0SDavid Gibson 		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
55763551ae0SDavid Gibson 	} else {
55863551ae0SDavid Gibson 		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
55963551ae0SDavid Gibson 	}
56063551ae0SDavid Gibson 	entry = pte_mkyoung(entry);
56163551ae0SDavid Gibson 	entry = pte_mkhuge(entry);
56263551ae0SDavid Gibson 
56363551ae0SDavid Gibson 	return entry;
56463551ae0SDavid Gibson }
56563551ae0SDavid Gibson 
5661e8f889bSDavid Gibson static void set_huge_ptep_writable(struct vm_area_struct *vma,
5671e8f889bSDavid Gibson 				   unsigned long address, pte_t *ptep)
5681e8f889bSDavid Gibson {
5691e8f889bSDavid Gibson 	pte_t entry;
5701e8f889bSDavid Gibson 
5711e8f889bSDavid Gibson 	entry = pte_mkwrite(pte_mkdirty(*ptep));
5728dab5241SBenjamin Herrenschmidt 	if (ptep_set_access_flags(vma, address, ptep, entry, 1)) {
5731e8f889bSDavid Gibson 		update_mmu_cache(vma, address, entry);
5741e8f889bSDavid Gibson 	}
5758dab5241SBenjamin Herrenschmidt }
5761e8f889bSDavid Gibson 
5771e8f889bSDavid Gibson 
57863551ae0SDavid Gibson int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
57963551ae0SDavid Gibson 			    struct vm_area_struct *vma)
58063551ae0SDavid Gibson {
58163551ae0SDavid Gibson 	pte_t *src_pte, *dst_pte, entry;
58263551ae0SDavid Gibson 	struct page *ptepage;
5831c59827dSHugh Dickins 	unsigned long addr;
5841e8f889bSDavid Gibson 	int cow;
5851e8f889bSDavid Gibson 
5861e8f889bSDavid Gibson 	cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
58763551ae0SDavid Gibson 
5881c59827dSHugh Dickins 	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
589c74df32cSHugh Dickins 		src_pte = huge_pte_offset(src, addr);
590c74df32cSHugh Dickins 		if (!src_pte)
591c74df32cSHugh Dickins 			continue;
59263551ae0SDavid Gibson 		dst_pte = huge_pte_alloc(dst, addr);
59363551ae0SDavid Gibson 		if (!dst_pte)
59463551ae0SDavid Gibson 			goto nomem;
595c74df32cSHugh Dickins 		spin_lock(&dst->page_table_lock);
5961c59827dSHugh Dickins 		spin_lock(&src->page_table_lock);
597c74df32cSHugh Dickins 		if (!pte_none(*src_pte)) {
5981e8f889bSDavid Gibson 			if (cow)
5991e8f889bSDavid Gibson 				ptep_set_wrprotect(src, addr, src_pte);
60063551ae0SDavid Gibson 			entry = *src_pte;
60163551ae0SDavid Gibson 			ptepage = pte_page(entry);
60263551ae0SDavid Gibson 			get_page(ptepage);
60363551ae0SDavid Gibson 			set_huge_pte_at(dst, addr, dst_pte, entry);
6041c59827dSHugh Dickins 		}
6051c59827dSHugh Dickins 		spin_unlock(&src->page_table_lock);
606c74df32cSHugh Dickins 		spin_unlock(&dst->page_table_lock);
60763551ae0SDavid Gibson 	}
60863551ae0SDavid Gibson 	return 0;
60963551ae0SDavid Gibson 
61063551ae0SDavid Gibson nomem:
61163551ae0SDavid Gibson 	return -ENOMEM;
61263551ae0SDavid Gibson }
61363551ae0SDavid Gibson 
614502717f4SChen, Kenneth W void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
61563551ae0SDavid Gibson 			    unsigned long end)
61663551ae0SDavid Gibson {
61763551ae0SDavid Gibson 	struct mm_struct *mm = vma->vm_mm;
61863551ae0SDavid Gibson 	unsigned long address;
619c7546f8fSDavid Gibson 	pte_t *ptep;
62063551ae0SDavid Gibson 	pte_t pte;
62163551ae0SDavid Gibson 	struct page *page;
622fe1668aeSChen, Kenneth W 	struct page *tmp;
623c0a499c2SChen, Kenneth W 	/*
624c0a499c2SChen, Kenneth W 	 * A page gathering list, protected by per file i_mmap_lock. The
625c0a499c2SChen, Kenneth W 	 * lock is used to avoid list corruption from multiple unmapping
626c0a499c2SChen, Kenneth W 	 * of the same page since we are using page->lru.
627c0a499c2SChen, Kenneth W 	 */
628fe1668aeSChen, Kenneth W 	LIST_HEAD(page_list);
62963551ae0SDavid Gibson 
63063551ae0SDavid Gibson 	WARN_ON(!is_vm_hugetlb_page(vma));
63163551ae0SDavid Gibson 	BUG_ON(start & ~HPAGE_MASK);
63263551ae0SDavid Gibson 	BUG_ON(end & ~HPAGE_MASK);
63363551ae0SDavid Gibson 
634508034a3SHugh Dickins 	spin_lock(&mm->page_table_lock);
63563551ae0SDavid Gibson 	for (address = start; address < end; address += HPAGE_SIZE) {
636c7546f8fSDavid Gibson 		ptep = huge_pte_offset(mm, address);
637c7546f8fSDavid Gibson 		if (!ptep)
638c7546f8fSDavid Gibson 			continue;
639c7546f8fSDavid Gibson 
64039dde65cSChen, Kenneth W 		if (huge_pmd_unshare(mm, &address, ptep))
64139dde65cSChen, Kenneth W 			continue;
64239dde65cSChen, Kenneth W 
643c7546f8fSDavid Gibson 		pte = huge_ptep_get_and_clear(mm, address, ptep);
64463551ae0SDavid Gibson 		if (pte_none(pte))
64563551ae0SDavid Gibson 			continue;
646c7546f8fSDavid Gibson 
64763551ae0SDavid Gibson 		page = pte_page(pte);
6486649a386SKen Chen 		if (pte_dirty(pte))
6496649a386SKen Chen 			set_page_dirty(page);
650fe1668aeSChen, Kenneth W 		list_add(&page->lru, &page_list);
65163551ae0SDavid Gibson 	}
6521da177e4SLinus Torvalds 	spin_unlock(&mm->page_table_lock);
653508034a3SHugh Dickins 	flush_tlb_range(vma, start, end);
654fe1668aeSChen, Kenneth W 	list_for_each_entry_safe(page, tmp, &page_list, lru) {
655fe1668aeSChen, Kenneth W 		list_del(&page->lru);
656fe1668aeSChen, Kenneth W 		put_page(page);
657fe1668aeSChen, Kenneth W 	}
6581da177e4SLinus Torvalds }
65963551ae0SDavid Gibson 
660502717f4SChen, Kenneth W void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
661502717f4SChen, Kenneth W 			  unsigned long end)
662502717f4SChen, Kenneth W {
663502717f4SChen, Kenneth W 	/*
664502717f4SChen, Kenneth W 	 * It is undesirable to test vma->vm_file as it should be non-null
665502717f4SChen, Kenneth W 	 * for valid hugetlb area. However, vm_file will be NULL in the error
666502717f4SChen, Kenneth W 	 * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
667502717f4SChen, Kenneth W 	 * do_mmap_pgoff() nullifies vma->vm_file before calling this function
668502717f4SChen, Kenneth W 	 * to clean up. Since no pte has actually been setup, it is safe to
669502717f4SChen, Kenneth W 	 * do nothing in this case.
670502717f4SChen, Kenneth W 	 */
671502717f4SChen, Kenneth W 	if (vma->vm_file) {
672502717f4SChen, Kenneth W 		spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
673502717f4SChen, Kenneth W 		__unmap_hugepage_range(vma, start, end);
674502717f4SChen, Kenneth W 		spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
675502717f4SChen, Kenneth W 	}
676502717f4SChen, Kenneth W }
677502717f4SChen, Kenneth W 
6781e8f889bSDavid Gibson static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
6791e8f889bSDavid Gibson 			unsigned long address, pte_t *ptep, pte_t pte)
6801e8f889bSDavid Gibson {
6811e8f889bSDavid Gibson 	struct page *old_page, *new_page;
68279ac6ba4SDavid Gibson 	int avoidcopy;
6831e8f889bSDavid Gibson 
6841e8f889bSDavid Gibson 	old_page = pte_page(pte);
6851e8f889bSDavid Gibson 
6861e8f889bSDavid Gibson 	/* If no-one else is actually using this page, avoid the copy
6871e8f889bSDavid Gibson 	 * and just make the page writable */
6881e8f889bSDavid Gibson 	avoidcopy = (page_count(old_page) == 1);
6891e8f889bSDavid Gibson 	if (avoidcopy) {
6901e8f889bSDavid Gibson 		set_huge_ptep_writable(vma, address, ptep);
69183c54070SNick Piggin 		return 0;
6921e8f889bSDavid Gibson 	}
6931e8f889bSDavid Gibson 
6941e8f889bSDavid Gibson 	page_cache_get(old_page);
6955da7ca86SChristoph Lameter 	new_page = alloc_huge_page(vma, address);
6961e8f889bSDavid Gibson 
6971e8f889bSDavid Gibson 	if (!new_page) {
6981e8f889bSDavid Gibson 		page_cache_release(old_page);
6990df420d8SChristoph Lameter 		return VM_FAULT_OOM;
7001e8f889bSDavid Gibson 	}
7011e8f889bSDavid Gibson 
7021e8f889bSDavid Gibson 	spin_unlock(&mm->page_table_lock);
7039de455b2SAtsushi Nemoto 	copy_huge_page(new_page, old_page, address, vma);
7041e8f889bSDavid Gibson 	spin_lock(&mm->page_table_lock);
7051e8f889bSDavid Gibson 
7061e8f889bSDavid Gibson 	ptep = huge_pte_offset(mm, address & HPAGE_MASK);
7071e8f889bSDavid Gibson 	if (likely(pte_same(*ptep, pte))) {
7081e8f889bSDavid Gibson 		/* Break COW */
7091e8f889bSDavid Gibson 		set_huge_pte_at(mm, address, ptep,
7101e8f889bSDavid Gibson 				make_huge_pte(vma, new_page, 1));
7111e8f889bSDavid Gibson 		/* Make the old page be freed below */
7121e8f889bSDavid Gibson 		new_page = old_page;
7131e8f889bSDavid Gibson 	}
7141e8f889bSDavid Gibson 	page_cache_release(new_page);
7151e8f889bSDavid Gibson 	page_cache_release(old_page);
71683c54070SNick Piggin 	return 0;
7171e8f889bSDavid Gibson }
7181e8f889bSDavid Gibson 
719a1ed3ddaSRobert P. J. Day static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
7201e8f889bSDavid Gibson 			unsigned long address, pte_t *ptep, int write_access)
721ac9b9c66SHugh Dickins {
722ac9b9c66SHugh Dickins 	int ret = VM_FAULT_SIGBUS;
7234c887265SAdam Litke 	unsigned long idx;
7244c887265SAdam Litke 	unsigned long size;
7254c887265SAdam Litke 	struct page *page;
7264c887265SAdam Litke 	struct address_space *mapping;
7271e8f889bSDavid Gibson 	pte_t new_pte;
7284c887265SAdam Litke 
7294c887265SAdam Litke 	mapping = vma->vm_file->f_mapping;
7304c887265SAdam Litke 	idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
7314c887265SAdam Litke 		+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
7324c887265SAdam Litke 
7334c887265SAdam Litke 	/*
7344c887265SAdam Litke 	 * Use page lock to guard against racing truncation
7354c887265SAdam Litke 	 * before we get page_table_lock.
7364c887265SAdam Litke 	 */
7376bda666aSChristoph Lameter retry:
7386bda666aSChristoph Lameter 	page = find_lock_page(mapping, idx);
7396bda666aSChristoph Lameter 	if (!page) {
740ebed4bfcSHugh Dickins 		size = i_size_read(mapping->host) >> HPAGE_SHIFT;
741ebed4bfcSHugh Dickins 		if (idx >= size)
742ebed4bfcSHugh Dickins 			goto out;
7436bda666aSChristoph Lameter 		if (hugetlb_get_quota(mapping))
7444c887265SAdam Litke 			goto out;
7456bda666aSChristoph Lameter 		page = alloc_huge_page(vma, address);
7466bda666aSChristoph Lameter 		if (!page) {
7476bda666aSChristoph Lameter 			hugetlb_put_quota(mapping);
7480df420d8SChristoph Lameter 			ret = VM_FAULT_OOM;
7496bda666aSChristoph Lameter 			goto out;
7506bda666aSChristoph Lameter 		}
75179ac6ba4SDavid Gibson 		clear_huge_page(page, address);
752ac9b9c66SHugh Dickins 
7536bda666aSChristoph Lameter 		if (vma->vm_flags & VM_SHARED) {
7546bda666aSChristoph Lameter 			int err;
7556bda666aSChristoph Lameter 
7566bda666aSChristoph Lameter 			err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
7576bda666aSChristoph Lameter 			if (err) {
7586bda666aSChristoph Lameter 				put_page(page);
7596bda666aSChristoph Lameter 				hugetlb_put_quota(mapping);
7606bda666aSChristoph Lameter 				if (err == -EEXIST)
7616bda666aSChristoph Lameter 					goto retry;
7626bda666aSChristoph Lameter 				goto out;
7636bda666aSChristoph Lameter 			}
7646bda666aSChristoph Lameter 		} else
7656bda666aSChristoph Lameter 			lock_page(page);
7666bda666aSChristoph Lameter 	}
7671e8f889bSDavid Gibson 
768ac9b9c66SHugh Dickins 	spin_lock(&mm->page_table_lock);
7694c887265SAdam Litke 	size = i_size_read(mapping->host) >> HPAGE_SHIFT;
7704c887265SAdam Litke 	if (idx >= size)
7714c887265SAdam Litke 		goto backout;
7724c887265SAdam Litke 
77383c54070SNick Piggin 	ret = 0;
77486e5216fSAdam Litke 	if (!pte_none(*ptep))
7754c887265SAdam Litke 		goto backout;
7764c887265SAdam Litke 
7771e8f889bSDavid Gibson 	new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
7781e8f889bSDavid Gibson 				&& (vma->vm_flags & VM_SHARED)));
7791e8f889bSDavid Gibson 	set_huge_pte_at(mm, address, ptep, new_pte);
7801e8f889bSDavid Gibson 
7811e8f889bSDavid Gibson 	if (write_access && !(vma->vm_flags & VM_SHARED)) {
7821e8f889bSDavid Gibson 		/* Optimization, do the COW without a second fault */
7831e8f889bSDavid Gibson 		ret = hugetlb_cow(mm, vma, address, ptep, new_pte);
7841e8f889bSDavid Gibson 	}
7851e8f889bSDavid Gibson 
786ac9b9c66SHugh Dickins 	spin_unlock(&mm->page_table_lock);
7874c887265SAdam Litke 	unlock_page(page);
7884c887265SAdam Litke out:
789ac9b9c66SHugh Dickins 	return ret;
7904c887265SAdam Litke 
7914c887265SAdam Litke backout:
7924c887265SAdam Litke 	spin_unlock(&mm->page_table_lock);
7934c887265SAdam Litke 	hugetlb_put_quota(mapping);
7944c887265SAdam Litke 	unlock_page(page);
7954c887265SAdam Litke 	put_page(page);
7964c887265SAdam Litke 	goto out;
797ac9b9c66SHugh Dickins }
798ac9b9c66SHugh Dickins 
79986e5216fSAdam Litke int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
80086e5216fSAdam Litke 			unsigned long address, int write_access)
80186e5216fSAdam Litke {
80286e5216fSAdam Litke 	pte_t *ptep;
80386e5216fSAdam Litke 	pte_t entry;
8041e8f889bSDavid Gibson 	int ret;
8053935baa9SDavid Gibson 	static DEFINE_MUTEX(hugetlb_instantiation_mutex);
80686e5216fSAdam Litke 
80786e5216fSAdam Litke 	ptep = huge_pte_alloc(mm, address);
80886e5216fSAdam Litke 	if (!ptep)
80986e5216fSAdam Litke 		return VM_FAULT_OOM;
81086e5216fSAdam Litke 
8113935baa9SDavid Gibson 	/*
8123935baa9SDavid Gibson 	 * Serialize hugepage allocation and instantiation, so that we don't
8133935baa9SDavid Gibson 	 * get spurious allocation failures if two CPUs race to instantiate
8143935baa9SDavid Gibson 	 * the same page in the page cache.
8153935baa9SDavid Gibson 	 */
8163935baa9SDavid Gibson 	mutex_lock(&hugetlb_instantiation_mutex);
81786e5216fSAdam Litke 	entry = *ptep;
8183935baa9SDavid Gibson 	if (pte_none(entry)) {
8193935baa9SDavid Gibson 		ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
8203935baa9SDavid Gibson 		mutex_unlock(&hugetlb_instantiation_mutex);
8213935baa9SDavid Gibson 		return ret;
8223935baa9SDavid Gibson 	}
82386e5216fSAdam Litke 
82483c54070SNick Piggin 	ret = 0;
8251e8f889bSDavid Gibson 
8261e8f889bSDavid Gibson 	spin_lock(&mm->page_table_lock);
8271e8f889bSDavid Gibson 	/* Check for a racing update before calling hugetlb_cow */
8281e8f889bSDavid Gibson 	if (likely(pte_same(entry, *ptep)))
8291e8f889bSDavid Gibson 		if (write_access && !pte_write(entry))
8301e8f889bSDavid Gibson 			ret = hugetlb_cow(mm, vma, address, ptep, entry);
8311e8f889bSDavid Gibson 	spin_unlock(&mm->page_table_lock);
8323935baa9SDavid Gibson 	mutex_unlock(&hugetlb_instantiation_mutex);
8331e8f889bSDavid Gibson 
8341e8f889bSDavid Gibson 	return ret;
83586e5216fSAdam Litke }
83686e5216fSAdam Litke 
83763551ae0SDavid Gibson int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
83863551ae0SDavid Gibson 			struct page **pages, struct vm_area_struct **vmas,
83963551ae0SDavid Gibson 			unsigned long *position, int *length, int i)
84063551ae0SDavid Gibson {
841d5d4b0aaSChen, Kenneth W 	unsigned long pfn_offset;
842d5d4b0aaSChen, Kenneth W 	unsigned long vaddr = *position;
84363551ae0SDavid Gibson 	int remainder = *length;
84463551ae0SDavid Gibson 
8451c59827dSHugh Dickins 	spin_lock(&mm->page_table_lock);
84663551ae0SDavid Gibson 	while (vaddr < vma->vm_end && remainder) {
84763551ae0SDavid Gibson 		pte_t *pte;
84863551ae0SDavid Gibson 		struct page *page;
84963551ae0SDavid Gibson 
8504c887265SAdam Litke 		/*
8514c887265SAdam Litke 		 * Some archs (sparc64, sh*) have multiple pte_ts to
8524c887265SAdam Litke 		 * each hugepage.  We have to make * sure we get the
8534c887265SAdam Litke 		 * first, for the page indexing below to work.
8544c887265SAdam Litke 		 */
85563551ae0SDavid Gibson 		pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
85663551ae0SDavid Gibson 
8571c59827dSHugh Dickins 		if (!pte || pte_none(*pte)) {
8584c887265SAdam Litke 			int ret;
8594c887265SAdam Litke 
8604c887265SAdam Litke 			spin_unlock(&mm->page_table_lock);
8614c887265SAdam Litke 			ret = hugetlb_fault(mm, vma, vaddr, 0);
8624c887265SAdam Litke 			spin_lock(&mm->page_table_lock);
863a89182c7SAdam Litke 			if (!(ret & VM_FAULT_ERROR))
8644c887265SAdam Litke 				continue;
8654c887265SAdam Litke 
8661c59827dSHugh Dickins 			remainder = 0;
8671c59827dSHugh Dickins 			if (!i)
8681c59827dSHugh Dickins 				i = -EFAULT;
8691c59827dSHugh Dickins 			break;
8701c59827dSHugh Dickins 		}
87163551ae0SDavid Gibson 
872d5d4b0aaSChen, Kenneth W 		pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT;
873d5d4b0aaSChen, Kenneth W 		page = pte_page(*pte);
874d5d4b0aaSChen, Kenneth W same_page:
875d6692183SChen, Kenneth W 		if (pages) {
87663551ae0SDavid Gibson 			get_page(page);
877d5d4b0aaSChen, Kenneth W 			pages[i] = page + pfn_offset;
878d6692183SChen, Kenneth W 		}
87963551ae0SDavid Gibson 
88063551ae0SDavid Gibson 		if (vmas)
88163551ae0SDavid Gibson 			vmas[i] = vma;
88263551ae0SDavid Gibson 
88363551ae0SDavid Gibson 		vaddr += PAGE_SIZE;
884d5d4b0aaSChen, Kenneth W 		++pfn_offset;
88563551ae0SDavid Gibson 		--remainder;
88663551ae0SDavid Gibson 		++i;
887d5d4b0aaSChen, Kenneth W 		if (vaddr < vma->vm_end && remainder &&
888d5d4b0aaSChen, Kenneth W 				pfn_offset < HPAGE_SIZE/PAGE_SIZE) {
889d5d4b0aaSChen, Kenneth W 			/*
890d5d4b0aaSChen, Kenneth W 			 * We use pfn_offset to avoid touching the pageframes
891d5d4b0aaSChen, Kenneth W 			 * of this compound page.
892d5d4b0aaSChen, Kenneth W 			 */
893d5d4b0aaSChen, Kenneth W 			goto same_page;
894d5d4b0aaSChen, Kenneth W 		}
89563551ae0SDavid Gibson 	}
8961c59827dSHugh Dickins 	spin_unlock(&mm->page_table_lock);
89763551ae0SDavid Gibson 	*length = remainder;
89863551ae0SDavid Gibson 	*position = vaddr;
89963551ae0SDavid Gibson 
90063551ae0SDavid Gibson 	return i;
90163551ae0SDavid Gibson }
9028f860591SZhang, Yanmin 
9038f860591SZhang, Yanmin void hugetlb_change_protection(struct vm_area_struct *vma,
9048f860591SZhang, Yanmin 		unsigned long address, unsigned long end, pgprot_t newprot)
9058f860591SZhang, Yanmin {
9068f860591SZhang, Yanmin 	struct mm_struct *mm = vma->vm_mm;
9078f860591SZhang, Yanmin 	unsigned long start = address;
9088f860591SZhang, Yanmin 	pte_t *ptep;
9098f860591SZhang, Yanmin 	pte_t pte;
9108f860591SZhang, Yanmin 
9118f860591SZhang, Yanmin 	BUG_ON(address >= end);
9128f860591SZhang, Yanmin 	flush_cache_range(vma, address, end);
9138f860591SZhang, Yanmin 
91439dde65cSChen, Kenneth W 	spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
9158f860591SZhang, Yanmin 	spin_lock(&mm->page_table_lock);
9168f860591SZhang, Yanmin 	for (; address < end; address += HPAGE_SIZE) {
9178f860591SZhang, Yanmin 		ptep = huge_pte_offset(mm, address);
9188f860591SZhang, Yanmin 		if (!ptep)
9198f860591SZhang, Yanmin 			continue;
92039dde65cSChen, Kenneth W 		if (huge_pmd_unshare(mm, &address, ptep))
92139dde65cSChen, Kenneth W 			continue;
9228f860591SZhang, Yanmin 		if (!pte_none(*ptep)) {
9238f860591SZhang, Yanmin 			pte = huge_ptep_get_and_clear(mm, address, ptep);
9248f860591SZhang, Yanmin 			pte = pte_mkhuge(pte_modify(pte, newprot));
9258f860591SZhang, Yanmin 			set_huge_pte_at(mm, address, ptep, pte);
9268f860591SZhang, Yanmin 		}
9278f860591SZhang, Yanmin 	}
9288f860591SZhang, Yanmin 	spin_unlock(&mm->page_table_lock);
92939dde65cSChen, Kenneth W 	spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
9308f860591SZhang, Yanmin 
9318f860591SZhang, Yanmin 	flush_tlb_range(vma, start, end);
9328f860591SZhang, Yanmin }
9338f860591SZhang, Yanmin 
934a43a8c39SChen, Kenneth W struct file_region {
935a43a8c39SChen, Kenneth W 	struct list_head link;
936a43a8c39SChen, Kenneth W 	long from;
937a43a8c39SChen, Kenneth W 	long to;
938a43a8c39SChen, Kenneth W };
939a43a8c39SChen, Kenneth W 
940a43a8c39SChen, Kenneth W static long region_add(struct list_head *head, long f, long t)
941a43a8c39SChen, Kenneth W {
942a43a8c39SChen, Kenneth W 	struct file_region *rg, *nrg, *trg;
943a43a8c39SChen, Kenneth W 
944a43a8c39SChen, Kenneth W 	/* Locate the region we are either in or before. */
945a43a8c39SChen, Kenneth W 	list_for_each_entry(rg, head, link)
946a43a8c39SChen, Kenneth W 		if (f <= rg->to)
947a43a8c39SChen, Kenneth W 			break;
948a43a8c39SChen, Kenneth W 
949a43a8c39SChen, Kenneth W 	/* Round our left edge to the current segment if it encloses us. */
950a43a8c39SChen, Kenneth W 	if (f > rg->from)
951a43a8c39SChen, Kenneth W 		f = rg->from;
952a43a8c39SChen, Kenneth W 
953a43a8c39SChen, Kenneth W 	/* Check for and consume any regions we now overlap with. */
954a43a8c39SChen, Kenneth W 	nrg = rg;
955a43a8c39SChen, Kenneth W 	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
956a43a8c39SChen, Kenneth W 		if (&rg->link == head)
957a43a8c39SChen, Kenneth W 			break;
958a43a8c39SChen, Kenneth W 		if (rg->from > t)
959a43a8c39SChen, Kenneth W 			break;
960a43a8c39SChen, Kenneth W 
961a43a8c39SChen, Kenneth W 		/* If this area reaches higher then extend our area to
962a43a8c39SChen, Kenneth W 		 * include it completely.  If this is not the first area
963a43a8c39SChen, Kenneth W 		 * which we intend to reuse, free it. */
964a43a8c39SChen, Kenneth W 		if (rg->to > t)
965a43a8c39SChen, Kenneth W 			t = rg->to;
966a43a8c39SChen, Kenneth W 		if (rg != nrg) {
967a43a8c39SChen, Kenneth W 			list_del(&rg->link);
968a43a8c39SChen, Kenneth W 			kfree(rg);
969a43a8c39SChen, Kenneth W 		}
970a43a8c39SChen, Kenneth W 	}
971a43a8c39SChen, Kenneth W 	nrg->from = f;
972a43a8c39SChen, Kenneth W 	nrg->to = t;
973a43a8c39SChen, Kenneth W 	return 0;
974a43a8c39SChen, Kenneth W }
975a43a8c39SChen, Kenneth W 
976a43a8c39SChen, Kenneth W static long region_chg(struct list_head *head, long f, long t)
977a43a8c39SChen, Kenneth W {
978a43a8c39SChen, Kenneth W 	struct file_region *rg, *nrg;
979a43a8c39SChen, Kenneth W 	long chg = 0;
980a43a8c39SChen, Kenneth W 
981a43a8c39SChen, Kenneth W 	/* Locate the region we are before or in. */
982a43a8c39SChen, Kenneth W 	list_for_each_entry(rg, head, link)
983a43a8c39SChen, Kenneth W 		if (f <= rg->to)
984a43a8c39SChen, Kenneth W 			break;
985a43a8c39SChen, Kenneth W 
986a43a8c39SChen, Kenneth W 	/* If we are below the current region then a new region is required.
987a43a8c39SChen, Kenneth W 	 * Subtle, allocate a new region at the position but make it zero
988a43a8c39SChen, Kenneth W 	 * size such that we can guarentee to record the reservation. */
989a43a8c39SChen, Kenneth W 	if (&rg->link == head || t < rg->from) {
990a43a8c39SChen, Kenneth W 		nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
991a43a8c39SChen, Kenneth W 		if (nrg == 0)
992a43a8c39SChen, Kenneth W 			return -ENOMEM;
993a43a8c39SChen, Kenneth W 		nrg->from = f;
994a43a8c39SChen, Kenneth W 		nrg->to   = f;
995a43a8c39SChen, Kenneth W 		INIT_LIST_HEAD(&nrg->link);
996a43a8c39SChen, Kenneth W 		list_add(&nrg->link, rg->link.prev);
997a43a8c39SChen, Kenneth W 
998a43a8c39SChen, Kenneth W 		return t - f;
999a43a8c39SChen, Kenneth W 	}
1000a43a8c39SChen, Kenneth W 
1001a43a8c39SChen, Kenneth W 	/* Round our left edge to the current segment if it encloses us. */
1002a43a8c39SChen, Kenneth W 	if (f > rg->from)
1003a43a8c39SChen, Kenneth W 		f = rg->from;
1004a43a8c39SChen, Kenneth W 	chg = t - f;
1005a43a8c39SChen, Kenneth W 
1006a43a8c39SChen, Kenneth W 	/* Check for and consume any regions we now overlap with. */
1007a43a8c39SChen, Kenneth W 	list_for_each_entry(rg, rg->link.prev, link) {
1008a43a8c39SChen, Kenneth W 		if (&rg->link == head)
1009a43a8c39SChen, Kenneth W 			break;
1010a43a8c39SChen, Kenneth W 		if (rg->from > t)
1011a43a8c39SChen, Kenneth W 			return chg;
1012a43a8c39SChen, Kenneth W 
1013a43a8c39SChen, Kenneth W 		/* We overlap with this area, if it extends futher than
1014a43a8c39SChen, Kenneth W 		 * us then we must extend ourselves.  Account for its
1015a43a8c39SChen, Kenneth W 		 * existing reservation. */
1016a43a8c39SChen, Kenneth W 		if (rg->to > t) {
1017a43a8c39SChen, Kenneth W 			chg += rg->to - t;
1018a43a8c39SChen, Kenneth W 			t = rg->to;
1019a43a8c39SChen, Kenneth W 		}
1020a43a8c39SChen, Kenneth W 		chg -= rg->to - rg->from;
1021a43a8c39SChen, Kenneth W 	}
1022a43a8c39SChen, Kenneth W 	return chg;
1023a43a8c39SChen, Kenneth W }
1024a43a8c39SChen, Kenneth W 
1025a43a8c39SChen, Kenneth W static long region_truncate(struct list_head *head, long end)
1026a43a8c39SChen, Kenneth W {
1027a43a8c39SChen, Kenneth W 	struct file_region *rg, *trg;
1028a43a8c39SChen, Kenneth W 	long chg = 0;
1029a43a8c39SChen, Kenneth W 
1030a43a8c39SChen, Kenneth W 	/* Locate the region we are either in or before. */
1031a43a8c39SChen, Kenneth W 	list_for_each_entry(rg, head, link)
1032a43a8c39SChen, Kenneth W 		if (end <= rg->to)
1033a43a8c39SChen, Kenneth W 			break;
1034a43a8c39SChen, Kenneth W 	if (&rg->link == head)
1035a43a8c39SChen, Kenneth W 		return 0;
1036a43a8c39SChen, Kenneth W 
1037a43a8c39SChen, Kenneth W 	/* If we are in the middle of a region then adjust it. */
1038a43a8c39SChen, Kenneth W 	if (end > rg->from) {
1039a43a8c39SChen, Kenneth W 		chg = rg->to - end;
1040a43a8c39SChen, Kenneth W 		rg->to = end;
1041a43a8c39SChen, Kenneth W 		rg = list_entry(rg->link.next, typeof(*rg), link);
1042a43a8c39SChen, Kenneth W 	}
1043a43a8c39SChen, Kenneth W 
1044a43a8c39SChen, Kenneth W 	/* Drop any remaining regions. */
1045a43a8c39SChen, Kenneth W 	list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
1046a43a8c39SChen, Kenneth W 		if (&rg->link == head)
1047a43a8c39SChen, Kenneth W 			break;
1048a43a8c39SChen, Kenneth W 		chg += rg->to - rg->from;
1049a43a8c39SChen, Kenneth W 		list_del(&rg->link);
1050a43a8c39SChen, Kenneth W 		kfree(rg);
1051a43a8c39SChen, Kenneth W 	}
1052a43a8c39SChen, Kenneth W 	return chg;
1053a43a8c39SChen, Kenneth W }
1054a43a8c39SChen, Kenneth W 
1055a43a8c39SChen, Kenneth W static int hugetlb_acct_memory(long delta)
1056a43a8c39SChen, Kenneth W {
1057a43a8c39SChen, Kenneth W 	int ret = -ENOMEM;
1058a43a8c39SChen, Kenneth W 
1059a43a8c39SChen, Kenneth W 	spin_lock(&hugetlb_lock);
10608a630112SKen Chen 	/*
10618a630112SKen Chen 	 * When cpuset is configured, it breaks the strict hugetlb page
10628a630112SKen Chen 	 * reservation as the accounting is done on a global variable. Such
10638a630112SKen Chen 	 * reservation is completely rubbish in the presence of cpuset because
10648a630112SKen Chen 	 * the reservation is not checked against page availability for the
10658a630112SKen Chen 	 * current cpuset. Application can still potentially OOM'ed by kernel
10668a630112SKen Chen 	 * with lack of free htlb page in cpuset that the task is in.
10678a630112SKen Chen 	 * Attempt to enforce strict accounting with cpuset is almost
10688a630112SKen Chen 	 * impossible (or too ugly) because cpuset is too fluid that
10698a630112SKen Chen 	 * task or memory node can be dynamically moved between cpusets.
10708a630112SKen Chen 	 *
10718a630112SKen Chen 	 * The change of semantics for shared hugetlb mapping with cpuset is
10728a630112SKen Chen 	 * undesirable. However, in order to preserve some of the semantics,
10738a630112SKen Chen 	 * we fall back to check against current free page availability as
10748a630112SKen Chen 	 * a best attempt and hopefully to minimize the impact of changing
10758a630112SKen Chen 	 * semantics that cpuset has.
10768a630112SKen Chen 	 */
1077e4e574b7SAdam Litke 	if (delta > 0) {
1078e4e574b7SAdam Litke 		if (gather_surplus_pages(delta) < 0)
1079e4e574b7SAdam Litke 			goto out;
1080e4e574b7SAdam Litke 
1081e4e574b7SAdam Litke 		if (delta > cpuset_mems_nr(free_huge_pages_node))
1082e4e574b7SAdam Litke 			goto out;
1083e4e574b7SAdam Litke 	}
1084e4e574b7SAdam Litke 
1085e4e574b7SAdam Litke 	ret = 0;
1086e4e574b7SAdam Litke 	resv_huge_pages += delta;
1087e4e574b7SAdam Litke 	if (delta < 0)
1088e4e574b7SAdam Litke 		return_unused_surplus_pages((unsigned long) -delta);
1089e4e574b7SAdam Litke 
1090e4e574b7SAdam Litke out:
1091e4e574b7SAdam Litke 	spin_unlock(&hugetlb_lock);
1092e4e574b7SAdam Litke 	return ret;
1093e4e574b7SAdam Litke }
1094e4e574b7SAdam Litke 
1095e4e574b7SAdam Litke int hugetlb_reserve_pages(struct inode *inode, long from, long to)
1096e4e574b7SAdam Litke {
1097e4e574b7SAdam Litke 	long ret, chg;
1098e4e574b7SAdam Litke 
1099e4e574b7SAdam Litke 	chg = region_chg(&inode->i_mapping->private_list, from, to);
1100e4e574b7SAdam Litke 	if (chg < 0)
1101e4e574b7SAdam Litke 		return chg;
11028a630112SKen Chen 
1103a43a8c39SChen, Kenneth W 	ret = hugetlb_acct_memory(chg);
1104a43a8c39SChen, Kenneth W 	if (ret < 0)
1105a43a8c39SChen, Kenneth W 		return ret;
1106a43a8c39SChen, Kenneth W 	region_add(&inode->i_mapping->private_list, from, to);
1107a43a8c39SChen, Kenneth W 	return 0;
1108a43a8c39SChen, Kenneth W }
1109a43a8c39SChen, Kenneth W 
1110a43a8c39SChen, Kenneth W void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
1111a43a8c39SChen, Kenneth W {
1112a43a8c39SChen, Kenneth W 	long chg = region_truncate(&inode->i_mapping->private_list, offset);
1113a43a8c39SChen, Kenneth W 	hugetlb_acct_memory(freed - chg);
1114a43a8c39SChen, Kenneth W }
1115