xref: /openbmc/linux/mm/hugetlb.c (revision 1da177e4)
1 /*
2  * Generic hugetlb support.
3  * (C) William Irwin, April 2004
4  */
5 #include <linux/gfp.h>
6 #include <linux/list.h>
7 #include <linux/init.h>
8 #include <linux/module.h>
9 #include <linux/mm.h>
10 #include <linux/hugetlb.h>
11 #include <linux/sysctl.h>
12 #include <linux/highmem.h>
13 #include <linux/nodemask.h>
14 
15 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
16 static unsigned long nr_huge_pages, free_huge_pages;
17 unsigned long max_huge_pages;
18 static struct list_head hugepage_freelists[MAX_NUMNODES];
19 static unsigned int nr_huge_pages_node[MAX_NUMNODES];
20 static unsigned int free_huge_pages_node[MAX_NUMNODES];
21 static DEFINE_SPINLOCK(hugetlb_lock);
22 
23 static void enqueue_huge_page(struct page *page)
24 {
25 	int nid = page_to_nid(page);
26 	list_add(&page->lru, &hugepage_freelists[nid]);
27 	free_huge_pages++;
28 	free_huge_pages_node[nid]++;
29 }
30 
31 static struct page *dequeue_huge_page(void)
32 {
33 	int nid = numa_node_id();
34 	struct page *page = NULL;
35 
36 	if (list_empty(&hugepage_freelists[nid])) {
37 		for (nid = 0; nid < MAX_NUMNODES; ++nid)
38 			if (!list_empty(&hugepage_freelists[nid]))
39 				break;
40 	}
41 	if (nid >= 0 && nid < MAX_NUMNODES &&
42 	    !list_empty(&hugepage_freelists[nid])) {
43 		page = list_entry(hugepage_freelists[nid].next,
44 				  struct page, lru);
45 		list_del(&page->lru);
46 		free_huge_pages--;
47 		free_huge_pages_node[nid]--;
48 	}
49 	return page;
50 }
51 
52 static struct page *alloc_fresh_huge_page(void)
53 {
54 	static int nid = 0;
55 	struct page *page;
56 	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
57 					HUGETLB_PAGE_ORDER);
58 	nid = (nid + 1) % num_online_nodes();
59 	if (page) {
60 		nr_huge_pages++;
61 		nr_huge_pages_node[page_to_nid(page)]++;
62 	}
63 	return page;
64 }
65 
66 void free_huge_page(struct page *page)
67 {
68 	BUG_ON(page_count(page));
69 
70 	INIT_LIST_HEAD(&page->lru);
71 	page[1].mapping = NULL;
72 
73 	spin_lock(&hugetlb_lock);
74 	enqueue_huge_page(page);
75 	spin_unlock(&hugetlb_lock);
76 }
77 
78 struct page *alloc_huge_page(void)
79 {
80 	struct page *page;
81 	int i;
82 
83 	spin_lock(&hugetlb_lock);
84 	page = dequeue_huge_page();
85 	if (!page) {
86 		spin_unlock(&hugetlb_lock);
87 		return NULL;
88 	}
89 	spin_unlock(&hugetlb_lock);
90 	set_page_count(page, 1);
91 	page[1].mapping = (void *)free_huge_page;
92 	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
93 		clear_highpage(&page[i]);
94 	return page;
95 }
96 
97 static int __init hugetlb_init(void)
98 {
99 	unsigned long i;
100 	struct page *page;
101 
102 	for (i = 0; i < MAX_NUMNODES; ++i)
103 		INIT_LIST_HEAD(&hugepage_freelists[i]);
104 
105 	for (i = 0; i < max_huge_pages; ++i) {
106 		page = alloc_fresh_huge_page();
107 		if (!page)
108 			break;
109 		spin_lock(&hugetlb_lock);
110 		enqueue_huge_page(page);
111 		spin_unlock(&hugetlb_lock);
112 	}
113 	max_huge_pages = free_huge_pages = nr_huge_pages = i;
114 	printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
115 	return 0;
116 }
117 module_init(hugetlb_init);
118 
119 static int __init hugetlb_setup(char *s)
120 {
121 	if (sscanf(s, "%lu", &max_huge_pages) <= 0)
122 		max_huge_pages = 0;
123 	return 1;
124 }
125 __setup("hugepages=", hugetlb_setup);
126 
127 #ifdef CONFIG_SYSCTL
128 static void update_and_free_page(struct page *page)
129 {
130 	int i;
131 	nr_huge_pages--;
132 	nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--;
133 	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
134 		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
135 				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
136 				1 << PG_private | 1<< PG_writeback);
137 		set_page_count(&page[i], 0);
138 	}
139 	set_page_count(page, 1);
140 	__free_pages(page, HUGETLB_PAGE_ORDER);
141 }
142 
143 #ifdef CONFIG_HIGHMEM
144 static void try_to_free_low(unsigned long count)
145 {
146 	int i, nid;
147 	for (i = 0; i < MAX_NUMNODES; ++i) {
148 		struct page *page, *next;
149 		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
150 			if (PageHighMem(page))
151 				continue;
152 			list_del(&page->lru);
153 			update_and_free_page(page);
154 			nid = page_zone(page)->zone_pgdat->node_id;
155 			free_huge_pages--;
156 			free_huge_pages_node[nid]--;
157 			if (count >= nr_huge_pages)
158 				return;
159 		}
160 	}
161 }
162 #else
163 static inline void try_to_free_low(unsigned long count)
164 {
165 }
166 #endif
167 
168 static unsigned long set_max_huge_pages(unsigned long count)
169 {
170 	while (count > nr_huge_pages) {
171 		struct page *page = alloc_fresh_huge_page();
172 		if (!page)
173 			return nr_huge_pages;
174 		spin_lock(&hugetlb_lock);
175 		enqueue_huge_page(page);
176 		spin_unlock(&hugetlb_lock);
177 	}
178 	if (count >= nr_huge_pages)
179 		return nr_huge_pages;
180 
181 	spin_lock(&hugetlb_lock);
182 	try_to_free_low(count);
183 	while (count < nr_huge_pages) {
184 		struct page *page = dequeue_huge_page();
185 		if (!page)
186 			break;
187 		update_and_free_page(page);
188 	}
189 	spin_unlock(&hugetlb_lock);
190 	return nr_huge_pages;
191 }
192 
193 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
194 			   struct file *file, void __user *buffer,
195 			   size_t *length, loff_t *ppos)
196 {
197 	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
198 	max_huge_pages = set_max_huge_pages(max_huge_pages);
199 	return 0;
200 }
201 #endif /* CONFIG_SYSCTL */
202 
203 int hugetlb_report_meminfo(char *buf)
204 {
205 	return sprintf(buf,
206 			"HugePages_Total: %5lu\n"
207 			"HugePages_Free:  %5lu\n"
208 			"Hugepagesize:    %5lu kB\n",
209 			nr_huge_pages,
210 			free_huge_pages,
211 			HPAGE_SIZE/1024);
212 }
213 
214 int hugetlb_report_node_meminfo(int nid, char *buf)
215 {
216 	return sprintf(buf,
217 		"Node %d HugePages_Total: %5u\n"
218 		"Node %d HugePages_Free:  %5u\n",
219 		nid, nr_huge_pages_node[nid],
220 		nid, free_huge_pages_node[nid]);
221 }
222 
223 int is_hugepage_mem_enough(size_t size)
224 {
225 	return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages;
226 }
227 
228 /* Return the number pages of memory we physically have, in PAGE_SIZE units. */
229 unsigned long hugetlb_total_pages(void)
230 {
231 	return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
232 }
233 EXPORT_SYMBOL(hugetlb_total_pages);
234 
235 /*
236  * We cannot handle pagefaults against hugetlb pages at all.  They cause
237  * handle_mm_fault() to try to instantiate regular-sized pages in the
238  * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
239  * this far.
240  */
241 static struct page *hugetlb_nopage(struct vm_area_struct *vma,
242 				unsigned long address, int *unused)
243 {
244 	BUG();
245 	return NULL;
246 }
247 
248 struct vm_operations_struct hugetlb_vm_ops = {
249 	.nopage = hugetlb_nopage,
250 };
251 
252 void zap_hugepage_range(struct vm_area_struct *vma,
253 			unsigned long start, unsigned long length)
254 {
255 	struct mm_struct *mm = vma->vm_mm;
256 
257 	spin_lock(&mm->page_table_lock);
258 	unmap_hugepage_range(vma, start, start + length);
259 	spin_unlock(&mm->page_table_lock);
260 }
261