1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hugepage-madvise:
4  *
5  * Basic functional testing of madvise MADV_DONTNEED and MADV_REMOVE
6  * on hugetlb mappings.
7  *
8  * Before running this test, make sure the administrator has pre-allocated
9  * at least MIN_FREE_PAGES hugetlb pages and they are free.  In addition,
10  * the test takes an argument that is the path to a file in a hugetlbfs
11  * filesystem.  Therefore, a hugetlbfs filesystem must be mounted on some
12  * directory.
13  */
14 
15 #define _GNU_SOURCE
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <unistd.h>
19 #include <sys/mman.h>
20 #define __USE_GNU
21 #include <fcntl.h>
22 
23 #define MIN_FREE_PAGES	20
24 #define NR_HUGE_PAGES	10	/* common number of pages to map/allocate */
25 
26 #define validate_free_pages(exp_free)					\
27 	do {								\
28 		int fhp = get_free_hugepages();				\
29 		if (fhp != (exp_free)) {				\
30 			printf("Unexpected number of free huge "	\
31 				"pages line %d\n", __LINE__);		\
32 			exit(1);					\
33 		}							\
34 	} while (0)
35 
36 unsigned long huge_page_size;
37 unsigned long base_page_size;
38 
39 /*
40  * default_huge_page_size copied from mlock2-tests.c
41  */
42 unsigned long default_huge_page_size(void)
43 {
44 	unsigned long hps = 0;
45 	char *line = NULL;
46 	size_t linelen = 0;
47 	FILE *f = fopen("/proc/meminfo", "r");
48 
49 	if (!f)
50 		return 0;
51 	while (getline(&line, &linelen, f) > 0) {
52 		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
53 			hps <<= 10;
54 			break;
55 		}
56 	}
57 
58 	free(line);
59 	fclose(f);
60 	return hps;
61 }
62 
63 unsigned long get_free_hugepages(void)
64 {
65 	unsigned long fhp = 0;
66 	char *line = NULL;
67 	size_t linelen = 0;
68 	FILE *f = fopen("/proc/meminfo", "r");
69 
70 	if (!f)
71 		return fhp;
72 	while (getline(&line, &linelen, f) > 0) {
73 		if (sscanf(line, "HugePages_Free:      %lu", &fhp) == 1)
74 			break;
75 	}
76 
77 	free(line);
78 	fclose(f);
79 	return fhp;
80 }
81 
82 void write_fault_pages(void *addr, unsigned long nr_pages)
83 {
84 	unsigned long i;
85 
86 	for (i = 0; i < nr_pages; i++)
87 		*((unsigned long *)(addr + (i * huge_page_size))) = i;
88 }
89 
90 void read_fault_pages(void *addr, unsigned long nr_pages)
91 {
92 	unsigned long dummy = 0;
93 	unsigned long i;
94 
95 	for (i = 0; i < nr_pages; i++)
96 		dummy += *((unsigned long *)(addr + (i * huge_page_size)));
97 }
98 
99 int main(int argc, char **argv)
100 {
101 	unsigned long free_hugepages;
102 	void *addr, *addr2;
103 	int fd;
104 	int ret;
105 
106 	huge_page_size = default_huge_page_size();
107 	if (!huge_page_size) {
108 		printf("Unable to determine huge page size, exiting!\n");
109 		exit(1);
110 	}
111 	base_page_size = sysconf(_SC_PAGE_SIZE);
112 	if (!huge_page_size) {
113 		printf("Unable to determine base page size, exiting!\n");
114 		exit(1);
115 	}
116 
117 	free_hugepages = get_free_hugepages();
118 	if (free_hugepages < MIN_FREE_PAGES) {
119 		printf("Not enough free huge pages to test, exiting!\n");
120 		exit(1);
121 	}
122 
123 	fd = memfd_create(argv[0], MFD_HUGETLB);
124 	if (fd < 0) {
125 		perror("memfd_create() failed");
126 		exit(1);
127 	}
128 
129 	/*
130 	 * Test validity of MADV_DONTNEED addr and length arguments.  mmap
131 	 * size is NR_HUGE_PAGES + 2.  One page at the beginning and end of
132 	 * the mapping will be unmapped so we KNOW there is nothing mapped
133 	 * there.
134 	 */
135 	addr = mmap(NULL, (NR_HUGE_PAGES + 2) * huge_page_size,
136 			PROT_READ | PROT_WRITE,
137 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
138 			-1, 0);
139 	if (addr == MAP_FAILED) {
140 		perror("mmap");
141 		exit(1);
142 	}
143 	if (munmap(addr, huge_page_size) ||
144 			munmap(addr + (NR_HUGE_PAGES + 1) * huge_page_size,
145 				huge_page_size)) {
146 		perror("munmap");
147 		exit(1);
148 	}
149 	addr = addr + huge_page_size;
150 
151 	write_fault_pages(addr, NR_HUGE_PAGES);
152 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
153 
154 	/* addr before mapping should fail */
155 	ret = madvise(addr - base_page_size, NR_HUGE_PAGES * huge_page_size,
156 		MADV_DONTNEED);
157 	if (!ret) {
158 		printf("Unexpected success of madvise call with invalid addr line %d\n",
159 				__LINE__);
160 			exit(1);
161 	}
162 
163 	/* addr + length after mapping should fail */
164 	ret = madvise(addr, (NR_HUGE_PAGES * huge_page_size) + base_page_size,
165 		MADV_DONTNEED);
166 	if (!ret) {
167 		printf("Unexpected success of madvise call with invalid length line %d\n",
168 				__LINE__);
169 			exit(1);
170 	}
171 
172 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
173 
174 	/*
175 	 * Test alignment of MADV_DONTNEED addr and length arguments
176 	 */
177 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
178 			PROT_READ | PROT_WRITE,
179 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
180 			-1, 0);
181 	if (addr == MAP_FAILED) {
182 		perror("mmap");
183 		exit(1);
184 	}
185 	write_fault_pages(addr, NR_HUGE_PAGES);
186 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
187 
188 	/* addr is not huge page size aligned and should fail */
189 	ret = madvise(addr + base_page_size,
190 			NR_HUGE_PAGES * huge_page_size - base_page_size,
191 			MADV_DONTNEED);
192 	if (!ret) {
193 		printf("Unexpected success of madvise call with unaligned start address %d\n",
194 				__LINE__);
195 			exit(1);
196 	}
197 
198 	/* addr + length should be aligned down to huge page size */
199 	if (madvise(addr,
200 			((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
201 			MADV_DONTNEED)) {
202 		perror("madvise");
203 		exit(1);
204 	}
205 
206 	/* should free all but last page in mapping */
207 	validate_free_pages(free_hugepages - 1);
208 
209 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
210 	validate_free_pages(free_hugepages);
211 
212 	/*
213 	 * Test MADV_DONTNEED on anonymous private mapping
214 	 */
215 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
216 			PROT_READ | PROT_WRITE,
217 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
218 			-1, 0);
219 	if (addr == MAP_FAILED) {
220 		perror("mmap");
221 		exit(1);
222 	}
223 	write_fault_pages(addr, NR_HUGE_PAGES);
224 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
225 
226 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
227 		perror("madvise");
228 		exit(1);
229 	}
230 
231 	/* should free all pages in mapping */
232 	validate_free_pages(free_hugepages);
233 
234 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
235 
236 	/*
237 	 * Test MADV_DONTNEED on private mapping of hugetlb file
238 	 */
239 	if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
240 		perror("fallocate");
241 		exit(1);
242 	}
243 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
244 
245 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
246 			PROT_READ | PROT_WRITE,
247 			MAP_PRIVATE, fd, 0);
248 	if (addr == MAP_FAILED) {
249 		perror("mmap");
250 		exit(1);
251 	}
252 
253 	/* read should not consume any pages */
254 	read_fault_pages(addr, NR_HUGE_PAGES);
255 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
256 
257 	/* madvise should not free any pages */
258 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
259 		perror("madvise");
260 		exit(1);
261 	}
262 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
263 
264 	/* writes should allocate private pages */
265 	write_fault_pages(addr, NR_HUGE_PAGES);
266 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
267 
268 	/* madvise should free private pages */
269 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
270 		perror("madvise");
271 		exit(1);
272 	}
273 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
274 
275 	/* writes should allocate private pages */
276 	write_fault_pages(addr, NR_HUGE_PAGES);
277 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
278 
279 	/*
280 	 * The fallocate below certainly should free the pages associated
281 	 * with the file.  However, pages in the private mapping are also
282 	 * freed.  This is not the 'correct' behavior, but is expected
283 	 * because this is how it has worked since the initial hugetlb
284 	 * implementation.
285 	 */
286 	if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
287 					0, NR_HUGE_PAGES * huge_page_size)) {
288 		perror("fallocate");
289 		exit(1);
290 	}
291 	validate_free_pages(free_hugepages);
292 
293 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
294 
295 	/*
296 	 * Test MADV_DONTNEED on shared mapping of hugetlb file
297 	 */
298 	if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
299 		perror("fallocate");
300 		exit(1);
301 	}
302 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
303 
304 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
305 			PROT_READ | PROT_WRITE,
306 			MAP_SHARED, fd, 0);
307 	if (addr == MAP_FAILED) {
308 		perror("mmap");
309 		exit(1);
310 	}
311 
312 	/* write should not consume any pages */
313 	write_fault_pages(addr, NR_HUGE_PAGES);
314 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
315 
316 	/* madvise should not free any pages */
317 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
318 		perror("madvise");
319 		exit(1);
320 	}
321 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
322 
323 	/*
324 	 * Test MADV_REMOVE on shared mapping of hugetlb file
325 	 *
326 	 * madvise is same as hole punch and should free all pages.
327 	 */
328 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
329 		perror("madvise");
330 		exit(1);
331 	}
332 	validate_free_pages(free_hugepages);
333 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
334 
335 	/*
336 	 * Test MADV_REMOVE on shared and private mapping of hugetlb file
337 	 */
338 	if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
339 		perror("fallocate");
340 		exit(1);
341 	}
342 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
343 
344 	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
345 			PROT_READ | PROT_WRITE,
346 			MAP_SHARED, fd, 0);
347 	if (addr == MAP_FAILED) {
348 		perror("mmap");
349 		exit(1);
350 	}
351 
352 	/* shared write should not consume any additional pages */
353 	write_fault_pages(addr, NR_HUGE_PAGES);
354 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
355 
356 	addr2 = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
357 			PROT_READ | PROT_WRITE,
358 			MAP_PRIVATE, fd, 0);
359 	if (addr2 == MAP_FAILED) {
360 		perror("mmap");
361 		exit(1);
362 	}
363 
364 	/* private read should not consume any pages */
365 	read_fault_pages(addr2, NR_HUGE_PAGES);
366 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
367 
368 	/* private write should consume additional pages */
369 	write_fault_pages(addr2, NR_HUGE_PAGES);
370 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
371 
372 	/* madvise of shared mapping should not free any pages */
373 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
374 		perror("madvise");
375 		exit(1);
376 	}
377 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
378 
379 	/* madvise of private mapping should free private pages */
380 	if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
381 		perror("madvise");
382 		exit(1);
383 	}
384 	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
385 
386 	/* private write should consume additional pages again */
387 	write_fault_pages(addr2, NR_HUGE_PAGES);
388 	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
389 
390 	/*
391 	 * madvise should free both file and private pages although this is
392 	 * not correct.  private pages should not be freed, but this is
393 	 * expected.  See comment associated with FALLOC_FL_PUNCH_HOLE call.
394 	 */
395 	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
396 		perror("madvise");
397 		exit(1);
398 	}
399 	validate_free_pages(free_hugepages);
400 
401 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
402 	(void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
403 
404 	close(fd);
405 	return 0;
406 }
407