1baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0
2baa489faSSeongJae Park /*
3baa489faSSeongJae Park * hugepage-madvise:
4baa489faSSeongJae Park *
5baa489faSSeongJae Park * Basic functional testing of madvise MADV_DONTNEED and MADV_REMOVE
6baa489faSSeongJae Park * on hugetlb mappings.
7baa489faSSeongJae Park *
8baa489faSSeongJae Park * Before running this test, make sure the administrator has pre-allocated
9baa489faSSeongJae Park * at least MIN_FREE_PAGES hugetlb pages and they are free. In addition,
10baa489faSSeongJae Park * the test takes an argument that is the path to a file in a hugetlbfs
11baa489faSSeongJae Park * filesystem. Therefore, a hugetlbfs filesystem must be mounted on some
12baa489faSSeongJae Park * directory.
13baa489faSSeongJae Park */
14baa489faSSeongJae Park
15baa489faSSeongJae Park #define _GNU_SOURCE
16baa489faSSeongJae Park #include <stdlib.h>
17baa489faSSeongJae Park #include <stdio.h>
18baa489faSSeongJae Park #include <unistd.h>
19baa489faSSeongJae Park #include <sys/mman.h>
20baa489faSSeongJae Park #include <fcntl.h>
21*bd4d67e7SPeter Xu #include "vm_util.h"
22baa489faSSeongJae Park
23baa489faSSeongJae Park #define MIN_FREE_PAGES 20
24baa489faSSeongJae Park #define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */
25baa489faSSeongJae Park
26baa489faSSeongJae Park #define validate_free_pages(exp_free) \
27baa489faSSeongJae Park do { \
28baa489faSSeongJae Park int fhp = get_free_hugepages(); \
29baa489faSSeongJae Park if (fhp != (exp_free)) { \
30baa489faSSeongJae Park printf("Unexpected number of free huge " \
31baa489faSSeongJae Park "pages line %d\n", __LINE__); \
32baa489faSSeongJae Park exit(1); \
33baa489faSSeongJae Park } \
34baa489faSSeongJae Park } while (0)
35baa489faSSeongJae Park
36baa489faSSeongJae Park unsigned long huge_page_size;
37baa489faSSeongJae Park unsigned long base_page_size;
38baa489faSSeongJae Park
get_free_hugepages(void)39baa489faSSeongJae Park unsigned long get_free_hugepages(void)
40baa489faSSeongJae Park {
41baa489faSSeongJae Park unsigned long fhp = 0;
42baa489faSSeongJae Park char *line = NULL;
43baa489faSSeongJae Park size_t linelen = 0;
44baa489faSSeongJae Park FILE *f = fopen("/proc/meminfo", "r");
45baa489faSSeongJae Park
46baa489faSSeongJae Park if (!f)
47baa489faSSeongJae Park return fhp;
48baa489faSSeongJae Park while (getline(&line, &linelen, f) > 0) {
49baa489faSSeongJae Park if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
50baa489faSSeongJae Park break;
51baa489faSSeongJae Park }
52baa489faSSeongJae Park
53baa489faSSeongJae Park free(line);
54baa489faSSeongJae Park fclose(f);
55baa489faSSeongJae Park return fhp;
56baa489faSSeongJae Park }
57baa489faSSeongJae Park
write_fault_pages(void * addr,unsigned long nr_pages)58baa489faSSeongJae Park void write_fault_pages(void *addr, unsigned long nr_pages)
59baa489faSSeongJae Park {
60baa489faSSeongJae Park unsigned long i;
61baa489faSSeongJae Park
62baa489faSSeongJae Park for (i = 0; i < nr_pages; i++)
63baa489faSSeongJae Park *((unsigned long *)(addr + (i * huge_page_size))) = i;
64baa489faSSeongJae Park }
65baa489faSSeongJae Park
read_fault_pages(void * addr,unsigned long nr_pages)66baa489faSSeongJae Park void read_fault_pages(void *addr, unsigned long nr_pages)
67baa489faSSeongJae Park {
68baa489faSSeongJae Park volatile unsigned long dummy = 0;
69baa489faSSeongJae Park unsigned long i;
70baa489faSSeongJae Park
71baa489faSSeongJae Park for (i = 0; i < nr_pages; i++) {
72baa489faSSeongJae Park dummy += *((unsigned long *)(addr + (i * huge_page_size)));
73baa489faSSeongJae Park
74baa489faSSeongJae Park /* Prevent the compiler from optimizing out the entire loop: */
75baa489faSSeongJae Park asm volatile("" : "+r" (dummy));
76baa489faSSeongJae Park }
77baa489faSSeongJae Park }
78baa489faSSeongJae Park
main(int argc,char ** argv)79baa489faSSeongJae Park int main(int argc, char **argv)
80baa489faSSeongJae Park {
81baa489faSSeongJae Park unsigned long free_hugepages;
82baa489faSSeongJae Park void *addr, *addr2;
83baa489faSSeongJae Park int fd;
84baa489faSSeongJae Park int ret;
85baa489faSSeongJae Park
86baa489faSSeongJae Park huge_page_size = default_huge_page_size();
87baa489faSSeongJae Park if (!huge_page_size) {
88baa489faSSeongJae Park printf("Unable to determine huge page size, exiting!\n");
89baa489faSSeongJae Park exit(1);
90baa489faSSeongJae Park }
91baa489faSSeongJae Park base_page_size = sysconf(_SC_PAGE_SIZE);
92baa489faSSeongJae Park if (!huge_page_size) {
93baa489faSSeongJae Park printf("Unable to determine base page size, exiting!\n");
94baa489faSSeongJae Park exit(1);
95baa489faSSeongJae Park }
96baa489faSSeongJae Park
97baa489faSSeongJae Park free_hugepages = get_free_hugepages();
98baa489faSSeongJae Park if (free_hugepages < MIN_FREE_PAGES) {
99baa489faSSeongJae Park printf("Not enough free huge pages to test, exiting!\n");
100baa489faSSeongJae Park exit(1);
101baa489faSSeongJae Park }
102baa489faSSeongJae Park
103baa489faSSeongJae Park fd = memfd_create(argv[0], MFD_HUGETLB);
104baa489faSSeongJae Park if (fd < 0) {
105baa489faSSeongJae Park perror("memfd_create() failed");
106baa489faSSeongJae Park exit(1);
107baa489faSSeongJae Park }
108baa489faSSeongJae Park
109baa489faSSeongJae Park /*
110baa489faSSeongJae Park * Test validity of MADV_DONTNEED addr and length arguments. mmap
111baa489faSSeongJae Park * size is NR_HUGE_PAGES + 2. One page at the beginning and end of
112baa489faSSeongJae Park * the mapping will be unmapped so we KNOW there is nothing mapped
113baa489faSSeongJae Park * there.
114baa489faSSeongJae Park */
115baa489faSSeongJae Park addr = mmap(NULL, (NR_HUGE_PAGES + 2) * huge_page_size,
116baa489faSSeongJae Park PROT_READ | PROT_WRITE,
117baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
118baa489faSSeongJae Park -1, 0);
119baa489faSSeongJae Park if (addr == MAP_FAILED) {
120baa489faSSeongJae Park perror("mmap");
121baa489faSSeongJae Park exit(1);
122baa489faSSeongJae Park }
123baa489faSSeongJae Park if (munmap(addr, huge_page_size) ||
124baa489faSSeongJae Park munmap(addr + (NR_HUGE_PAGES + 1) * huge_page_size,
125baa489faSSeongJae Park huge_page_size)) {
126baa489faSSeongJae Park perror("munmap");
127baa489faSSeongJae Park exit(1);
128baa489faSSeongJae Park }
129baa489faSSeongJae Park addr = addr + huge_page_size;
130baa489faSSeongJae Park
131baa489faSSeongJae Park write_fault_pages(addr, NR_HUGE_PAGES);
132baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
133baa489faSSeongJae Park
134baa489faSSeongJae Park /* addr before mapping should fail */
135baa489faSSeongJae Park ret = madvise(addr - base_page_size, NR_HUGE_PAGES * huge_page_size,
136baa489faSSeongJae Park MADV_DONTNEED);
137baa489faSSeongJae Park if (!ret) {
138baa489faSSeongJae Park printf("Unexpected success of madvise call with invalid addr line %d\n",
139baa489faSSeongJae Park __LINE__);
140baa489faSSeongJae Park exit(1);
141baa489faSSeongJae Park }
142baa489faSSeongJae Park
143baa489faSSeongJae Park /* addr + length after mapping should fail */
144baa489faSSeongJae Park ret = madvise(addr, (NR_HUGE_PAGES * huge_page_size) + base_page_size,
145baa489faSSeongJae Park MADV_DONTNEED);
146baa489faSSeongJae Park if (!ret) {
147baa489faSSeongJae Park printf("Unexpected success of madvise call with invalid length line %d\n",
148baa489faSSeongJae Park __LINE__);
149baa489faSSeongJae Park exit(1);
150baa489faSSeongJae Park }
151baa489faSSeongJae Park
152baa489faSSeongJae Park (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
153baa489faSSeongJae Park
154baa489faSSeongJae Park /*
155baa489faSSeongJae Park * Test alignment of MADV_DONTNEED addr and length arguments
156baa489faSSeongJae Park */
157baa489faSSeongJae Park addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
158baa489faSSeongJae Park PROT_READ | PROT_WRITE,
159baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
160baa489faSSeongJae Park -1, 0);
161baa489faSSeongJae Park if (addr == MAP_FAILED) {
162baa489faSSeongJae Park perror("mmap");
163baa489faSSeongJae Park exit(1);
164baa489faSSeongJae Park }
165baa489faSSeongJae Park write_fault_pages(addr, NR_HUGE_PAGES);
166baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
167baa489faSSeongJae Park
168baa489faSSeongJae Park /* addr is not huge page size aligned and should fail */
169baa489faSSeongJae Park ret = madvise(addr + base_page_size,
170baa489faSSeongJae Park NR_HUGE_PAGES * huge_page_size - base_page_size,
171baa489faSSeongJae Park MADV_DONTNEED);
172baa489faSSeongJae Park if (!ret) {
173baa489faSSeongJae Park printf("Unexpected success of madvise call with unaligned start address %d\n",
174baa489faSSeongJae Park __LINE__);
175baa489faSSeongJae Park exit(1);
176baa489faSSeongJae Park }
177baa489faSSeongJae Park
178baa489faSSeongJae Park /* addr + length should be aligned down to huge page size */
179baa489faSSeongJae Park if (madvise(addr,
180baa489faSSeongJae Park ((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
181baa489faSSeongJae Park MADV_DONTNEED)) {
182baa489faSSeongJae Park perror("madvise");
183baa489faSSeongJae Park exit(1);
184baa489faSSeongJae Park }
185baa489faSSeongJae Park
186baa489faSSeongJae Park /* should free all but last page in mapping */
187baa489faSSeongJae Park validate_free_pages(free_hugepages - 1);
188baa489faSSeongJae Park
189baa489faSSeongJae Park (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
190baa489faSSeongJae Park validate_free_pages(free_hugepages);
191baa489faSSeongJae Park
192baa489faSSeongJae Park /*
193baa489faSSeongJae Park * Test MADV_DONTNEED on anonymous private mapping
194baa489faSSeongJae Park */
195baa489faSSeongJae Park addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
196baa489faSSeongJae Park PROT_READ | PROT_WRITE,
197baa489faSSeongJae Park MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
198baa489faSSeongJae Park -1, 0);
199baa489faSSeongJae Park if (addr == MAP_FAILED) {
200baa489faSSeongJae Park perror("mmap");
201baa489faSSeongJae Park exit(1);
202baa489faSSeongJae Park }
203baa489faSSeongJae Park write_fault_pages(addr, NR_HUGE_PAGES);
204baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
205baa489faSSeongJae Park
206baa489faSSeongJae Park if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
207baa489faSSeongJae Park perror("madvise");
208baa489faSSeongJae Park exit(1);
209baa489faSSeongJae Park }
210baa489faSSeongJae Park
211baa489faSSeongJae Park /* should free all pages in mapping */
212baa489faSSeongJae Park validate_free_pages(free_hugepages);
213baa489faSSeongJae Park
214baa489faSSeongJae Park (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
215baa489faSSeongJae Park
216baa489faSSeongJae Park /*
217baa489faSSeongJae Park * Test MADV_DONTNEED on private mapping of hugetlb file
218baa489faSSeongJae Park */
219baa489faSSeongJae Park if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
220baa489faSSeongJae Park perror("fallocate");
221baa489faSSeongJae Park exit(1);
222baa489faSSeongJae Park }
223baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
224baa489faSSeongJae Park
225baa489faSSeongJae Park addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
226baa489faSSeongJae Park PROT_READ | PROT_WRITE,
227baa489faSSeongJae Park MAP_PRIVATE, fd, 0);
228baa489faSSeongJae Park if (addr == MAP_FAILED) {
229baa489faSSeongJae Park perror("mmap");
230baa489faSSeongJae Park exit(1);
231baa489faSSeongJae Park }
232baa489faSSeongJae Park
233baa489faSSeongJae Park /* read should not consume any pages */
234baa489faSSeongJae Park read_fault_pages(addr, NR_HUGE_PAGES);
235baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
236baa489faSSeongJae Park
237baa489faSSeongJae Park /* madvise should not free any pages */
238baa489faSSeongJae Park if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
239baa489faSSeongJae Park perror("madvise");
240baa489faSSeongJae Park exit(1);
241baa489faSSeongJae Park }
242baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
243baa489faSSeongJae Park
244baa489faSSeongJae Park /* writes should allocate private pages */
245baa489faSSeongJae Park write_fault_pages(addr, NR_HUGE_PAGES);
246baa489faSSeongJae Park validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
247baa489faSSeongJae Park
248baa489faSSeongJae Park /* madvise should free private pages */
249baa489faSSeongJae Park if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
250baa489faSSeongJae Park perror("madvise");
251baa489faSSeongJae Park exit(1);
252baa489faSSeongJae Park }
253baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
254baa489faSSeongJae Park
255baa489faSSeongJae Park /* writes should allocate private pages */
256baa489faSSeongJae Park write_fault_pages(addr, NR_HUGE_PAGES);
257baa489faSSeongJae Park validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
258baa489faSSeongJae Park
259baa489faSSeongJae Park /*
260baa489faSSeongJae Park * The fallocate below certainly should free the pages associated
261baa489faSSeongJae Park * with the file. However, pages in the private mapping are also
262baa489faSSeongJae Park * freed. This is not the 'correct' behavior, but is expected
263baa489faSSeongJae Park * because this is how it has worked since the initial hugetlb
264baa489faSSeongJae Park * implementation.
265baa489faSSeongJae Park */
266baa489faSSeongJae Park if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
267baa489faSSeongJae Park 0, NR_HUGE_PAGES * huge_page_size)) {
268baa489faSSeongJae Park perror("fallocate");
269baa489faSSeongJae Park exit(1);
270baa489faSSeongJae Park }
271baa489faSSeongJae Park validate_free_pages(free_hugepages);
272baa489faSSeongJae Park
273baa489faSSeongJae Park (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
274baa489faSSeongJae Park
275baa489faSSeongJae Park /*
276baa489faSSeongJae Park * Test MADV_DONTNEED on shared mapping of hugetlb file
277baa489faSSeongJae Park */
278baa489faSSeongJae Park if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
279baa489faSSeongJae Park perror("fallocate");
280baa489faSSeongJae Park exit(1);
281baa489faSSeongJae Park }
282baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
283baa489faSSeongJae Park
284baa489faSSeongJae Park addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
285baa489faSSeongJae Park PROT_READ | PROT_WRITE,
286baa489faSSeongJae Park MAP_SHARED, fd, 0);
287baa489faSSeongJae Park if (addr == MAP_FAILED) {
288baa489faSSeongJae Park perror("mmap");
289baa489faSSeongJae Park exit(1);
290baa489faSSeongJae Park }
291baa489faSSeongJae Park
292baa489faSSeongJae Park /* write should not consume any pages */
293baa489faSSeongJae Park write_fault_pages(addr, NR_HUGE_PAGES);
294baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
295baa489faSSeongJae Park
296baa489faSSeongJae Park /* madvise should not free any pages */
297baa489faSSeongJae Park if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
298baa489faSSeongJae Park perror("madvise");
299baa489faSSeongJae Park exit(1);
300baa489faSSeongJae Park }
301baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
302baa489faSSeongJae Park
303baa489faSSeongJae Park /*
304baa489faSSeongJae Park * Test MADV_REMOVE on shared mapping of hugetlb file
305baa489faSSeongJae Park *
306baa489faSSeongJae Park * madvise is same as hole punch and should free all pages.
307baa489faSSeongJae Park */
308baa489faSSeongJae Park if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
309baa489faSSeongJae Park perror("madvise");
310baa489faSSeongJae Park exit(1);
311baa489faSSeongJae Park }
312baa489faSSeongJae Park validate_free_pages(free_hugepages);
313baa489faSSeongJae Park (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
314baa489faSSeongJae Park
315baa489faSSeongJae Park /*
316baa489faSSeongJae Park * Test MADV_REMOVE on shared and private mapping of hugetlb file
317baa489faSSeongJae Park */
318baa489faSSeongJae Park if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
319baa489faSSeongJae Park perror("fallocate");
320baa489faSSeongJae Park exit(1);
321baa489faSSeongJae Park }
322baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
323baa489faSSeongJae Park
324baa489faSSeongJae Park addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
325baa489faSSeongJae Park PROT_READ | PROT_WRITE,
326baa489faSSeongJae Park MAP_SHARED, fd, 0);
327baa489faSSeongJae Park if (addr == MAP_FAILED) {
328baa489faSSeongJae Park perror("mmap");
329baa489faSSeongJae Park exit(1);
330baa489faSSeongJae Park }
331baa489faSSeongJae Park
332baa489faSSeongJae Park /* shared write should not consume any additional pages */
333baa489faSSeongJae Park write_fault_pages(addr, NR_HUGE_PAGES);
334baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
335baa489faSSeongJae Park
336baa489faSSeongJae Park addr2 = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
337baa489faSSeongJae Park PROT_READ | PROT_WRITE,
338baa489faSSeongJae Park MAP_PRIVATE, fd, 0);
339baa489faSSeongJae Park if (addr2 == MAP_FAILED) {
340baa489faSSeongJae Park perror("mmap");
341baa489faSSeongJae Park exit(1);
342baa489faSSeongJae Park }
343baa489faSSeongJae Park
344baa489faSSeongJae Park /* private read should not consume any pages */
345baa489faSSeongJae Park read_fault_pages(addr2, NR_HUGE_PAGES);
346baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
347baa489faSSeongJae Park
348baa489faSSeongJae Park /* private write should consume additional pages */
349baa489faSSeongJae Park write_fault_pages(addr2, NR_HUGE_PAGES);
350baa489faSSeongJae Park validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
351baa489faSSeongJae Park
352baa489faSSeongJae Park /* madvise of shared mapping should not free any pages */
353baa489faSSeongJae Park if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
354baa489faSSeongJae Park perror("madvise");
355baa489faSSeongJae Park exit(1);
356baa489faSSeongJae Park }
357baa489faSSeongJae Park validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
358baa489faSSeongJae Park
359baa489faSSeongJae Park /* madvise of private mapping should free private pages */
360baa489faSSeongJae Park if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
361baa489faSSeongJae Park perror("madvise");
362baa489faSSeongJae Park exit(1);
363baa489faSSeongJae Park }
364baa489faSSeongJae Park validate_free_pages(free_hugepages - NR_HUGE_PAGES);
365baa489faSSeongJae Park
366baa489faSSeongJae Park /* private write should consume additional pages again */
367baa489faSSeongJae Park write_fault_pages(addr2, NR_HUGE_PAGES);
368baa489faSSeongJae Park validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
369baa489faSSeongJae Park
370baa489faSSeongJae Park /*
371baa489faSSeongJae Park * madvise should free both file and private pages although this is
372baa489faSSeongJae Park * not correct. private pages should not be freed, but this is
373baa489faSSeongJae Park * expected. See comment associated with FALLOC_FL_PUNCH_HOLE call.
374baa489faSSeongJae Park */
375baa489faSSeongJae Park if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
376baa489faSSeongJae Park perror("madvise");
377baa489faSSeongJae Park exit(1);
378baa489faSSeongJae Park }
379baa489faSSeongJae Park validate_free_pages(free_hugepages);
380baa489faSSeongJae Park
381baa489faSSeongJae Park (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
382baa489faSSeongJae Park (void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
383
384 close(fd);
385 return 0;
386 }
387