1baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0
2baa489faSSeongJae Park /*
3baa489faSSeongJae Park  * hugepage-mremap:
4baa489faSSeongJae Park  *
5baa489faSSeongJae Park  * Example of remapping huge page memory in a user application using the
6baa489faSSeongJae Park  * mremap system call.  The path to a file in a hugetlbfs filesystem must
7baa489faSSeongJae Park  * be passed as the last argument to this test.  The amount of memory used
8baa489faSSeongJae Park  * by this test in MBs can optionally be passed as an argument.  If no memory
9baa489faSSeongJae Park  * amount is passed, the default amount is 10MB.
10baa489faSSeongJae Park  *
11baa489faSSeongJae Park  * To make sure the test triggers pmd sharing and goes through the 'unshare'
12baa489faSSeongJae Park  * path in the mremap code use 1GB (1024) or more.
13baa489faSSeongJae Park  */
14baa489faSSeongJae Park 
15baa489faSSeongJae Park #define _GNU_SOURCE
16baa489faSSeongJae Park #include <stdlib.h>
17baa489faSSeongJae Park #include <stdio.h>
18baa489faSSeongJae Park #include <unistd.h>
19baa489faSSeongJae Park #include <sys/mman.h>
20baa489faSSeongJae Park #include <errno.h>
21baa489faSSeongJae Park #include <fcntl.h> /* Definition of O_* constants */
22baa489faSSeongJae Park #include <sys/syscall.h> /* Definition of SYS_* constants */
23baa489faSSeongJae Park #include <linux/userfaultfd.h>
24baa489faSSeongJae Park #include <sys/ioctl.h>
25baa489faSSeongJae Park #include <string.h>
26*c4277cb6SPeter Xu #include <stdbool.h>
27*c4277cb6SPeter Xu #include "vm_util.h"
28baa489faSSeongJae Park 
29baa489faSSeongJae Park #define DEFAULT_LENGTH_MB 10UL
30baa489faSSeongJae Park #define MB_TO_BYTES(x) (x * 1024 * 1024)
31baa489faSSeongJae Park 
32baa489faSSeongJae Park #define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
33baa489faSSeongJae Park #define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
34baa489faSSeongJae Park 
check_bytes(char * addr)35baa489faSSeongJae Park static void check_bytes(char *addr)
36baa489faSSeongJae Park {
37baa489faSSeongJae Park 	printf("First hex is %x\n", *((unsigned int *)addr));
38baa489faSSeongJae Park }
39baa489faSSeongJae Park 
write_bytes(char * addr,size_t len)40baa489faSSeongJae Park static void write_bytes(char *addr, size_t len)
41baa489faSSeongJae Park {
42baa489faSSeongJae Park 	unsigned long i;
43baa489faSSeongJae Park 
44baa489faSSeongJae Park 	for (i = 0; i < len; i++)
45baa489faSSeongJae Park 		*(addr + i) = (char)i;
46baa489faSSeongJae Park }
47baa489faSSeongJae Park 
read_bytes(char * addr,size_t len)48baa489faSSeongJae Park static int read_bytes(char *addr, size_t len)
49baa489faSSeongJae Park {
50baa489faSSeongJae Park 	unsigned long i;
51baa489faSSeongJae Park 
52baa489faSSeongJae Park 	check_bytes(addr);
53baa489faSSeongJae Park 	for (i = 0; i < len; i++)
54baa489faSSeongJae Park 		if (*(addr + i) != (char)i) {
55baa489faSSeongJae Park 			printf("Mismatch at %lu\n", i);
56baa489faSSeongJae Park 			return 1;
57baa489faSSeongJae Park 		}
58baa489faSSeongJae Park 	return 0;
59baa489faSSeongJae Park }
60baa489faSSeongJae Park 
register_region_with_uffd(char * addr,size_t len)61baa489faSSeongJae Park static void register_region_with_uffd(char *addr, size_t len)
62baa489faSSeongJae Park {
63baa489faSSeongJae Park 	long uffd; /* userfaultfd file descriptor */
64baa489faSSeongJae Park 	struct uffdio_api uffdio_api;
65baa489faSSeongJae Park 
66baa489faSSeongJae Park 	/* Create and enable userfaultfd object. */
67baa489faSSeongJae Park 
68baa489faSSeongJae Park 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
69baa489faSSeongJae Park 	if (uffd == -1) {
70baa489faSSeongJae Park 		perror("userfaultfd");
71baa489faSSeongJae Park 		exit(1);
72baa489faSSeongJae Park 	}
73baa489faSSeongJae Park 
74baa489faSSeongJae Park 	uffdio_api.api = UFFD_API;
75baa489faSSeongJae Park 	uffdio_api.features = 0;
76baa489faSSeongJae Park 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
77baa489faSSeongJae Park 		perror("ioctl-UFFDIO_API");
78baa489faSSeongJae Park 		exit(1);
79baa489faSSeongJae Park 	}
80baa489faSSeongJae Park 
81baa489faSSeongJae Park 	/* Create a private anonymous mapping. The memory will be
82baa489faSSeongJae Park 	 * demand-zero paged--that is, not yet allocated. When we
83baa489faSSeongJae Park 	 * actually touch the memory, it will be allocated via
84baa489faSSeongJae Park 	 * the userfaultfd.
85baa489faSSeongJae Park 	 */
86baa489faSSeongJae Park 
87baa489faSSeongJae Park 	addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
88baa489faSSeongJae Park 		    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
89baa489faSSeongJae Park 	if (addr == MAP_FAILED) {
90baa489faSSeongJae Park 		perror("mmap");
91baa489faSSeongJae Park 		exit(1);
92baa489faSSeongJae Park 	}
93baa489faSSeongJae Park 
94baa489faSSeongJae Park 	printf("Address returned by mmap() = %p\n", addr);
95baa489faSSeongJae Park 
96baa489faSSeongJae Park 	/* Register the memory range of the mapping we just created for
97baa489faSSeongJae Park 	 * handling by the userfaultfd object. In mode, we request to track
98baa489faSSeongJae Park 	 * missing pages (i.e., pages that have not yet been faulted in).
99baa489faSSeongJae Park 	 */
100*c4277cb6SPeter Xu 	if (uffd_register(uffd, addr, len, true, false, false)) {
101baa489faSSeongJae Park 		perror("ioctl-UFFDIO_REGISTER");
102baa489faSSeongJae Park 		exit(1);
103baa489faSSeongJae Park 	}
104baa489faSSeongJae Park }
105baa489faSSeongJae Park 
main(int argc,char * argv[])106baa489faSSeongJae Park int main(int argc, char *argv[])
107baa489faSSeongJae Park {
108baa489faSSeongJae Park 	size_t length = 0;
109baa489faSSeongJae Park 	int ret = 0, fd;
110baa489faSSeongJae Park 
111baa489faSSeongJae Park 	if (argc >= 2 && !strcmp(argv[1], "-h")) {
112baa489faSSeongJae Park 		printf("Usage: %s [length_in_MB]\n", argv[0]);
113baa489faSSeongJae Park 		exit(1);
114baa489faSSeongJae Park 	}
115baa489faSSeongJae Park 
116baa489faSSeongJae Park 	/* Read memory length as the first arg if valid, otherwise fallback to
117baa489faSSeongJae Park 	 * the default length.
118baa489faSSeongJae Park 	 */
119baa489faSSeongJae Park 	if (argc >= 2)
120baa489faSSeongJae Park 		length = (size_t)atoi(argv[1]);
121baa489faSSeongJae Park 	else
122baa489faSSeongJae Park 		length = DEFAULT_LENGTH_MB;
123baa489faSSeongJae Park 
124baa489faSSeongJae Park 	length = MB_TO_BYTES(length);
125baa489faSSeongJae Park 	fd = memfd_create(argv[0], MFD_HUGETLB);
126baa489faSSeongJae Park 	if (fd < 0) {
127baa489faSSeongJae Park 		perror("Open failed");
128baa489faSSeongJae Park 		exit(1);
129baa489faSSeongJae Park 	}
130baa489faSSeongJae Park 
131baa489faSSeongJae Park 	/* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
132baa489faSSeongJae Park 	unsigned long suggested_addr = 0x7eaa40000000;
133baa489faSSeongJae Park 	void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
134baa489faSSeongJae Park 			   MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
135baa489faSSeongJae Park 	printf("Map haddr: Returned address is %p\n", haddr);
136baa489faSSeongJae Park 	if (haddr == MAP_FAILED) {
137baa489faSSeongJae Park 		perror("mmap1");
138baa489faSSeongJae Park 		exit(1);
139baa489faSSeongJae Park 	}
140baa489faSSeongJae Park 
141baa489faSSeongJae Park 	/* mmap again to a dummy address to hopefully trigger pmd sharing. */
142baa489faSSeongJae Park 	suggested_addr = 0x7daa40000000;
143baa489faSSeongJae Park 	void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
144baa489faSSeongJae Park 			   MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
145baa489faSSeongJae Park 	printf("Map daddr: Returned address is %p\n", daddr);
146baa489faSSeongJae Park 	if (daddr == MAP_FAILED) {
147baa489faSSeongJae Park 		perror("mmap3");
148baa489faSSeongJae Park 		exit(1);
149baa489faSSeongJae Park 	}
150baa489faSSeongJae Park 
151baa489faSSeongJae Park 	suggested_addr = 0x7faa40000000;
152baa489faSSeongJae Park 	void *vaddr =
153baa489faSSeongJae Park 		mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
154baa489faSSeongJae Park 	printf("Map vaddr: Returned address is %p\n", vaddr);
155baa489faSSeongJae Park 	if (vaddr == MAP_FAILED) {
156baa489faSSeongJae Park 		perror("mmap2");
157baa489faSSeongJae Park 		exit(1);
158baa489faSSeongJae Park 	}
159baa489faSSeongJae Park 
160baa489faSSeongJae Park 	register_region_with_uffd(haddr, length);
161baa489faSSeongJae Park 
162baa489faSSeongJae Park 	void *addr = mremap(haddr, length, length,
163baa489faSSeongJae Park 			    MREMAP_MAYMOVE | MREMAP_FIXED, vaddr);
164baa489faSSeongJae Park 	if (addr == MAP_FAILED) {
165baa489faSSeongJae Park 		perror("mremap");
166baa489faSSeongJae Park 		exit(1);
167baa489faSSeongJae Park 	}
168baa489faSSeongJae Park 
169baa489faSSeongJae Park 	printf("Mremap: Returned address is %p\n", addr);
170baa489faSSeongJae Park 	check_bytes(addr);
171baa489faSSeongJae Park 	write_bytes(addr, length);
172baa489faSSeongJae Park 	ret = read_bytes(addr, length);
173baa489faSSeongJae Park 
174baa489faSSeongJae Park 	munmap(addr, length);
175baa489faSSeongJae Park 
176baa489faSSeongJae Park 	addr = mremap(addr, length, length, 0);
177baa489faSSeongJae Park 	if (addr != MAP_FAILED) {
178baa489faSSeongJae Park 		printf("mremap: Expected failure, but call succeeded\n");
179baa489faSSeongJae Park 		exit(1);
180baa489faSSeongJae Park 	}
181baa489faSSeongJae Park 
182baa489faSSeongJae Park 	close(fd);
183baa489faSSeongJae Park 
184baa489faSSeongJae Park 	return ret;
185baa489faSSeongJae Park }
186