1 // SPDX-License-Identifier: GPL-2.0
2 #include <string.h>
3 #include <fcntl.h>
4 #include <sys/ioctl.h>
5 #include <linux/userfaultfd.h>
6 #include <sys/syscall.h>
7 #include <unistd.h>
8 #include "../kselftest.h"
9 #include "vm_util.h"
10 
11 #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
12 #define SMAP_FILE_PATH "/proc/self/smaps"
13 #define MAX_LINE_LENGTH 500
14 
15 unsigned int __page_size;
16 unsigned int __page_shift;
17 
18 uint64_t pagemap_get_entry(int fd, char *start)
19 {
20 	const unsigned long pfn = (unsigned long)start / getpagesize();
21 	uint64_t entry;
22 	int ret;
23 
24 	ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
25 	if (ret != sizeof(entry))
26 		ksft_exit_fail_msg("reading pagemap failed\n");
27 	return entry;
28 }
29 
30 bool pagemap_is_softdirty(int fd, char *start)
31 {
32 	return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY;
33 }
34 
35 bool pagemap_is_swapped(int fd, char *start)
36 {
37 	return pagemap_get_entry(fd, start) & PM_SWAP;
38 }
39 
40 bool pagemap_is_populated(int fd, char *start)
41 {
42 	return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP);
43 }
44 
45 unsigned long pagemap_get_pfn(int fd, char *start)
46 {
47 	uint64_t entry = pagemap_get_entry(fd, start);
48 
49 	/* If present (63th bit), PFN is at bit 0 -- 54. */
50 	if (entry & PM_PRESENT)
51 		return entry & 0x007fffffffffffffull;
52 	return -1ul;
53 }
54 
55 void clear_softdirty(void)
56 {
57 	int ret;
58 	const char *ctrl = "4";
59 	int fd = open("/proc/self/clear_refs", O_WRONLY);
60 
61 	if (fd < 0)
62 		ksft_exit_fail_msg("opening clear_refs failed\n");
63 	ret = write(fd, ctrl, strlen(ctrl));
64 	close(fd);
65 	if (ret != strlen(ctrl))
66 		ksft_exit_fail_msg("writing clear_refs failed\n");
67 }
68 
69 bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len)
70 {
71 	while (fgets(buf, len, fp)) {
72 		if (!strncmp(buf, pattern, strlen(pattern)))
73 			return true;
74 	}
75 	return false;
76 }
77 
78 uint64_t read_pmd_pagesize(void)
79 {
80 	int fd;
81 	char buf[20];
82 	ssize_t num_read;
83 
84 	fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
85 	if (fd == -1)
86 		return 0;
87 
88 	num_read = read(fd, buf, 19);
89 	if (num_read < 1) {
90 		close(fd);
91 		return 0;
92 	}
93 	buf[num_read] = '\0';
94 	close(fd);
95 
96 	return strtoul(buf, NULL, 10);
97 }
98 
99 bool __check_huge(void *addr, char *pattern, int nr_hpages,
100 		  uint64_t hpage_size)
101 {
102 	uint64_t thp = -1;
103 	int ret;
104 	FILE *fp;
105 	char buffer[MAX_LINE_LENGTH];
106 	char addr_pattern[MAX_LINE_LENGTH];
107 
108 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
109 		       (unsigned long) addr);
110 	if (ret >= MAX_LINE_LENGTH)
111 		ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
112 
113 	fp = fopen(SMAP_FILE_PATH, "r");
114 	if (!fp)
115 		ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
116 
117 	if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
118 		goto err_out;
119 
120 	/*
121 	 * Fetch the pattern in the same block and check the number of
122 	 * hugepages.
123 	 */
124 	if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer)))
125 		goto err_out;
126 
127 	snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern);
128 
129 	if (sscanf(buffer, addr_pattern, &thp) != 1)
130 		ksft_exit_fail_msg("Reading smap error\n");
131 
132 err_out:
133 	fclose(fp);
134 	return thp == (nr_hpages * (hpage_size >> 10));
135 }
136 
137 bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size)
138 {
139 	return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size);
140 }
141 
142 bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size)
143 {
144 	return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size);
145 }
146 
147 bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size)
148 {
149 	return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size);
150 }
151 
152 int64_t allocate_transhuge(void *ptr, int pagemap_fd)
153 {
154 	uint64_t ent[2];
155 
156 	/* drop pmd */
157 	if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
158 		 MAP_FIXED | MAP_ANONYMOUS |
159 		 MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
160 		errx(2, "mmap transhuge");
161 
162 	if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
163 		err(2, "MADV_HUGEPAGE");
164 
165 	/* allocate transparent huge page */
166 	*(volatile void **)ptr = ptr;
167 
168 	if (pread(pagemap_fd, ent, sizeof(ent),
169 		  (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent))
170 		err(2, "read pagemap");
171 
172 	if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
173 	    PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
174 	    !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1)))
175 		return PAGEMAP_PFN(ent[0]);
176 
177 	return -1;
178 }
179 
180 unsigned long default_huge_page_size(void)
181 {
182 	unsigned long hps = 0;
183 	char *line = NULL;
184 	size_t linelen = 0;
185 	FILE *f = fopen("/proc/meminfo", "r");
186 
187 	if (!f)
188 		return 0;
189 	while (getline(&line, &linelen, f) > 0) {
190 		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
191 			hps <<= 10;
192 			break;
193 		}
194 	}
195 
196 	free(line);
197 	fclose(f);
198 	return hps;
199 }
200 
201 /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
202 int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
203 			      bool miss, bool wp, bool minor, uint64_t *ioctls)
204 {
205 	struct uffdio_register uffdio_register = { 0 };
206 	uint64_t mode = 0;
207 	int ret = 0;
208 
209 	if (miss)
210 		mode |= UFFDIO_REGISTER_MODE_MISSING;
211 	if (wp)
212 		mode |= UFFDIO_REGISTER_MODE_WP;
213 	if (minor)
214 		mode |= UFFDIO_REGISTER_MODE_MINOR;
215 
216 	uffdio_register.range.start = (unsigned long)addr;
217 	uffdio_register.range.len = len;
218 	uffdio_register.mode = mode;
219 
220 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
221 		ret = -errno;
222 	else if (ioctls)
223 		*ioctls = uffdio_register.ioctls;
224 
225 	return ret;
226 }
227 
228 int uffd_register(int uffd, void *addr, uint64_t len,
229 		  bool miss, bool wp, bool minor)
230 {
231 	return uffd_register_with_ioctls(uffd, addr, len,
232 					 miss, wp, minor, NULL);
233 }
234 
235 int uffd_unregister(int uffd, void *addr, uint64_t len)
236 {
237 	struct uffdio_range range = { .start = (uintptr_t)addr, .len = len };
238 	int ret = 0;
239 
240 	if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1)
241 		ret = -errno;
242 
243 	return ret;
244 }
245 
246 int uffd_open_dev(unsigned int flags)
247 {
248 	int fd, uffd;
249 
250 	fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
251 	if (fd < 0)
252 		return fd;
253 	uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags);
254 	close(fd);
255 
256 	return uffd;
257 }
258 
259 int uffd_open_sys(unsigned int flags)
260 {
261 #ifdef __NR_userfaultfd
262 	return syscall(__NR_userfaultfd, flags);
263 #else
264 	return -1;
265 #endif
266 }
267 
268 int uffd_open(unsigned int flags)
269 {
270 	int uffd = uffd_open_sys(flags);
271 
272 	if (uffd < 0)
273 		uffd = uffd_open_dev(flags);
274 
275 	return uffd;
276 }
277 
278 int uffd_get_features(uint64_t *features)
279 {
280 	struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 };
281 	/*
282 	 * This should by default work in most kernels; the feature list
283 	 * will be the same no matter what we pass in here.
284 	 */
285 	int fd = uffd_open(UFFD_USER_MODE_ONLY);
286 
287 	if (fd < 0)
288 		/* Maybe the kernel is older than user-only mode? */
289 		fd = uffd_open(0);
290 
291 	if (fd < 0)
292 		return fd;
293 
294 	if (ioctl(fd, UFFDIO_API, &uffdio_api)) {
295 		close(fd);
296 		return -errno;
297 	}
298 
299 	*features = uffdio_api.features;
300 	close(fd);
301 
302 	return 0;
303 }
304