1baa489faSSeongJae Park // SPDX-License-Identifier: GPL-2.0
2baa489faSSeongJae Park /*
3baa489faSSeongJae Park * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual
4baa489faSSeongJae Park * address range in a process via <debugfs>/split_huge_pages interface.
5baa489faSSeongJae Park */
6baa489faSSeongJae Park
7baa489faSSeongJae Park #define _GNU_SOURCE
8baa489faSSeongJae Park #include <stdio.h>
9baa489faSSeongJae Park #include <stdlib.h>
10baa489faSSeongJae Park #include <stdarg.h>
11baa489faSSeongJae Park #include <unistd.h>
12baa489faSSeongJae Park #include <inttypes.h>
13baa489faSSeongJae Park #include <string.h>
14baa489faSSeongJae Park #include <fcntl.h>
15baa489faSSeongJae Park #include <sys/mman.h>
16baa489faSSeongJae Park #include <sys/mount.h>
17baa489faSSeongJae Park #include <malloc.h>
18baa489faSSeongJae Park #include <stdbool.h>
19baa489faSSeongJae Park #include "vm_util.h"
20baa489faSSeongJae Park
21baa489faSSeongJae Park uint64_t pagesize;
22baa489faSSeongJae Park unsigned int pageshift;
23baa489faSSeongJae Park uint64_t pmd_pagesize;
24baa489faSSeongJae Park
25baa489faSSeongJae Park #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
26baa489faSSeongJae Park #define INPUT_MAX 80
27baa489faSSeongJae Park
28baa489faSSeongJae Park #define PID_FMT "%d,0x%lx,0x%lx"
29baa489faSSeongJae Park #define PATH_FMT "%s,0x%lx,0x%lx"
30baa489faSSeongJae Park
31baa489faSSeongJae Park #define PFN_MASK ((1UL<<55)-1)
32baa489faSSeongJae Park #define KPF_THP (1UL<<22)
33baa489faSSeongJae Park
is_backed_by_thp(char * vaddr,int pagemap_file,int kpageflags_file)34baa489faSSeongJae Park int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
35baa489faSSeongJae Park {
36baa489faSSeongJae Park uint64_t paddr;
37baa489faSSeongJae Park uint64_t page_flags;
38baa489faSSeongJae Park
39baa489faSSeongJae Park if (pagemap_file) {
40baa489faSSeongJae Park pread(pagemap_file, &paddr, sizeof(paddr),
41baa489faSSeongJae Park ((long)vaddr >> pageshift) * sizeof(paddr));
42baa489faSSeongJae Park
43baa489faSSeongJae Park if (kpageflags_file) {
44baa489faSSeongJae Park pread(kpageflags_file, &page_flags, sizeof(page_flags),
45baa489faSSeongJae Park (paddr & PFN_MASK) * sizeof(page_flags));
46baa489faSSeongJae Park
47baa489faSSeongJae Park return !!(page_flags & KPF_THP);
48baa489faSSeongJae Park }
49baa489faSSeongJae Park }
50baa489faSSeongJae Park return 0;
51baa489faSSeongJae Park }
52baa489faSSeongJae Park
write_file(const char * path,const char * buf,size_t buflen)53baa489faSSeongJae Park static int write_file(const char *path, const char *buf, size_t buflen)
54baa489faSSeongJae Park {
55baa489faSSeongJae Park int fd;
56baa489faSSeongJae Park ssize_t numwritten;
57baa489faSSeongJae Park
58baa489faSSeongJae Park fd = open(path, O_WRONLY);
59baa489faSSeongJae Park if (fd == -1)
60baa489faSSeongJae Park return 0;
61baa489faSSeongJae Park
62baa489faSSeongJae Park numwritten = write(fd, buf, buflen - 1);
63baa489faSSeongJae Park close(fd);
64baa489faSSeongJae Park if (numwritten < 1)
65baa489faSSeongJae Park return 0;
66baa489faSSeongJae Park
67baa489faSSeongJae Park return (unsigned int) numwritten;
68baa489faSSeongJae Park }
69baa489faSSeongJae Park
write_debugfs(const char * fmt,...)70baa489faSSeongJae Park static void write_debugfs(const char *fmt, ...)
71baa489faSSeongJae Park {
72baa489faSSeongJae Park char input[INPUT_MAX];
73baa489faSSeongJae Park int ret;
74baa489faSSeongJae Park va_list argp;
75baa489faSSeongJae Park
76baa489faSSeongJae Park va_start(argp, fmt);
77baa489faSSeongJae Park ret = vsnprintf(input, INPUT_MAX, fmt, argp);
78baa489faSSeongJae Park va_end(argp);
79baa489faSSeongJae Park
80baa489faSSeongJae Park if (ret >= INPUT_MAX) {
81baa489faSSeongJae Park printf("%s: Debugfs input is too long\n", __func__);
82baa489faSSeongJae Park exit(EXIT_FAILURE);
83baa489faSSeongJae Park }
84baa489faSSeongJae Park
85baa489faSSeongJae Park if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) {
86baa489faSSeongJae Park perror(SPLIT_DEBUGFS);
87baa489faSSeongJae Park exit(EXIT_FAILURE);
88baa489faSSeongJae Park }
89baa489faSSeongJae Park }
90baa489faSSeongJae Park
split_pmd_thp(void)91baa489faSSeongJae Park void split_pmd_thp(void)
92baa489faSSeongJae Park {
93baa489faSSeongJae Park char *one_page;
94baa489faSSeongJae Park size_t len = 4 * pmd_pagesize;
95baa489faSSeongJae Park size_t i;
96baa489faSSeongJae Park
97baa489faSSeongJae Park one_page = memalign(pmd_pagesize, len);
98baa489faSSeongJae Park
99baa489faSSeongJae Park if (!one_page) {
100baa489faSSeongJae Park printf("Fail to allocate memory\n");
101baa489faSSeongJae Park exit(EXIT_FAILURE);
102baa489faSSeongJae Park }
103baa489faSSeongJae Park
104baa489faSSeongJae Park madvise(one_page, len, MADV_HUGEPAGE);
105baa489faSSeongJae Park
106baa489faSSeongJae Park for (i = 0; i < len; i++)
107baa489faSSeongJae Park one_page[i] = (char)i;
108baa489faSSeongJae Park
109dd63bd7dSZi Yan if (!check_huge_anon(one_page, 4, pmd_pagesize)) {
110baa489faSSeongJae Park printf("No THP is allocated\n");
111baa489faSSeongJae Park exit(EXIT_FAILURE);
112baa489faSSeongJae Park }
113baa489faSSeongJae Park
114baa489faSSeongJae Park /* split all THPs */
115baa489faSSeongJae Park write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
116baa489faSSeongJae Park (uint64_t)one_page + len);
117baa489faSSeongJae Park
118baa489faSSeongJae Park for (i = 0; i < len; i++)
119baa489faSSeongJae Park if (one_page[i] != (char)i) {
120baa489faSSeongJae Park printf("%ld byte corrupted\n", i);
121baa489faSSeongJae Park exit(EXIT_FAILURE);
122baa489faSSeongJae Park }
123baa489faSSeongJae Park
124baa489faSSeongJae Park
125dd63bd7dSZi Yan if (!check_huge_anon(one_page, 0, pmd_pagesize)) {
126baa489faSSeongJae Park printf("Still AnonHugePages not split\n");
127baa489faSSeongJae Park exit(EXIT_FAILURE);
128baa489faSSeongJae Park }
129baa489faSSeongJae Park
130baa489faSSeongJae Park printf("Split huge pages successful\n");
131baa489faSSeongJae Park free(one_page);
132baa489faSSeongJae Park }
133baa489faSSeongJae Park
split_pte_mapped_thp(void)134baa489faSSeongJae Park void split_pte_mapped_thp(void)
135baa489faSSeongJae Park {
136baa489faSSeongJae Park char *one_page, *pte_mapped, *pte_mapped2;
137baa489faSSeongJae Park size_t len = 4 * pmd_pagesize;
138baa489faSSeongJae Park uint64_t thp_size;
139baa489faSSeongJae Park size_t i;
140baa489faSSeongJae Park const char *pagemap_template = "/proc/%d/pagemap";
141baa489faSSeongJae Park const char *kpageflags_proc = "/proc/kpageflags";
142baa489faSSeongJae Park char pagemap_proc[255];
143baa489faSSeongJae Park int pagemap_fd;
144baa489faSSeongJae Park int kpageflags_fd;
145baa489faSSeongJae Park
146baa489faSSeongJae Park if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) {
147baa489faSSeongJae Park perror("get pagemap proc error");
148baa489faSSeongJae Park exit(EXIT_FAILURE);
149baa489faSSeongJae Park }
150baa489faSSeongJae Park pagemap_fd = open(pagemap_proc, O_RDONLY);
151baa489faSSeongJae Park
152baa489faSSeongJae Park if (pagemap_fd == -1) {
153baa489faSSeongJae Park perror("read pagemap:");
154baa489faSSeongJae Park exit(EXIT_FAILURE);
155baa489faSSeongJae Park }
156baa489faSSeongJae Park
157baa489faSSeongJae Park kpageflags_fd = open(kpageflags_proc, O_RDONLY);
158baa489faSSeongJae Park
159baa489faSSeongJae Park if (kpageflags_fd == -1) {
160baa489faSSeongJae Park perror("read kpageflags:");
161baa489faSSeongJae Park exit(EXIT_FAILURE);
162baa489faSSeongJae Park }
163baa489faSSeongJae Park
164baa489faSSeongJae Park one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE,
165baa489faSSeongJae Park MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
166baa489faSSeongJae Park
167baa489faSSeongJae Park madvise(one_page, len, MADV_HUGEPAGE);
168baa489faSSeongJae Park
169baa489faSSeongJae Park for (i = 0; i < len; i++)
170baa489faSSeongJae Park one_page[i] = (char)i;
171baa489faSSeongJae Park
172dd63bd7dSZi Yan if (!check_huge_anon(one_page, 4, pmd_pagesize)) {
173baa489faSSeongJae Park printf("No THP is allocated\n");
174baa489faSSeongJae Park exit(EXIT_FAILURE);
175baa489faSSeongJae Park }
176baa489faSSeongJae Park
177baa489faSSeongJae Park /* remap the first pagesize of first THP */
178baa489faSSeongJae Park pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE);
179baa489faSSeongJae Park
180baa489faSSeongJae Park /* remap the Nth pagesize of Nth THP */
181baa489faSSeongJae Park for (i = 1; i < 4; i++) {
182baa489faSSeongJae Park pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i,
183baa489faSSeongJae Park pagesize, pagesize,
184baa489faSSeongJae Park MREMAP_MAYMOVE|MREMAP_FIXED,
185baa489faSSeongJae Park pte_mapped + pagesize * i);
186baa489faSSeongJae Park if (pte_mapped2 == (char *)-1) {
187baa489faSSeongJae Park perror("mremap failed");
188baa489faSSeongJae Park exit(EXIT_FAILURE);
189baa489faSSeongJae Park }
190baa489faSSeongJae Park }
191baa489faSSeongJae Park
192baa489faSSeongJae Park /* smap does not show THPs after mremap, use kpageflags instead */
193baa489faSSeongJae Park thp_size = 0;
194baa489faSSeongJae Park for (i = 0; i < pagesize * 4; i++)
195baa489faSSeongJae Park if (i % pagesize == 0 &&
196baa489faSSeongJae Park is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
197baa489faSSeongJae Park thp_size++;
198baa489faSSeongJae Park
199baa489faSSeongJae Park if (thp_size != 4) {
200baa489faSSeongJae Park printf("Some THPs are missing during mremap\n");
201baa489faSSeongJae Park exit(EXIT_FAILURE);
202baa489faSSeongJae Park }
203baa489faSSeongJae Park
204baa489faSSeongJae Park /* split all remapped THPs */
205baa489faSSeongJae Park write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
206baa489faSSeongJae Park (uint64_t)pte_mapped + pagesize * 4);
207baa489faSSeongJae Park
208baa489faSSeongJae Park /* smap does not show THPs after mremap, use kpageflags instead */
209baa489faSSeongJae Park thp_size = 0;
210baa489faSSeongJae Park for (i = 0; i < pagesize * 4; i++) {
211baa489faSSeongJae Park if (pte_mapped[i] != (char)i) {
212baa489faSSeongJae Park printf("%ld byte corrupted\n", i);
213baa489faSSeongJae Park exit(EXIT_FAILURE);
214baa489faSSeongJae Park }
215baa489faSSeongJae Park if (i % pagesize == 0 &&
216baa489faSSeongJae Park is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
217baa489faSSeongJae Park thp_size++;
218baa489faSSeongJae Park }
219baa489faSSeongJae Park
220baa489faSSeongJae Park if (thp_size) {
221baa489faSSeongJae Park printf("Still %ld THPs not split\n", thp_size);
222baa489faSSeongJae Park exit(EXIT_FAILURE);
223baa489faSSeongJae Park }
224baa489faSSeongJae Park
225baa489faSSeongJae Park printf("Split PTE-mapped huge pages successful\n");
226baa489faSSeongJae Park munmap(one_page, len);
227baa489faSSeongJae Park close(pagemap_fd);
228baa489faSSeongJae Park close(kpageflags_fd);
229baa489faSSeongJae Park }
230baa489faSSeongJae Park
split_file_backed_thp(void)231baa489faSSeongJae Park void split_file_backed_thp(void)
232baa489faSSeongJae Park {
233baa489faSSeongJae Park int status;
234baa489faSSeongJae Park int fd;
235baa489faSSeongJae Park ssize_t num_written;
236baa489faSSeongJae Park char tmpfs_template[] = "/tmp/thp_split_XXXXXX";
237baa489faSSeongJae Park const char *tmpfs_loc = mkdtemp(tmpfs_template);
238baa489faSSeongJae Park char testfile[INPUT_MAX];
239baa489faSSeongJae Park uint64_t pgoff_start = 0, pgoff_end = 1024;
240baa489faSSeongJae Park
241baa489faSSeongJae Park printf("Please enable pr_debug in split_huge_pages_in_file() if you need more info.\n");
242baa489faSSeongJae Park
243baa489faSSeongJae Park status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
244baa489faSSeongJae Park
245baa489faSSeongJae Park if (status) {
246baa489faSSeongJae Park printf("Unable to create a tmpfs for testing\n");
247baa489faSSeongJae Park exit(EXIT_FAILURE);
248baa489faSSeongJae Park }
249baa489faSSeongJae Park
250baa489faSSeongJae Park status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
251baa489faSSeongJae Park if (status >= INPUT_MAX) {
252baa489faSSeongJae Park printf("Fail to create file-backed THP split testing file\n");
253baa489faSSeongJae Park goto cleanup;
254baa489faSSeongJae Park }
255baa489faSSeongJae Park
256*78142322SVitaly Chikunov fd = open(testfile, O_CREAT|O_WRONLY, 0664);
257baa489faSSeongJae Park if (fd == -1) {
258baa489faSSeongJae Park perror("Cannot open testing file\n");
259baa489faSSeongJae Park goto cleanup;
260baa489faSSeongJae Park }
261baa489faSSeongJae Park
262baa489faSSeongJae Park /* write something to the file, so a file-backed THP can be allocated */
263baa489faSSeongJae Park num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1);
264baa489faSSeongJae Park close(fd);
265baa489faSSeongJae Park
266baa489faSSeongJae Park if (num_written < 1) {
267baa489faSSeongJae Park printf("Fail to write data to testing file\n");
268baa489faSSeongJae Park goto cleanup;
269baa489faSSeongJae Park }
270baa489faSSeongJae Park
271baa489faSSeongJae Park /* split the file-backed THP */
272baa489faSSeongJae Park write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end);
273baa489faSSeongJae Park
274baa489faSSeongJae Park status = unlink(testfile);
275baa489faSSeongJae Park if (status)
276baa489faSSeongJae Park perror("Cannot remove testing file\n");
277baa489faSSeongJae Park
278baa489faSSeongJae Park cleanup:
279baa489faSSeongJae Park status = umount(tmpfs_loc);
280baa489faSSeongJae Park if (status) {
281baa489faSSeongJae Park printf("Unable to umount %s\n", tmpfs_loc);
282baa489faSSeongJae Park exit(EXIT_FAILURE);
283baa489faSSeongJae Park }
284baa489faSSeongJae Park status = rmdir(tmpfs_loc);
285baa489faSSeongJae Park if (status) {
286baa489faSSeongJae Park perror("cannot remove tmp dir");
287baa489faSSeongJae Park exit(EXIT_FAILURE);
288baa489faSSeongJae Park }
289baa489faSSeongJae Park
290baa489faSSeongJae Park printf("file-backed THP split test done, please check dmesg for more information\n");
291baa489faSSeongJae Park }
292baa489faSSeongJae Park
main(int argc,char ** argv)293baa489faSSeongJae Park int main(int argc, char **argv)
294baa489faSSeongJae Park {
295baa489faSSeongJae Park if (geteuid() != 0) {
296baa489faSSeongJae Park printf("Please run the benchmark as root\n");
297baa489faSSeongJae Park exit(EXIT_FAILURE);
298baa489faSSeongJae Park }
299baa489faSSeongJae Park
300baa489faSSeongJae Park pagesize = getpagesize();
301baa489faSSeongJae Park pageshift = ffs(pagesize) - 1;
302baa489faSSeongJae Park pmd_pagesize = read_pmd_pagesize();
303d6e61afbSDavid Hildenbrand if (!pmd_pagesize) {
304d6e61afbSDavid Hildenbrand printf("Reading PMD pagesize failed\n");
305d6e61afbSDavid Hildenbrand exit(EXIT_FAILURE);
306d6e61afbSDavid Hildenbrand }
307baa489faSSeongJae Park
308baa489faSSeongJae Park split_pmd_thp();
309baa489faSSeongJae Park split_pte_mapped_thp();
310baa489faSSeongJae Park split_file_backed_thp();
311baa489faSSeongJae Park
312baa489faSSeongJae Park return 0;
313baa489faSSeongJae Park }
314