1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * KVM userfaultfd util
4  * Adapted from demand_paging_test.c
5  *
6  * Copyright (C) 2018, Red Hat, Inc.
7  * Copyright (C) 2019-2022 Google LLC
8  */
9 
10 #define _GNU_SOURCE /* for pipe2 */
11 
12 #include <inttypes.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <time.h>
16 #include <poll.h>
17 #include <pthread.h>
18 #include <linux/userfaultfd.h>
19 #include <sys/syscall.h>
20 
21 #include "kvm_util.h"
22 #include "test_util.h"
23 #include "memstress.h"
24 #include "userfaultfd_util.h"
25 
26 #ifdef __NR_userfaultfd
27 
28 static void *uffd_handler_thread_fn(void *arg)
29 {
30 	struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
31 	int uffd = uffd_desc->uffd;
32 	int pipefd = uffd_desc->pipefds[0];
33 	useconds_t delay = uffd_desc->delay;
34 	int64_t pages = 0;
35 	struct timespec start;
36 	struct timespec ts_diff;
37 
38 	clock_gettime(CLOCK_MONOTONIC, &start);
39 	while (1) {
40 		struct uffd_msg msg;
41 		struct pollfd pollfd[2];
42 		char tmp_chr;
43 		int r;
44 
45 		pollfd[0].fd = uffd;
46 		pollfd[0].events = POLLIN;
47 		pollfd[1].fd = pipefd;
48 		pollfd[1].events = POLLIN;
49 
50 		r = poll(pollfd, 2, -1);
51 		switch (r) {
52 		case -1:
53 			pr_info("poll err");
54 			continue;
55 		case 0:
56 			continue;
57 		case 1:
58 			break;
59 		default:
60 			pr_info("Polling uffd returned %d", r);
61 			return NULL;
62 		}
63 
64 		if (pollfd[0].revents & POLLERR) {
65 			pr_info("uffd revents has POLLERR");
66 			return NULL;
67 		}
68 
69 		if (pollfd[1].revents & POLLIN) {
70 			r = read(pollfd[1].fd, &tmp_chr, 1);
71 			TEST_ASSERT(r == 1,
72 				    "Error reading pipefd in UFFD thread\n");
73 			return NULL;
74 		}
75 
76 		if (!(pollfd[0].revents & POLLIN))
77 			continue;
78 
79 		r = read(uffd, &msg, sizeof(msg));
80 		if (r == -1) {
81 			if (errno == EAGAIN)
82 				continue;
83 			pr_info("Read of uffd got errno %d\n", errno);
84 			return NULL;
85 		}
86 
87 		if (r != sizeof(msg)) {
88 			pr_info("Read on uffd returned unexpected size: %d bytes", r);
89 			return NULL;
90 		}
91 
92 		if (!(msg.event & UFFD_EVENT_PAGEFAULT))
93 			continue;
94 
95 		if (delay)
96 			usleep(delay);
97 		r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
98 		if (r < 0)
99 			return NULL;
100 		pages++;
101 	}
102 
103 	ts_diff = timespec_elapsed(start);
104 	PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
105 		       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
106 		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
107 
108 	return NULL;
109 }
110 
111 struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
112 					   void *hva, uint64_t len,
113 					   uffd_handler_t handler)
114 {
115 	struct uffd_desc *uffd_desc;
116 	bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
117 	int uffd;
118 	struct uffdio_api uffdio_api;
119 	struct uffdio_register uffdio_register;
120 	uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
121 	int ret;
122 
123 	PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
124 		       is_minor ? "MINOR" : "MISSING",
125 		       is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
126 
127 	uffd_desc = malloc(sizeof(struct uffd_desc));
128 	TEST_ASSERT(uffd_desc, "malloc failed");
129 
130 	/* In order to get minor faults, prefault via the alias. */
131 	if (is_minor)
132 		expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
133 
134 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
135 	TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
136 
137 	uffdio_api.api = UFFD_API;
138 	uffdio_api.features = 0;
139 	TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
140 		    "ioctl UFFDIO_API failed: %" PRIu64,
141 		    (uint64_t)uffdio_api.api);
142 
143 	uffdio_register.range.start = (uint64_t)hva;
144 	uffdio_register.range.len = len;
145 	uffdio_register.mode = uffd_mode;
146 	TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
147 		    "ioctl UFFDIO_REGISTER failed");
148 	TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
149 		    expected_ioctls, "missing userfaultfd ioctls");
150 
151 	ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
152 	TEST_ASSERT(!ret, "Failed to set up pipefd");
153 
154 	uffd_desc->uffd_mode = uffd_mode;
155 	uffd_desc->uffd = uffd;
156 	uffd_desc->delay = delay;
157 	uffd_desc->handler = handler;
158 	pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
159 		       uffd_desc);
160 
161 	PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
162 		       hva, hva + len);
163 
164 	return uffd_desc;
165 }
166 
167 void uffd_stop_demand_paging(struct uffd_desc *uffd)
168 {
169 	char c = 0;
170 	int ret;
171 
172 	ret = write(uffd->pipefds[1], &c, 1);
173 	TEST_ASSERT(ret == 1, "Unable to write to pipefd");
174 
175 	ret = pthread_join(uffd->thread, NULL);
176 	TEST_ASSERT(ret == 0, "Pthread_join failed.");
177 
178 	close(uffd->uffd);
179 
180 	close(uffd->pipefds[1]);
181 	close(uffd->pipefds[0]);
182 
183 	free(uffd);
184 }
185 
186 #endif /* __NR_userfaultfd */
187