1 /* 2 * Linux UFFD-WP support 3 * 4 * Copyright Virtuozzo GmbH, 2020 5 * 6 * Authors: 7 * Andrey Gruzdev <andrey.gruzdev@virtuozzo.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu/bitops.h" 15 #include "qemu/error-report.h" 16 #include "qemu/userfaultfd.h" 17 #include "trace.h" 18 #include <poll.h> 19 #include <sys/syscall.h> 20 #include <sys/ioctl.h> 21 22 int uffd_open(int flags) 23 { 24 #if defined(__NR_userfaultfd) 25 return syscall(__NR_userfaultfd, flags); 26 #else 27 return -EINVAL; 28 #endif 29 } 30 31 /** 32 * uffd_query_features: query UFFD features 33 * 34 * Returns: 0 on success, negative value in case of an error 35 * 36 * @features: parameter to receive 'uffdio_api.features' 37 */ 38 int uffd_query_features(uint64_t *features) 39 { 40 int uffd_fd; 41 struct uffdio_api api_struct = { 0 }; 42 int ret = -1; 43 44 uffd_fd = uffd_open(O_CLOEXEC); 45 if (uffd_fd < 0) { 46 trace_uffd_query_features_nosys(errno); 47 return -1; 48 } 49 50 api_struct.api = UFFD_API; 51 api_struct.features = 0; 52 53 if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) { 54 trace_uffd_query_features_api_failed(errno); 55 goto out; 56 } 57 *features = api_struct.features; 58 ret = 0; 59 60 out: 61 close(uffd_fd); 62 return ret; 63 } 64 65 /** 66 * uffd_create_fd: create UFFD file descriptor 67 * 68 * Returns non-negative file descriptor or negative value in case of an error 69 * 70 * @features: UFFD features to request 71 * @non_blocking: create UFFD file descriptor for non-blocking operation 72 */ 73 int uffd_create_fd(uint64_t features, bool non_blocking) 74 { 75 int uffd_fd; 76 int flags; 77 struct uffdio_api api_struct = { 0 }; 78 uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER); 79 80 flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0); 81 uffd_fd = uffd_open(flags); 82 if (uffd_fd < 0) { 83 trace_uffd_create_fd_nosys(errno); 84 return -1; 85 } 86 87 api_struct.api = UFFD_API; 88 api_struct.features = features; 89 if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) { 90 trace_uffd_create_fd_api_failed(errno); 91 goto fail; 92 } 93 if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) { 94 trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls); 95 goto fail; 96 } 97 98 return uffd_fd; 99 100 fail: 101 close(uffd_fd); 102 return -1; 103 } 104 105 /** 106 * uffd_close_fd: close UFFD file descriptor 107 * 108 * @uffd_fd: UFFD file descriptor 109 */ 110 void uffd_close_fd(int uffd_fd) 111 { 112 assert(uffd_fd >= 0); 113 close(uffd_fd); 114 } 115 116 /** 117 * uffd_register_memory: register memory range via UFFD-IO 118 * 119 * Returns 0 in case of success, negative value in case of an error 120 * 121 * @uffd_fd: UFFD file descriptor 122 * @addr: base address of memory range 123 * @length: length of memory range 124 * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...) 125 * @ioctls: optional pointer to receive supported IOCTL mask 126 */ 127 int uffd_register_memory(int uffd_fd, void *addr, uint64_t length, 128 uint64_t mode, uint64_t *ioctls) 129 { 130 struct uffdio_register uffd_register; 131 132 uffd_register.range.start = (uintptr_t) addr; 133 uffd_register.range.len = length; 134 uffd_register.mode = mode; 135 136 if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) { 137 trace_uffd_register_memory_failed(addr, length, mode, errno); 138 return -1; 139 } 140 if (ioctls) { 141 *ioctls = uffd_register.ioctls; 142 } 143 144 return 0; 145 } 146 147 /** 148 * uffd_unregister_memory: un-register memory range with UFFD-IO 149 * 150 * Returns 0 in case of success, negative value in case of an error 151 * 152 * @uffd_fd: UFFD file descriptor 153 * @addr: base address of memory range 154 * @length: length of memory range 155 */ 156 int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length) 157 { 158 struct uffdio_range uffd_range; 159 160 uffd_range.start = (uintptr_t) addr; 161 uffd_range.len = length; 162 163 if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) { 164 trace_uffd_unregister_memory_failed(addr, length, errno); 165 return -1; 166 } 167 168 return 0; 169 } 170 171 /** 172 * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO 173 * 174 * Returns 0 on success, negative value in case of error 175 * 176 * @uffd_fd: UFFD file descriptor 177 * @addr: base address of memory range 178 * @length: length of memory range 179 * @wp: write-protect/unprotect 180 * @dont_wake: do not wake threads waiting on wr-protected page 181 */ 182 int uffd_change_protection(int uffd_fd, void *addr, uint64_t length, 183 bool wp, bool dont_wake) 184 { 185 struct uffdio_writeprotect uffd_writeprotect; 186 187 uffd_writeprotect.range.start = (uintptr_t) addr; 188 uffd_writeprotect.range.len = length; 189 if (!wp && dont_wake) { 190 /* DONTWAKE is meaningful only on protection release */ 191 uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE; 192 } else { 193 uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0); 194 } 195 196 if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) { 197 error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64 198 " mode=%" PRIx64 " errno=%i", addr, length, 199 (uint64_t) uffd_writeprotect.mode, errno); 200 return -1; 201 } 202 203 return 0; 204 } 205 206 /** 207 * uffd_copy_page: copy range of pages to destination via UFFD-IO 208 * 209 * Copy range of source pages to the destination to resolve 210 * missing page fault somewhere in the destination range. 211 * 212 * Returns 0 on success, negative value in case of an error 213 * 214 * @uffd_fd: UFFD file descriptor 215 * @dst_addr: destination base address 216 * @src_addr: source base address 217 * @length: length of the range to copy 218 * @dont_wake: do not wake threads waiting on missing page 219 */ 220 int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr, 221 uint64_t length, bool dont_wake) 222 { 223 struct uffdio_copy uffd_copy; 224 225 uffd_copy.dst = (uintptr_t) dst_addr; 226 uffd_copy.src = (uintptr_t) src_addr; 227 uffd_copy.len = length; 228 uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0; 229 230 if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) { 231 error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64 232 " mode=%" PRIx64 " errno=%i", dst_addr, src_addr, 233 length, (uint64_t) uffd_copy.mode, errno); 234 return -1; 235 } 236 237 return 0; 238 } 239 240 /** 241 * uffd_zero_page: fill range of pages with zeroes via UFFD-IO 242 * 243 * Fill range pages with zeroes to resolve missing page fault within the range. 244 * 245 * Returns 0 on success, negative value in case of an error 246 * 247 * @uffd_fd: UFFD file descriptor 248 * @addr: base address 249 * @length: length of the range to fill with zeroes 250 * @dont_wake: do not wake threads waiting on missing page 251 */ 252 int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake) 253 { 254 struct uffdio_zeropage uffd_zeropage; 255 256 uffd_zeropage.range.start = (uintptr_t) addr; 257 uffd_zeropage.range.len = length; 258 uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0; 259 260 if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) { 261 error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64 262 " mode=%" PRIx64 " errno=%i", addr, length, 263 (uint64_t) uffd_zeropage.mode, errno); 264 return -1; 265 } 266 267 return 0; 268 } 269 270 /** 271 * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution 272 * 273 * Wake up threads waiting on any page/pages from the designated range. 274 * The main use case is when during some period, page faults are resolved 275 * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits 276 * for the whole memory range are satisfied in a single call to uffd_wakeup(). 277 * 278 * Returns 0 on success, negative value in case of an error 279 * 280 * @uffd_fd: UFFD file descriptor 281 * @addr: base address 282 * @length: length of the range 283 */ 284 int uffd_wakeup(int uffd_fd, void *addr, uint64_t length) 285 { 286 struct uffdio_range uffd_range; 287 288 uffd_range.start = (uintptr_t) addr; 289 uffd_range.len = length; 290 291 if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) { 292 error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i", 293 addr, length, errno); 294 return -1; 295 } 296 297 return 0; 298 } 299 300 /** 301 * uffd_read_events: read pending UFFD events 302 * 303 * Returns number of fetched messages, 0 if non is available or 304 * negative value in case of an error 305 * 306 * @uffd_fd: UFFD file descriptor 307 * @msgs: pointer to message buffer 308 * @count: number of messages that can fit in the buffer 309 */ 310 int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count) 311 { 312 ssize_t res; 313 do { 314 res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg)); 315 } while (res < 0 && errno == EINTR); 316 317 if ((res < 0 && errno == EAGAIN)) { 318 return 0; 319 } 320 if (res < 0) { 321 error_report("uffd_read_events() failed: errno=%i", errno); 322 return -1; 323 } 324 325 return (int) (res / sizeof(struct uffd_msg)); 326 } 327 328 /** 329 * uffd_poll_events: poll UFFD file descriptor for read 330 * 331 * Returns true if events are available for read, false otherwise 332 * 333 * @uffd_fd: UFFD file descriptor 334 * @tmo: timeout value 335 */ 336 bool uffd_poll_events(int uffd_fd, int tmo) 337 { 338 int res; 339 struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 }; 340 341 do { 342 res = poll(&poll_fd, 1, tmo); 343 } while (res < 0 && errno == EINTR); 344 345 if (res == 0) { 346 return false; 347 } 348 if (res < 0) { 349 error_report("uffd_poll_events() failed: errno=%i", errno); 350 return false; 351 } 352 353 return (poll_fd.revents & POLLIN) != 0; 354 } 355