10e9b5cd6SAndrey Gruzdev /*
20e9b5cd6SAndrey Gruzdev * Linux UFFD-WP support
30e9b5cd6SAndrey Gruzdev *
40e9b5cd6SAndrey Gruzdev * Copyright Virtuozzo GmbH, 2020
50e9b5cd6SAndrey Gruzdev *
60e9b5cd6SAndrey Gruzdev * Authors:
70e9b5cd6SAndrey Gruzdev * Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
80e9b5cd6SAndrey Gruzdev *
90e9b5cd6SAndrey Gruzdev * This work is licensed under the terms of the GNU GPL, version 2 or
100e9b5cd6SAndrey Gruzdev * later. See the COPYING file in the top-level directory.
110e9b5cd6SAndrey Gruzdev */
120e9b5cd6SAndrey Gruzdev
130e9b5cd6SAndrey Gruzdev #include "qemu/osdep.h"
140e9b5cd6SAndrey Gruzdev #include "qemu/bitops.h"
150e9b5cd6SAndrey Gruzdev #include "qemu/error-report.h"
160e9b5cd6SAndrey Gruzdev #include "qemu/userfaultfd.h"
170e9b5cd6SAndrey Gruzdev #include "trace.h"
180e9b5cd6SAndrey Gruzdev #include <poll.h>
190e9b5cd6SAndrey Gruzdev #include <sys/syscall.h>
200e9b5cd6SAndrey Gruzdev #include <sys/ioctl.h>
21*c40c0463SPeter Xu #include <fcntl.h>
22*c40c0463SPeter Xu
23*c40c0463SPeter Xu typedef enum {
24*c40c0463SPeter Xu UFFD_UNINITIALIZED = 0,
25*c40c0463SPeter Xu UFFD_USE_DEV_PATH,
26*c40c0463SPeter Xu UFFD_USE_SYSCALL,
27*c40c0463SPeter Xu } uffd_open_mode;
280e9b5cd6SAndrey Gruzdev
uffd_open(int flags)29d5890ea0SPeter Xu int uffd_open(int flags)
30d5890ea0SPeter Xu {
31d5890ea0SPeter Xu #if defined(__NR_userfaultfd)
32*c40c0463SPeter Xu static uffd_open_mode open_mode;
33*c40c0463SPeter Xu static int uffd_dev;
34*c40c0463SPeter Xu
35*c40c0463SPeter Xu /* Detect how to generate uffd desc when run the 1st time */
36*c40c0463SPeter Xu if (open_mode == UFFD_UNINITIALIZED) {
37*c40c0463SPeter Xu /*
38*c40c0463SPeter Xu * Make /dev/userfaultfd the default approach because it has better
39*c40c0463SPeter Xu * permission controls, meanwhile allows kernel faults without any
40*c40c0463SPeter Xu * privilege requirement (e.g. SYS_CAP_PTRACE).
41*c40c0463SPeter Xu */
42*c40c0463SPeter Xu uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
43*c40c0463SPeter Xu if (uffd_dev >= 0) {
44*c40c0463SPeter Xu open_mode = UFFD_USE_DEV_PATH;
45*c40c0463SPeter Xu } else {
46*c40c0463SPeter Xu /* Fallback to the system call */
47*c40c0463SPeter Xu open_mode = UFFD_USE_SYSCALL;
48*c40c0463SPeter Xu }
49*c40c0463SPeter Xu trace_uffd_detect_open_mode(open_mode);
50*c40c0463SPeter Xu }
51*c40c0463SPeter Xu
52*c40c0463SPeter Xu if (open_mode == UFFD_USE_DEV_PATH) {
53*c40c0463SPeter Xu assert(uffd_dev >= 0);
54*c40c0463SPeter Xu return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
55*c40c0463SPeter Xu }
56*c40c0463SPeter Xu
57d5890ea0SPeter Xu return syscall(__NR_userfaultfd, flags);
58d5890ea0SPeter Xu #else
59d5890ea0SPeter Xu return -EINVAL;
60d5890ea0SPeter Xu #endif
61d5890ea0SPeter Xu }
62d5890ea0SPeter Xu
630e9b5cd6SAndrey Gruzdev /**
640e9b5cd6SAndrey Gruzdev * uffd_query_features: query UFFD features
650e9b5cd6SAndrey Gruzdev *
660e9b5cd6SAndrey Gruzdev * Returns: 0 on success, negative value in case of an error
670e9b5cd6SAndrey Gruzdev *
680e9b5cd6SAndrey Gruzdev * @features: parameter to receive 'uffdio_api.features'
690e9b5cd6SAndrey Gruzdev */
uffd_query_features(uint64_t * features)700e9b5cd6SAndrey Gruzdev int uffd_query_features(uint64_t *features)
710e9b5cd6SAndrey Gruzdev {
720e9b5cd6SAndrey Gruzdev int uffd_fd;
730e9b5cd6SAndrey Gruzdev struct uffdio_api api_struct = { 0 };
740e9b5cd6SAndrey Gruzdev int ret = -1;
750e9b5cd6SAndrey Gruzdev
76d5890ea0SPeter Xu uffd_fd = uffd_open(O_CLOEXEC);
770e9b5cd6SAndrey Gruzdev if (uffd_fd < 0) {
780e9b5cd6SAndrey Gruzdev trace_uffd_query_features_nosys(errno);
790e9b5cd6SAndrey Gruzdev return -1;
800e9b5cd6SAndrey Gruzdev }
810e9b5cd6SAndrey Gruzdev
820e9b5cd6SAndrey Gruzdev api_struct.api = UFFD_API;
830e9b5cd6SAndrey Gruzdev api_struct.features = 0;
840e9b5cd6SAndrey Gruzdev
850e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
860e9b5cd6SAndrey Gruzdev trace_uffd_query_features_api_failed(errno);
870e9b5cd6SAndrey Gruzdev goto out;
880e9b5cd6SAndrey Gruzdev }
890e9b5cd6SAndrey Gruzdev *features = api_struct.features;
900e9b5cd6SAndrey Gruzdev ret = 0;
910e9b5cd6SAndrey Gruzdev
920e9b5cd6SAndrey Gruzdev out:
930e9b5cd6SAndrey Gruzdev close(uffd_fd);
940e9b5cd6SAndrey Gruzdev return ret;
950e9b5cd6SAndrey Gruzdev }
960e9b5cd6SAndrey Gruzdev
970e9b5cd6SAndrey Gruzdev /**
980e9b5cd6SAndrey Gruzdev * uffd_create_fd: create UFFD file descriptor
990e9b5cd6SAndrey Gruzdev *
1000e9b5cd6SAndrey Gruzdev * Returns non-negative file descriptor or negative value in case of an error
1010e9b5cd6SAndrey Gruzdev *
1020e9b5cd6SAndrey Gruzdev * @features: UFFD features to request
1030e9b5cd6SAndrey Gruzdev * @non_blocking: create UFFD file descriptor for non-blocking operation
1040e9b5cd6SAndrey Gruzdev */
uffd_create_fd(uint64_t features,bool non_blocking)1050e9b5cd6SAndrey Gruzdev int uffd_create_fd(uint64_t features, bool non_blocking)
1060e9b5cd6SAndrey Gruzdev {
1070e9b5cd6SAndrey Gruzdev int uffd_fd;
1080e9b5cd6SAndrey Gruzdev int flags;
1090e9b5cd6SAndrey Gruzdev struct uffdio_api api_struct = { 0 };
1100e9b5cd6SAndrey Gruzdev uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
1110e9b5cd6SAndrey Gruzdev
1120e9b5cd6SAndrey Gruzdev flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
113d5890ea0SPeter Xu uffd_fd = uffd_open(flags);
1140e9b5cd6SAndrey Gruzdev if (uffd_fd < 0) {
1150e9b5cd6SAndrey Gruzdev trace_uffd_create_fd_nosys(errno);
1160e9b5cd6SAndrey Gruzdev return -1;
1170e9b5cd6SAndrey Gruzdev }
1180e9b5cd6SAndrey Gruzdev
1190e9b5cd6SAndrey Gruzdev api_struct.api = UFFD_API;
1200e9b5cd6SAndrey Gruzdev api_struct.features = features;
1210e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
1220e9b5cd6SAndrey Gruzdev trace_uffd_create_fd_api_failed(errno);
1230e9b5cd6SAndrey Gruzdev goto fail;
1240e9b5cd6SAndrey Gruzdev }
1250e9b5cd6SAndrey Gruzdev if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
1260e9b5cd6SAndrey Gruzdev trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls);
1270e9b5cd6SAndrey Gruzdev goto fail;
1280e9b5cd6SAndrey Gruzdev }
1290e9b5cd6SAndrey Gruzdev
1300e9b5cd6SAndrey Gruzdev return uffd_fd;
1310e9b5cd6SAndrey Gruzdev
1320e9b5cd6SAndrey Gruzdev fail:
1330e9b5cd6SAndrey Gruzdev close(uffd_fd);
1340e9b5cd6SAndrey Gruzdev return -1;
1350e9b5cd6SAndrey Gruzdev }
1360e9b5cd6SAndrey Gruzdev
1370e9b5cd6SAndrey Gruzdev /**
1380e9b5cd6SAndrey Gruzdev * uffd_close_fd: close UFFD file descriptor
1390e9b5cd6SAndrey Gruzdev *
1400e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
1410e9b5cd6SAndrey Gruzdev */
uffd_close_fd(int uffd_fd)1420e9b5cd6SAndrey Gruzdev void uffd_close_fd(int uffd_fd)
1430e9b5cd6SAndrey Gruzdev {
1440e9b5cd6SAndrey Gruzdev assert(uffd_fd >= 0);
1450e9b5cd6SAndrey Gruzdev close(uffd_fd);
1460e9b5cd6SAndrey Gruzdev }
1470e9b5cd6SAndrey Gruzdev
1480e9b5cd6SAndrey Gruzdev /**
1490e9b5cd6SAndrey Gruzdev * uffd_register_memory: register memory range via UFFD-IO
1500e9b5cd6SAndrey Gruzdev *
1510e9b5cd6SAndrey Gruzdev * Returns 0 in case of success, negative value in case of an error
1520e9b5cd6SAndrey Gruzdev *
1530e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
1540e9b5cd6SAndrey Gruzdev * @addr: base address of memory range
1550e9b5cd6SAndrey Gruzdev * @length: length of memory range
1560e9b5cd6SAndrey Gruzdev * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...)
1570e9b5cd6SAndrey Gruzdev * @ioctls: optional pointer to receive supported IOCTL mask
1580e9b5cd6SAndrey Gruzdev */
uffd_register_memory(int uffd_fd,void * addr,uint64_t length,uint64_t mode,uint64_t * ioctls)1590e9b5cd6SAndrey Gruzdev int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
1600e9b5cd6SAndrey Gruzdev uint64_t mode, uint64_t *ioctls)
1610e9b5cd6SAndrey Gruzdev {
1620e9b5cd6SAndrey Gruzdev struct uffdio_register uffd_register;
1630e9b5cd6SAndrey Gruzdev
1640e9b5cd6SAndrey Gruzdev uffd_register.range.start = (uintptr_t) addr;
1650e9b5cd6SAndrey Gruzdev uffd_register.range.len = length;
1660e9b5cd6SAndrey Gruzdev uffd_register.mode = mode;
1670e9b5cd6SAndrey Gruzdev
1680e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) {
1690e9b5cd6SAndrey Gruzdev trace_uffd_register_memory_failed(addr, length, mode, errno);
1700e9b5cd6SAndrey Gruzdev return -1;
1710e9b5cd6SAndrey Gruzdev }
1720e9b5cd6SAndrey Gruzdev if (ioctls) {
1730e9b5cd6SAndrey Gruzdev *ioctls = uffd_register.ioctls;
1740e9b5cd6SAndrey Gruzdev }
1750e9b5cd6SAndrey Gruzdev
1760e9b5cd6SAndrey Gruzdev return 0;
1770e9b5cd6SAndrey Gruzdev }
1780e9b5cd6SAndrey Gruzdev
1790e9b5cd6SAndrey Gruzdev /**
1800e9b5cd6SAndrey Gruzdev * uffd_unregister_memory: un-register memory range with UFFD-IO
1810e9b5cd6SAndrey Gruzdev *
1820e9b5cd6SAndrey Gruzdev * Returns 0 in case of success, negative value in case of an error
1830e9b5cd6SAndrey Gruzdev *
1840e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
1850e9b5cd6SAndrey Gruzdev * @addr: base address of memory range
1860e9b5cd6SAndrey Gruzdev * @length: length of memory range
1870e9b5cd6SAndrey Gruzdev */
uffd_unregister_memory(int uffd_fd,void * addr,uint64_t length)1880e9b5cd6SAndrey Gruzdev int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length)
1890e9b5cd6SAndrey Gruzdev {
1900e9b5cd6SAndrey Gruzdev struct uffdio_range uffd_range;
1910e9b5cd6SAndrey Gruzdev
1920e9b5cd6SAndrey Gruzdev uffd_range.start = (uintptr_t) addr;
1930e9b5cd6SAndrey Gruzdev uffd_range.len = length;
1940e9b5cd6SAndrey Gruzdev
1950e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) {
1960e9b5cd6SAndrey Gruzdev trace_uffd_unregister_memory_failed(addr, length, errno);
1970e9b5cd6SAndrey Gruzdev return -1;
1980e9b5cd6SAndrey Gruzdev }
1990e9b5cd6SAndrey Gruzdev
2000e9b5cd6SAndrey Gruzdev return 0;
2010e9b5cd6SAndrey Gruzdev }
2020e9b5cd6SAndrey Gruzdev
2030e9b5cd6SAndrey Gruzdev /**
2040e9b5cd6SAndrey Gruzdev * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO
2050e9b5cd6SAndrey Gruzdev *
2060e9b5cd6SAndrey Gruzdev * Returns 0 on success, negative value in case of error
2070e9b5cd6SAndrey Gruzdev *
2080e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
2090e9b5cd6SAndrey Gruzdev * @addr: base address of memory range
2100e9b5cd6SAndrey Gruzdev * @length: length of memory range
2110e9b5cd6SAndrey Gruzdev * @wp: write-protect/unprotect
2120e9b5cd6SAndrey Gruzdev * @dont_wake: do not wake threads waiting on wr-protected page
2130e9b5cd6SAndrey Gruzdev */
uffd_change_protection(int uffd_fd,void * addr,uint64_t length,bool wp,bool dont_wake)2140e9b5cd6SAndrey Gruzdev int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
2150e9b5cd6SAndrey Gruzdev bool wp, bool dont_wake)
2160e9b5cd6SAndrey Gruzdev {
2170e9b5cd6SAndrey Gruzdev struct uffdio_writeprotect uffd_writeprotect;
2180e9b5cd6SAndrey Gruzdev
2190e9b5cd6SAndrey Gruzdev uffd_writeprotect.range.start = (uintptr_t) addr;
2200e9b5cd6SAndrey Gruzdev uffd_writeprotect.range.len = length;
2210e9b5cd6SAndrey Gruzdev if (!wp && dont_wake) {
2220e9b5cd6SAndrey Gruzdev /* DONTWAKE is meaningful only on protection release */
2230e9b5cd6SAndrey Gruzdev uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
2240e9b5cd6SAndrey Gruzdev } else {
2250e9b5cd6SAndrey Gruzdev uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0);
2260e9b5cd6SAndrey Gruzdev }
2270e9b5cd6SAndrey Gruzdev
2280e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
2290e9b5cd6SAndrey Gruzdev error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
2300e9b5cd6SAndrey Gruzdev " mode=%" PRIx64 " errno=%i", addr, length,
2310e9b5cd6SAndrey Gruzdev (uint64_t) uffd_writeprotect.mode, errno);
2320e9b5cd6SAndrey Gruzdev return -1;
2330e9b5cd6SAndrey Gruzdev }
2340e9b5cd6SAndrey Gruzdev
2350e9b5cd6SAndrey Gruzdev return 0;
2360e9b5cd6SAndrey Gruzdev }
2370e9b5cd6SAndrey Gruzdev
2380e9b5cd6SAndrey Gruzdev /**
2390e9b5cd6SAndrey Gruzdev * uffd_copy_page: copy range of pages to destination via UFFD-IO
2400e9b5cd6SAndrey Gruzdev *
2410e9b5cd6SAndrey Gruzdev * Copy range of source pages to the destination to resolve
2420e9b5cd6SAndrey Gruzdev * missing page fault somewhere in the destination range.
2430e9b5cd6SAndrey Gruzdev *
2440e9b5cd6SAndrey Gruzdev * Returns 0 on success, negative value in case of an error
2450e9b5cd6SAndrey Gruzdev *
2460e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
2470e9b5cd6SAndrey Gruzdev * @dst_addr: destination base address
2480e9b5cd6SAndrey Gruzdev * @src_addr: source base address
2490e9b5cd6SAndrey Gruzdev * @length: length of the range to copy
2500e9b5cd6SAndrey Gruzdev * @dont_wake: do not wake threads waiting on missing page
2510e9b5cd6SAndrey Gruzdev */
uffd_copy_page(int uffd_fd,void * dst_addr,void * src_addr,uint64_t length,bool dont_wake)2520e9b5cd6SAndrey Gruzdev int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
2530e9b5cd6SAndrey Gruzdev uint64_t length, bool dont_wake)
2540e9b5cd6SAndrey Gruzdev {
2550e9b5cd6SAndrey Gruzdev struct uffdio_copy uffd_copy;
2560e9b5cd6SAndrey Gruzdev
2570e9b5cd6SAndrey Gruzdev uffd_copy.dst = (uintptr_t) dst_addr;
2580e9b5cd6SAndrey Gruzdev uffd_copy.src = (uintptr_t) src_addr;
2590e9b5cd6SAndrey Gruzdev uffd_copy.len = length;
2600e9b5cd6SAndrey Gruzdev uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
2610e9b5cd6SAndrey Gruzdev
2620e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
2630e9b5cd6SAndrey Gruzdev error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
2640e9b5cd6SAndrey Gruzdev " mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
2650e9b5cd6SAndrey Gruzdev length, (uint64_t) uffd_copy.mode, errno);
2660e9b5cd6SAndrey Gruzdev return -1;
2670e9b5cd6SAndrey Gruzdev }
2680e9b5cd6SAndrey Gruzdev
2690e9b5cd6SAndrey Gruzdev return 0;
2700e9b5cd6SAndrey Gruzdev }
2710e9b5cd6SAndrey Gruzdev
2720e9b5cd6SAndrey Gruzdev /**
2730e9b5cd6SAndrey Gruzdev * uffd_zero_page: fill range of pages with zeroes via UFFD-IO
2740e9b5cd6SAndrey Gruzdev *
2750e9b5cd6SAndrey Gruzdev * Fill range pages with zeroes to resolve missing page fault within the range.
2760e9b5cd6SAndrey Gruzdev *
2770e9b5cd6SAndrey Gruzdev * Returns 0 on success, negative value in case of an error
2780e9b5cd6SAndrey Gruzdev *
2790e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
2800e9b5cd6SAndrey Gruzdev * @addr: base address
2810e9b5cd6SAndrey Gruzdev * @length: length of the range to fill with zeroes
2820e9b5cd6SAndrey Gruzdev * @dont_wake: do not wake threads waiting on missing page
2830e9b5cd6SAndrey Gruzdev */
uffd_zero_page(int uffd_fd,void * addr,uint64_t length,bool dont_wake)2840e9b5cd6SAndrey Gruzdev int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
2850e9b5cd6SAndrey Gruzdev {
2860e9b5cd6SAndrey Gruzdev struct uffdio_zeropage uffd_zeropage;
2870e9b5cd6SAndrey Gruzdev
2880e9b5cd6SAndrey Gruzdev uffd_zeropage.range.start = (uintptr_t) addr;
2890e9b5cd6SAndrey Gruzdev uffd_zeropage.range.len = length;
2900e9b5cd6SAndrey Gruzdev uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
2910e9b5cd6SAndrey Gruzdev
2920e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
2930e9b5cd6SAndrey Gruzdev error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
2940e9b5cd6SAndrey Gruzdev " mode=%" PRIx64 " errno=%i", addr, length,
2950e9b5cd6SAndrey Gruzdev (uint64_t) uffd_zeropage.mode, errno);
2960e9b5cd6SAndrey Gruzdev return -1;
2970e9b5cd6SAndrey Gruzdev }
2980e9b5cd6SAndrey Gruzdev
2990e9b5cd6SAndrey Gruzdev return 0;
3000e9b5cd6SAndrey Gruzdev }
3010e9b5cd6SAndrey Gruzdev
3020e9b5cd6SAndrey Gruzdev /**
3030e9b5cd6SAndrey Gruzdev * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution
3040e9b5cd6SAndrey Gruzdev *
3050e9b5cd6SAndrey Gruzdev * Wake up threads waiting on any page/pages from the designated range.
3060e9b5cd6SAndrey Gruzdev * The main use case is when during some period, page faults are resolved
3070e9b5cd6SAndrey Gruzdev * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
3080e9b5cd6SAndrey Gruzdev * for the whole memory range are satisfied in a single call to uffd_wakeup().
3090e9b5cd6SAndrey Gruzdev *
3100e9b5cd6SAndrey Gruzdev * Returns 0 on success, negative value in case of an error
3110e9b5cd6SAndrey Gruzdev *
3120e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
3130e9b5cd6SAndrey Gruzdev * @addr: base address
3140e9b5cd6SAndrey Gruzdev * @length: length of the range
3150e9b5cd6SAndrey Gruzdev */
uffd_wakeup(int uffd_fd,void * addr,uint64_t length)3160e9b5cd6SAndrey Gruzdev int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
3170e9b5cd6SAndrey Gruzdev {
3180e9b5cd6SAndrey Gruzdev struct uffdio_range uffd_range;
3190e9b5cd6SAndrey Gruzdev
3200e9b5cd6SAndrey Gruzdev uffd_range.start = (uintptr_t) addr;
3210e9b5cd6SAndrey Gruzdev uffd_range.len = length;
3220e9b5cd6SAndrey Gruzdev
3230e9b5cd6SAndrey Gruzdev if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
3240e9b5cd6SAndrey Gruzdev error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
3250e9b5cd6SAndrey Gruzdev addr, length, errno);
3260e9b5cd6SAndrey Gruzdev return -1;
3270e9b5cd6SAndrey Gruzdev }
3280e9b5cd6SAndrey Gruzdev
3290e9b5cd6SAndrey Gruzdev return 0;
3300e9b5cd6SAndrey Gruzdev }
3310e9b5cd6SAndrey Gruzdev
3320e9b5cd6SAndrey Gruzdev /**
3330e9b5cd6SAndrey Gruzdev * uffd_read_events: read pending UFFD events
3340e9b5cd6SAndrey Gruzdev *
3350e9b5cd6SAndrey Gruzdev * Returns number of fetched messages, 0 if non is available or
3360e9b5cd6SAndrey Gruzdev * negative value in case of an error
3370e9b5cd6SAndrey Gruzdev *
3380e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
3390e9b5cd6SAndrey Gruzdev * @msgs: pointer to message buffer
3400e9b5cd6SAndrey Gruzdev * @count: number of messages that can fit in the buffer
3410e9b5cd6SAndrey Gruzdev */
uffd_read_events(int uffd_fd,struct uffd_msg * msgs,int count)3420e9b5cd6SAndrey Gruzdev int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
3430e9b5cd6SAndrey Gruzdev {
3440e9b5cd6SAndrey Gruzdev ssize_t res;
3450e9b5cd6SAndrey Gruzdev do {
3460e9b5cd6SAndrey Gruzdev res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg));
3470e9b5cd6SAndrey Gruzdev } while (res < 0 && errno == EINTR);
3480e9b5cd6SAndrey Gruzdev
3490e9b5cd6SAndrey Gruzdev if ((res < 0 && errno == EAGAIN)) {
3500e9b5cd6SAndrey Gruzdev return 0;
3510e9b5cd6SAndrey Gruzdev }
3520e9b5cd6SAndrey Gruzdev if (res < 0) {
3530e9b5cd6SAndrey Gruzdev error_report("uffd_read_events() failed: errno=%i", errno);
3540e9b5cd6SAndrey Gruzdev return -1;
3550e9b5cd6SAndrey Gruzdev }
3560e9b5cd6SAndrey Gruzdev
3570e9b5cd6SAndrey Gruzdev return (int) (res / sizeof(struct uffd_msg));
3580e9b5cd6SAndrey Gruzdev }
3590e9b5cd6SAndrey Gruzdev
3600e9b5cd6SAndrey Gruzdev /**
3610e9b5cd6SAndrey Gruzdev * uffd_poll_events: poll UFFD file descriptor for read
3620e9b5cd6SAndrey Gruzdev *
3630e9b5cd6SAndrey Gruzdev * Returns true if events are available for read, false otherwise
3640e9b5cd6SAndrey Gruzdev *
3650e9b5cd6SAndrey Gruzdev * @uffd_fd: UFFD file descriptor
3660e9b5cd6SAndrey Gruzdev * @tmo: timeout value
3670e9b5cd6SAndrey Gruzdev */
uffd_poll_events(int uffd_fd,int tmo)3680e9b5cd6SAndrey Gruzdev bool uffd_poll_events(int uffd_fd, int tmo)
3690e9b5cd6SAndrey Gruzdev {
3700e9b5cd6SAndrey Gruzdev int res;
3710e9b5cd6SAndrey Gruzdev struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 };
3720e9b5cd6SAndrey Gruzdev
3730e9b5cd6SAndrey Gruzdev do {
3740e9b5cd6SAndrey Gruzdev res = poll(&poll_fd, 1, tmo);
3750e9b5cd6SAndrey Gruzdev } while (res < 0 && errno == EINTR);
3760e9b5cd6SAndrey Gruzdev
3770e9b5cd6SAndrey Gruzdev if (res == 0) {
3780e9b5cd6SAndrey Gruzdev return false;
3790e9b5cd6SAndrey Gruzdev }
3800e9b5cd6SAndrey Gruzdev if (res < 0) {
3810e9b5cd6SAndrey Gruzdev error_report("uffd_poll_events() failed: errno=%i", errno);
3820e9b5cd6SAndrey Gruzdev return false;
3830e9b5cd6SAndrey Gruzdev }
3840e9b5cd6SAndrey Gruzdev
3850e9b5cd6SAndrey Gruzdev return (poll_fd.revents & POLLIN) != 0;
3860e9b5cd6SAndrey Gruzdev }
387