1 // SPDX-License-Identifier: GPL-2.0 2 #include <string.h> 3 #include <fcntl.h> 4 #include <sys/ioctl.h> 5 #include <linux/userfaultfd.h> 6 #include <sys/syscall.h> 7 #include <unistd.h> 8 #include "../kselftest.h" 9 #include "vm_util.h" 10 11 #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" 12 #define SMAP_FILE_PATH "/proc/self/smaps" 13 #define MAX_LINE_LENGTH 500 14 15 unsigned int __page_size; 16 unsigned int __page_shift; 17 18 uint64_t pagemap_get_entry(int fd, char *start) 19 { 20 const unsigned long pfn = (unsigned long)start / getpagesize(); 21 uint64_t entry; 22 int ret; 23 24 ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); 25 if (ret != sizeof(entry)) 26 ksft_exit_fail_msg("reading pagemap failed\n"); 27 return entry; 28 } 29 30 bool pagemap_is_softdirty(int fd, char *start) 31 { 32 return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY; 33 } 34 35 bool pagemap_is_swapped(int fd, char *start) 36 { 37 return pagemap_get_entry(fd, start) & PM_SWAP; 38 } 39 40 bool pagemap_is_populated(int fd, char *start) 41 { 42 return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP); 43 } 44 45 unsigned long pagemap_get_pfn(int fd, char *start) 46 { 47 uint64_t entry = pagemap_get_entry(fd, start); 48 49 /* If present (63th bit), PFN is at bit 0 -- 54. */ 50 if (entry & PM_PRESENT) 51 return entry & 0x007fffffffffffffull; 52 return -1ul; 53 } 54 55 void clear_softdirty(void) 56 { 57 int ret; 58 const char *ctrl = "4"; 59 int fd = open("/proc/self/clear_refs", O_WRONLY); 60 61 if (fd < 0) 62 ksft_exit_fail_msg("opening clear_refs failed\n"); 63 ret = write(fd, ctrl, strlen(ctrl)); 64 close(fd); 65 if (ret != strlen(ctrl)) 66 ksft_exit_fail_msg("writing clear_refs failed\n"); 67 } 68 69 bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len) 70 { 71 while (fgets(buf, len, fp)) { 72 if (!strncmp(buf, pattern, strlen(pattern))) 73 return true; 74 } 75 return false; 76 } 77 78 uint64_t read_pmd_pagesize(void) 79 { 80 int fd; 81 char buf[20]; 82 ssize_t num_read; 83 84 fd = open(PMD_SIZE_FILE_PATH, O_RDONLY); 85 if (fd == -1) 86 return 0; 87 88 num_read = read(fd, buf, 19); 89 if (num_read < 1) { 90 close(fd); 91 return 0; 92 } 93 buf[num_read] = '\0'; 94 close(fd); 95 96 return strtoul(buf, NULL, 10); 97 } 98 99 bool __check_huge(void *addr, char *pattern, int nr_hpages, 100 uint64_t hpage_size) 101 { 102 uint64_t thp = -1; 103 int ret; 104 FILE *fp; 105 char buffer[MAX_LINE_LENGTH]; 106 char addr_pattern[MAX_LINE_LENGTH]; 107 108 ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", 109 (unsigned long) addr); 110 if (ret >= MAX_LINE_LENGTH) 111 ksft_exit_fail_msg("%s: Pattern is too long\n", __func__); 112 113 fp = fopen(SMAP_FILE_PATH, "r"); 114 if (!fp) 115 ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); 116 117 if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) 118 goto err_out; 119 120 /* 121 * Fetch the pattern in the same block and check the number of 122 * hugepages. 123 */ 124 if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer))) 125 goto err_out; 126 127 snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern); 128 129 if (sscanf(buffer, addr_pattern, &thp) != 1) 130 ksft_exit_fail_msg("Reading smap error\n"); 131 132 err_out: 133 fclose(fp); 134 return thp == (nr_hpages * (hpage_size >> 10)); 135 } 136 137 bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size) 138 { 139 return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size); 140 } 141 142 bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size) 143 { 144 return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size); 145 } 146 147 bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size) 148 { 149 return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size); 150 } 151 152 int64_t allocate_transhuge(void *ptr, int pagemap_fd) 153 { 154 uint64_t ent[2]; 155 156 /* drop pmd */ 157 if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, 158 MAP_FIXED | MAP_ANONYMOUS | 159 MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) 160 errx(2, "mmap transhuge"); 161 162 if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) 163 err(2, "MADV_HUGEPAGE"); 164 165 /* allocate transparent huge page */ 166 *(volatile void **)ptr = ptr; 167 168 if (pread(pagemap_fd, ent, sizeof(ent), 169 (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent)) 170 err(2, "read pagemap"); 171 172 if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) && 173 PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) && 174 !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1))) 175 return PAGEMAP_PFN(ent[0]); 176 177 return -1; 178 } 179 180 unsigned long default_huge_page_size(void) 181 { 182 unsigned long hps = 0; 183 char *line = NULL; 184 size_t linelen = 0; 185 FILE *f = fopen("/proc/meminfo", "r"); 186 187 if (!f) 188 return 0; 189 while (getline(&line, &linelen, f) > 0) { 190 if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { 191 hps <<= 10; 192 break; 193 } 194 } 195 196 free(line); 197 fclose(f); 198 return hps; 199 } 200 201 /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ 202 int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, 203 bool miss, bool wp, bool minor, uint64_t *ioctls) 204 { 205 struct uffdio_register uffdio_register = { 0 }; 206 uint64_t mode = 0; 207 int ret = 0; 208 209 if (miss) 210 mode |= UFFDIO_REGISTER_MODE_MISSING; 211 if (wp) 212 mode |= UFFDIO_REGISTER_MODE_WP; 213 if (minor) 214 mode |= UFFDIO_REGISTER_MODE_MINOR; 215 216 uffdio_register.range.start = (unsigned long)addr; 217 uffdio_register.range.len = len; 218 uffdio_register.mode = mode; 219 220 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) 221 ret = -errno; 222 else if (ioctls) 223 *ioctls = uffdio_register.ioctls; 224 225 return ret; 226 } 227 228 int uffd_register(int uffd, void *addr, uint64_t len, 229 bool miss, bool wp, bool minor) 230 { 231 return uffd_register_with_ioctls(uffd, addr, len, 232 miss, wp, minor, NULL); 233 } 234 235 int uffd_unregister(int uffd, void *addr, uint64_t len) 236 { 237 struct uffdio_range range = { .start = (uintptr_t)addr, .len = len }; 238 int ret = 0; 239 240 if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1) 241 ret = -errno; 242 243 return ret; 244 } 245 246 int uffd_open_dev(unsigned int flags) 247 { 248 int fd, uffd; 249 250 fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); 251 if (fd < 0) 252 return fd; 253 uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags); 254 close(fd); 255 256 return uffd; 257 } 258 259 int uffd_open_sys(unsigned int flags) 260 { 261 #ifdef __NR_userfaultfd 262 return syscall(__NR_userfaultfd, flags); 263 #else 264 return -1; 265 #endif 266 } 267 268 int uffd_open(unsigned int flags) 269 { 270 int uffd = uffd_open_sys(flags); 271 272 if (uffd < 0) 273 uffd = uffd_open_dev(flags); 274 275 return uffd; 276 } 277 278 int uffd_get_features(uint64_t *features) 279 { 280 struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 }; 281 /* 282 * This should by default work in most kernels; the feature list 283 * will be the same no matter what we pass in here. 284 */ 285 int fd = uffd_open(UFFD_USER_MODE_ONLY); 286 287 if (fd < 0) 288 /* Maybe the kernel is older than user-only mode? */ 289 fd = uffd_open(0); 290 291 if (fd < 0) 292 return fd; 293 294 if (ioctl(fd, UFFDIO_API, &uffdio_api)) { 295 close(fd); 296 return -errno; 297 } 298 299 *features = uffdio_api.features; 300 close(fd); 301 302 return 0; 303 } 304