1 /* 2 * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 /* 17 * Fork and exec tiny 1 page executable which precisely controls its VM. 18 * Test /proc/$PID/maps 19 * Test /proc/$PID/smaps 20 * Test /proc/$PID/smaps_rollup 21 * Test /proc/$PID/statm 22 * 23 * FIXME require CONFIG_TMPFS which can be disabled 24 * FIXME test other values from "smaps" 25 * FIXME support other archs 26 */ 27 #undef NDEBUG 28 #include <assert.h> 29 #include <errno.h> 30 #include <sched.h> 31 #include <signal.h> 32 #include <stdbool.h> 33 #include <stdint.h> 34 #include <stdio.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <sys/mount.h> 38 #include <sys/types.h> 39 #include <sys/stat.h> 40 #include <sys/wait.h> 41 #include <fcntl.h> 42 #include <unistd.h> 43 #include <sys/syscall.h> 44 #include <sys/uio.h> 45 #include <linux/kdev_t.h> 46 #include <sys/time.h> 47 #include <sys/resource.h> 48 49 #include "../kselftest.h" 50 51 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) 52 { 53 return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); 54 } 55 56 static void make_private_tmp(void) 57 { 58 if (unshare(CLONE_NEWNS) == -1) { 59 if (errno == ENOSYS || errno == EPERM) { 60 exit(4); 61 } 62 exit(1); 63 } 64 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { 65 exit(1); 66 } 67 if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) { 68 exit(1); 69 } 70 } 71 72 static pid_t pid = -1; 73 static void ate(void) 74 { 75 if (pid > 0) { 76 kill(pid, SIGTERM); 77 } 78 } 79 80 struct elf64_hdr { 81 uint8_t e_ident[16]; 82 uint16_t e_type; 83 uint16_t e_machine; 84 uint32_t e_version; 85 uint64_t e_entry; 86 uint64_t e_phoff; 87 uint64_t e_shoff; 88 uint32_t e_flags; 89 uint16_t e_ehsize; 90 uint16_t e_phentsize; 91 uint16_t e_phnum; 92 uint16_t e_shentsize; 93 uint16_t e_shnum; 94 uint16_t e_shstrndx; 95 }; 96 97 struct elf64_phdr { 98 uint32_t p_type; 99 uint32_t p_flags; 100 uint64_t p_offset; 101 uint64_t p_vaddr; 102 uint64_t p_paddr; 103 uint64_t p_filesz; 104 uint64_t p_memsz; 105 uint64_t p_align; 106 }; 107 108 #ifdef __x86_64__ 109 #define PAGE_SIZE 4096 110 #define VADDR (1UL << 32) 111 #define MAPS_OFFSET 73 112 113 #define syscall 0x0f, 0x05 114 #define mov_rdi(x) \ 115 0x48, 0xbf, \ 116 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ 117 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff 118 119 #define mov_rsi(x) \ 120 0x48, 0xbe, \ 121 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ 122 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff 123 124 #define mov_eax(x) \ 125 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff 126 127 static const uint8_t payload[] = { 128 /* Casually unmap stack, vDSO and everything else. */ 129 /* munmap */ 130 mov_rdi(VADDR + 4096), 131 mov_rsi((1ULL << 47) - 4096 - VADDR - 4096), 132 mov_eax(11), 133 syscall, 134 135 /* Ping parent. */ 136 /* write(0, &c, 1); */ 137 0x31, 0xff, /* xor edi, edi */ 138 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */ 139 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */ 140 mov_eax(1), 141 syscall, 142 143 /* 1: pause(); */ 144 mov_eax(34), 145 syscall, 146 147 0xeb, 0xf7, /* jmp 1b */ 148 }; 149 150 static int make_exe(const uint8_t *payload, size_t len) 151 { 152 struct elf64_hdr h; 153 struct elf64_phdr ph; 154 155 struct iovec iov[3] = { 156 {&h, sizeof(struct elf64_hdr)}, 157 {&ph, sizeof(struct elf64_phdr)}, 158 {(void *)payload, len}, 159 }; 160 int fd, fd1; 161 char buf[64]; 162 163 memset(&h, 0, sizeof(h)); 164 h.e_ident[0] = 0x7f; 165 h.e_ident[1] = 'E'; 166 h.e_ident[2] = 'L'; 167 h.e_ident[3] = 'F'; 168 h.e_ident[4] = 2; 169 h.e_ident[5] = 1; 170 h.e_ident[6] = 1; 171 h.e_ident[7] = 0; 172 h.e_type = 2; 173 h.e_machine = 0x3e; 174 h.e_version = 1; 175 h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr); 176 h.e_phoff = sizeof(struct elf64_hdr); 177 h.e_shoff = 0; 178 h.e_flags = 0; 179 h.e_ehsize = sizeof(struct elf64_hdr); 180 h.e_phentsize = sizeof(struct elf64_phdr); 181 h.e_phnum = 1; 182 h.e_shentsize = 0; 183 h.e_shnum = 0; 184 h.e_shstrndx = 0; 185 186 memset(&ph, 0, sizeof(ph)); 187 ph.p_type = 1; 188 ph.p_flags = (1<<2)|1; 189 ph.p_offset = 0; 190 ph.p_vaddr = VADDR; 191 ph.p_paddr = 0; 192 ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; 193 ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; 194 ph.p_align = 4096; 195 196 fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700); 197 if (fd == -1) { 198 exit(1); 199 } 200 201 if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) { 202 exit(1); 203 } 204 205 /* Avoid ETXTBSY on exec. */ 206 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd); 207 fd1 = open(buf, O_RDONLY|O_CLOEXEC); 208 close(fd); 209 210 return fd1; 211 } 212 #endif 213 214 /* 215 * 0: vsyscall VMA doesn't exist vsyscall=none 216 * 1: vsyscall VMA is r-xp vsyscall=emulate 217 * 2: vsyscall VMA is --xp vsyscall=xonly 218 */ 219 static int g_vsyscall; 220 static const char *str_vsyscall; 221 222 static const char str_vsyscall_0[] = ""; 223 static const char str_vsyscall_1[] = 224 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; 225 static const char str_vsyscall_2[] = 226 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; 227 228 #ifdef __x86_64__ 229 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) 230 { 231 _exit(1); 232 } 233 234 /* 235 * vsyscall page can't be unmapped, probe it directly. 236 */ 237 static void vsyscall(void) 238 { 239 pid_t pid; 240 int wstatus; 241 242 pid = fork(); 243 if (pid < 0) { 244 fprintf(stderr, "fork, errno %d\n", errno); 245 exit(1); 246 } 247 if (pid == 0) { 248 struct rlimit rlim = {0, 0}; 249 (void)setrlimit(RLIMIT_CORE, &rlim); 250 251 /* Hide "segfault at ffffffffff600000" messages. */ 252 struct sigaction act; 253 memset(&act, 0, sizeof(struct sigaction)); 254 act.sa_flags = SA_SIGINFO; 255 act.sa_sigaction = sigaction_SIGSEGV; 256 (void)sigaction(SIGSEGV, &act, NULL); 257 258 /* gettimeofday(NULL, NULL); */ 259 asm volatile ( 260 "call %P0" 261 : 262 : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) 263 : "rax", "rcx", "r11" 264 ); 265 exit(0); 266 } 267 waitpid(pid, &wstatus, 0); 268 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { 269 /* vsyscall page exists and is executable. */ 270 } else { 271 /* vsyscall page doesn't exist. */ 272 g_vsyscall = 0; 273 return; 274 } 275 276 pid = fork(); 277 if (pid < 0) { 278 fprintf(stderr, "fork, errno %d\n", errno); 279 exit(1); 280 } 281 if (pid == 0) { 282 struct rlimit rlim = {0, 0}; 283 (void)setrlimit(RLIMIT_CORE, &rlim); 284 285 /* Hide "segfault at ffffffffff600000" messages. */ 286 struct sigaction act; 287 memset(&act, 0, sizeof(struct sigaction)); 288 act.sa_flags = SA_SIGINFO; 289 act.sa_sigaction = sigaction_SIGSEGV; 290 (void)sigaction(SIGSEGV, &act, NULL); 291 292 *(volatile int *)0xffffffffff600000UL; 293 exit(0); 294 } 295 waitpid(pid, &wstatus, 0); 296 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { 297 /* vsyscall page is readable and executable. */ 298 g_vsyscall = 1; 299 return; 300 } 301 302 /* vsyscall page is executable but unreadable. */ 303 g_vsyscall = 2; 304 } 305 306 int main(void) 307 { 308 int pipefd[2]; 309 int exec_fd; 310 311 vsyscall(); 312 switch (g_vsyscall) { 313 case 0: 314 str_vsyscall = str_vsyscall_0; 315 break; 316 case 1: 317 str_vsyscall = str_vsyscall_1; 318 break; 319 case 2: 320 str_vsyscall = str_vsyscall_2; 321 break; 322 default: 323 abort(); 324 } 325 326 atexit(ate); 327 328 make_private_tmp(); 329 330 /* Reserve fd 0 for 1-byte pipe ping from child. */ 331 close(0); 332 if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) { 333 return 1; 334 } 335 336 exec_fd = make_exe(payload, sizeof(payload)); 337 338 if (pipe(pipefd) == -1) { 339 return 1; 340 } 341 if (dup2(pipefd[1], 0) != 0) { 342 return 1; 343 } 344 345 pid = fork(); 346 if (pid == -1) { 347 return 1; 348 } 349 if (pid == 0) { 350 sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH); 351 return 1; 352 } 353 354 char _; 355 if (read(pipefd[0], &_, 1) != 1) { 356 return 1; 357 } 358 359 struct stat st; 360 if (fstat(exec_fd, &st) == -1) { 361 return 1; 362 } 363 364 /* Generate "head -n1 /proc/$PID/maps" */ 365 char buf0[256]; 366 memset(buf0, ' ', sizeof(buf0)); 367 int len = snprintf(buf0, sizeof(buf0), 368 "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu", 369 VADDR, VADDR + PAGE_SIZE, 370 MAJOR(st.st_dev), MINOR(st.st_dev), 371 (unsigned long long)st.st_ino); 372 buf0[len] = ' '; 373 snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET, 374 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino); 375 376 /* Test /proc/$PID/maps */ 377 { 378 const size_t len = strlen(buf0) + strlen(str_vsyscall); 379 char buf[256]; 380 ssize_t rv; 381 int fd; 382 383 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); 384 fd = open(buf, O_RDONLY); 385 if (fd == -1) { 386 return 1; 387 } 388 rv = read(fd, buf, sizeof(buf)); 389 assert(rv == len); 390 assert(memcmp(buf, buf0, strlen(buf0)) == 0); 391 if (g_vsyscall > 0) { 392 assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0); 393 } 394 } 395 396 /* Test /proc/$PID/smaps */ 397 { 398 char buf[4096]; 399 ssize_t rv; 400 int fd; 401 402 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); 403 fd = open(buf, O_RDONLY); 404 if (fd == -1) { 405 return 1; 406 } 407 rv = read(fd, buf, sizeof(buf)); 408 assert(0 <= rv && rv <= sizeof(buf)); 409 410 assert(rv >= strlen(buf0)); 411 assert(memcmp(buf, buf0, strlen(buf0)) == 0); 412 413 #define RSS1 "Rss: 4 kB\n" 414 #define RSS2 "Rss: 0 kB\n" 415 #define PSS1 "Pss: 4 kB\n" 416 #define PSS2 "Pss: 0 kB\n" 417 assert(memmem(buf, rv, RSS1, strlen(RSS1)) || 418 memmem(buf, rv, RSS2, strlen(RSS2))); 419 assert(memmem(buf, rv, PSS1, strlen(PSS1)) || 420 memmem(buf, rv, PSS2, strlen(PSS2))); 421 422 static const char *S[] = { 423 "Size: 4 kB\n", 424 "KernelPageSize: 4 kB\n", 425 "MMUPageSize: 4 kB\n", 426 "Anonymous: 0 kB\n", 427 "AnonHugePages: 0 kB\n", 428 "Shared_Hugetlb: 0 kB\n", 429 "Private_Hugetlb: 0 kB\n", 430 "Locked: 0 kB\n", 431 }; 432 int i; 433 434 for (i = 0; i < ARRAY_SIZE(S); i++) { 435 assert(memmem(buf, rv, S[i], strlen(S[i]))); 436 } 437 438 if (g_vsyscall > 0) { 439 assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall))); 440 } 441 } 442 443 /* Test /proc/$PID/smaps_rollup */ 444 { 445 char bufr[256]; 446 memset(bufr, ' ', sizeof(bufr)); 447 len = snprintf(bufr, sizeof(bufr), 448 "%08lx-%08lx ---p 00000000 00:00 0", 449 VADDR, VADDR + PAGE_SIZE); 450 bufr[len] = ' '; 451 snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET, 452 "[rollup]\n"); 453 454 char buf[1024]; 455 ssize_t rv; 456 int fd; 457 458 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); 459 fd = open(buf, O_RDONLY); 460 if (fd == -1) { 461 return 1; 462 } 463 rv = read(fd, buf, sizeof(buf)); 464 assert(0 <= rv && rv <= sizeof(buf)); 465 466 assert(rv >= strlen(bufr)); 467 assert(memcmp(buf, bufr, strlen(bufr)) == 0); 468 469 assert(memmem(buf, rv, RSS1, strlen(RSS1)) || 470 memmem(buf, rv, RSS2, strlen(RSS2))); 471 assert(memmem(buf, rv, PSS1, strlen(PSS1)) || 472 memmem(buf, rv, PSS2, strlen(PSS2))); 473 474 static const char *S[] = { 475 "Anonymous: 0 kB\n", 476 "AnonHugePages: 0 kB\n", 477 "Shared_Hugetlb: 0 kB\n", 478 "Private_Hugetlb: 0 kB\n", 479 "Locked: 0 kB\n", 480 }; 481 int i; 482 483 for (i = 0; i < ARRAY_SIZE(S); i++) { 484 assert(memmem(buf, rv, S[i], strlen(S[i]))); 485 } 486 } 487 488 /* Test /proc/$PID/statm */ 489 { 490 char buf[64]; 491 ssize_t rv; 492 int fd; 493 494 snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); 495 fd = open(buf, O_RDONLY); 496 if (fd == -1) { 497 return 1; 498 } 499 rv = read(fd, buf, sizeof(buf)); 500 assert(rv == 7 * 2); 501 502 assert(buf[0] == '1'); /* ->total_vm */ 503 assert(buf[1] == ' '); 504 assert(buf[2] == '0' || buf[2] == '1'); /* rss */ 505 assert(buf[3] == ' '); 506 assert(buf[4] == '0' || buf[2] == '1'); /* file rss */ 507 assert(buf[5] == ' '); 508 assert(buf[6] == '1'); /* ELF executable segments */ 509 assert(buf[7] == ' '); 510 assert(buf[8] == '0'); 511 assert(buf[9] == ' '); 512 assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */ 513 assert(buf[11] == ' '); 514 assert(buf[12] == '0'); 515 assert(buf[13] == '\n'); 516 } 517 518 return 0; 519 } 520 #else 521 int main(void) 522 { 523 return 4; 524 } 525 #endif 526