1 /* 2 * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 /* 17 * Fork and exec tiny 1 page executable which precisely controls its VM. 18 * Test /proc/$PID/maps 19 * Test /proc/$PID/smaps 20 * Test /proc/$PID/smaps_rollup 21 * Test /proc/$PID/statm 22 * 23 * FIXME require CONFIG_TMPFS which can be disabled 24 * FIXME test other values from "smaps" 25 * FIXME support other archs 26 */ 27 #undef NDEBUG 28 #include <assert.h> 29 #include <errno.h> 30 #include <sched.h> 31 #include <signal.h> 32 #include <stdbool.h> 33 #include <stdint.h> 34 #include <stdio.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <sys/mount.h> 38 #include <sys/types.h> 39 #include <sys/stat.h> 40 #include <sys/wait.h> 41 #include <fcntl.h> 42 #include <unistd.h> 43 #include <sys/syscall.h> 44 #include <sys/uio.h> 45 #include <linux/kdev_t.h> 46 #include <sys/time.h> 47 #include <sys/resource.h> 48 49 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) 50 { 51 return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); 52 } 53 54 static void make_private_tmp(void) 55 { 56 if (unshare(CLONE_NEWNS) == -1) { 57 if (errno == ENOSYS || errno == EPERM) { 58 exit(4); 59 } 60 exit(1); 61 } 62 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { 63 exit(1); 64 } 65 if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) { 66 exit(1); 67 } 68 } 69 70 static pid_t pid = -1; 71 static void ate(void) 72 { 73 if (pid > 0) { 74 kill(pid, SIGTERM); 75 } 76 } 77 78 struct elf64_hdr { 79 uint8_t e_ident[16]; 80 uint16_t e_type; 81 uint16_t e_machine; 82 uint32_t e_version; 83 uint64_t e_entry; 84 uint64_t e_phoff; 85 uint64_t e_shoff; 86 uint32_t e_flags; 87 uint16_t e_ehsize; 88 uint16_t e_phentsize; 89 uint16_t e_phnum; 90 uint16_t e_shentsize; 91 uint16_t e_shnum; 92 uint16_t e_shstrndx; 93 }; 94 95 struct elf64_phdr { 96 uint32_t p_type; 97 uint32_t p_flags; 98 uint64_t p_offset; 99 uint64_t p_vaddr; 100 uint64_t p_paddr; 101 uint64_t p_filesz; 102 uint64_t p_memsz; 103 uint64_t p_align; 104 }; 105 106 #ifdef __x86_64__ 107 #define PAGE_SIZE 4096 108 #define VADDR (1UL << 32) 109 #define MAPS_OFFSET 73 110 111 #define syscall 0x0f, 0x05 112 #define mov_rdi(x) \ 113 0x48, 0xbf, \ 114 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ 115 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff 116 117 #define mov_rsi(x) \ 118 0x48, 0xbe, \ 119 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ 120 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff 121 122 #define mov_eax(x) \ 123 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff 124 125 static const uint8_t payload[] = { 126 /* Casually unmap stack, vDSO and everything else. */ 127 /* munmap */ 128 mov_rdi(VADDR + 4096), 129 mov_rsi((1ULL << 47) - 4096 - VADDR - 4096), 130 mov_eax(11), 131 syscall, 132 133 /* Ping parent. */ 134 /* write(0, &c, 1); */ 135 0x31, 0xff, /* xor edi, edi */ 136 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */ 137 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */ 138 mov_eax(1), 139 syscall, 140 141 /* 1: pause(); */ 142 mov_eax(34), 143 syscall, 144 145 0xeb, 0xf7, /* jmp 1b */ 146 }; 147 148 static int make_exe(const uint8_t *payload, size_t len) 149 { 150 struct elf64_hdr h; 151 struct elf64_phdr ph; 152 153 struct iovec iov[3] = { 154 {&h, sizeof(struct elf64_hdr)}, 155 {&ph, sizeof(struct elf64_phdr)}, 156 {(void *)payload, len}, 157 }; 158 int fd, fd1; 159 char buf[64]; 160 161 memset(&h, 0, sizeof(h)); 162 h.e_ident[0] = 0x7f; 163 h.e_ident[1] = 'E'; 164 h.e_ident[2] = 'L'; 165 h.e_ident[3] = 'F'; 166 h.e_ident[4] = 2; 167 h.e_ident[5] = 1; 168 h.e_ident[6] = 1; 169 h.e_ident[7] = 0; 170 h.e_type = 2; 171 h.e_machine = 0x3e; 172 h.e_version = 1; 173 h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr); 174 h.e_phoff = sizeof(struct elf64_hdr); 175 h.e_shoff = 0; 176 h.e_flags = 0; 177 h.e_ehsize = sizeof(struct elf64_hdr); 178 h.e_phentsize = sizeof(struct elf64_phdr); 179 h.e_phnum = 1; 180 h.e_shentsize = 0; 181 h.e_shnum = 0; 182 h.e_shstrndx = 0; 183 184 memset(&ph, 0, sizeof(ph)); 185 ph.p_type = 1; 186 ph.p_flags = (1<<2)|1; 187 ph.p_offset = 0; 188 ph.p_vaddr = VADDR; 189 ph.p_paddr = 0; 190 ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; 191 ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; 192 ph.p_align = 4096; 193 194 fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700); 195 if (fd == -1) { 196 exit(1); 197 } 198 199 if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) { 200 exit(1); 201 } 202 203 /* Avoid ETXTBSY on exec. */ 204 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd); 205 fd1 = open(buf, O_RDONLY|O_CLOEXEC); 206 close(fd); 207 208 return fd1; 209 } 210 #endif 211 212 static bool g_vsyscall = false; 213 214 static const char str_vsyscall[] = 215 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; 216 217 #ifdef __x86_64__ 218 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) 219 { 220 _exit(1); 221 } 222 223 /* 224 * vsyscall page can't be unmapped, probe it with memory load. 225 */ 226 static void vsyscall(void) 227 { 228 pid_t pid; 229 int wstatus; 230 231 pid = fork(); 232 if (pid < 0) { 233 fprintf(stderr, "fork, errno %d\n", errno); 234 exit(1); 235 } 236 if (pid == 0) { 237 struct rlimit rlim = {0, 0}; 238 (void)setrlimit(RLIMIT_CORE, &rlim); 239 240 /* Hide "segfault at ffffffffff600000" messages. */ 241 struct sigaction act; 242 memset(&act, 0, sizeof(struct sigaction)); 243 act.sa_flags = SA_SIGINFO; 244 act.sa_sigaction = sigaction_SIGSEGV; 245 (void)sigaction(SIGSEGV, &act, NULL); 246 247 *(volatile int *)0xffffffffff600000UL; 248 exit(0); 249 } 250 waitpid(pid, &wstatus, 0); 251 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { 252 g_vsyscall = true; 253 } 254 } 255 256 int main(void) 257 { 258 int pipefd[2]; 259 int exec_fd; 260 261 vsyscall(); 262 263 atexit(ate); 264 265 make_private_tmp(); 266 267 /* Reserve fd 0 for 1-byte pipe ping from child. */ 268 close(0); 269 if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) { 270 return 1; 271 } 272 273 exec_fd = make_exe(payload, sizeof(payload)); 274 275 if (pipe(pipefd) == -1) { 276 return 1; 277 } 278 if (dup2(pipefd[1], 0) != 0) { 279 return 1; 280 } 281 282 pid = fork(); 283 if (pid == -1) { 284 return 1; 285 } 286 if (pid == 0) { 287 sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH); 288 return 1; 289 } 290 291 char _; 292 if (read(pipefd[0], &_, 1) != 1) { 293 return 1; 294 } 295 296 struct stat st; 297 if (fstat(exec_fd, &st) == -1) { 298 return 1; 299 } 300 301 /* Generate "head -n1 /proc/$PID/maps" */ 302 char buf0[256]; 303 memset(buf0, ' ', sizeof(buf0)); 304 int len = snprintf(buf0, sizeof(buf0), 305 "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu", 306 VADDR, VADDR + PAGE_SIZE, 307 MAJOR(st.st_dev), MINOR(st.st_dev), 308 (unsigned long long)st.st_ino); 309 buf0[len] = ' '; 310 snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET, 311 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino); 312 313 /* Test /proc/$PID/maps */ 314 { 315 const size_t len = strlen(buf0) + (g_vsyscall ? strlen(str_vsyscall) : 0); 316 char buf[256]; 317 ssize_t rv; 318 int fd; 319 320 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); 321 fd = open(buf, O_RDONLY); 322 if (fd == -1) { 323 return 1; 324 } 325 rv = read(fd, buf, sizeof(buf)); 326 assert(rv == len); 327 assert(memcmp(buf, buf0, strlen(buf0)) == 0); 328 if (g_vsyscall) { 329 assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0); 330 } 331 } 332 333 /* Test /proc/$PID/smaps */ 334 { 335 char buf[4096]; 336 ssize_t rv; 337 int fd; 338 339 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); 340 fd = open(buf, O_RDONLY); 341 if (fd == -1) { 342 return 1; 343 } 344 rv = read(fd, buf, sizeof(buf)); 345 assert(0 <= rv && rv <= sizeof(buf)); 346 347 assert(rv >= strlen(buf0)); 348 assert(memcmp(buf, buf0, strlen(buf0)) == 0); 349 350 #define RSS1 "Rss: 4 kB\n" 351 #define RSS2 "Rss: 0 kB\n" 352 #define PSS1 "Pss: 4 kB\n" 353 #define PSS2 "Pss: 0 kB\n" 354 assert(memmem(buf, rv, RSS1, strlen(RSS1)) || 355 memmem(buf, rv, RSS2, strlen(RSS2))); 356 assert(memmem(buf, rv, PSS1, strlen(PSS1)) || 357 memmem(buf, rv, PSS2, strlen(PSS2))); 358 359 static const char *S[] = { 360 "Size: 4 kB\n", 361 "KernelPageSize: 4 kB\n", 362 "MMUPageSize: 4 kB\n", 363 "Anonymous: 0 kB\n", 364 "AnonHugePages: 0 kB\n", 365 "Shared_Hugetlb: 0 kB\n", 366 "Private_Hugetlb: 0 kB\n", 367 "Locked: 0 kB\n", 368 }; 369 int i; 370 371 for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { 372 assert(memmem(buf, rv, S[i], strlen(S[i]))); 373 } 374 375 if (g_vsyscall) { 376 assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall))); 377 } 378 } 379 380 /* Test /proc/$PID/smaps_rollup */ 381 { 382 char bufr[256]; 383 memset(bufr, ' ', sizeof(bufr)); 384 len = snprintf(bufr, sizeof(bufr), 385 "%08lx-%08lx ---p 00000000 00:00 0", 386 VADDR, VADDR + PAGE_SIZE); 387 bufr[len] = ' '; 388 snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET, 389 "[rollup]\n"); 390 391 char buf[1024]; 392 ssize_t rv; 393 int fd; 394 395 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); 396 fd = open(buf, O_RDONLY); 397 if (fd == -1) { 398 return 1; 399 } 400 rv = read(fd, buf, sizeof(buf)); 401 assert(0 <= rv && rv <= sizeof(buf)); 402 403 assert(rv >= strlen(bufr)); 404 assert(memcmp(buf, bufr, strlen(bufr)) == 0); 405 406 assert(memmem(buf, rv, RSS1, strlen(RSS1)) || 407 memmem(buf, rv, RSS2, strlen(RSS2))); 408 assert(memmem(buf, rv, PSS1, strlen(PSS1)) || 409 memmem(buf, rv, PSS2, strlen(PSS2))); 410 411 static const char *S[] = { 412 "Anonymous: 0 kB\n", 413 "AnonHugePages: 0 kB\n", 414 "Shared_Hugetlb: 0 kB\n", 415 "Private_Hugetlb: 0 kB\n", 416 "Locked: 0 kB\n", 417 }; 418 int i; 419 420 for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { 421 assert(memmem(buf, rv, S[i], strlen(S[i]))); 422 } 423 } 424 425 /* Test /proc/$PID/statm */ 426 { 427 char buf[64]; 428 ssize_t rv; 429 int fd; 430 431 snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); 432 fd = open(buf, O_RDONLY); 433 if (fd == -1) { 434 return 1; 435 } 436 rv = read(fd, buf, sizeof(buf)); 437 assert(rv == 7 * 2); 438 439 assert(buf[0] == '1'); /* ->total_vm */ 440 assert(buf[1] == ' '); 441 assert(buf[2] == '0' || buf[2] == '1'); /* rss */ 442 assert(buf[3] == ' '); 443 assert(buf[4] == '0' || buf[2] == '1'); /* file rss */ 444 assert(buf[5] == ' '); 445 assert(buf[6] == '1'); /* ELF executable segments */ 446 assert(buf[7] == ' '); 447 assert(buf[8] == '0'); 448 assert(buf[9] == ' '); 449 assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */ 450 assert(buf[11] == ' '); 451 assert(buf[12] == '0'); 452 assert(buf[13] == '\n'); 453 } 454 455 return 0; 456 } 457 #else 458 int main(void) 459 { 460 return 4; 461 } 462 #endif 463