1 /*
2  * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 /*
17  * Fork and exec tiny 1 page executable which precisely controls its VM.
18  * Test /proc/$PID/maps
19  * Test /proc/$PID/smaps
20  * Test /proc/$PID/smaps_rollup
21  * Test /proc/$PID/statm
22  *
23  * FIXME require CONFIG_TMPFS which can be disabled
24  * FIXME test other values from "smaps"
25  * FIXME support other archs
26  */
27 #undef NDEBUG
28 #include <assert.h>
29 #include <errno.h>
30 #include <sched.h>
31 #include <signal.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <sys/mount.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <sys/wait.h>
41 #include <fcntl.h>
42 #include <unistd.h>
43 #include <sys/syscall.h>
44 #include <sys/uio.h>
45 #include <linux/kdev_t.h>
46 #include <sys/time.h>
47 #include <sys/resource.h>
48 
49 #include "../kselftest.h"
50 
51 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
52 {
53 	return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
54 }
55 
56 static void make_private_tmp(void)
57 {
58 	if (unshare(CLONE_NEWNS) == -1) {
59 		if (errno == ENOSYS || errno == EPERM) {
60 			exit(4);
61 		}
62 		exit(1);
63 	}
64 	if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
65 		exit(1);
66 	}
67 	if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) {
68 		exit(1);
69 	}
70 }
71 
72 static pid_t pid = -1;
73 static void ate(void)
74 {
75 	if (pid > 0) {
76 		kill(pid, SIGTERM);
77 	}
78 }
79 
80 struct elf64_hdr {
81 	uint8_t e_ident[16];
82 	uint16_t e_type;
83 	uint16_t e_machine;
84 	uint32_t e_version;
85 	uint64_t e_entry;
86 	uint64_t e_phoff;
87 	uint64_t e_shoff;
88 	uint32_t e_flags;
89 	uint16_t e_ehsize;
90 	uint16_t e_phentsize;
91 	uint16_t e_phnum;
92 	uint16_t e_shentsize;
93 	uint16_t e_shnum;
94 	uint16_t e_shstrndx;
95 };
96 
97 struct elf64_phdr {
98 	uint32_t p_type;
99 	uint32_t p_flags;
100 	uint64_t p_offset;
101 	uint64_t p_vaddr;
102 	uint64_t p_paddr;
103 	uint64_t p_filesz;
104 	uint64_t p_memsz;
105 	uint64_t p_align;
106 };
107 
108 #ifdef __x86_64__
109 #define PAGE_SIZE 4096
110 #define VADDR (1UL << 32)
111 #define MAPS_OFFSET 73
112 
113 #define syscall	0x0f, 0x05
114 #define mov_rdi(x)	\
115 	0x48, 0xbf,	\
116 	(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,	\
117 	((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
118 
119 #define mov_rsi(x)	\
120 	0x48, 0xbe,	\
121 	(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,	\
122 	((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
123 
124 #define mov_eax(x)	\
125 	0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
126 
127 static const uint8_t payload[] = {
128 	/* Casually unmap stack, vDSO and everything else. */
129 	/* munmap */
130 	mov_rdi(VADDR + 4096),
131 	mov_rsi((1ULL << 47) - 4096 - VADDR - 4096),
132 	mov_eax(11),
133 	syscall,
134 
135 	/* Ping parent. */
136 	/* write(0, &c, 1); */
137 	0x31, 0xff,					/* xor edi, edi */
138 	0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00,	/* lea rsi, [rip] */
139 	0xba, 0x01, 0x00, 0x00, 0x00,			/* mov edx, 1 */
140 	mov_eax(1),
141 	syscall,
142 
143 	/* 1: pause(); */
144 	mov_eax(34),
145 	syscall,
146 
147 	0xeb, 0xf7,	/* jmp 1b */
148 };
149 
150 static int make_exe(const uint8_t *payload, size_t len)
151 {
152 	struct elf64_hdr h;
153 	struct elf64_phdr ph;
154 
155 	struct iovec iov[3] = {
156 		{&h, sizeof(struct elf64_hdr)},
157 		{&ph, sizeof(struct elf64_phdr)},
158 		{(void *)payload, len},
159 	};
160 	int fd, fd1;
161 	char buf[64];
162 
163 	memset(&h, 0, sizeof(h));
164 	h.e_ident[0] = 0x7f;
165 	h.e_ident[1] = 'E';
166 	h.e_ident[2] = 'L';
167 	h.e_ident[3] = 'F';
168 	h.e_ident[4] = 2;
169 	h.e_ident[5] = 1;
170 	h.e_ident[6] = 1;
171 	h.e_ident[7] = 0;
172 	h.e_type = 2;
173 	h.e_machine = 0x3e;
174 	h.e_version = 1;
175 	h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr);
176 	h.e_phoff = sizeof(struct elf64_hdr);
177 	h.e_shoff = 0;
178 	h.e_flags = 0;
179 	h.e_ehsize = sizeof(struct elf64_hdr);
180 	h.e_phentsize = sizeof(struct elf64_phdr);
181 	h.e_phnum = 1;
182 	h.e_shentsize = 0;
183 	h.e_shnum = 0;
184 	h.e_shstrndx = 0;
185 
186 	memset(&ph, 0, sizeof(ph));
187 	ph.p_type = 1;
188 	ph.p_flags = (1<<2)|1;
189 	ph.p_offset = 0;
190 	ph.p_vaddr = VADDR;
191 	ph.p_paddr = 0;
192 	ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
193 	ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
194 	ph.p_align = 4096;
195 
196 	fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
197 	if (fd == -1) {
198 		exit(1);
199 	}
200 
201 	if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) {
202 		exit(1);
203 	}
204 
205 	/* Avoid ETXTBSY on exec. */
206 	snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
207 	fd1 = open(buf, O_RDONLY|O_CLOEXEC);
208 	close(fd);
209 
210 	return fd1;
211 }
212 #endif
213 
214 /*
215  * 0: vsyscall VMA doesn't exist	vsyscall=none
216  * 1: vsyscall VMA is r-xp		vsyscall=emulate
217  * 2: vsyscall VMA is --xp		vsyscall=xonly
218  */
219 static int g_vsyscall;
220 static const char *str_vsyscall;
221 
222 static const char str_vsyscall_0[] = "";
223 static const char str_vsyscall_1[] =
224 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
225 static const char str_vsyscall_2[] =
226 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
227 
228 #ifdef __x86_64__
229 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
230 {
231 	_exit(1);
232 }
233 
234 /*
235  * vsyscall page can't be unmapped, probe it directly.
236  */
237 static void vsyscall(void)
238 {
239 	pid_t pid;
240 	int wstatus;
241 
242 	pid = fork();
243 	if (pid < 0) {
244 		fprintf(stderr, "fork, errno %d\n", errno);
245 		exit(1);
246 	}
247 	if (pid == 0) {
248 		struct rlimit rlim = {0, 0};
249 		(void)setrlimit(RLIMIT_CORE, &rlim);
250 
251 		/* Hide "segfault at ffffffffff600000" messages. */
252 		struct sigaction act;
253 		memset(&act, 0, sizeof(struct sigaction));
254 		act.sa_flags = SA_SIGINFO;
255 		act.sa_sigaction = sigaction_SIGSEGV;
256 		(void)sigaction(SIGSEGV, &act, NULL);
257 
258 		/* gettimeofday(NULL, NULL); */
259 		asm volatile (
260 			"call %P0"
261 			:
262 			: "i" (0xffffffffff600000), "D" (NULL), "S" (NULL)
263 			: "rax", "rcx", "r11"
264 		);
265 		exit(0);
266 	}
267 	waitpid(pid, &wstatus, 0);
268 	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) {
269 		/* vsyscall page exists and is executable. */
270 	} else {
271 		/* vsyscall page doesn't exist. */
272 		g_vsyscall = 0;
273 		return;
274 	}
275 
276 	pid = fork();
277 	if (pid < 0) {
278 		fprintf(stderr, "fork, errno %d\n", errno);
279 		exit(1);
280 	}
281 	if (pid == 0) {
282 		struct rlimit rlim = {0, 0};
283 		(void)setrlimit(RLIMIT_CORE, &rlim);
284 
285 		/* Hide "segfault at ffffffffff600000" messages. */
286 		struct sigaction act;
287 		memset(&act, 0, sizeof(struct sigaction));
288 		act.sa_flags = SA_SIGINFO;
289 		act.sa_sigaction = sigaction_SIGSEGV;
290 		(void)sigaction(SIGSEGV, &act, NULL);
291 
292 		*(volatile int *)0xffffffffff600000UL;
293 		exit(0);
294 	}
295 	waitpid(pid, &wstatus, 0);
296 	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) {
297 		/* vsyscall page is readable and executable. */
298 		g_vsyscall = 1;
299 		return;
300 	}
301 
302 	/* vsyscall page is executable but unreadable. */
303 	g_vsyscall = 2;
304 }
305 
306 int main(void)
307 {
308 	int pipefd[2];
309 	int exec_fd;
310 
311 	vsyscall();
312 	switch (g_vsyscall) {
313 	case 0:
314 		str_vsyscall = str_vsyscall_0;
315 		break;
316 	case 1:
317 		str_vsyscall = str_vsyscall_1;
318 		break;
319 	case 2:
320 		str_vsyscall = str_vsyscall_2;
321 		break;
322 	default:
323 		abort();
324 	}
325 
326 	atexit(ate);
327 
328 	make_private_tmp();
329 
330 	/* Reserve fd 0 for 1-byte pipe ping from child. */
331 	close(0);
332 	if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) {
333 		return 1;
334 	}
335 
336 	exec_fd = make_exe(payload, sizeof(payload));
337 
338 	if (pipe(pipefd) == -1) {
339 		return 1;
340 	}
341 	if (dup2(pipefd[1], 0) != 0) {
342 		return 1;
343 	}
344 
345 	pid = fork();
346 	if (pid == -1) {
347 		return 1;
348 	}
349 	if (pid == 0) {
350 		sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH);
351 		return 1;
352 	}
353 
354 	char _;
355 	if (read(pipefd[0], &_, 1) != 1) {
356 		return 1;
357 	}
358 
359 	struct stat st;
360 	if (fstat(exec_fd, &st) == -1) {
361 		return 1;
362 	}
363 
364 	/* Generate "head -n1 /proc/$PID/maps" */
365 	char buf0[256];
366 	memset(buf0, ' ', sizeof(buf0));
367 	int len = snprintf(buf0, sizeof(buf0),
368 			"%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
369 			VADDR, VADDR + PAGE_SIZE,
370 			MAJOR(st.st_dev), MINOR(st.st_dev),
371 			(unsigned long long)st.st_ino);
372 	buf0[len] = ' ';
373 	snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET,
374 		 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino);
375 
376 	/* Test /proc/$PID/maps */
377 	{
378 		const size_t len = strlen(buf0) + strlen(str_vsyscall);
379 		char buf[256];
380 		ssize_t rv;
381 		int fd;
382 
383 		snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
384 		fd = open(buf, O_RDONLY);
385 		if (fd == -1) {
386 			return 1;
387 		}
388 		rv = read(fd, buf, sizeof(buf));
389 		assert(rv == len);
390 		assert(memcmp(buf, buf0, strlen(buf0)) == 0);
391 		if (g_vsyscall > 0) {
392 			assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
393 		}
394 	}
395 
396 	/* Test /proc/$PID/smaps */
397 	{
398 		char buf[4096];
399 		ssize_t rv;
400 		int fd;
401 
402 		snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
403 		fd = open(buf, O_RDONLY);
404 		if (fd == -1) {
405 			return 1;
406 		}
407 		rv = read(fd, buf, sizeof(buf));
408 		assert(0 <= rv && rv <= sizeof(buf));
409 
410 		assert(rv >= strlen(buf0));
411 		assert(memcmp(buf, buf0, strlen(buf0)) == 0);
412 
413 #define RSS1 "Rss:                   4 kB\n"
414 #define RSS2 "Rss:                   0 kB\n"
415 #define PSS1 "Pss:                   4 kB\n"
416 #define PSS2 "Pss:                   0 kB\n"
417 		assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
418 		       memmem(buf, rv, RSS2, strlen(RSS2)));
419 		assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
420 		       memmem(buf, rv, PSS2, strlen(PSS2)));
421 
422 		static const char *S[] = {
423 			"Size:                  4 kB\n",
424 			"KernelPageSize:        4 kB\n",
425 			"MMUPageSize:           4 kB\n",
426 			"Anonymous:             0 kB\n",
427 			"AnonHugePages:         0 kB\n",
428 			"Shared_Hugetlb:        0 kB\n",
429 			"Private_Hugetlb:       0 kB\n",
430 			"Locked:                0 kB\n",
431 		};
432 		int i;
433 
434 		for (i = 0; i < ARRAY_SIZE(S); i++) {
435 			assert(memmem(buf, rv, S[i], strlen(S[i])));
436 		}
437 
438 		if (g_vsyscall > 0) {
439 			assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
440 		}
441 	}
442 
443 	/* Test /proc/$PID/smaps_rollup */
444 	{
445 		char bufr[256];
446 		memset(bufr, ' ', sizeof(bufr));
447 		len = snprintf(bufr, sizeof(bufr),
448 				"%08lx-%08lx ---p 00000000 00:00 0",
449 				VADDR, VADDR + PAGE_SIZE);
450 		bufr[len] = ' ';
451 		snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET,
452 			 "[rollup]\n");
453 
454 		char buf[1024];
455 		ssize_t rv;
456 		int fd;
457 
458 		snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
459 		fd = open(buf, O_RDONLY);
460 		if (fd == -1) {
461 			return 1;
462 		}
463 		rv = read(fd, buf, sizeof(buf));
464 		assert(0 <= rv && rv <= sizeof(buf));
465 
466 		assert(rv >= strlen(bufr));
467 		assert(memcmp(buf, bufr, strlen(bufr)) == 0);
468 
469 		assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
470 		       memmem(buf, rv, RSS2, strlen(RSS2)));
471 		assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
472 		       memmem(buf, rv, PSS2, strlen(PSS2)));
473 
474 		static const char *S[] = {
475 			"Anonymous:             0 kB\n",
476 			"AnonHugePages:         0 kB\n",
477 			"Shared_Hugetlb:        0 kB\n",
478 			"Private_Hugetlb:       0 kB\n",
479 			"Locked:                0 kB\n",
480 		};
481 		int i;
482 
483 		for (i = 0; i < ARRAY_SIZE(S); i++) {
484 			assert(memmem(buf, rv, S[i], strlen(S[i])));
485 		}
486 	}
487 
488 	/* Test /proc/$PID/statm */
489 	{
490 		char buf[64];
491 		ssize_t rv;
492 		int fd;
493 
494 		snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
495 		fd = open(buf, O_RDONLY);
496 		if (fd == -1) {
497 			return 1;
498 		}
499 		rv = read(fd, buf, sizeof(buf));
500 		assert(rv == 7 * 2);
501 
502 		assert(buf[0] == '1');	/* ->total_vm */
503 		assert(buf[1] == ' ');
504 		assert(buf[2] == '0' || buf[2] == '1');	/* rss */
505 		assert(buf[3] == ' ');
506 		assert(buf[4] == '0' || buf[2] == '1');	/* file rss */
507 		assert(buf[5] == ' ');
508 		assert(buf[6] == '1');	/* ELF executable segments */
509 		assert(buf[7] == ' ');
510 		assert(buf[8] == '0');
511 		assert(buf[9] == ' ');
512 		assert(buf[10] == '0');	/* ->data_vm + ->stack_vm */
513 		assert(buf[11] == ' ');
514 		assert(buf[12] == '0');
515 		assert(buf[13] == '\n');
516 	}
517 
518 	return 0;
519 }
520 #else
521 int main(void)
522 {
523 	return 4;
524 }
525 #endif
526