xref: /openbmc/linux/tools/testing/selftests/proc/proc-pid-vm.c (revision 9a87ffc99ec8eb8d35eed7c4f816d75f5cc9662e)
1  /*
2   * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
3   *
4   * Permission to use, copy, modify, and distribute this software for any
5   * purpose with or without fee is hereby granted, provided that the above
6   * copyright notice and this permission notice appear in all copies.
7   *
8   * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10   * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15   */
16  /*
17   * Fork and exec tiny 1 page executable which precisely controls its VM.
18   * Test /proc/$PID/maps
19   * Test /proc/$PID/smaps
20   * Test /proc/$PID/smaps_rollup
21   * Test /proc/$PID/statm
22   *
23   * FIXME require CONFIG_TMPFS which can be disabled
24   * FIXME test other values from "smaps"
25   * FIXME support other archs
26   */
27  #undef NDEBUG
28  #include <assert.h>
29  #include <errno.h>
30  #include <sched.h>
31  #include <signal.h>
32  #include <stdbool.h>
33  #include <stdint.h>
34  #include <stdio.h>
35  #include <string.h>
36  #include <stdlib.h>
37  #include <sys/mount.h>
38  #include <sys/types.h>
39  #include <sys/stat.h>
40  #include <sys/wait.h>
41  #include <fcntl.h>
42  #include <unistd.h>
43  #include <sys/syscall.h>
44  #include <sys/uio.h>
45  #include <linux/kdev_t.h>
46  #include <sys/time.h>
47  #include <sys/resource.h>
48  
49  #include "../kselftest.h"
50  
sys_execveat(int dirfd,const char * pathname,char ** argv,char ** envp,int flags)51  static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
52  {
53  	return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
54  }
55  
make_private_tmp(void)56  static void make_private_tmp(void)
57  {
58  	if (unshare(CLONE_NEWNS) == -1) {
59  		if (errno == ENOSYS || errno == EPERM) {
60  			exit(4);
61  		}
62  		exit(1);
63  	}
64  	if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
65  		exit(1);
66  	}
67  	if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) {
68  		exit(1);
69  	}
70  }
71  
72  static pid_t pid = -1;
ate(void)73  static void ate(void)
74  {
75  	if (pid > 0) {
76  		kill(pid, SIGTERM);
77  	}
78  }
79  
80  struct elf64_hdr {
81  	uint8_t e_ident[16];
82  	uint16_t e_type;
83  	uint16_t e_machine;
84  	uint32_t e_version;
85  	uint64_t e_entry;
86  	uint64_t e_phoff;
87  	uint64_t e_shoff;
88  	uint32_t e_flags;
89  	uint16_t e_ehsize;
90  	uint16_t e_phentsize;
91  	uint16_t e_phnum;
92  	uint16_t e_shentsize;
93  	uint16_t e_shnum;
94  	uint16_t e_shstrndx;
95  };
96  
97  struct elf64_phdr {
98  	uint32_t p_type;
99  	uint32_t p_flags;
100  	uint64_t p_offset;
101  	uint64_t p_vaddr;
102  	uint64_t p_paddr;
103  	uint64_t p_filesz;
104  	uint64_t p_memsz;
105  	uint64_t p_align;
106  };
107  
108  #ifdef __x86_64__
109  #define PAGE_SIZE 4096
110  #define VADDR (1UL << 32)
111  #define MAPS_OFFSET 73
112  
113  #define syscall	0x0f, 0x05
114  #define mov_rdi(x)	\
115  	0x48, 0xbf,	\
116  	(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,	\
117  	((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
118  
119  #define mov_rsi(x)	\
120  	0x48, 0xbe,	\
121  	(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,	\
122  	((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
123  
124  #define mov_eax(x)	\
125  	0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
126  
127  static const uint8_t payload[] = {
128  	/* Casually unmap stack, vDSO and everything else. */
129  	/* munmap */
130  	mov_rdi(VADDR + 4096),
131  	mov_rsi((1ULL << 47) - 4096 - VADDR - 4096),
132  	mov_eax(11),
133  	syscall,
134  
135  	/* Ping parent. */
136  	/* write(0, &c, 1); */
137  	0x31, 0xff,					/* xor edi, edi */
138  	0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00,	/* lea rsi, [rip] */
139  	0xba, 0x01, 0x00, 0x00, 0x00,			/* mov edx, 1 */
140  	mov_eax(1),
141  	syscall,
142  
143  	/* 1: pause(); */
144  	mov_eax(34),
145  	syscall,
146  
147  	0xeb, 0xf7,	/* jmp 1b */
148  };
149  
make_exe(const uint8_t * payload,size_t len)150  static int make_exe(const uint8_t *payload, size_t len)
151  {
152  	struct elf64_hdr h;
153  	struct elf64_phdr ph;
154  
155  	struct iovec iov[3] = {
156  		{&h, sizeof(struct elf64_hdr)},
157  		{&ph, sizeof(struct elf64_phdr)},
158  		{(void *)payload, len},
159  	};
160  	int fd, fd1;
161  	char buf[64];
162  
163  	memset(&h, 0, sizeof(h));
164  	h.e_ident[0] = 0x7f;
165  	h.e_ident[1] = 'E';
166  	h.e_ident[2] = 'L';
167  	h.e_ident[3] = 'F';
168  	h.e_ident[4] = 2;
169  	h.e_ident[5] = 1;
170  	h.e_ident[6] = 1;
171  	h.e_ident[7] = 0;
172  	h.e_type = 2;
173  	h.e_machine = 0x3e;
174  	h.e_version = 1;
175  	h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr);
176  	h.e_phoff = sizeof(struct elf64_hdr);
177  	h.e_shoff = 0;
178  	h.e_flags = 0;
179  	h.e_ehsize = sizeof(struct elf64_hdr);
180  	h.e_phentsize = sizeof(struct elf64_phdr);
181  	h.e_phnum = 1;
182  	h.e_shentsize = 0;
183  	h.e_shnum = 0;
184  	h.e_shstrndx = 0;
185  
186  	memset(&ph, 0, sizeof(ph));
187  	ph.p_type = 1;
188  	ph.p_flags = (1<<2)|1;
189  	ph.p_offset = 0;
190  	ph.p_vaddr = VADDR;
191  	ph.p_paddr = 0;
192  	ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
193  	ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
194  	ph.p_align = 4096;
195  
196  	fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
197  	if (fd == -1) {
198  		exit(1);
199  	}
200  
201  	if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) {
202  		exit(1);
203  	}
204  
205  	/* Avoid ETXTBSY on exec. */
206  	snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
207  	fd1 = open(buf, O_RDONLY|O_CLOEXEC);
208  	close(fd);
209  
210  	return fd1;
211  }
212  #endif
213  
214  /*
215   * 0: vsyscall VMA doesn't exist	vsyscall=none
216   * 1: vsyscall VMA is --xp		vsyscall=xonly
217   * 2: vsyscall VMA is r-xp		vsyscall=emulate
218   */
219  static volatile int g_vsyscall;
220  static const char *str_vsyscall;
221  
222  static const char str_vsyscall_0[] = "";
223  static const char str_vsyscall_1[] =
224  "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
225  static const char str_vsyscall_2[] =
226  "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
227  
228  #ifdef __x86_64__
sigaction_SIGSEGV(int _,siginfo_t * __,void * ___)229  static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
230  {
231  	_exit(g_vsyscall);
232  }
233  
234  /*
235   * vsyscall page can't be unmapped, probe it directly.
236   */
vsyscall(void)237  static void vsyscall(void)
238  {
239  	pid_t pid;
240  	int wstatus;
241  
242  	pid = fork();
243  	if (pid < 0) {
244  		fprintf(stderr, "fork, errno %d\n", errno);
245  		exit(1);
246  	}
247  	if (pid == 0) {
248  		struct rlimit rlim = {0, 0};
249  		(void)setrlimit(RLIMIT_CORE, &rlim);
250  
251  		/* Hide "segfault at ffffffffff600000" messages. */
252  		struct sigaction act;
253  		memset(&act, 0, sizeof(struct sigaction));
254  		act.sa_flags = SA_SIGINFO;
255  		act.sa_sigaction = sigaction_SIGSEGV;
256  		(void)sigaction(SIGSEGV, &act, NULL);
257  
258  		g_vsyscall = 0;
259  		/* gettimeofday(NULL, NULL); */
260  		uint64_t rax = 0xffffffffff600000;
261  		asm volatile (
262  			"call *%[rax]"
263  			: [rax] "+a" (rax)
264  			: "D" (NULL), "S" (NULL)
265  			: "rcx", "r11"
266  		);
267  
268  		g_vsyscall = 1;
269  		*(volatile int *)0xffffffffff600000UL;
270  
271  		g_vsyscall = 2;
272  		exit(g_vsyscall);
273  	}
274  	waitpid(pid, &wstatus, 0);
275  	if (WIFEXITED(wstatus)) {
276  		g_vsyscall = WEXITSTATUS(wstatus);
277  	} else {
278  		fprintf(stderr, "error: wstatus %08x\n", wstatus);
279  		exit(1);
280  	}
281  }
282  
main(void)283  int main(void)
284  {
285  	int pipefd[2];
286  	int exec_fd;
287  
288  	vsyscall();
289  	switch (g_vsyscall) {
290  	case 0:
291  		str_vsyscall = str_vsyscall_0;
292  		break;
293  	case 1:
294  		str_vsyscall = str_vsyscall_1;
295  		break;
296  	case 2:
297  		str_vsyscall = str_vsyscall_2;
298  		break;
299  	default:
300  		abort();
301  	}
302  
303  	atexit(ate);
304  
305  	make_private_tmp();
306  
307  	/* Reserve fd 0 for 1-byte pipe ping from child. */
308  	close(0);
309  	if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) {
310  		return 1;
311  	}
312  
313  	exec_fd = make_exe(payload, sizeof(payload));
314  
315  	if (pipe(pipefd) == -1) {
316  		return 1;
317  	}
318  	if (dup2(pipefd[1], 0) != 0) {
319  		return 1;
320  	}
321  
322  	pid = fork();
323  	if (pid == -1) {
324  		return 1;
325  	}
326  	if (pid == 0) {
327  		sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH);
328  		return 1;
329  	}
330  
331  	char _;
332  	if (read(pipefd[0], &_, 1) != 1) {
333  		return 1;
334  	}
335  
336  	struct stat st;
337  	if (fstat(exec_fd, &st) == -1) {
338  		return 1;
339  	}
340  
341  	/* Generate "head -n1 /proc/$PID/maps" */
342  	char buf0[256];
343  	memset(buf0, ' ', sizeof(buf0));
344  	int len = snprintf(buf0, sizeof(buf0),
345  			"%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
346  			VADDR, VADDR + PAGE_SIZE,
347  			MAJOR(st.st_dev), MINOR(st.st_dev),
348  			(unsigned long long)st.st_ino);
349  	buf0[len] = ' ';
350  	snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET,
351  		 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino);
352  
353  	/* Test /proc/$PID/maps */
354  	{
355  		const size_t len = strlen(buf0) + strlen(str_vsyscall);
356  		char buf[256];
357  		ssize_t rv;
358  		int fd;
359  
360  		snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
361  		fd = open(buf, O_RDONLY);
362  		if (fd == -1) {
363  			return 1;
364  		}
365  		rv = read(fd, buf, sizeof(buf));
366  		assert(rv == len);
367  		assert(memcmp(buf, buf0, strlen(buf0)) == 0);
368  		if (g_vsyscall > 0) {
369  			assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
370  		}
371  	}
372  
373  	/* Test /proc/$PID/smaps */
374  	{
375  		char buf[4096];
376  		ssize_t rv;
377  		int fd;
378  
379  		snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
380  		fd = open(buf, O_RDONLY);
381  		if (fd == -1) {
382  			return 1;
383  		}
384  		rv = read(fd, buf, sizeof(buf));
385  		assert(0 <= rv && rv <= sizeof(buf));
386  
387  		assert(rv >= strlen(buf0));
388  		assert(memcmp(buf, buf0, strlen(buf0)) == 0);
389  
390  #define RSS1 "Rss:                   4 kB\n"
391  #define RSS2 "Rss:                   0 kB\n"
392  #define PSS1 "Pss:                   4 kB\n"
393  #define PSS2 "Pss:                   0 kB\n"
394  		assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
395  		       memmem(buf, rv, RSS2, strlen(RSS2)));
396  		assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
397  		       memmem(buf, rv, PSS2, strlen(PSS2)));
398  
399  		static const char *S[] = {
400  			"Size:                  4 kB\n",
401  			"KernelPageSize:        4 kB\n",
402  			"MMUPageSize:           4 kB\n",
403  			"Anonymous:             0 kB\n",
404  			"AnonHugePages:         0 kB\n",
405  			"Shared_Hugetlb:        0 kB\n",
406  			"Private_Hugetlb:       0 kB\n",
407  			"Locked:                0 kB\n",
408  		};
409  		int i;
410  
411  		for (i = 0; i < ARRAY_SIZE(S); i++) {
412  			assert(memmem(buf, rv, S[i], strlen(S[i])));
413  		}
414  
415  		if (g_vsyscall > 0) {
416  			assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
417  		}
418  	}
419  
420  	/* Test /proc/$PID/smaps_rollup */
421  	{
422  		char bufr[256];
423  		memset(bufr, ' ', sizeof(bufr));
424  		len = snprintf(bufr, sizeof(bufr),
425  				"%08lx-%08lx ---p 00000000 00:00 0",
426  				VADDR, VADDR + PAGE_SIZE);
427  		bufr[len] = ' ';
428  		snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET,
429  			 "[rollup]\n");
430  
431  		char buf[1024];
432  		ssize_t rv;
433  		int fd;
434  
435  		snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
436  		fd = open(buf, O_RDONLY);
437  		if (fd == -1) {
438  			return 1;
439  		}
440  		rv = read(fd, buf, sizeof(buf));
441  		assert(0 <= rv && rv <= sizeof(buf));
442  
443  		assert(rv >= strlen(bufr));
444  		assert(memcmp(buf, bufr, strlen(bufr)) == 0);
445  
446  		assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
447  		       memmem(buf, rv, RSS2, strlen(RSS2)));
448  		assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
449  		       memmem(buf, rv, PSS2, strlen(PSS2)));
450  
451  		static const char *S[] = {
452  			"Anonymous:             0 kB\n",
453  			"AnonHugePages:         0 kB\n",
454  			"Shared_Hugetlb:        0 kB\n",
455  			"Private_Hugetlb:       0 kB\n",
456  			"Locked:                0 kB\n",
457  		};
458  		int i;
459  
460  		for (i = 0; i < ARRAY_SIZE(S); i++) {
461  			assert(memmem(buf, rv, S[i], strlen(S[i])));
462  		}
463  	}
464  
465  	/* Test /proc/$PID/statm */
466  	{
467  		char buf[64];
468  		ssize_t rv;
469  		int fd;
470  
471  		snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
472  		fd = open(buf, O_RDONLY);
473  		if (fd == -1) {
474  			return 1;
475  		}
476  		rv = read(fd, buf, sizeof(buf));
477  		assert(rv == 7 * 2);
478  
479  		assert(buf[0] == '1');	/* ->total_vm */
480  		assert(buf[1] == ' ');
481  		assert(buf[2] == '0' || buf[2] == '1');	/* rss */
482  		assert(buf[3] == ' ');
483  		assert(buf[4] == '0' || buf[2] == '1');	/* file rss */
484  		assert(buf[5] == ' ');
485  		assert(buf[6] == '1');	/* ELF executable segments */
486  		assert(buf[7] == ' ');
487  		assert(buf[8] == '0');
488  		assert(buf[9] == ' ');
489  		assert(buf[10] == '0');	/* ->data_vm + ->stack_vm */
490  		assert(buf[11] == ' ');
491  		assert(buf[12] == '0');
492  		assert(buf[13] == '\n');
493  	}
494  
495  	return 0;
496  }
497  #else
main(void)498  int main(void)
499  {
500  	return 4;
501  }
502  #endif
503