1 /*
2  * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 /*
17  * Create a process without mappings by unmapping everything at once and
18  * holding it with ptrace(2). See what happens to
19  *
20  *	/proc/${pid}/maps
21  *	/proc/${pid}/numa_maps
22  *	/proc/${pid}/smaps
23  *	/proc/${pid}/smaps_rollup
24  */
25 #undef NDEBUG
26 #include <assert.h>
27 #include <errno.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <fcntl.h>
32 #include <sys/mman.h>
33 #include <sys/ptrace.h>
34 #include <sys/resource.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <unistd.h>
38 
39 /*
40  * 0: vsyscall VMA doesn't exist	vsyscall=none
41  * 1: vsyscall VMA is --xp		vsyscall=xonly
42  * 2: vsyscall VMA is r-xp		vsyscall=emulate
43  */
44 static int g_vsyscall;
45 static const char *g_proc_pid_maps_vsyscall;
46 static const char *g_proc_pid_smaps_vsyscall;
47 
48 static const char proc_pid_maps_vsyscall_0[] = "";
49 static const char proc_pid_maps_vsyscall_1[] =
50 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
51 static const char proc_pid_maps_vsyscall_2[] =
52 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
53 
54 static const char proc_pid_smaps_vsyscall_0[] = "";
55 
56 static const char proc_pid_smaps_vsyscall_1[] =
57 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
58 "Size:                  4 kB\n"
59 "KernelPageSize:        4 kB\n"
60 "MMUPageSize:           4 kB\n"
61 "Rss:                   0 kB\n"
62 "Pss:                   0 kB\n"
63 "Pss_Dirty:             0 kB\n"
64 "Shared_Clean:          0 kB\n"
65 "Shared_Dirty:          0 kB\n"
66 "Private_Clean:         0 kB\n"
67 "Private_Dirty:         0 kB\n"
68 "Referenced:            0 kB\n"
69 "Anonymous:             0 kB\n"
70 "LazyFree:              0 kB\n"
71 "AnonHugePages:         0 kB\n"
72 "ShmemPmdMapped:        0 kB\n"
73 "FilePmdMapped:         0 kB\n"
74 "Shared_Hugetlb:        0 kB\n"
75 "Private_Hugetlb:       0 kB\n"
76 "Swap:                  0 kB\n"
77 "SwapPss:               0 kB\n"
78 "Locked:                0 kB\n"
79 "THPeligible:    0\n"
80 /*
81  * "ProtectionKey:" field is conditional. It is possible to check it as well,
82  * but I don't have such machine.
83  */
84 ;
85 
86 static const char proc_pid_smaps_vsyscall_2[] =
87 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n"
88 "Size:                  4 kB\n"
89 "KernelPageSize:        4 kB\n"
90 "MMUPageSize:           4 kB\n"
91 "Rss:                   0 kB\n"
92 "Pss:                   0 kB\n"
93 "Pss_Dirty:             0 kB\n"
94 "Shared_Clean:          0 kB\n"
95 "Shared_Dirty:          0 kB\n"
96 "Private_Clean:         0 kB\n"
97 "Private_Dirty:         0 kB\n"
98 "Referenced:            0 kB\n"
99 "Anonymous:             0 kB\n"
100 "LazyFree:              0 kB\n"
101 "AnonHugePages:         0 kB\n"
102 "ShmemPmdMapped:        0 kB\n"
103 "FilePmdMapped:         0 kB\n"
104 "Shared_Hugetlb:        0 kB\n"
105 "Private_Hugetlb:       0 kB\n"
106 "Swap:                  0 kB\n"
107 "SwapPss:               0 kB\n"
108 "Locked:                0 kB\n"
109 "THPeligible:    0\n"
110 /*
111  * "ProtectionKey:" field is conditional. It is possible to check it as well,
112  * but I'm too tired.
113  */
114 ;
115 
116 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
117 {
118 	_exit(EXIT_FAILURE);
119 }
120 
121 static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___)
122 {
123 	_exit(g_vsyscall);
124 }
125 
126 /*
127  * vsyscall page can't be unmapped, probe it directly.
128  */
129 static void vsyscall(void)
130 {
131 	pid_t pid;
132 	int wstatus;
133 
134 	pid = fork();
135 	if (pid < 0) {
136 		fprintf(stderr, "fork, errno %d\n", errno);
137 		exit(1);
138 	}
139 	if (pid == 0) {
140 		setrlimit(RLIMIT_CORE, &(struct rlimit){});
141 
142 		/* Hide "segfault at ffffffffff600000" messages. */
143 		struct sigaction act = {};
144 		act.sa_flags = SA_SIGINFO;
145 		act.sa_sigaction = sigaction_SIGSEGV_vsyscall;
146 		sigaction(SIGSEGV, &act, NULL);
147 
148 		g_vsyscall = 0;
149 		/* gettimeofday(NULL, NULL); */
150 		asm volatile (
151 			"call %P0"
152 			:
153 			: "i" (0xffffffffff600000), "D" (NULL), "S" (NULL)
154 			: "rax", "rcx", "r11"
155 		);
156 
157 		g_vsyscall = 1;
158 		*(volatile int *)0xffffffffff600000UL;
159 
160 		g_vsyscall = 2;
161 		exit(g_vsyscall);
162 	}
163 	waitpid(pid, &wstatus, 0);
164 	if (WIFEXITED(wstatus)) {
165 		g_vsyscall = WEXITSTATUS(wstatus);
166 	} else {
167 		fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus);
168 		exit(1);
169 	}
170 }
171 
172 static int test_proc_pid_maps(pid_t pid)
173 {
174 	char buf[4096];
175 	snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
176 	int fd = open(buf, O_RDONLY);
177 	if (fd == -1) {
178 		perror("open /proc/${pid}/maps");
179 		return EXIT_FAILURE;
180 	} else {
181 		ssize_t rv = read(fd, buf, sizeof(buf));
182 		close(fd);
183 		if (g_vsyscall == 0) {
184 			assert(rv == 0);
185 		} else {
186 			size_t len = strlen(g_proc_pid_maps_vsyscall);
187 			assert(rv == len);
188 			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
189 		}
190 		return EXIT_SUCCESS;
191 	}
192 }
193 
194 static int test_proc_pid_numa_maps(pid_t pid)
195 {
196 	char buf[4096];
197 	snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid);
198 	int fd = open(buf, O_RDONLY);
199 	if (fd == -1) {
200 		if (errno == ENOENT) {
201 			/*
202 			 * /proc/${pid}/numa_maps is under CONFIG_NUMA,
203 			 * it doesn't necessarily exist.
204 			 */
205 			return EXIT_SUCCESS;
206 		}
207 		perror("open /proc/${pid}/numa_maps");
208 		return EXIT_FAILURE;
209 	} else {
210 		ssize_t rv = read(fd, buf, sizeof(buf));
211 		close(fd);
212 		assert(rv == 0);
213 		return EXIT_SUCCESS;
214 	}
215 }
216 
217 static int test_proc_pid_smaps(pid_t pid)
218 {
219 	char buf[4096];
220 	snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
221 	int fd = open(buf, O_RDONLY);
222 	if (fd == -1) {
223 		if (errno == ENOENT) {
224 			/*
225 			 * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR,
226 			 * it doesn't necessarily exist.
227 			 */
228 			return EXIT_SUCCESS;
229 		}
230 		perror("open /proc/${pid}/smaps");
231 		return EXIT_FAILURE;
232 	} else {
233 		ssize_t rv = read(fd, buf, sizeof(buf));
234 		close(fd);
235 		if (g_vsyscall == 0) {
236 			assert(rv == 0);
237 		} else {
238 			size_t len = strlen(g_proc_pid_maps_vsyscall);
239 			/* TODO "ProtectionKey:" */
240 			assert(rv > len);
241 			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
242 		}
243 		return EXIT_SUCCESS;
244 	}
245 }
246 
247 static const char g_smaps_rollup[] =
248 "00000000-00000000 ---p 00000000 00:00 0                                  [rollup]\n"
249 "Rss:                   0 kB\n"
250 "Pss:                   0 kB\n"
251 "Pss_Dirty:             0 kB\n"
252 "Pss_Anon:              0 kB\n"
253 "Pss_File:              0 kB\n"
254 "Pss_Shmem:             0 kB\n"
255 "Shared_Clean:          0 kB\n"
256 "Shared_Dirty:          0 kB\n"
257 "Private_Clean:         0 kB\n"
258 "Private_Dirty:         0 kB\n"
259 "Referenced:            0 kB\n"
260 "Anonymous:             0 kB\n"
261 "LazyFree:              0 kB\n"
262 "AnonHugePages:         0 kB\n"
263 "ShmemPmdMapped:        0 kB\n"
264 "FilePmdMapped:         0 kB\n"
265 "Shared_Hugetlb:        0 kB\n"
266 "Private_Hugetlb:       0 kB\n"
267 "Swap:                  0 kB\n"
268 "SwapPss:               0 kB\n"
269 "Locked:                0 kB\n"
270 ;
271 
272 static int test_proc_pid_smaps_rollup(pid_t pid)
273 {
274 	char buf[4096];
275 	snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
276 	int fd = open(buf, O_RDONLY);
277 	if (fd == -1) {
278 		if (errno == ENOENT) {
279 			/*
280 			 * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR,
281 			 * it doesn't necessarily exist.
282 			 */
283 			return EXIT_SUCCESS;
284 		}
285 		perror("open /proc/${pid}/smaps_rollup");
286 		return EXIT_FAILURE;
287 	} else {
288 		ssize_t rv = read(fd, buf, sizeof(buf));
289 		close(fd);
290 		assert(rv == sizeof(g_smaps_rollup) - 1);
291 		assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0);
292 		return EXIT_SUCCESS;
293 	}
294 }
295 
296 int main(void)
297 {
298 	int rv = EXIT_SUCCESS;
299 
300 	vsyscall();
301 
302 	switch (g_vsyscall) {
303 	case 0:
304 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_0;
305 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0;
306 		break;
307 	case 1:
308 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_1;
309 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1;
310 		break;
311 	case 2:
312 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_2;
313 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2;
314 		break;
315 	default:
316 		abort();
317 	}
318 
319 	pid_t pid = fork();
320 	if (pid == -1) {
321 		perror("fork");
322 		return EXIT_FAILURE;
323 	} else if (pid == 0) {
324 		rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
325 		if (rv != 0) {
326 			if (errno == EPERM) {
327 				fprintf(stderr,
328 "Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n"
329 				);
330 				kill(getppid(), SIGTERM);
331 				return EXIT_FAILURE;
332 			}
333 			perror("ptrace PTRACE_TRACEME");
334 			return EXIT_FAILURE;
335 		}
336 
337 		/*
338 		 * Hide "segfault at ..." messages. Signal handler won't run.
339 		 */
340 		struct sigaction act = {};
341 		act.sa_flags = SA_SIGINFO;
342 		act.sa_sigaction = sigaction_SIGSEGV;
343 		sigaction(SIGSEGV, &act, NULL);
344 
345 #ifdef __amd64__
346 		munmap(NULL, ((size_t)1 << 47) - 4096);
347 #else
348 #error "implement 'unmap everything'"
349 #endif
350 		return EXIT_FAILURE;
351 	} else {
352 		/*
353 		 * TODO find reliable way to signal parent that munmap(2) completed.
354 		 * Child can't do it directly because it effectively doesn't exist
355 		 * anymore. Looking at child's VM files isn't 100% reliable either:
356 		 * due to a bug they may not become empty or empty-like.
357 		 */
358 		sleep(1);
359 
360 		if (rv == EXIT_SUCCESS) {
361 			rv = test_proc_pid_maps(pid);
362 		}
363 		if (rv == EXIT_SUCCESS) {
364 			rv = test_proc_pid_numa_maps(pid);
365 		}
366 		if (rv == EXIT_SUCCESS) {
367 			rv = test_proc_pid_smaps(pid);
368 		}
369 		if (rv == EXIT_SUCCESS) {
370 			rv = test_proc_pid_smaps_rollup(pid);
371 		}
372 		/*
373 		 * TODO test /proc/${pid}/statm, task_statm()
374 		 * ->start_code, ->end_code aren't updated by munmap().
375 		 * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything.
376 		 */
377 
378 		/* Cut the rope. */
379 		int wstatus;
380 		waitpid(pid, &wstatus, 0);
381 		assert(WIFSTOPPED(wstatus));
382 		assert(WSTOPSIG(wstatus) == SIGSEGV);
383 	}
384 
385 	return rv;
386 }
387