1 #if defined __amd64__ || defined __i386__
2 /*
3  * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 /*
18  * Create a process without mappings by unmapping everything at once and
19  * holding it with ptrace(2). See what happens to
20  *
21  *	/proc/${pid}/maps
22  *	/proc/${pid}/numa_maps
23  *	/proc/${pid}/smaps
24  *	/proc/${pid}/smaps_rollup
25  */
26 #undef NDEBUG
27 #include <assert.h>
28 #include <errno.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <sys/mman.h>
35 #include <sys/ptrace.h>
36 #include <sys/resource.h>
37 #include <sys/types.h>
38 #include <sys/wait.h>
39 #include <unistd.h>
40 
41 #ifdef __amd64__
42 #define TEST_VSYSCALL
43 #endif
44 
45 /*
46  * 0: vsyscall VMA doesn't exist	vsyscall=none
47  * 1: vsyscall VMA is --xp		vsyscall=xonly
48  * 2: vsyscall VMA is r-xp		vsyscall=emulate
49  */
50 static volatile int g_vsyscall;
51 static const char *g_proc_pid_maps_vsyscall;
52 static const char *g_proc_pid_smaps_vsyscall;
53 
54 static const char proc_pid_maps_vsyscall_0[] = "";
55 static const char proc_pid_maps_vsyscall_1[] =
56 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
57 static const char proc_pid_maps_vsyscall_2[] =
58 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
59 
60 static const char proc_pid_smaps_vsyscall_0[] = "";
61 
62 static const char proc_pid_smaps_vsyscall_1[] =
63 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
64 "Size:                  4 kB\n"
65 "KernelPageSize:        4 kB\n"
66 "MMUPageSize:           4 kB\n"
67 "Rss:                   0 kB\n"
68 "Pss:                   0 kB\n"
69 "Pss_Dirty:             0 kB\n"
70 "Shared_Clean:          0 kB\n"
71 "Shared_Dirty:          0 kB\n"
72 "Private_Clean:         0 kB\n"
73 "Private_Dirty:         0 kB\n"
74 "Referenced:            0 kB\n"
75 "Anonymous:             0 kB\n"
76 "LazyFree:              0 kB\n"
77 "AnonHugePages:         0 kB\n"
78 "ShmemPmdMapped:        0 kB\n"
79 "FilePmdMapped:         0 kB\n"
80 "Shared_Hugetlb:        0 kB\n"
81 "Private_Hugetlb:       0 kB\n"
82 "Swap:                  0 kB\n"
83 "SwapPss:               0 kB\n"
84 "Locked:                0 kB\n"
85 "THPeligible:    0\n"
86 /*
87  * "ProtectionKey:" field is conditional. It is possible to check it as well,
88  * but I don't have such machine.
89  */
90 ;
91 
92 static const char proc_pid_smaps_vsyscall_2[] =
93 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n"
94 "Size:                  4 kB\n"
95 "KernelPageSize:        4 kB\n"
96 "MMUPageSize:           4 kB\n"
97 "Rss:                   0 kB\n"
98 "Pss:                   0 kB\n"
99 "Pss_Dirty:             0 kB\n"
100 "Shared_Clean:          0 kB\n"
101 "Shared_Dirty:          0 kB\n"
102 "Private_Clean:         0 kB\n"
103 "Private_Dirty:         0 kB\n"
104 "Referenced:            0 kB\n"
105 "Anonymous:             0 kB\n"
106 "LazyFree:              0 kB\n"
107 "AnonHugePages:         0 kB\n"
108 "ShmemPmdMapped:        0 kB\n"
109 "FilePmdMapped:         0 kB\n"
110 "Shared_Hugetlb:        0 kB\n"
111 "Private_Hugetlb:       0 kB\n"
112 "Swap:                  0 kB\n"
113 "SwapPss:               0 kB\n"
114 "Locked:                0 kB\n"
115 "THPeligible:    0\n"
116 /*
117  * "ProtectionKey:" field is conditional. It is possible to check it as well,
118  * but I'm too tired.
119  */
120 ;
121 
122 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
123 {
124 	_exit(EXIT_FAILURE);
125 }
126 
127 #ifdef TEST_VSYSCALL
128 static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___)
129 {
130 	_exit(g_vsyscall);
131 }
132 
133 /*
134  * vsyscall page can't be unmapped, probe it directly.
135  */
136 static void vsyscall(void)
137 {
138 	pid_t pid;
139 	int wstatus;
140 
141 	pid = fork();
142 	if (pid < 0) {
143 		fprintf(stderr, "fork, errno %d\n", errno);
144 		exit(1);
145 	}
146 	if (pid == 0) {
147 		setrlimit(RLIMIT_CORE, &(struct rlimit){});
148 
149 		/* Hide "segfault at ffffffffff600000" messages. */
150 		struct sigaction act = {};
151 		act.sa_flags = SA_SIGINFO;
152 		act.sa_sigaction = sigaction_SIGSEGV_vsyscall;
153 		sigaction(SIGSEGV, &act, NULL);
154 
155 		g_vsyscall = 0;
156 		/* gettimeofday(NULL, NULL); */
157 		uint64_t rax = 0xffffffffff600000;
158 		asm volatile (
159 			"call *%[rax]"
160 			: [rax] "+a" (rax)
161 			: "D" (NULL), "S" (NULL)
162 			: "rcx", "r11"
163 		);
164 
165 		g_vsyscall = 1;
166 		*(volatile int *)0xffffffffff600000UL;
167 
168 		g_vsyscall = 2;
169 		exit(g_vsyscall);
170 	}
171 	waitpid(pid, &wstatus, 0);
172 	if (WIFEXITED(wstatus)) {
173 		g_vsyscall = WEXITSTATUS(wstatus);
174 	} else {
175 		fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus);
176 		exit(1);
177 	}
178 }
179 #endif
180 
181 static int test_proc_pid_maps(pid_t pid)
182 {
183 	char buf[4096];
184 	snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
185 	int fd = open(buf, O_RDONLY);
186 	if (fd == -1) {
187 		perror("open /proc/${pid}/maps");
188 		return EXIT_FAILURE;
189 	} else {
190 		ssize_t rv = read(fd, buf, sizeof(buf));
191 		close(fd);
192 		if (g_vsyscall == 0) {
193 			assert(rv == 0);
194 		} else {
195 			size_t len = strlen(g_proc_pid_maps_vsyscall);
196 			assert(rv == len);
197 			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
198 		}
199 		return EXIT_SUCCESS;
200 	}
201 }
202 
203 static int test_proc_pid_numa_maps(pid_t pid)
204 {
205 	char buf[4096];
206 	snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid);
207 	int fd = open(buf, O_RDONLY);
208 	if (fd == -1) {
209 		if (errno == ENOENT) {
210 			/*
211 			 * /proc/${pid}/numa_maps is under CONFIG_NUMA,
212 			 * it doesn't necessarily exist.
213 			 */
214 			return EXIT_SUCCESS;
215 		}
216 		perror("open /proc/${pid}/numa_maps");
217 		return EXIT_FAILURE;
218 	} else {
219 		ssize_t rv = read(fd, buf, sizeof(buf));
220 		close(fd);
221 		assert(rv == 0);
222 		return EXIT_SUCCESS;
223 	}
224 }
225 
226 static int test_proc_pid_smaps(pid_t pid)
227 {
228 	char buf[4096];
229 	snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
230 	int fd = open(buf, O_RDONLY);
231 	if (fd == -1) {
232 		if (errno == ENOENT) {
233 			/*
234 			 * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR,
235 			 * it doesn't necessarily exist.
236 			 */
237 			return EXIT_SUCCESS;
238 		}
239 		perror("open /proc/${pid}/smaps");
240 		return EXIT_FAILURE;
241 	} else {
242 		ssize_t rv = read(fd, buf, sizeof(buf));
243 		close(fd);
244 		if (g_vsyscall == 0) {
245 			assert(rv == 0);
246 		} else {
247 			size_t len = strlen(g_proc_pid_maps_vsyscall);
248 			/* TODO "ProtectionKey:" */
249 			assert(rv > len);
250 			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
251 		}
252 		return EXIT_SUCCESS;
253 	}
254 }
255 
256 static const char g_smaps_rollup[] =
257 "00000000-00000000 ---p 00000000 00:00 0                                  [rollup]\n"
258 "Rss:                   0 kB\n"
259 "Pss:                   0 kB\n"
260 "Pss_Dirty:             0 kB\n"
261 "Pss_Anon:              0 kB\n"
262 "Pss_File:              0 kB\n"
263 "Pss_Shmem:             0 kB\n"
264 "Shared_Clean:          0 kB\n"
265 "Shared_Dirty:          0 kB\n"
266 "Private_Clean:         0 kB\n"
267 "Private_Dirty:         0 kB\n"
268 "Referenced:            0 kB\n"
269 "Anonymous:             0 kB\n"
270 "LazyFree:              0 kB\n"
271 "AnonHugePages:         0 kB\n"
272 "ShmemPmdMapped:        0 kB\n"
273 "FilePmdMapped:         0 kB\n"
274 "Shared_Hugetlb:        0 kB\n"
275 "Private_Hugetlb:       0 kB\n"
276 "Swap:                  0 kB\n"
277 "SwapPss:               0 kB\n"
278 "Locked:                0 kB\n"
279 ;
280 
281 static int test_proc_pid_smaps_rollup(pid_t pid)
282 {
283 	char buf[4096];
284 	snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
285 	int fd = open(buf, O_RDONLY);
286 	if (fd == -1) {
287 		if (errno == ENOENT) {
288 			/*
289 			 * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR,
290 			 * it doesn't necessarily exist.
291 			 */
292 			return EXIT_SUCCESS;
293 		}
294 		perror("open /proc/${pid}/smaps_rollup");
295 		return EXIT_FAILURE;
296 	} else {
297 		ssize_t rv = read(fd, buf, sizeof(buf));
298 		close(fd);
299 		assert(rv == sizeof(g_smaps_rollup) - 1);
300 		assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0);
301 		return EXIT_SUCCESS;
302 	}
303 }
304 
305 int main(void)
306 {
307 	int rv = EXIT_SUCCESS;
308 
309 #ifdef TEST_VSYSCALL
310 	vsyscall();
311 #endif
312 
313 	switch (g_vsyscall) {
314 	case 0:
315 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_0;
316 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0;
317 		break;
318 	case 1:
319 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_1;
320 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1;
321 		break;
322 	case 2:
323 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_2;
324 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2;
325 		break;
326 	default:
327 		abort();
328 	}
329 
330 	pid_t pid = fork();
331 	if (pid == -1) {
332 		perror("fork");
333 		return EXIT_FAILURE;
334 	} else if (pid == 0) {
335 		rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
336 		if (rv != 0) {
337 			if (errno == EPERM) {
338 				fprintf(stderr,
339 "Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n"
340 				);
341 				kill(getppid(), SIGTERM);
342 				return EXIT_FAILURE;
343 			}
344 			perror("ptrace PTRACE_TRACEME");
345 			return EXIT_FAILURE;
346 		}
347 
348 		/*
349 		 * Hide "segfault at ..." messages. Signal handler won't run.
350 		 */
351 		struct sigaction act = {};
352 		act.sa_flags = SA_SIGINFO;
353 		act.sa_sigaction = sigaction_SIGSEGV;
354 		sigaction(SIGSEGV, &act, NULL);
355 
356 #ifdef __amd64__
357 		munmap(NULL, ((size_t)1 << 47) - 4096);
358 #elif defined __i386__
359 		{
360 			size_t len;
361 
362 			for (len = -4096;; len -= 4096) {
363 				munmap(NULL, len);
364 			}
365 		}
366 #else
367 #error "implement 'unmap everything'"
368 #endif
369 		return EXIT_FAILURE;
370 	} else {
371 		/*
372 		 * TODO find reliable way to signal parent that munmap(2) completed.
373 		 * Child can't do it directly because it effectively doesn't exist
374 		 * anymore. Looking at child's VM files isn't 100% reliable either:
375 		 * due to a bug they may not become empty or empty-like.
376 		 */
377 		sleep(1);
378 
379 		if (rv == EXIT_SUCCESS) {
380 			rv = test_proc_pid_maps(pid);
381 		}
382 		if (rv == EXIT_SUCCESS) {
383 			rv = test_proc_pid_numa_maps(pid);
384 		}
385 		if (rv == EXIT_SUCCESS) {
386 			rv = test_proc_pid_smaps(pid);
387 		}
388 		if (rv == EXIT_SUCCESS) {
389 			rv = test_proc_pid_smaps_rollup(pid);
390 		}
391 		/*
392 		 * TODO test /proc/${pid}/statm, task_statm()
393 		 * ->start_code, ->end_code aren't updated by munmap().
394 		 * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything.
395 		 */
396 
397 		/* Cut the rope. */
398 		int wstatus;
399 		waitpid(pid, &wstatus, 0);
400 		assert(WIFSTOPPED(wstatus));
401 		assert(WSTOPSIG(wstatus) == SIGSEGV);
402 	}
403 
404 	return rv;
405 }
406 #else
407 int main(void)
408 {
409 	return 4;
410 }
411 #endif
412