1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 
22 #include "../kselftest_harness.h"
23 
24 #ifndef CLONE_NEWNS
25 #define CLONE_NEWNS 0x00020000
26 #endif
27 
28 #ifndef CLONE_NEWUSER
29 #define CLONE_NEWUSER 0x10000000
30 #endif
31 
32 #ifndef MS_REC
33 #define MS_REC 16384
34 #endif
35 
36 #ifndef MS_RELATIME
37 #define MS_RELATIME (1 << 21)
38 #endif
39 
40 #ifndef MS_STRICTATIME
41 #define MS_STRICTATIME (1 << 24)
42 #endif
43 
44 #ifndef MOUNT_ATTR_RDONLY
45 #define MOUNT_ATTR_RDONLY 0x00000001
46 #endif
47 
48 #ifndef MOUNT_ATTR_NOSUID
49 #define MOUNT_ATTR_NOSUID 0x00000002
50 #endif
51 
52 #ifndef MOUNT_ATTR_NOEXEC
53 #define MOUNT_ATTR_NOEXEC 0x00000008
54 #endif
55 
56 #ifndef MOUNT_ATTR_NODIRATIME
57 #define MOUNT_ATTR_NODIRATIME 0x00000080
58 #endif
59 
60 #ifndef MOUNT_ATTR__ATIME
61 #define MOUNT_ATTR__ATIME 0x00000070
62 #endif
63 
64 #ifndef MOUNT_ATTR_RELATIME
65 #define MOUNT_ATTR_RELATIME 0x00000000
66 #endif
67 
68 #ifndef MOUNT_ATTR_NOATIME
69 #define MOUNT_ATTR_NOATIME 0x00000010
70 #endif
71 
72 #ifndef MOUNT_ATTR_STRICTATIME
73 #define MOUNT_ATTR_STRICTATIME 0x00000020
74 #endif
75 
76 #ifndef AT_RECURSIVE
77 #define AT_RECURSIVE 0x8000
78 #endif
79 
80 #ifndef MS_SHARED
81 #define MS_SHARED (1 << 20)
82 #endif
83 
84 #define DEFAULT_THREADS 4
85 #define ptr_to_int(p) ((int)((intptr_t)(p)))
86 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
87 
88 #ifndef __NR_mount_setattr
89 	#if defined __alpha__
90 		#define __NR_mount_setattr 552
91 	#elif defined _MIPS_SIM
92 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
93 			#define __NR_mount_setattr (442 + 4000)
94 		#endif
95 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
96 			#define __NR_mount_setattr (442 + 6000)
97 		#endif
98 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
99 			#define __NR_mount_setattr (442 + 5000)
100 		#endif
101 	#elif defined __ia64__
102 		#define __NR_mount_setattr (442 + 1024)
103 	#else
104 		#define __NR_mount_setattr 442
105 	#endif
106 
107 struct mount_attr {
108 	__u64 attr_set;
109 	__u64 attr_clr;
110 	__u64 propagation;
111 	__u64 userns_fd;
112 };
113 #endif
114 
115 #ifndef __NR_open_tree
116 	#if defined __alpha__
117 		#define __NR_open_tree 538
118 	#elif defined _MIPS_SIM
119 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
120 			#define __NR_open_tree 4428
121 		#endif
122 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
123 			#define __NR_open_tree 6428
124 		#endif
125 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
126 			#define __NR_open_tree 5428
127 		#endif
128 	#elif defined __ia64__
129 		#define __NR_open_tree (428 + 1024)
130 	#else
131 		#define __NR_open_tree 428
132 	#endif
133 #endif
134 
135 #ifndef MOUNT_ATTR_IDMAP
136 #define MOUNT_ATTR_IDMAP 0x00100000
137 #endif
138 
139 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
140 				    struct mount_attr *attr, size_t size)
141 {
142 	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
143 }
144 
145 #ifndef OPEN_TREE_CLONE
146 #define OPEN_TREE_CLONE 1
147 #endif
148 
149 #ifndef OPEN_TREE_CLOEXEC
150 #define OPEN_TREE_CLOEXEC O_CLOEXEC
151 #endif
152 
153 #ifndef AT_RECURSIVE
154 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
155 #endif
156 
157 static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
158 {
159 	return syscall(__NR_open_tree, dfd, filename, flags);
160 }
161 
162 static ssize_t write_nointr(int fd, const void *buf, size_t count)
163 {
164 	ssize_t ret;
165 
166 	do {
167 		ret = write(fd, buf, count);
168 	} while (ret < 0 && errno == EINTR);
169 
170 	return ret;
171 }
172 
173 static int write_file(const char *path, const void *buf, size_t count)
174 {
175 	int fd;
176 	ssize_t ret;
177 
178 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
179 	if (fd < 0)
180 		return -1;
181 
182 	ret = write_nointr(fd, buf, count);
183 	close(fd);
184 	if (ret < 0 || (size_t)ret != count)
185 		return -1;
186 
187 	return 0;
188 }
189 
190 static int create_and_enter_userns(void)
191 {
192 	uid_t uid;
193 	gid_t gid;
194 	char map[100];
195 
196 	uid = getuid();
197 	gid = getgid();
198 
199 	if (unshare(CLONE_NEWUSER))
200 		return -1;
201 
202 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
203 	    errno != ENOENT)
204 		return -1;
205 
206 	snprintf(map, sizeof(map), "0 %d 1", uid);
207 	if (write_file("/proc/self/uid_map", map, strlen(map)))
208 		return -1;
209 
210 
211 	snprintf(map, sizeof(map), "0 %d 1", gid);
212 	if (write_file("/proc/self/gid_map", map, strlen(map)))
213 		return -1;
214 
215 	if (setgid(0))
216 		return -1;
217 
218 	if (setuid(0))
219 		return -1;
220 
221 	return 0;
222 }
223 
224 static int prepare_unpriv_mountns(void)
225 {
226 	if (create_and_enter_userns())
227 		return -1;
228 
229 	if (unshare(CLONE_NEWNS))
230 		return -1;
231 
232 	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
233 		return -1;
234 
235 	return 0;
236 }
237 
238 static int read_mnt_flags(const char *path)
239 {
240 	int ret;
241 	struct statvfs stat;
242 	unsigned int mnt_flags;
243 
244 	ret = statvfs(path, &stat);
245 	if (ret != 0)
246 		return -EINVAL;
247 
248 	if (stat.f_flag &
249 	    ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME |
250 	      ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK))
251 		return -EINVAL;
252 
253 	mnt_flags = 0;
254 	if (stat.f_flag & ST_RDONLY)
255 		mnt_flags |= MS_RDONLY;
256 	if (stat.f_flag & ST_NOSUID)
257 		mnt_flags |= MS_NOSUID;
258 	if (stat.f_flag & ST_NODEV)
259 		mnt_flags |= MS_NODEV;
260 	if (stat.f_flag & ST_NOEXEC)
261 		mnt_flags |= MS_NOEXEC;
262 	if (stat.f_flag & ST_NOATIME)
263 		mnt_flags |= MS_NOATIME;
264 	if (stat.f_flag & ST_NODIRATIME)
265 		mnt_flags |= MS_NODIRATIME;
266 	if (stat.f_flag & ST_RELATIME)
267 		mnt_flags |= MS_RELATIME;
268 	if (stat.f_flag & ST_SYNCHRONOUS)
269 		mnt_flags |= MS_SYNCHRONOUS;
270 	if (stat.f_flag & ST_MANDLOCK)
271 		mnt_flags |= ST_MANDLOCK;
272 
273 	return mnt_flags;
274 }
275 
276 static char *get_field(char *src, int nfields)
277 {
278 	int i;
279 	char *p = src;
280 
281 	for (i = 0; i < nfields; i++) {
282 		while (*p && *p != ' ' && *p != '\t')
283 			p++;
284 
285 		if (!*p)
286 			break;
287 
288 		p++;
289 	}
290 
291 	return p;
292 }
293 
294 static void null_endofword(char *word)
295 {
296 	while (*word && *word != ' ' && *word != '\t')
297 		word++;
298 	*word = '\0';
299 }
300 
301 static bool is_shared_mount(const char *path)
302 {
303 	size_t len = 0;
304 	char *line = NULL;
305 	FILE *f = NULL;
306 
307 	f = fopen("/proc/self/mountinfo", "re");
308 	if (!f)
309 		return false;
310 
311 	while (getline(&line, &len, f) != -1) {
312 		char *opts, *target;
313 
314 		target = get_field(line, 4);
315 		if (!target)
316 			continue;
317 
318 		opts = get_field(target, 2);
319 		if (!opts)
320 			continue;
321 
322 		null_endofword(target);
323 
324 		if (strcmp(target, path) != 0)
325 			continue;
326 
327 		null_endofword(opts);
328 		if (strstr(opts, "shared:"))
329 			return true;
330 	}
331 
332 	free(line);
333 	fclose(f);
334 
335 	return false;
336 }
337 
338 static void *mount_setattr_thread(void *data)
339 {
340 	struct mount_attr attr = {
341 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
342 		.attr_clr	= 0,
343 		.propagation	= MS_SHARED,
344 	};
345 
346 	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
347 		pthread_exit(int_to_ptr(-1));
348 
349 	pthread_exit(int_to_ptr(0));
350 }
351 
352 /* Attempt to de-conflict with the selftests tree. */
353 #ifndef SKIP
354 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
355 #endif
356 
357 static bool mount_setattr_supported(void)
358 {
359 	int ret;
360 
361 	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
362 	if (ret < 0 && errno == ENOSYS)
363 		return false;
364 
365 	return true;
366 }
367 
368 FIXTURE(mount_setattr) {
369 };
370 
371 FIXTURE_SETUP(mount_setattr)
372 {
373 	if (!mount_setattr_supported())
374 		SKIP(return, "mount_setattr syscall not supported");
375 
376 	ASSERT_EQ(prepare_unpriv_mountns(), 0);
377 
378 	(void)umount2("/mnt", MNT_DETACH);
379 	(void)umount2("/tmp", MNT_DETACH);
380 
381 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
382 			"size=100000,mode=700"), 0);
383 
384 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
385 
386 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
387 			"size=100000,mode=700"), 0);
388 
389 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
390 
391 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
392 			"size=100000,mode=700"), 0);
393 
394 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
395 			"size=100000,mode=700"), 0);
396 
397 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
398 
399 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
400 			"size=100000,mode=700"), 0);
401 
402 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
403 
404 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
405 
406 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
407 
408 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
409 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
410 
411 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
412 
413 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
414 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
415 }
416 
417 FIXTURE_TEARDOWN(mount_setattr)
418 {
419 	if (!mount_setattr_supported())
420 		SKIP(return, "mount_setattr syscall not supported");
421 
422 	(void)umount2("/mnt/A", MNT_DETACH);
423 	(void)umount2("/tmp", MNT_DETACH);
424 }
425 
426 TEST_F(mount_setattr, invalid_attributes)
427 {
428 	struct mount_attr invalid_attr = {
429 		.attr_set = (1U << 31),
430 	};
431 
432 	if (!mount_setattr_supported())
433 		SKIP(return, "mount_setattr syscall not supported");
434 
435 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
436 				    sizeof(invalid_attr)), 0);
437 
438 	invalid_attr.attr_set	= 0;
439 	invalid_attr.attr_clr	= (1U << 31);
440 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
441 				    sizeof(invalid_attr)), 0);
442 
443 	invalid_attr.attr_clr		= 0;
444 	invalid_attr.propagation	= (1U << 31);
445 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
446 				    sizeof(invalid_attr)), 0);
447 
448 	invalid_attr.attr_set		= (1U << 31);
449 	invalid_attr.attr_clr		= (1U << 31);
450 	invalid_attr.propagation	= (1U << 31);
451 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
452 				    sizeof(invalid_attr)), 0);
453 
454 	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
455 				    sizeof(invalid_attr)), 0);
456 }
457 
458 TEST_F(mount_setattr, extensibility)
459 {
460 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
461 	char *s = "dummy";
462 	struct mount_attr invalid_attr = {};
463 	struct mount_attr_large {
464 		struct mount_attr attr1;
465 		struct mount_attr attr2;
466 		struct mount_attr attr3;
467 	} large_attr = {};
468 
469 	if (!mount_setattr_supported())
470 		SKIP(return, "mount_setattr syscall not supported");
471 
472 	old_flags = read_mnt_flags("/mnt/A");
473 	ASSERT_GT(old_flags, 0);
474 
475 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
476 				    sizeof(invalid_attr)), 0);
477 	ASSERT_EQ(errno, EFAULT);
478 
479 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
480 				    sizeof(invalid_attr)), 0);
481 	ASSERT_EQ(errno, EINVAL);
482 
483 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
484 	ASSERT_EQ(errno, EINVAL);
485 
486 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
487 				    sizeof(invalid_attr) / 2), 0);
488 	ASSERT_EQ(errno, EINVAL);
489 
490 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
491 				    sizeof(invalid_attr) / 2), 0);
492 	ASSERT_EQ(errno, EINVAL);
493 
494 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
495 				    (void *)&large_attr, sizeof(large_attr)), 0);
496 
497 	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
498 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
499 				    (void *)&large_attr, sizeof(large_attr)), 0);
500 
501 	large_attr.attr3.attr_set = 0;
502 	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
503 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
504 				    (void *)&large_attr, sizeof(large_attr)), 0);
505 
506 	expected_flags = old_flags;
507 	expected_flags |= MS_RDONLY;
508 
509 	new_flags = read_mnt_flags("/mnt/A");
510 	ASSERT_EQ(new_flags, expected_flags);
511 
512 	new_flags = read_mnt_flags("/mnt/A/AA");
513 	ASSERT_EQ(new_flags, expected_flags);
514 
515 	new_flags = read_mnt_flags("/mnt/A/AA/B");
516 	ASSERT_EQ(new_flags, expected_flags);
517 
518 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
519 	ASSERT_EQ(new_flags, expected_flags);
520 }
521 
522 TEST_F(mount_setattr, basic)
523 {
524 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
525 	struct mount_attr attr = {
526 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
527 		.attr_clr	= MOUNT_ATTR__ATIME,
528 	};
529 
530 	if (!mount_setattr_supported())
531 		SKIP(return, "mount_setattr syscall not supported");
532 
533 	old_flags = read_mnt_flags("/mnt/A");
534 	ASSERT_GT(old_flags, 0);
535 
536 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
537 
538 	expected_flags = old_flags;
539 	expected_flags |= MS_RDONLY;
540 	expected_flags |= MS_NOEXEC;
541 	expected_flags &= ~MS_NOATIME;
542 	expected_flags |= MS_RELATIME;
543 
544 	new_flags = read_mnt_flags("/mnt/A");
545 	ASSERT_EQ(new_flags, expected_flags);
546 
547 	new_flags = read_mnt_flags("/mnt/A/AA");
548 	ASSERT_EQ(new_flags, old_flags);
549 
550 	new_flags = read_mnt_flags("/mnt/A/AA/B");
551 	ASSERT_EQ(new_flags, old_flags);
552 
553 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
554 	ASSERT_EQ(new_flags, old_flags);
555 }
556 
557 TEST_F(mount_setattr, basic_recursive)
558 {
559 	int fd;
560 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
561 	struct mount_attr attr = {
562 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
563 		.attr_clr	= MOUNT_ATTR__ATIME,
564 	};
565 
566 	if (!mount_setattr_supported())
567 		SKIP(return, "mount_setattr syscall not supported");
568 
569 	old_flags = read_mnt_flags("/mnt/A");
570 	ASSERT_GT(old_flags, 0);
571 
572 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
573 
574 	expected_flags = old_flags;
575 	expected_flags |= MS_RDONLY;
576 	expected_flags |= MS_NOEXEC;
577 	expected_flags &= ~MS_NOATIME;
578 	expected_flags |= MS_RELATIME;
579 
580 	new_flags = read_mnt_flags("/mnt/A");
581 	ASSERT_EQ(new_flags, expected_flags);
582 
583 	new_flags = read_mnt_flags("/mnt/A/AA");
584 	ASSERT_EQ(new_flags, expected_flags);
585 
586 	new_flags = read_mnt_flags("/mnt/A/AA/B");
587 	ASSERT_EQ(new_flags, expected_flags);
588 
589 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
590 	ASSERT_EQ(new_flags, expected_flags);
591 
592 	memset(&attr, 0, sizeof(attr));
593 	attr.attr_clr = MOUNT_ATTR_RDONLY;
594 	attr.propagation = MS_SHARED;
595 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
596 
597 	expected_flags &= ~MS_RDONLY;
598 	new_flags = read_mnt_flags("/mnt/A");
599 	ASSERT_EQ(new_flags, expected_flags);
600 
601 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
602 
603 	new_flags = read_mnt_flags("/mnt/A/AA");
604 	ASSERT_EQ(new_flags, expected_flags);
605 
606 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
607 
608 	new_flags = read_mnt_flags("/mnt/A/AA/B");
609 	ASSERT_EQ(new_flags, expected_flags);
610 
611 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
612 
613 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
614 	ASSERT_EQ(new_flags, expected_flags);
615 
616 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
617 
618 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
619 	ASSERT_GE(fd, 0);
620 
621 	/*
622 	 * We're holding a fd open for writing so this needs to fail somewhere
623 	 * in the middle and the mount options need to be unchanged.
624 	 */
625 	attr.attr_set = MOUNT_ATTR_RDONLY;
626 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
627 
628 	new_flags = read_mnt_flags("/mnt/A");
629 	ASSERT_EQ(new_flags, expected_flags);
630 
631 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
632 
633 	new_flags = read_mnt_flags("/mnt/A/AA");
634 	ASSERT_EQ(new_flags, expected_flags);
635 
636 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
637 
638 	new_flags = read_mnt_flags("/mnt/A/AA/B");
639 	ASSERT_EQ(new_flags, expected_flags);
640 
641 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
642 
643 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
644 	ASSERT_EQ(new_flags, expected_flags);
645 
646 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
647 
648 	EXPECT_EQ(close(fd), 0);
649 }
650 
651 TEST_F(mount_setattr, mount_has_writers)
652 {
653 	int fd, dfd;
654 	unsigned int old_flags = 0, new_flags = 0;
655 	struct mount_attr attr = {
656 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
657 		.attr_clr	= MOUNT_ATTR__ATIME,
658 		.propagation	= MS_SHARED,
659 	};
660 
661 	if (!mount_setattr_supported())
662 		SKIP(return, "mount_setattr syscall not supported");
663 
664 	old_flags = read_mnt_flags("/mnt/A");
665 	ASSERT_GT(old_flags, 0);
666 
667 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
668 	ASSERT_GE(fd, 0);
669 
670 	/*
671 	 * We're holding a fd open to a mount somwhere in the middle so this
672 	 * needs to fail somewhere in the middle. After this the mount options
673 	 * need to be unchanged.
674 	 */
675 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
676 
677 	new_flags = read_mnt_flags("/mnt/A");
678 	ASSERT_EQ(new_flags, old_flags);
679 
680 	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
681 
682 	new_flags = read_mnt_flags("/mnt/A/AA");
683 	ASSERT_EQ(new_flags, old_flags);
684 
685 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
686 
687 	new_flags = read_mnt_flags("/mnt/A/AA/B");
688 	ASSERT_EQ(new_flags, old_flags);
689 
690 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
691 
692 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
693 	ASSERT_EQ(new_flags, old_flags);
694 
695 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
696 
697 	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
698 	ASSERT_GE(dfd, 0);
699 	EXPECT_EQ(fsync(dfd), 0);
700 	EXPECT_EQ(close(dfd), 0);
701 
702 	EXPECT_EQ(fsync(fd), 0);
703 	EXPECT_EQ(close(fd), 0);
704 
705 	/* All writers are gone so this should succeed. */
706 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
707 }
708 
709 TEST_F(mount_setattr, mixed_mount_options)
710 {
711 	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
712 	struct mount_attr attr = {
713 		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
714 		.attr_set = MOUNT_ATTR_RELATIME,
715 	};
716 
717 	if (!mount_setattr_supported())
718 		SKIP(return, "mount_setattr syscall not supported");
719 
720 	old_flags1 = read_mnt_flags("/mnt/B");
721 	ASSERT_GT(old_flags1, 0);
722 
723 	old_flags2 = read_mnt_flags("/mnt/B/BB");
724 	ASSERT_GT(old_flags2, 0);
725 
726 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
727 
728 	expected_flags = old_flags2;
729 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
730 	expected_flags |= MS_RELATIME;
731 
732 	new_flags = read_mnt_flags("/mnt/B");
733 	ASSERT_EQ(new_flags, expected_flags);
734 
735 	expected_flags = old_flags2;
736 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
737 	expected_flags |= MS_RELATIME;
738 
739 	new_flags = read_mnt_flags("/mnt/B/BB");
740 	ASSERT_EQ(new_flags, expected_flags);
741 }
742 
743 TEST_F(mount_setattr, time_changes)
744 {
745 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
746 	struct mount_attr attr = {
747 		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
748 	};
749 
750 	if (!mount_setattr_supported())
751 		SKIP(return, "mount_setattr syscall not supported");
752 
753 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
754 
755 	attr.attr_set = MOUNT_ATTR_STRICTATIME;
756 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
757 
758 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
759 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
760 
761 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
762 	attr.attr_clr = MOUNT_ATTR__ATIME;
763 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
764 
765 	attr.attr_set = 0;
766 	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
767 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
768 
769 	attr.attr_clr = MOUNT_ATTR_NOATIME;
770 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
771 
772 	old_flags = read_mnt_flags("/mnt/A");
773 	ASSERT_GT(old_flags, 0);
774 
775 	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
776 	attr.attr_clr = MOUNT_ATTR__ATIME;
777 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
778 
779 	expected_flags = old_flags;
780 	expected_flags |= MS_NOATIME;
781 	expected_flags |= MS_NODIRATIME;
782 
783 	new_flags = read_mnt_flags("/mnt/A");
784 	ASSERT_EQ(new_flags, expected_flags);
785 
786 	new_flags = read_mnt_flags("/mnt/A/AA");
787 	ASSERT_EQ(new_flags, expected_flags);
788 
789 	new_flags = read_mnt_flags("/mnt/A/AA/B");
790 	ASSERT_EQ(new_flags, expected_flags);
791 
792 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
793 	ASSERT_EQ(new_flags, expected_flags);
794 
795 	memset(&attr, 0, sizeof(attr));
796 	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
797 	attr.attr_set |= MOUNT_ATTR_RELATIME;
798 	attr.attr_clr |= MOUNT_ATTR__ATIME;
799 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
800 
801 	expected_flags &= ~MS_NOATIME;
802 	expected_flags |= MS_RELATIME;
803 
804 	new_flags = read_mnt_flags("/mnt/A");
805 	ASSERT_EQ(new_flags, expected_flags);
806 
807 	new_flags = read_mnt_flags("/mnt/A/AA");
808 	ASSERT_EQ(new_flags, expected_flags);
809 
810 	new_flags = read_mnt_flags("/mnt/A/AA/B");
811 	ASSERT_EQ(new_flags, expected_flags);
812 
813 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
814 	ASSERT_EQ(new_flags, expected_flags);
815 
816 	memset(&attr, 0, sizeof(attr));
817 	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
818 	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
819 	attr.attr_clr |= MOUNT_ATTR__ATIME;
820 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
821 
822 	expected_flags &= ~MS_RELATIME;
823 
824 	new_flags = read_mnt_flags("/mnt/A");
825 	ASSERT_EQ(new_flags, expected_flags);
826 
827 	new_flags = read_mnt_flags("/mnt/A/AA");
828 	ASSERT_EQ(new_flags, expected_flags);
829 
830 	new_flags = read_mnt_flags("/mnt/A/AA/B");
831 	ASSERT_EQ(new_flags, expected_flags);
832 
833 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
834 	ASSERT_EQ(new_flags, expected_flags);
835 
836 	memset(&attr, 0, sizeof(attr));
837 	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
838 	attr.attr_set |= MOUNT_ATTR_NOATIME;
839 	attr.attr_clr |= MOUNT_ATTR__ATIME;
840 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
841 
842 	expected_flags |= MS_NOATIME;
843 	new_flags = read_mnt_flags("/mnt/A");
844 	ASSERT_EQ(new_flags, expected_flags);
845 
846 	new_flags = read_mnt_flags("/mnt/A/AA");
847 	ASSERT_EQ(new_flags, expected_flags);
848 
849 	new_flags = read_mnt_flags("/mnt/A/AA/B");
850 	ASSERT_EQ(new_flags, expected_flags);
851 
852 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
853 	ASSERT_EQ(new_flags, expected_flags);
854 
855 	memset(&attr, 0, sizeof(attr));
856 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
857 
858 	new_flags = read_mnt_flags("/mnt/A");
859 	ASSERT_EQ(new_flags, expected_flags);
860 
861 	new_flags = read_mnt_flags("/mnt/A/AA");
862 	ASSERT_EQ(new_flags, expected_flags);
863 
864 	new_flags = read_mnt_flags("/mnt/A/AA/B");
865 	ASSERT_EQ(new_flags, expected_flags);
866 
867 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
868 	ASSERT_EQ(new_flags, expected_flags);
869 
870 	memset(&attr, 0, sizeof(attr));
871 	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
872 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
873 
874 	expected_flags &= ~MS_NODIRATIME;
875 
876 	new_flags = read_mnt_flags("/mnt/A");
877 	ASSERT_EQ(new_flags, expected_flags);
878 
879 	new_flags = read_mnt_flags("/mnt/A/AA");
880 	ASSERT_EQ(new_flags, expected_flags);
881 
882 	new_flags = read_mnt_flags("/mnt/A/AA/B");
883 	ASSERT_EQ(new_flags, expected_flags);
884 
885 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
886 	ASSERT_EQ(new_flags, expected_flags);
887 }
888 
889 TEST_F(mount_setattr, multi_threaded)
890 {
891 	int i, j, nthreads, ret = 0;
892 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
893 	pthread_attr_t pattr;
894 	pthread_t threads[DEFAULT_THREADS];
895 
896 	if (!mount_setattr_supported())
897 		SKIP(return, "mount_setattr syscall not supported");
898 
899 	old_flags = read_mnt_flags("/mnt/A");
900 	ASSERT_GT(old_flags, 0);
901 
902 	/* Try to change mount options from multiple threads. */
903 	nthreads = get_nprocs_conf();
904 	if (nthreads > DEFAULT_THREADS)
905 		nthreads = DEFAULT_THREADS;
906 
907 	pthread_attr_init(&pattr);
908 	for (i = 0; i < nthreads; i++)
909 		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
910 
911 	for (j = 0; j < i; j++) {
912 		void *retptr = NULL;
913 
914 		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
915 
916 		ret += ptr_to_int(retptr);
917 		EXPECT_EQ(ret, 0);
918 	}
919 	pthread_attr_destroy(&pattr);
920 
921 	ASSERT_EQ(ret, 0);
922 
923 	expected_flags = old_flags;
924 	expected_flags |= MS_RDONLY;
925 	expected_flags |= MS_NOSUID;
926 	new_flags = read_mnt_flags("/mnt/A");
927 	ASSERT_EQ(new_flags, expected_flags);
928 
929 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
930 
931 	new_flags = read_mnt_flags("/mnt/A/AA");
932 	ASSERT_EQ(new_flags, expected_flags);
933 
934 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
935 
936 	new_flags = read_mnt_flags("/mnt/A/AA/B");
937 	ASSERT_EQ(new_flags, expected_flags);
938 
939 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
940 
941 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
942 	ASSERT_EQ(new_flags, expected_flags);
943 
944 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
945 }
946 
947 TEST_F(mount_setattr, wrong_user_namespace)
948 {
949 	int ret;
950 	struct mount_attr attr = {
951 		.attr_set = MOUNT_ATTR_RDONLY,
952 	};
953 
954 	if (!mount_setattr_supported())
955 		SKIP(return, "mount_setattr syscall not supported");
956 
957 	EXPECT_EQ(create_and_enter_userns(), 0);
958 	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
959 	ASSERT_LT(ret, 0);
960 	ASSERT_EQ(errno, EPERM);
961 }
962 
963 TEST_F(mount_setattr, wrong_mount_namespace)
964 {
965 	int fd, ret;
966 	struct mount_attr attr = {
967 		.attr_set = MOUNT_ATTR_RDONLY,
968 	};
969 
970 	if (!mount_setattr_supported())
971 		SKIP(return, "mount_setattr syscall not supported");
972 
973 	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
974 	ASSERT_GE(fd, 0);
975 
976 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
977 
978 	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
979 	ASSERT_LT(ret, 0);
980 	ASSERT_EQ(errno, EINVAL);
981 }
982 
983 FIXTURE(mount_setattr_idmapped) {
984 };
985 
986 FIXTURE_SETUP(mount_setattr_idmapped)
987 {
988 	int img_fd = -EBADF;
989 
990 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
991 
992 	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
993 
994 	(void)umount2("/mnt", MNT_DETACH);
995 	(void)umount2("/tmp", MNT_DETACH);
996 
997 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
998 			"size=100000,mode=700"), 0);
999 
1000 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1001 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1002 	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1003 
1004 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1005 			"size=100000,mode=700"), 0);
1006 
1007 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1008 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1009 	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1010 
1011 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1012 			"size=100000,mode=700"), 0);
1013 
1014 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1015 			"size=100000,mode=700"), 0);
1016 
1017 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1018 
1019 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1020 			"size=100000,mode=700"), 0);
1021 
1022 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1023 
1024 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1025 
1026 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1027 
1028 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1029 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1030 
1031 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1032 
1033 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1034 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1035 
1036 	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1037 	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1038 	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1039 	ASSERT_GE(img_fd, 0);
1040 	ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1041 	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1042 	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1043 	ASSERT_EQ(close(img_fd), 0);
1044 }
1045 
1046 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1047 {
1048 	(void)umount2("/mnt/A", MNT_DETACH);
1049 	(void)umount2("/tmp", MNT_DETACH);
1050 }
1051 
1052 /**
1053  * Validate that negative fd values are rejected.
1054  */
1055 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1056 {
1057 	struct mount_attr attr = {
1058 		.attr_set	= MOUNT_ATTR_IDMAP,
1059 		.userns_fd	= -EBADF,
1060 	};
1061 
1062 	if (!mount_setattr_supported())
1063 		SKIP(return, "mount_setattr syscall not supported");
1064 
1065 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1066 		TH_LOG("failure: created idmapped mount with negative fd");
1067 	}
1068 }
1069 
1070 /**
1071  * Validate that excessively large fd values are rejected.
1072  */
1073 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1074 {
1075 	struct mount_attr attr = {
1076 		.attr_set	= MOUNT_ATTR_IDMAP,
1077 		.userns_fd	= INT64_MAX,
1078 	};
1079 
1080 	if (!mount_setattr_supported())
1081 		SKIP(return, "mount_setattr syscall not supported");
1082 
1083 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1084 		TH_LOG("failure: created idmapped mount with too large fd value");
1085 	}
1086 }
1087 
1088 /**
1089  * Validate that closed fd values are rejected.
1090  */
1091 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1092 {
1093 	int fd;
1094 	struct mount_attr attr = {
1095 		.attr_set = MOUNT_ATTR_IDMAP,
1096 	};
1097 
1098 	if (!mount_setattr_supported())
1099 		SKIP(return, "mount_setattr syscall not supported");
1100 
1101 	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1102 	ASSERT_GE(fd, 0);
1103 	ASSERT_GE(close(fd), 0);
1104 
1105 	attr.userns_fd = fd;
1106 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1107 		TH_LOG("failure: created idmapped mount with closed fd");
1108 	}
1109 }
1110 
1111 /**
1112  * Validate that the initial user namespace is rejected.
1113  */
1114 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1115 {
1116 	int open_tree_fd = -EBADF;
1117 	struct mount_attr attr = {
1118 		.attr_set = MOUNT_ATTR_IDMAP,
1119 	};
1120 
1121 	if (!mount_setattr_supported())
1122 		SKIP(return, "mount_setattr syscall not supported");
1123 
1124 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1125 				     AT_NO_AUTOMOUNT |
1126 				     AT_SYMLINK_NOFOLLOW |
1127 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1128 	ASSERT_GE(open_tree_fd, 0);
1129 
1130 	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1131 	ASSERT_GE(attr.userns_fd, 0);
1132 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1133 	ASSERT_EQ(errno, EPERM);
1134 	ASSERT_EQ(close(attr.userns_fd), 0);
1135 	ASSERT_EQ(close(open_tree_fd), 0);
1136 }
1137 
1138 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1139 		   unsigned long range)
1140 {
1141 	char map[100], procfile[256];
1142 
1143 	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1144 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1145 	if (write_file(procfile, map, strlen(map)))
1146 		return -1;
1147 
1148 
1149 	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1150 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1151 	if (write_file(procfile, map, strlen(map)))
1152 		return -1;
1153 
1154 	return 0;
1155 }
1156 
1157 #define __STACK_SIZE (8 * 1024 * 1024)
1158 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1159 {
1160 	void *stack;
1161 
1162 	stack = malloc(__STACK_SIZE);
1163 	if (!stack)
1164 		return -ENOMEM;
1165 
1166 #ifdef __ia64__
1167 	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1168 #else
1169 	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1170 #endif
1171 }
1172 
1173 static int get_userns_fd_cb(void *data)
1174 {
1175 	return kill(getpid(), SIGSTOP);
1176 }
1177 
1178 static int wait_for_pid(pid_t pid)
1179 {
1180 	int status, ret;
1181 
1182 again:
1183 	ret = waitpid(pid, &status, 0);
1184 	if (ret == -1) {
1185 		if (errno == EINTR)
1186 			goto again;
1187 
1188 		return -1;
1189 	}
1190 
1191 	if (!WIFEXITED(status))
1192 		return -1;
1193 
1194 	return WEXITSTATUS(status);
1195 }
1196 
1197 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1198 {
1199 	int ret;
1200 	pid_t pid;
1201 	char path[256];
1202 
1203 	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1204 	if (pid < 0)
1205 		return -errno;
1206 
1207 	ret = map_ids(pid, nsid, hostid, range);
1208 	if (ret < 0)
1209 		return ret;
1210 
1211 	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1212 	ret = open(path, O_RDONLY | O_CLOEXEC);
1213 	kill(pid, SIGKILL);
1214 	wait_for_pid(pid);
1215 	return ret;
1216 }
1217 
1218 /**
1219  * Validate that an attached mount in our mount namespace can be idmapped.
1220  * (The kernel enforces that the mount's mount namespace and the caller's mount
1221  *  namespace match.)
1222  */
1223 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1224 {
1225 	int open_tree_fd = -EBADF;
1226 	struct mount_attr attr = {
1227 		.attr_set = MOUNT_ATTR_IDMAP,
1228 	};
1229 
1230 	if (!mount_setattr_supported())
1231 		SKIP(return, "mount_setattr syscall not supported");
1232 
1233 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1234 				     AT_EMPTY_PATH |
1235 				     AT_NO_AUTOMOUNT |
1236 				     AT_SYMLINK_NOFOLLOW |
1237 				     OPEN_TREE_CLOEXEC);
1238 	ASSERT_GE(open_tree_fd, 0);
1239 
1240 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1241 	ASSERT_GE(attr.userns_fd, 0);
1242 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1243 	ASSERT_EQ(close(attr.userns_fd), 0);
1244 	ASSERT_EQ(close(open_tree_fd), 0);
1245 }
1246 
1247 /**
1248  * Validate that idmapping a mount is rejected if the mount's mount namespace
1249  * and our mount namespace don't match.
1250  * (The kernel enforces that the mount's mount namespace and the caller's mount
1251  *  namespace match.)
1252  */
1253 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1254 {
1255 	int open_tree_fd = -EBADF;
1256 	struct mount_attr attr = {
1257 		.attr_set = MOUNT_ATTR_IDMAP,
1258 	};
1259 
1260 	if (!mount_setattr_supported())
1261 		SKIP(return, "mount_setattr syscall not supported");
1262 
1263 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1264 				     AT_EMPTY_PATH |
1265 				     AT_NO_AUTOMOUNT |
1266 				     AT_SYMLINK_NOFOLLOW |
1267 				     OPEN_TREE_CLOEXEC);
1268 	ASSERT_GE(open_tree_fd, 0);
1269 
1270 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1271 
1272 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1273 	ASSERT_GE(attr.userns_fd, 0);
1274 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1275 				    sizeof(attr)), 0);
1276 	ASSERT_EQ(close(attr.userns_fd), 0);
1277 	ASSERT_EQ(close(open_tree_fd), 0);
1278 }
1279 
1280 /**
1281  * Validate that an attached mount in our mount namespace can be idmapped.
1282  */
1283 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1284 {
1285 	int open_tree_fd = -EBADF;
1286 	struct mount_attr attr = {
1287 		.attr_set = MOUNT_ATTR_IDMAP,
1288 	};
1289 
1290 	if (!mount_setattr_supported())
1291 		SKIP(return, "mount_setattr syscall not supported");
1292 
1293 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1294 				     AT_EMPTY_PATH |
1295 				     AT_NO_AUTOMOUNT |
1296 				     AT_SYMLINK_NOFOLLOW |
1297 				     OPEN_TREE_CLOEXEC |
1298 				     OPEN_TREE_CLONE);
1299 	ASSERT_GE(open_tree_fd, 0);
1300 
1301 	/* Changing mount properties on a detached mount. */
1302 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1303 	ASSERT_GE(attr.userns_fd, 0);
1304 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1305 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1306 	ASSERT_EQ(close(attr.userns_fd), 0);
1307 	ASSERT_EQ(close(open_tree_fd), 0);
1308 }
1309 
1310 /**
1311  * Validate that a detached mount not in our mount namespace can be idmapped.
1312  */
1313 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1314 {
1315 	int open_tree_fd = -EBADF;
1316 	struct mount_attr attr = {
1317 		.attr_set = MOUNT_ATTR_IDMAP,
1318 	};
1319 
1320 	if (!mount_setattr_supported())
1321 		SKIP(return, "mount_setattr syscall not supported");
1322 
1323 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1324 				     AT_EMPTY_PATH |
1325 				     AT_NO_AUTOMOUNT |
1326 				     AT_SYMLINK_NOFOLLOW |
1327 				     OPEN_TREE_CLOEXEC |
1328 				     OPEN_TREE_CLONE);
1329 	ASSERT_GE(open_tree_fd, 0);
1330 
1331 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1332 
1333 	/* Changing mount properties on a detached mount. */
1334 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1335 	ASSERT_GE(attr.userns_fd, 0);
1336 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1337 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1338 	ASSERT_EQ(close(attr.userns_fd), 0);
1339 	ASSERT_EQ(close(open_tree_fd), 0);
1340 }
1341 
1342 /**
1343  * Validate that currently changing the idmapping of an idmapped mount fails.
1344  */
1345 TEST_F(mount_setattr_idmapped, change_idmapping)
1346 {
1347 	int open_tree_fd = -EBADF;
1348 	struct mount_attr attr = {
1349 		.attr_set = MOUNT_ATTR_IDMAP,
1350 	};
1351 
1352 	if (!mount_setattr_supported())
1353 		SKIP(return, "mount_setattr syscall not supported");
1354 
1355 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1356 				     AT_EMPTY_PATH |
1357 				     AT_NO_AUTOMOUNT |
1358 				     AT_SYMLINK_NOFOLLOW |
1359 				     OPEN_TREE_CLOEXEC |
1360 				     OPEN_TREE_CLONE);
1361 	ASSERT_GE(open_tree_fd, 0);
1362 
1363 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1364 	ASSERT_GE(attr.userns_fd, 0);
1365 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1366 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1367 	ASSERT_EQ(close(attr.userns_fd), 0);
1368 
1369 	/* Change idmapping on a detached mount that is already idmapped. */
1370 	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1371 	ASSERT_GE(attr.userns_fd, 0);
1372 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1373 	ASSERT_EQ(close(attr.userns_fd), 0);
1374 	ASSERT_EQ(close(open_tree_fd), 0);
1375 }
1376 
1377 static bool expected_uid_gid(int dfd, const char *path, int flags,
1378 			     uid_t expected_uid, gid_t expected_gid)
1379 {
1380 	int ret;
1381 	struct stat st;
1382 
1383 	ret = fstatat(dfd, path, &st, flags);
1384 	if (ret < 0)
1385 		return false;
1386 
1387 	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1388 }
1389 
1390 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1391 {
1392 	int open_tree_fd = -EBADF;
1393 	struct mount_attr attr = {
1394 		.attr_set = MOUNT_ATTR_IDMAP,
1395 	};
1396 
1397 	if (!mount_setattr_supported())
1398 		SKIP(return, "mount_setattr syscall not supported");
1399 
1400 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1401 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1402 
1403 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1404 				     AT_RECURSIVE |
1405 				     AT_EMPTY_PATH |
1406 				     AT_NO_AUTOMOUNT |
1407 				     AT_SYMLINK_NOFOLLOW |
1408 				     OPEN_TREE_CLOEXEC |
1409 				     OPEN_TREE_CLONE);
1410 	ASSERT_GE(open_tree_fd, 0);
1411 
1412 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1413 	ASSERT_GE(attr.userns_fd, 0);
1414 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1415 	ASSERT_EQ(close(attr.userns_fd), 0);
1416 	ASSERT_EQ(close(open_tree_fd), 0);
1417 
1418 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1419 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1420 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1421 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1422 }
1423 
1424 TEST_HARNESS_MAIN
1425