1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 
22 #include "../kselftest_harness.h"
23 
24 #ifndef CLONE_NEWNS
25 #define CLONE_NEWNS 0x00020000
26 #endif
27 
28 #ifndef CLONE_NEWUSER
29 #define CLONE_NEWUSER 0x10000000
30 #endif
31 
32 #ifndef MS_REC
33 #define MS_REC 16384
34 #endif
35 
36 #ifndef MS_RELATIME
37 #define MS_RELATIME (1 << 21)
38 #endif
39 
40 #ifndef MS_STRICTATIME
41 #define MS_STRICTATIME (1 << 24)
42 #endif
43 
44 #ifndef MOUNT_ATTR_RDONLY
45 #define MOUNT_ATTR_RDONLY 0x00000001
46 #endif
47 
48 #ifndef MOUNT_ATTR_NOSUID
49 #define MOUNT_ATTR_NOSUID 0x00000002
50 #endif
51 
52 #ifndef MOUNT_ATTR_NOEXEC
53 #define MOUNT_ATTR_NOEXEC 0x00000008
54 #endif
55 
56 #ifndef MOUNT_ATTR_NODIRATIME
57 #define MOUNT_ATTR_NODIRATIME 0x00000080
58 #endif
59 
60 #ifndef MOUNT_ATTR__ATIME
61 #define MOUNT_ATTR__ATIME 0x00000070
62 #endif
63 
64 #ifndef MOUNT_ATTR_RELATIME
65 #define MOUNT_ATTR_RELATIME 0x00000000
66 #endif
67 
68 #ifndef MOUNT_ATTR_NOATIME
69 #define MOUNT_ATTR_NOATIME 0x00000010
70 #endif
71 
72 #ifndef MOUNT_ATTR_STRICTATIME
73 #define MOUNT_ATTR_STRICTATIME 0x00000020
74 #endif
75 
76 #ifndef AT_RECURSIVE
77 #define AT_RECURSIVE 0x8000
78 #endif
79 
80 #ifndef MS_SHARED
81 #define MS_SHARED (1 << 20)
82 #endif
83 
84 #define DEFAULT_THREADS 4
85 #define ptr_to_int(p) ((int)((intptr_t)(p)))
86 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
87 
88 #ifndef __NR_mount_setattr
89 	#if defined __alpha__
90 		#define __NR_mount_setattr 552
91 	#elif defined _MIPS_SIM
92 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
93 			#define __NR_mount_setattr (442 + 4000)
94 		#endif
95 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
96 			#define __NR_mount_setattr (442 + 6000)
97 		#endif
98 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
99 			#define __NR_mount_setattr (442 + 5000)
100 		#endif
101 	#elif defined __ia64__
102 		#define __NR_mount_setattr (442 + 1024)
103 	#else
104 		#define __NR_mount_setattr 442
105 	#endif
106 
107 struct mount_attr {
108 	__u64 attr_set;
109 	__u64 attr_clr;
110 	__u64 propagation;
111 	__u64 userns_fd;
112 };
113 #endif
114 
115 #ifndef __NR_open_tree
116 	#if defined __alpha__
117 		#define __NR_open_tree 538
118 	#elif defined _MIPS_SIM
119 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
120 			#define __NR_open_tree 4428
121 		#endif
122 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
123 			#define __NR_open_tree 6428
124 		#endif
125 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
126 			#define __NR_open_tree 5428
127 		#endif
128 	#elif defined __ia64__
129 		#define __NR_open_tree (428 + 1024)
130 	#else
131 		#define __NR_open_tree 428
132 	#endif
133 #endif
134 
135 #ifndef MOUNT_ATTR_IDMAP
136 #define MOUNT_ATTR_IDMAP 0x00100000
137 #endif
138 
139 #ifndef MOUNT_ATTR_NOSYMFOLLOW
140 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
141 #endif
142 
143 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
144 				    struct mount_attr *attr, size_t size)
145 {
146 	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
147 }
148 
149 #ifndef OPEN_TREE_CLONE
150 #define OPEN_TREE_CLONE 1
151 #endif
152 
153 #ifndef OPEN_TREE_CLOEXEC
154 #define OPEN_TREE_CLOEXEC O_CLOEXEC
155 #endif
156 
157 #ifndef AT_RECURSIVE
158 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
159 #endif
160 
161 static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
162 {
163 	return syscall(__NR_open_tree, dfd, filename, flags);
164 }
165 
166 static ssize_t write_nointr(int fd, const void *buf, size_t count)
167 {
168 	ssize_t ret;
169 
170 	do {
171 		ret = write(fd, buf, count);
172 	} while (ret < 0 && errno == EINTR);
173 
174 	return ret;
175 }
176 
177 static int write_file(const char *path, const void *buf, size_t count)
178 {
179 	int fd;
180 	ssize_t ret;
181 
182 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
183 	if (fd < 0)
184 		return -1;
185 
186 	ret = write_nointr(fd, buf, count);
187 	close(fd);
188 	if (ret < 0 || (size_t)ret != count)
189 		return -1;
190 
191 	return 0;
192 }
193 
194 static int create_and_enter_userns(void)
195 {
196 	uid_t uid;
197 	gid_t gid;
198 	char map[100];
199 
200 	uid = getuid();
201 	gid = getgid();
202 
203 	if (unshare(CLONE_NEWUSER))
204 		return -1;
205 
206 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
207 	    errno != ENOENT)
208 		return -1;
209 
210 	snprintf(map, sizeof(map), "0 %d 1", uid);
211 	if (write_file("/proc/self/uid_map", map, strlen(map)))
212 		return -1;
213 
214 
215 	snprintf(map, sizeof(map), "0 %d 1", gid);
216 	if (write_file("/proc/self/gid_map", map, strlen(map)))
217 		return -1;
218 
219 	if (setgid(0))
220 		return -1;
221 
222 	if (setuid(0))
223 		return -1;
224 
225 	return 0;
226 }
227 
228 static int prepare_unpriv_mountns(void)
229 {
230 	if (create_and_enter_userns())
231 		return -1;
232 
233 	if (unshare(CLONE_NEWNS))
234 		return -1;
235 
236 	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
237 		return -1;
238 
239 	return 0;
240 }
241 
242 #ifndef ST_NOSYMFOLLOW
243 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
244 #endif
245 
246 static int read_mnt_flags(const char *path)
247 {
248 	int ret;
249 	struct statvfs stat;
250 	unsigned int mnt_flags;
251 
252 	ret = statvfs(path, &stat);
253 	if (ret != 0)
254 		return -EINVAL;
255 
256 	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
257 			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
258 			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
259 		return -EINVAL;
260 
261 	mnt_flags = 0;
262 	if (stat.f_flag & ST_RDONLY)
263 		mnt_flags |= MS_RDONLY;
264 	if (stat.f_flag & ST_NOSUID)
265 		mnt_flags |= MS_NOSUID;
266 	if (stat.f_flag & ST_NODEV)
267 		mnt_flags |= MS_NODEV;
268 	if (stat.f_flag & ST_NOEXEC)
269 		mnt_flags |= MS_NOEXEC;
270 	if (stat.f_flag & ST_NOATIME)
271 		mnt_flags |= MS_NOATIME;
272 	if (stat.f_flag & ST_NODIRATIME)
273 		mnt_flags |= MS_NODIRATIME;
274 	if (stat.f_flag & ST_RELATIME)
275 		mnt_flags |= MS_RELATIME;
276 	if (stat.f_flag & ST_SYNCHRONOUS)
277 		mnt_flags |= MS_SYNCHRONOUS;
278 	if (stat.f_flag & ST_MANDLOCK)
279 		mnt_flags |= ST_MANDLOCK;
280 	if (stat.f_flag & ST_NOSYMFOLLOW)
281 		mnt_flags |= ST_NOSYMFOLLOW;
282 
283 	return mnt_flags;
284 }
285 
286 static char *get_field(char *src, int nfields)
287 {
288 	int i;
289 	char *p = src;
290 
291 	for (i = 0; i < nfields; i++) {
292 		while (*p && *p != ' ' && *p != '\t')
293 			p++;
294 
295 		if (!*p)
296 			break;
297 
298 		p++;
299 	}
300 
301 	return p;
302 }
303 
304 static void null_endofword(char *word)
305 {
306 	while (*word && *word != ' ' && *word != '\t')
307 		word++;
308 	*word = '\0';
309 }
310 
311 static bool is_shared_mount(const char *path)
312 {
313 	size_t len = 0;
314 	char *line = NULL;
315 	FILE *f = NULL;
316 
317 	f = fopen("/proc/self/mountinfo", "re");
318 	if (!f)
319 		return false;
320 
321 	while (getline(&line, &len, f) != -1) {
322 		char *opts, *target;
323 
324 		target = get_field(line, 4);
325 		if (!target)
326 			continue;
327 
328 		opts = get_field(target, 2);
329 		if (!opts)
330 			continue;
331 
332 		null_endofword(target);
333 
334 		if (strcmp(target, path) != 0)
335 			continue;
336 
337 		null_endofword(opts);
338 		if (strstr(opts, "shared:"))
339 			return true;
340 	}
341 
342 	free(line);
343 	fclose(f);
344 
345 	return false;
346 }
347 
348 static void *mount_setattr_thread(void *data)
349 {
350 	struct mount_attr attr = {
351 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
352 		.attr_clr	= 0,
353 		.propagation	= MS_SHARED,
354 	};
355 
356 	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
357 		pthread_exit(int_to_ptr(-1));
358 
359 	pthread_exit(int_to_ptr(0));
360 }
361 
362 /* Attempt to de-conflict with the selftests tree. */
363 #ifndef SKIP
364 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
365 #endif
366 
367 static bool mount_setattr_supported(void)
368 {
369 	int ret;
370 
371 	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
372 	if (ret < 0 && errno == ENOSYS)
373 		return false;
374 
375 	return true;
376 }
377 
378 FIXTURE(mount_setattr) {
379 };
380 
381 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
382 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
383 
384 FIXTURE_SETUP(mount_setattr)
385 {
386 	int fd = -EBADF;
387 
388 	if (!mount_setattr_supported())
389 		SKIP(return, "mount_setattr syscall not supported");
390 
391 	ASSERT_EQ(prepare_unpriv_mountns(), 0);
392 
393 	(void)umount2("/mnt", MNT_DETACH);
394 	(void)umount2("/tmp", MNT_DETACH);
395 
396 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
397 			"size=100000,mode=700"), 0);
398 
399 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
400 
401 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
402 			"size=100000,mode=700"), 0);
403 
404 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
405 
406 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
407 			"size=100000,mode=700"), 0);
408 
409 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
410 			"size=100000,mode=700"), 0);
411 
412 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
413 
414 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
415 			"size=100000,mode=700"), 0);
416 
417 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
418 
419 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
420 
421 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
422 
423 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
424 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
425 
426 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
427 
428 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
429 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
430 
431 	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
432 	ASSERT_GT(fd, 0);
433 	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
434 	ASSERT_EQ(close(fd), 0);
435 }
436 
437 FIXTURE_TEARDOWN(mount_setattr)
438 {
439 	if (!mount_setattr_supported())
440 		SKIP(return, "mount_setattr syscall not supported");
441 
442 	(void)umount2("/mnt/A", MNT_DETACH);
443 	(void)umount2("/tmp", MNT_DETACH);
444 }
445 
446 TEST_F(mount_setattr, invalid_attributes)
447 {
448 	struct mount_attr invalid_attr = {
449 		.attr_set = (1U << 31),
450 	};
451 
452 	if (!mount_setattr_supported())
453 		SKIP(return, "mount_setattr syscall not supported");
454 
455 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
456 				    sizeof(invalid_attr)), 0);
457 
458 	invalid_attr.attr_set	= 0;
459 	invalid_attr.attr_clr	= (1U << 31);
460 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
461 				    sizeof(invalid_attr)), 0);
462 
463 	invalid_attr.attr_clr		= 0;
464 	invalid_attr.propagation	= (1U << 31);
465 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
466 				    sizeof(invalid_attr)), 0);
467 
468 	invalid_attr.attr_set		= (1U << 31);
469 	invalid_attr.attr_clr		= (1U << 31);
470 	invalid_attr.propagation	= (1U << 31);
471 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
472 				    sizeof(invalid_attr)), 0);
473 
474 	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
475 				    sizeof(invalid_attr)), 0);
476 }
477 
478 TEST_F(mount_setattr, extensibility)
479 {
480 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
481 	char *s = "dummy";
482 	struct mount_attr invalid_attr = {};
483 	struct mount_attr_large {
484 		struct mount_attr attr1;
485 		struct mount_attr attr2;
486 		struct mount_attr attr3;
487 	} large_attr = {};
488 
489 	if (!mount_setattr_supported())
490 		SKIP(return, "mount_setattr syscall not supported");
491 
492 	old_flags = read_mnt_flags("/mnt/A");
493 	ASSERT_GT(old_flags, 0);
494 
495 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
496 				    sizeof(invalid_attr)), 0);
497 	ASSERT_EQ(errno, EFAULT);
498 
499 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
500 				    sizeof(invalid_attr)), 0);
501 	ASSERT_EQ(errno, EINVAL);
502 
503 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
504 	ASSERT_EQ(errno, EINVAL);
505 
506 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
507 				    sizeof(invalid_attr) / 2), 0);
508 	ASSERT_EQ(errno, EINVAL);
509 
510 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
511 				    sizeof(invalid_attr) / 2), 0);
512 	ASSERT_EQ(errno, EINVAL);
513 
514 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
515 				    (void *)&large_attr, sizeof(large_attr)), 0);
516 
517 	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
518 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
519 				    (void *)&large_attr, sizeof(large_attr)), 0);
520 
521 	large_attr.attr3.attr_set = 0;
522 	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
523 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
524 				    (void *)&large_attr, sizeof(large_attr)), 0);
525 
526 	expected_flags = old_flags;
527 	expected_flags |= MS_RDONLY;
528 
529 	new_flags = read_mnt_flags("/mnt/A");
530 	ASSERT_EQ(new_flags, expected_flags);
531 
532 	new_flags = read_mnt_flags("/mnt/A/AA");
533 	ASSERT_EQ(new_flags, expected_flags);
534 
535 	new_flags = read_mnt_flags("/mnt/A/AA/B");
536 	ASSERT_EQ(new_flags, expected_flags);
537 
538 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
539 	ASSERT_EQ(new_flags, expected_flags);
540 }
541 
542 TEST_F(mount_setattr, basic)
543 {
544 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
545 	struct mount_attr attr = {
546 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
547 		.attr_clr	= MOUNT_ATTR__ATIME,
548 	};
549 
550 	if (!mount_setattr_supported())
551 		SKIP(return, "mount_setattr syscall not supported");
552 
553 	old_flags = read_mnt_flags("/mnt/A");
554 	ASSERT_GT(old_flags, 0);
555 
556 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
557 
558 	expected_flags = old_flags;
559 	expected_flags |= MS_RDONLY;
560 	expected_flags |= MS_NOEXEC;
561 	expected_flags &= ~MS_NOATIME;
562 	expected_flags |= MS_RELATIME;
563 
564 	new_flags = read_mnt_flags("/mnt/A");
565 	ASSERT_EQ(new_flags, expected_flags);
566 
567 	new_flags = read_mnt_flags("/mnt/A/AA");
568 	ASSERT_EQ(new_flags, old_flags);
569 
570 	new_flags = read_mnt_flags("/mnt/A/AA/B");
571 	ASSERT_EQ(new_flags, old_flags);
572 
573 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
574 	ASSERT_EQ(new_flags, old_flags);
575 }
576 
577 TEST_F(mount_setattr, basic_recursive)
578 {
579 	int fd;
580 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
581 	struct mount_attr attr = {
582 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
583 		.attr_clr	= MOUNT_ATTR__ATIME,
584 	};
585 
586 	if (!mount_setattr_supported())
587 		SKIP(return, "mount_setattr syscall not supported");
588 
589 	old_flags = read_mnt_flags("/mnt/A");
590 	ASSERT_GT(old_flags, 0);
591 
592 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
593 
594 	expected_flags = old_flags;
595 	expected_flags |= MS_RDONLY;
596 	expected_flags |= MS_NOEXEC;
597 	expected_flags &= ~MS_NOATIME;
598 	expected_flags |= MS_RELATIME;
599 
600 	new_flags = read_mnt_flags("/mnt/A");
601 	ASSERT_EQ(new_flags, expected_flags);
602 
603 	new_flags = read_mnt_flags("/mnt/A/AA");
604 	ASSERT_EQ(new_flags, expected_flags);
605 
606 	new_flags = read_mnt_flags("/mnt/A/AA/B");
607 	ASSERT_EQ(new_flags, expected_flags);
608 
609 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
610 	ASSERT_EQ(new_flags, expected_flags);
611 
612 	memset(&attr, 0, sizeof(attr));
613 	attr.attr_clr = MOUNT_ATTR_RDONLY;
614 	attr.propagation = MS_SHARED;
615 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
616 
617 	expected_flags &= ~MS_RDONLY;
618 	new_flags = read_mnt_flags("/mnt/A");
619 	ASSERT_EQ(new_flags, expected_flags);
620 
621 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
622 
623 	new_flags = read_mnt_flags("/mnt/A/AA");
624 	ASSERT_EQ(new_flags, expected_flags);
625 
626 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
627 
628 	new_flags = read_mnt_flags("/mnt/A/AA/B");
629 	ASSERT_EQ(new_flags, expected_flags);
630 
631 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
632 
633 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
634 	ASSERT_EQ(new_flags, expected_flags);
635 
636 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
637 
638 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
639 	ASSERT_GE(fd, 0);
640 
641 	/*
642 	 * We're holding a fd open for writing so this needs to fail somewhere
643 	 * in the middle and the mount options need to be unchanged.
644 	 */
645 	attr.attr_set = MOUNT_ATTR_RDONLY;
646 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
647 
648 	new_flags = read_mnt_flags("/mnt/A");
649 	ASSERT_EQ(new_flags, expected_flags);
650 
651 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
652 
653 	new_flags = read_mnt_flags("/mnt/A/AA");
654 	ASSERT_EQ(new_flags, expected_flags);
655 
656 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
657 
658 	new_flags = read_mnt_flags("/mnt/A/AA/B");
659 	ASSERT_EQ(new_flags, expected_flags);
660 
661 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
662 
663 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
664 	ASSERT_EQ(new_flags, expected_flags);
665 
666 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
667 
668 	EXPECT_EQ(close(fd), 0);
669 }
670 
671 TEST_F(mount_setattr, mount_has_writers)
672 {
673 	int fd, dfd;
674 	unsigned int old_flags = 0, new_flags = 0;
675 	struct mount_attr attr = {
676 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
677 		.attr_clr	= MOUNT_ATTR__ATIME,
678 		.propagation	= MS_SHARED,
679 	};
680 
681 	if (!mount_setattr_supported())
682 		SKIP(return, "mount_setattr syscall not supported");
683 
684 	old_flags = read_mnt_flags("/mnt/A");
685 	ASSERT_GT(old_flags, 0);
686 
687 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
688 	ASSERT_GE(fd, 0);
689 
690 	/*
691 	 * We're holding a fd open to a mount somwhere in the middle so this
692 	 * needs to fail somewhere in the middle. After this the mount options
693 	 * need to be unchanged.
694 	 */
695 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
696 
697 	new_flags = read_mnt_flags("/mnt/A");
698 	ASSERT_EQ(new_flags, old_flags);
699 
700 	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
701 
702 	new_flags = read_mnt_flags("/mnt/A/AA");
703 	ASSERT_EQ(new_flags, old_flags);
704 
705 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
706 
707 	new_flags = read_mnt_flags("/mnt/A/AA/B");
708 	ASSERT_EQ(new_flags, old_flags);
709 
710 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
711 
712 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
713 	ASSERT_EQ(new_flags, old_flags);
714 
715 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
716 
717 	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
718 	ASSERT_GE(dfd, 0);
719 	EXPECT_EQ(fsync(dfd), 0);
720 	EXPECT_EQ(close(dfd), 0);
721 
722 	EXPECT_EQ(fsync(fd), 0);
723 	EXPECT_EQ(close(fd), 0);
724 
725 	/* All writers are gone so this should succeed. */
726 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
727 }
728 
729 TEST_F(mount_setattr, mixed_mount_options)
730 {
731 	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
732 	struct mount_attr attr = {
733 		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
734 		.attr_set = MOUNT_ATTR_RELATIME,
735 	};
736 
737 	if (!mount_setattr_supported())
738 		SKIP(return, "mount_setattr syscall not supported");
739 
740 	old_flags1 = read_mnt_flags("/mnt/B");
741 	ASSERT_GT(old_flags1, 0);
742 
743 	old_flags2 = read_mnt_flags("/mnt/B/BB");
744 	ASSERT_GT(old_flags2, 0);
745 
746 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
747 
748 	expected_flags = old_flags2;
749 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
750 	expected_flags |= MS_RELATIME;
751 
752 	new_flags = read_mnt_flags("/mnt/B");
753 	ASSERT_EQ(new_flags, expected_flags);
754 
755 	expected_flags = old_flags2;
756 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
757 	expected_flags |= MS_RELATIME;
758 
759 	new_flags = read_mnt_flags("/mnt/B/BB");
760 	ASSERT_EQ(new_flags, expected_flags);
761 }
762 
763 TEST_F(mount_setattr, time_changes)
764 {
765 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
766 	struct mount_attr attr = {
767 		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
768 	};
769 
770 	if (!mount_setattr_supported())
771 		SKIP(return, "mount_setattr syscall not supported");
772 
773 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
774 
775 	attr.attr_set = MOUNT_ATTR_STRICTATIME;
776 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
777 
778 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
779 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
780 
781 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
782 	attr.attr_clr = MOUNT_ATTR__ATIME;
783 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
784 
785 	attr.attr_set = 0;
786 	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
787 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
788 
789 	attr.attr_clr = MOUNT_ATTR_NOATIME;
790 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
791 
792 	old_flags = read_mnt_flags("/mnt/A");
793 	ASSERT_GT(old_flags, 0);
794 
795 	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
796 	attr.attr_clr = MOUNT_ATTR__ATIME;
797 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
798 
799 	expected_flags = old_flags;
800 	expected_flags |= MS_NOATIME;
801 	expected_flags |= MS_NODIRATIME;
802 
803 	new_flags = read_mnt_flags("/mnt/A");
804 	ASSERT_EQ(new_flags, expected_flags);
805 
806 	new_flags = read_mnt_flags("/mnt/A/AA");
807 	ASSERT_EQ(new_flags, expected_flags);
808 
809 	new_flags = read_mnt_flags("/mnt/A/AA/B");
810 	ASSERT_EQ(new_flags, expected_flags);
811 
812 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
813 	ASSERT_EQ(new_flags, expected_flags);
814 
815 	memset(&attr, 0, sizeof(attr));
816 	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
817 	attr.attr_set |= MOUNT_ATTR_RELATIME;
818 	attr.attr_clr |= MOUNT_ATTR__ATIME;
819 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
820 
821 	expected_flags &= ~MS_NOATIME;
822 	expected_flags |= MS_RELATIME;
823 
824 	new_flags = read_mnt_flags("/mnt/A");
825 	ASSERT_EQ(new_flags, expected_flags);
826 
827 	new_flags = read_mnt_flags("/mnt/A/AA");
828 	ASSERT_EQ(new_flags, expected_flags);
829 
830 	new_flags = read_mnt_flags("/mnt/A/AA/B");
831 	ASSERT_EQ(new_flags, expected_flags);
832 
833 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
834 	ASSERT_EQ(new_flags, expected_flags);
835 
836 	memset(&attr, 0, sizeof(attr));
837 	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
838 	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
839 	attr.attr_clr |= MOUNT_ATTR__ATIME;
840 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
841 
842 	expected_flags &= ~MS_RELATIME;
843 
844 	new_flags = read_mnt_flags("/mnt/A");
845 	ASSERT_EQ(new_flags, expected_flags);
846 
847 	new_flags = read_mnt_flags("/mnt/A/AA");
848 	ASSERT_EQ(new_flags, expected_flags);
849 
850 	new_flags = read_mnt_flags("/mnt/A/AA/B");
851 	ASSERT_EQ(new_flags, expected_flags);
852 
853 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
854 	ASSERT_EQ(new_flags, expected_flags);
855 
856 	memset(&attr, 0, sizeof(attr));
857 	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
858 	attr.attr_set |= MOUNT_ATTR_NOATIME;
859 	attr.attr_clr |= MOUNT_ATTR__ATIME;
860 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
861 
862 	expected_flags |= MS_NOATIME;
863 	new_flags = read_mnt_flags("/mnt/A");
864 	ASSERT_EQ(new_flags, expected_flags);
865 
866 	new_flags = read_mnt_flags("/mnt/A/AA");
867 	ASSERT_EQ(new_flags, expected_flags);
868 
869 	new_flags = read_mnt_flags("/mnt/A/AA/B");
870 	ASSERT_EQ(new_flags, expected_flags);
871 
872 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
873 	ASSERT_EQ(new_flags, expected_flags);
874 
875 	memset(&attr, 0, sizeof(attr));
876 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
877 
878 	new_flags = read_mnt_flags("/mnt/A");
879 	ASSERT_EQ(new_flags, expected_flags);
880 
881 	new_flags = read_mnt_flags("/mnt/A/AA");
882 	ASSERT_EQ(new_flags, expected_flags);
883 
884 	new_flags = read_mnt_flags("/mnt/A/AA/B");
885 	ASSERT_EQ(new_flags, expected_flags);
886 
887 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
888 	ASSERT_EQ(new_flags, expected_flags);
889 
890 	memset(&attr, 0, sizeof(attr));
891 	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
892 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
893 
894 	expected_flags &= ~MS_NODIRATIME;
895 
896 	new_flags = read_mnt_flags("/mnt/A");
897 	ASSERT_EQ(new_flags, expected_flags);
898 
899 	new_flags = read_mnt_flags("/mnt/A/AA");
900 	ASSERT_EQ(new_flags, expected_flags);
901 
902 	new_flags = read_mnt_flags("/mnt/A/AA/B");
903 	ASSERT_EQ(new_flags, expected_flags);
904 
905 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
906 	ASSERT_EQ(new_flags, expected_flags);
907 }
908 
909 TEST_F(mount_setattr, multi_threaded)
910 {
911 	int i, j, nthreads, ret = 0;
912 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
913 	pthread_attr_t pattr;
914 	pthread_t threads[DEFAULT_THREADS];
915 
916 	if (!mount_setattr_supported())
917 		SKIP(return, "mount_setattr syscall not supported");
918 
919 	old_flags = read_mnt_flags("/mnt/A");
920 	ASSERT_GT(old_flags, 0);
921 
922 	/* Try to change mount options from multiple threads. */
923 	nthreads = get_nprocs_conf();
924 	if (nthreads > DEFAULT_THREADS)
925 		nthreads = DEFAULT_THREADS;
926 
927 	pthread_attr_init(&pattr);
928 	for (i = 0; i < nthreads; i++)
929 		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
930 
931 	for (j = 0; j < i; j++) {
932 		void *retptr = NULL;
933 
934 		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
935 
936 		ret += ptr_to_int(retptr);
937 		EXPECT_EQ(ret, 0);
938 	}
939 	pthread_attr_destroy(&pattr);
940 
941 	ASSERT_EQ(ret, 0);
942 
943 	expected_flags = old_flags;
944 	expected_flags |= MS_RDONLY;
945 	expected_flags |= MS_NOSUID;
946 	new_flags = read_mnt_flags("/mnt/A");
947 	ASSERT_EQ(new_flags, expected_flags);
948 
949 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
950 
951 	new_flags = read_mnt_flags("/mnt/A/AA");
952 	ASSERT_EQ(new_flags, expected_flags);
953 
954 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
955 
956 	new_flags = read_mnt_flags("/mnt/A/AA/B");
957 	ASSERT_EQ(new_flags, expected_flags);
958 
959 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
960 
961 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
962 	ASSERT_EQ(new_flags, expected_flags);
963 
964 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
965 }
966 
967 TEST_F(mount_setattr, wrong_user_namespace)
968 {
969 	int ret;
970 	struct mount_attr attr = {
971 		.attr_set = MOUNT_ATTR_RDONLY,
972 	};
973 
974 	if (!mount_setattr_supported())
975 		SKIP(return, "mount_setattr syscall not supported");
976 
977 	EXPECT_EQ(create_and_enter_userns(), 0);
978 	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
979 	ASSERT_LT(ret, 0);
980 	ASSERT_EQ(errno, EPERM);
981 }
982 
983 TEST_F(mount_setattr, wrong_mount_namespace)
984 {
985 	int fd, ret;
986 	struct mount_attr attr = {
987 		.attr_set = MOUNT_ATTR_RDONLY,
988 	};
989 
990 	if (!mount_setattr_supported())
991 		SKIP(return, "mount_setattr syscall not supported");
992 
993 	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
994 	ASSERT_GE(fd, 0);
995 
996 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
997 
998 	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
999 	ASSERT_LT(ret, 0);
1000 	ASSERT_EQ(errno, EINVAL);
1001 }
1002 
1003 FIXTURE(mount_setattr_idmapped) {
1004 };
1005 
1006 FIXTURE_SETUP(mount_setattr_idmapped)
1007 {
1008 	int img_fd = -EBADF;
1009 
1010 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1011 
1012 	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1013 
1014 	(void)umount2("/mnt", MNT_DETACH);
1015 	(void)umount2("/tmp", MNT_DETACH);
1016 
1017 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1018 			"size=100000,mode=700"), 0);
1019 
1020 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1021 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1022 	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1023 
1024 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1025 			"size=100000,mode=700"), 0);
1026 
1027 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1028 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1029 	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1030 
1031 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1032 			"size=100000,mode=700"), 0);
1033 
1034 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1035 			"size=100000,mode=700"), 0);
1036 
1037 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1038 
1039 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1040 			"size=100000,mode=700"), 0);
1041 
1042 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1043 
1044 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1045 
1046 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1047 
1048 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1049 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1050 
1051 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1052 
1053 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1054 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1055 
1056 	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1057 	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1058 	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1059 	ASSERT_GE(img_fd, 0);
1060 	ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1061 	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1062 	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1063 	ASSERT_EQ(close(img_fd), 0);
1064 }
1065 
1066 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1067 {
1068 	(void)umount2("/mnt/A", MNT_DETACH);
1069 	(void)umount2("/tmp", MNT_DETACH);
1070 }
1071 
1072 /**
1073  * Validate that negative fd values are rejected.
1074  */
1075 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1076 {
1077 	struct mount_attr attr = {
1078 		.attr_set	= MOUNT_ATTR_IDMAP,
1079 		.userns_fd	= -EBADF,
1080 	};
1081 
1082 	if (!mount_setattr_supported())
1083 		SKIP(return, "mount_setattr syscall not supported");
1084 
1085 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1086 		TH_LOG("failure: created idmapped mount with negative fd");
1087 	}
1088 }
1089 
1090 /**
1091  * Validate that excessively large fd values are rejected.
1092  */
1093 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1094 {
1095 	struct mount_attr attr = {
1096 		.attr_set	= MOUNT_ATTR_IDMAP,
1097 		.userns_fd	= INT64_MAX,
1098 	};
1099 
1100 	if (!mount_setattr_supported())
1101 		SKIP(return, "mount_setattr syscall not supported");
1102 
1103 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1104 		TH_LOG("failure: created idmapped mount with too large fd value");
1105 	}
1106 }
1107 
1108 /**
1109  * Validate that closed fd values are rejected.
1110  */
1111 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1112 {
1113 	int fd;
1114 	struct mount_attr attr = {
1115 		.attr_set = MOUNT_ATTR_IDMAP,
1116 	};
1117 
1118 	if (!mount_setattr_supported())
1119 		SKIP(return, "mount_setattr syscall not supported");
1120 
1121 	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1122 	ASSERT_GE(fd, 0);
1123 	ASSERT_GE(close(fd), 0);
1124 
1125 	attr.userns_fd = fd;
1126 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1127 		TH_LOG("failure: created idmapped mount with closed fd");
1128 	}
1129 }
1130 
1131 /**
1132  * Validate that the initial user namespace is rejected.
1133  */
1134 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1135 {
1136 	int open_tree_fd = -EBADF;
1137 	struct mount_attr attr = {
1138 		.attr_set = MOUNT_ATTR_IDMAP,
1139 	};
1140 
1141 	if (!mount_setattr_supported())
1142 		SKIP(return, "mount_setattr syscall not supported");
1143 
1144 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1145 				     AT_NO_AUTOMOUNT |
1146 				     AT_SYMLINK_NOFOLLOW |
1147 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1148 	ASSERT_GE(open_tree_fd, 0);
1149 
1150 	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1151 	ASSERT_GE(attr.userns_fd, 0);
1152 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1153 	ASSERT_EQ(errno, EPERM);
1154 	ASSERT_EQ(close(attr.userns_fd), 0);
1155 	ASSERT_EQ(close(open_tree_fd), 0);
1156 }
1157 
1158 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1159 		   unsigned long range)
1160 {
1161 	char map[100], procfile[256];
1162 
1163 	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1164 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1165 	if (write_file(procfile, map, strlen(map)))
1166 		return -1;
1167 
1168 
1169 	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1170 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1171 	if (write_file(procfile, map, strlen(map)))
1172 		return -1;
1173 
1174 	return 0;
1175 }
1176 
1177 #define __STACK_SIZE (8 * 1024 * 1024)
1178 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1179 {
1180 	void *stack;
1181 
1182 	stack = malloc(__STACK_SIZE);
1183 	if (!stack)
1184 		return -ENOMEM;
1185 
1186 #ifdef __ia64__
1187 	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1188 #else
1189 	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1190 #endif
1191 }
1192 
1193 static int get_userns_fd_cb(void *data)
1194 {
1195 	return kill(getpid(), SIGSTOP);
1196 }
1197 
1198 static int wait_for_pid(pid_t pid)
1199 {
1200 	int status, ret;
1201 
1202 again:
1203 	ret = waitpid(pid, &status, 0);
1204 	if (ret == -1) {
1205 		if (errno == EINTR)
1206 			goto again;
1207 
1208 		return -1;
1209 	}
1210 
1211 	if (!WIFEXITED(status))
1212 		return -1;
1213 
1214 	return WEXITSTATUS(status);
1215 }
1216 
1217 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1218 {
1219 	int ret;
1220 	pid_t pid;
1221 	char path[256];
1222 
1223 	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1224 	if (pid < 0)
1225 		return -errno;
1226 
1227 	ret = map_ids(pid, nsid, hostid, range);
1228 	if (ret < 0)
1229 		return ret;
1230 
1231 	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1232 	ret = open(path, O_RDONLY | O_CLOEXEC);
1233 	kill(pid, SIGKILL);
1234 	wait_for_pid(pid);
1235 	return ret;
1236 }
1237 
1238 /**
1239  * Validate that an attached mount in our mount namespace cannot be idmapped.
1240  * (The kernel enforces that the mount's mount namespace and the caller's mount
1241  *  namespace match.)
1242  */
1243 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1244 {
1245 	int open_tree_fd = -EBADF;
1246 	struct mount_attr attr = {
1247 		.attr_set = MOUNT_ATTR_IDMAP,
1248 	};
1249 
1250 	if (!mount_setattr_supported())
1251 		SKIP(return, "mount_setattr syscall not supported");
1252 
1253 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1254 				     AT_EMPTY_PATH |
1255 				     AT_NO_AUTOMOUNT |
1256 				     AT_SYMLINK_NOFOLLOW |
1257 				     OPEN_TREE_CLOEXEC);
1258 	ASSERT_GE(open_tree_fd, 0);
1259 
1260 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1261 	ASSERT_GE(attr.userns_fd, 0);
1262 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1263 	ASSERT_EQ(close(attr.userns_fd), 0);
1264 	ASSERT_EQ(close(open_tree_fd), 0);
1265 }
1266 
1267 /**
1268  * Validate that idmapping a mount is rejected if the mount's mount namespace
1269  * and our mount namespace don't match.
1270  * (The kernel enforces that the mount's mount namespace and the caller's mount
1271  *  namespace match.)
1272  */
1273 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1274 {
1275 	int open_tree_fd = -EBADF;
1276 	struct mount_attr attr = {
1277 		.attr_set = MOUNT_ATTR_IDMAP,
1278 	};
1279 
1280 	if (!mount_setattr_supported())
1281 		SKIP(return, "mount_setattr syscall not supported");
1282 
1283 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1284 				     AT_EMPTY_PATH |
1285 				     AT_NO_AUTOMOUNT |
1286 				     AT_SYMLINK_NOFOLLOW |
1287 				     OPEN_TREE_CLOEXEC);
1288 	ASSERT_GE(open_tree_fd, 0);
1289 
1290 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1291 
1292 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1293 	ASSERT_GE(attr.userns_fd, 0);
1294 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1295 				    sizeof(attr)), 0);
1296 	ASSERT_EQ(close(attr.userns_fd), 0);
1297 	ASSERT_EQ(close(open_tree_fd), 0);
1298 }
1299 
1300 /**
1301  * Validate that an attached mount in our mount namespace can be idmapped.
1302  */
1303 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1304 {
1305 	int open_tree_fd = -EBADF;
1306 	struct mount_attr attr = {
1307 		.attr_set = MOUNT_ATTR_IDMAP,
1308 	};
1309 
1310 	if (!mount_setattr_supported())
1311 		SKIP(return, "mount_setattr syscall not supported");
1312 
1313 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1314 				     AT_EMPTY_PATH |
1315 				     AT_NO_AUTOMOUNT |
1316 				     AT_SYMLINK_NOFOLLOW |
1317 				     OPEN_TREE_CLOEXEC |
1318 				     OPEN_TREE_CLONE);
1319 	ASSERT_GE(open_tree_fd, 0);
1320 
1321 	/* Changing mount properties on a detached mount. */
1322 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1323 	ASSERT_GE(attr.userns_fd, 0);
1324 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1325 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1326 	ASSERT_EQ(close(attr.userns_fd), 0);
1327 	ASSERT_EQ(close(open_tree_fd), 0);
1328 }
1329 
1330 /**
1331  * Validate that a detached mount not in our mount namespace can be idmapped.
1332  */
1333 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1334 {
1335 	int open_tree_fd = -EBADF;
1336 	struct mount_attr attr = {
1337 		.attr_set = MOUNT_ATTR_IDMAP,
1338 	};
1339 
1340 	if (!mount_setattr_supported())
1341 		SKIP(return, "mount_setattr syscall not supported");
1342 
1343 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1344 				     AT_EMPTY_PATH |
1345 				     AT_NO_AUTOMOUNT |
1346 				     AT_SYMLINK_NOFOLLOW |
1347 				     OPEN_TREE_CLOEXEC |
1348 				     OPEN_TREE_CLONE);
1349 	ASSERT_GE(open_tree_fd, 0);
1350 
1351 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1352 
1353 	/* Changing mount properties on a detached mount. */
1354 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1355 	ASSERT_GE(attr.userns_fd, 0);
1356 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1357 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1358 	ASSERT_EQ(close(attr.userns_fd), 0);
1359 	ASSERT_EQ(close(open_tree_fd), 0);
1360 }
1361 
1362 /**
1363  * Validate that currently changing the idmapping of an idmapped mount fails.
1364  */
1365 TEST_F(mount_setattr_idmapped, change_idmapping)
1366 {
1367 	int open_tree_fd = -EBADF;
1368 	struct mount_attr attr = {
1369 		.attr_set = MOUNT_ATTR_IDMAP,
1370 	};
1371 
1372 	if (!mount_setattr_supported())
1373 		SKIP(return, "mount_setattr syscall not supported");
1374 
1375 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1376 				     AT_EMPTY_PATH |
1377 				     AT_NO_AUTOMOUNT |
1378 				     AT_SYMLINK_NOFOLLOW |
1379 				     OPEN_TREE_CLOEXEC |
1380 				     OPEN_TREE_CLONE);
1381 	ASSERT_GE(open_tree_fd, 0);
1382 
1383 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1384 	ASSERT_GE(attr.userns_fd, 0);
1385 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1386 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1387 	ASSERT_EQ(close(attr.userns_fd), 0);
1388 
1389 	/* Change idmapping on a detached mount that is already idmapped. */
1390 	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1391 	ASSERT_GE(attr.userns_fd, 0);
1392 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1393 	ASSERT_EQ(close(attr.userns_fd), 0);
1394 	ASSERT_EQ(close(open_tree_fd), 0);
1395 }
1396 
1397 static bool expected_uid_gid(int dfd, const char *path, int flags,
1398 			     uid_t expected_uid, gid_t expected_gid)
1399 {
1400 	int ret;
1401 	struct stat st;
1402 
1403 	ret = fstatat(dfd, path, &st, flags);
1404 	if (ret < 0)
1405 		return false;
1406 
1407 	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1408 }
1409 
1410 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1411 {
1412 	int open_tree_fd = -EBADF;
1413 	struct mount_attr attr = {
1414 		.attr_set = MOUNT_ATTR_IDMAP,
1415 	};
1416 
1417 	if (!mount_setattr_supported())
1418 		SKIP(return, "mount_setattr syscall not supported");
1419 
1420 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1421 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1422 
1423 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1424 				     AT_RECURSIVE |
1425 				     AT_EMPTY_PATH |
1426 				     AT_NO_AUTOMOUNT |
1427 				     AT_SYMLINK_NOFOLLOW |
1428 				     OPEN_TREE_CLOEXEC |
1429 				     OPEN_TREE_CLONE);
1430 	ASSERT_GE(open_tree_fd, 0);
1431 
1432 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1433 	ASSERT_GE(attr.userns_fd, 0);
1434 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1435 	ASSERT_EQ(close(attr.userns_fd), 0);
1436 	ASSERT_EQ(close(open_tree_fd), 0);
1437 
1438 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1439 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1440 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1441 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1442 }
1443 
1444 TEST_F(mount_setattr, mount_attr_nosymfollow)
1445 {
1446 	int fd;
1447 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1448 	struct mount_attr attr = {
1449 		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
1450 	};
1451 
1452 	if (!mount_setattr_supported())
1453 		SKIP(return, "mount_setattr syscall not supported");
1454 
1455 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1456 	ASSERT_GT(fd, 0);
1457 	ASSERT_EQ(close(fd), 0);
1458 
1459 	old_flags = read_mnt_flags("/mnt/A");
1460 	ASSERT_GT(old_flags, 0);
1461 
1462 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1463 
1464 	expected_flags = old_flags;
1465 	expected_flags |= ST_NOSYMFOLLOW;
1466 
1467 	new_flags = read_mnt_flags("/mnt/A");
1468 	ASSERT_EQ(new_flags, expected_flags);
1469 
1470 	new_flags = read_mnt_flags("/mnt/A/AA");
1471 	ASSERT_EQ(new_flags, expected_flags);
1472 
1473 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1474 	ASSERT_EQ(new_flags, expected_flags);
1475 
1476 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1477 	ASSERT_EQ(new_flags, expected_flags);
1478 
1479 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1480 	ASSERT_LT(fd, 0);
1481 	ASSERT_EQ(errno, ELOOP);
1482 
1483 	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1484 	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1485 
1486 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1487 
1488 	expected_flags &= ~ST_NOSYMFOLLOW;
1489 	new_flags = read_mnt_flags("/mnt/A");
1490 	ASSERT_EQ(new_flags, expected_flags);
1491 
1492 	new_flags = read_mnt_flags("/mnt/A/AA");
1493 	ASSERT_EQ(new_flags, expected_flags);
1494 
1495 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1496 	ASSERT_EQ(new_flags, expected_flags);
1497 
1498 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1499 	ASSERT_EQ(new_flags, expected_flags);
1500 
1501 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1502 	ASSERT_GT(fd, 0);
1503 	ASSERT_EQ(close(fd), 0);
1504 }
1505 
1506 TEST_HARNESS_MAIN
1507