1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 #include <linux/mount.h>
22 
23 #include "../kselftest_harness.h"
24 
25 #ifndef CLONE_NEWNS
26 #define CLONE_NEWNS 0x00020000
27 #endif
28 
29 #ifndef CLONE_NEWUSER
30 #define CLONE_NEWUSER 0x10000000
31 #endif
32 
33 #ifndef MS_REC
34 #define MS_REC 16384
35 #endif
36 
37 #ifndef MS_RELATIME
38 #define MS_RELATIME (1 << 21)
39 #endif
40 
41 #ifndef MS_STRICTATIME
42 #define MS_STRICTATIME (1 << 24)
43 #endif
44 
45 #ifndef MOUNT_ATTR_RDONLY
46 #define MOUNT_ATTR_RDONLY 0x00000001
47 #endif
48 
49 #ifndef MOUNT_ATTR_NOSUID
50 #define MOUNT_ATTR_NOSUID 0x00000002
51 #endif
52 
53 #ifndef MOUNT_ATTR_NOEXEC
54 #define MOUNT_ATTR_NOEXEC 0x00000008
55 #endif
56 
57 #ifndef MOUNT_ATTR_NODIRATIME
58 #define MOUNT_ATTR_NODIRATIME 0x00000080
59 #endif
60 
61 #ifndef MOUNT_ATTR__ATIME
62 #define MOUNT_ATTR__ATIME 0x00000070
63 #endif
64 
65 #ifndef MOUNT_ATTR_RELATIME
66 #define MOUNT_ATTR_RELATIME 0x00000000
67 #endif
68 
69 #ifndef MOUNT_ATTR_NOATIME
70 #define MOUNT_ATTR_NOATIME 0x00000010
71 #endif
72 
73 #ifndef MOUNT_ATTR_STRICTATIME
74 #define MOUNT_ATTR_STRICTATIME 0x00000020
75 #endif
76 
77 #ifndef AT_RECURSIVE
78 #define AT_RECURSIVE 0x8000
79 #endif
80 
81 #ifndef MS_SHARED
82 #define MS_SHARED (1 << 20)
83 #endif
84 
85 #define DEFAULT_THREADS 4
86 #define ptr_to_int(p) ((int)((intptr_t)(p)))
87 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
88 
89 #ifndef __NR_mount_setattr
90 	#if defined __alpha__
91 		#define __NR_mount_setattr 552
92 	#elif defined _MIPS_SIM
93 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
94 			#define __NR_mount_setattr (442 + 4000)
95 		#endif
96 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
97 			#define __NR_mount_setattr (442 + 6000)
98 		#endif
99 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
100 			#define __NR_mount_setattr (442 + 5000)
101 		#endif
102 	#elif defined __ia64__
103 		#define __NR_mount_setattr (442 + 1024)
104 	#else
105 		#define __NR_mount_setattr 442
106 	#endif
107 #endif
108 
109 #ifndef __NR_open_tree
110 	#if defined __alpha__
111 		#define __NR_open_tree 538
112 	#elif defined _MIPS_SIM
113 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
114 			#define __NR_open_tree 4428
115 		#endif
116 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
117 			#define __NR_open_tree 6428
118 		#endif
119 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
120 			#define __NR_open_tree 5428
121 		#endif
122 	#elif defined __ia64__
123 		#define __NR_open_tree (428 + 1024)
124 	#else
125 		#define __NR_open_tree 428
126 	#endif
127 #endif
128 
129 #ifndef MOUNT_ATTR_IDMAP
130 #define MOUNT_ATTR_IDMAP 0x00100000
131 #endif
132 
133 #ifndef MOUNT_ATTR_NOSYMFOLLOW
134 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
135 #endif
136 
137 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
138 				    struct mount_attr *attr, size_t size)
139 {
140 	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
141 }
142 
143 #ifndef OPEN_TREE_CLONE
144 #define OPEN_TREE_CLONE 1
145 #endif
146 
147 #ifndef OPEN_TREE_CLOEXEC
148 #define OPEN_TREE_CLOEXEC O_CLOEXEC
149 #endif
150 
151 #ifndef AT_RECURSIVE
152 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
153 #endif
154 
155 static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
156 {
157 	return syscall(__NR_open_tree, dfd, filename, flags);
158 }
159 
160 static ssize_t write_nointr(int fd, const void *buf, size_t count)
161 {
162 	ssize_t ret;
163 
164 	do {
165 		ret = write(fd, buf, count);
166 	} while (ret < 0 && errno == EINTR);
167 
168 	return ret;
169 }
170 
171 static int write_file(const char *path, const void *buf, size_t count)
172 {
173 	int fd;
174 	ssize_t ret;
175 
176 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
177 	if (fd < 0)
178 		return -1;
179 
180 	ret = write_nointr(fd, buf, count);
181 	close(fd);
182 	if (ret < 0 || (size_t)ret != count)
183 		return -1;
184 
185 	return 0;
186 }
187 
188 static int create_and_enter_userns(void)
189 {
190 	uid_t uid;
191 	gid_t gid;
192 	char map[100];
193 
194 	uid = getuid();
195 	gid = getgid();
196 
197 	if (unshare(CLONE_NEWUSER))
198 		return -1;
199 
200 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
201 	    errno != ENOENT)
202 		return -1;
203 
204 	snprintf(map, sizeof(map), "0 %d 1", uid);
205 	if (write_file("/proc/self/uid_map", map, strlen(map)))
206 		return -1;
207 
208 
209 	snprintf(map, sizeof(map), "0 %d 1", gid);
210 	if (write_file("/proc/self/gid_map", map, strlen(map)))
211 		return -1;
212 
213 	if (setgid(0))
214 		return -1;
215 
216 	if (setuid(0))
217 		return -1;
218 
219 	return 0;
220 }
221 
222 static int prepare_unpriv_mountns(void)
223 {
224 	if (create_and_enter_userns())
225 		return -1;
226 
227 	if (unshare(CLONE_NEWNS))
228 		return -1;
229 
230 	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
231 		return -1;
232 
233 	return 0;
234 }
235 
236 #ifndef ST_NOSYMFOLLOW
237 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
238 #endif
239 
240 static int read_mnt_flags(const char *path)
241 {
242 	int ret;
243 	struct statvfs stat;
244 	unsigned int mnt_flags;
245 
246 	ret = statvfs(path, &stat);
247 	if (ret != 0)
248 		return -EINVAL;
249 
250 	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
251 			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
252 			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
253 		return -EINVAL;
254 
255 	mnt_flags = 0;
256 	if (stat.f_flag & ST_RDONLY)
257 		mnt_flags |= MS_RDONLY;
258 	if (stat.f_flag & ST_NOSUID)
259 		mnt_flags |= MS_NOSUID;
260 	if (stat.f_flag & ST_NODEV)
261 		mnt_flags |= MS_NODEV;
262 	if (stat.f_flag & ST_NOEXEC)
263 		mnt_flags |= MS_NOEXEC;
264 	if (stat.f_flag & ST_NOATIME)
265 		mnt_flags |= MS_NOATIME;
266 	if (stat.f_flag & ST_NODIRATIME)
267 		mnt_flags |= MS_NODIRATIME;
268 	if (stat.f_flag & ST_RELATIME)
269 		mnt_flags |= MS_RELATIME;
270 	if (stat.f_flag & ST_SYNCHRONOUS)
271 		mnt_flags |= MS_SYNCHRONOUS;
272 	if (stat.f_flag & ST_MANDLOCK)
273 		mnt_flags |= ST_MANDLOCK;
274 	if (stat.f_flag & ST_NOSYMFOLLOW)
275 		mnt_flags |= ST_NOSYMFOLLOW;
276 
277 	return mnt_flags;
278 }
279 
280 static char *get_field(char *src, int nfields)
281 {
282 	int i;
283 	char *p = src;
284 
285 	for (i = 0; i < nfields; i++) {
286 		while (*p && *p != ' ' && *p != '\t')
287 			p++;
288 
289 		if (!*p)
290 			break;
291 
292 		p++;
293 	}
294 
295 	return p;
296 }
297 
298 static void null_endofword(char *word)
299 {
300 	while (*word && *word != ' ' && *word != '\t')
301 		word++;
302 	*word = '\0';
303 }
304 
305 static bool is_shared_mount(const char *path)
306 {
307 	size_t len = 0;
308 	char *line = NULL;
309 	FILE *f = NULL;
310 
311 	f = fopen("/proc/self/mountinfo", "re");
312 	if (!f)
313 		return false;
314 
315 	while (getline(&line, &len, f) != -1) {
316 		char *opts, *target;
317 
318 		target = get_field(line, 4);
319 		if (!target)
320 			continue;
321 
322 		opts = get_field(target, 2);
323 		if (!opts)
324 			continue;
325 
326 		null_endofword(target);
327 
328 		if (strcmp(target, path) != 0)
329 			continue;
330 
331 		null_endofword(opts);
332 		if (strstr(opts, "shared:"))
333 			return true;
334 	}
335 
336 	free(line);
337 	fclose(f);
338 
339 	return false;
340 }
341 
342 static void *mount_setattr_thread(void *data)
343 {
344 	struct mount_attr attr = {
345 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
346 		.attr_clr	= 0,
347 		.propagation	= MS_SHARED,
348 	};
349 
350 	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
351 		pthread_exit(int_to_ptr(-1));
352 
353 	pthread_exit(int_to_ptr(0));
354 }
355 
356 /* Attempt to de-conflict with the selftests tree. */
357 #ifndef SKIP
358 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
359 #endif
360 
361 static bool mount_setattr_supported(void)
362 {
363 	int ret;
364 
365 	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
366 	if (ret < 0 && errno == ENOSYS)
367 		return false;
368 
369 	return true;
370 }
371 
372 FIXTURE(mount_setattr) {
373 };
374 
375 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
376 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
377 
378 FIXTURE_SETUP(mount_setattr)
379 {
380 	int fd = -EBADF;
381 
382 	if (!mount_setattr_supported())
383 		SKIP(return, "mount_setattr syscall not supported");
384 
385 	ASSERT_EQ(prepare_unpriv_mountns(), 0);
386 
387 	(void)umount2("/mnt", MNT_DETACH);
388 	(void)umount2("/tmp", MNT_DETACH);
389 
390 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
391 			"size=100000,mode=700"), 0);
392 
393 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
394 
395 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
396 			"size=100000,mode=700"), 0);
397 
398 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
399 
400 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
401 			"size=100000,mode=700"), 0);
402 
403 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
404 			"size=100000,mode=700"), 0);
405 
406 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
407 
408 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
409 			"size=100000,mode=700"), 0);
410 
411 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
412 
413 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
414 
415 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
416 
417 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
418 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
419 
420 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
421 
422 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
423 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
424 
425 	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
426 	ASSERT_GT(fd, 0);
427 	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
428 	ASSERT_EQ(close(fd), 0);
429 }
430 
431 FIXTURE_TEARDOWN(mount_setattr)
432 {
433 	if (!mount_setattr_supported())
434 		SKIP(return, "mount_setattr syscall not supported");
435 
436 	(void)umount2("/mnt/A", MNT_DETACH);
437 	(void)umount2("/tmp", MNT_DETACH);
438 }
439 
440 TEST_F(mount_setattr, invalid_attributes)
441 {
442 	struct mount_attr invalid_attr = {
443 		.attr_set = (1U << 31),
444 	};
445 
446 	if (!mount_setattr_supported())
447 		SKIP(return, "mount_setattr syscall not supported");
448 
449 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
450 				    sizeof(invalid_attr)), 0);
451 
452 	invalid_attr.attr_set	= 0;
453 	invalid_attr.attr_clr	= (1U << 31);
454 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
455 				    sizeof(invalid_attr)), 0);
456 
457 	invalid_attr.attr_clr		= 0;
458 	invalid_attr.propagation	= (1U << 31);
459 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
460 				    sizeof(invalid_attr)), 0);
461 
462 	invalid_attr.attr_set		= (1U << 31);
463 	invalid_attr.attr_clr		= (1U << 31);
464 	invalid_attr.propagation	= (1U << 31);
465 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
466 				    sizeof(invalid_attr)), 0);
467 
468 	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
469 				    sizeof(invalid_attr)), 0);
470 }
471 
472 TEST_F(mount_setattr, extensibility)
473 {
474 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
475 	char *s = "dummy";
476 	struct mount_attr invalid_attr = {};
477 	struct mount_attr_large {
478 		struct mount_attr attr1;
479 		struct mount_attr attr2;
480 		struct mount_attr attr3;
481 	} large_attr = {};
482 
483 	if (!mount_setattr_supported())
484 		SKIP(return, "mount_setattr syscall not supported");
485 
486 	old_flags = read_mnt_flags("/mnt/A");
487 	ASSERT_GT(old_flags, 0);
488 
489 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
490 				    sizeof(invalid_attr)), 0);
491 	ASSERT_EQ(errno, EFAULT);
492 
493 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
494 				    sizeof(invalid_attr)), 0);
495 	ASSERT_EQ(errno, EINVAL);
496 
497 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
498 	ASSERT_EQ(errno, EINVAL);
499 
500 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
501 				    sizeof(invalid_attr) / 2), 0);
502 	ASSERT_EQ(errno, EINVAL);
503 
504 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
505 				    sizeof(invalid_attr) / 2), 0);
506 	ASSERT_EQ(errno, EINVAL);
507 
508 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
509 				    (void *)&large_attr, sizeof(large_attr)), 0);
510 
511 	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
512 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
513 				    (void *)&large_attr, sizeof(large_attr)), 0);
514 
515 	large_attr.attr3.attr_set = 0;
516 	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
517 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
518 				    (void *)&large_attr, sizeof(large_attr)), 0);
519 
520 	expected_flags = old_flags;
521 	expected_flags |= MS_RDONLY;
522 
523 	new_flags = read_mnt_flags("/mnt/A");
524 	ASSERT_EQ(new_flags, expected_flags);
525 
526 	new_flags = read_mnt_flags("/mnt/A/AA");
527 	ASSERT_EQ(new_flags, expected_flags);
528 
529 	new_flags = read_mnt_flags("/mnt/A/AA/B");
530 	ASSERT_EQ(new_flags, expected_flags);
531 
532 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
533 	ASSERT_EQ(new_flags, expected_flags);
534 }
535 
536 TEST_F(mount_setattr, basic)
537 {
538 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
539 	struct mount_attr attr = {
540 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
541 		.attr_clr	= MOUNT_ATTR__ATIME,
542 	};
543 
544 	if (!mount_setattr_supported())
545 		SKIP(return, "mount_setattr syscall not supported");
546 
547 	old_flags = read_mnt_flags("/mnt/A");
548 	ASSERT_GT(old_flags, 0);
549 
550 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
551 
552 	expected_flags = old_flags;
553 	expected_flags |= MS_RDONLY;
554 	expected_flags |= MS_NOEXEC;
555 	expected_flags &= ~MS_NOATIME;
556 	expected_flags |= MS_RELATIME;
557 
558 	new_flags = read_mnt_flags("/mnt/A");
559 	ASSERT_EQ(new_flags, expected_flags);
560 
561 	new_flags = read_mnt_flags("/mnt/A/AA");
562 	ASSERT_EQ(new_flags, old_flags);
563 
564 	new_flags = read_mnt_flags("/mnt/A/AA/B");
565 	ASSERT_EQ(new_flags, old_flags);
566 
567 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
568 	ASSERT_EQ(new_flags, old_flags);
569 }
570 
571 TEST_F(mount_setattr, basic_recursive)
572 {
573 	int fd;
574 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
575 	struct mount_attr attr = {
576 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
577 		.attr_clr	= MOUNT_ATTR__ATIME,
578 	};
579 
580 	if (!mount_setattr_supported())
581 		SKIP(return, "mount_setattr syscall not supported");
582 
583 	old_flags = read_mnt_flags("/mnt/A");
584 	ASSERT_GT(old_flags, 0);
585 
586 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
587 
588 	expected_flags = old_flags;
589 	expected_flags |= MS_RDONLY;
590 	expected_flags |= MS_NOEXEC;
591 	expected_flags &= ~MS_NOATIME;
592 	expected_flags |= MS_RELATIME;
593 
594 	new_flags = read_mnt_flags("/mnt/A");
595 	ASSERT_EQ(new_flags, expected_flags);
596 
597 	new_flags = read_mnt_flags("/mnt/A/AA");
598 	ASSERT_EQ(new_flags, expected_flags);
599 
600 	new_flags = read_mnt_flags("/mnt/A/AA/B");
601 	ASSERT_EQ(new_flags, expected_flags);
602 
603 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
604 	ASSERT_EQ(new_flags, expected_flags);
605 
606 	memset(&attr, 0, sizeof(attr));
607 	attr.attr_clr = MOUNT_ATTR_RDONLY;
608 	attr.propagation = MS_SHARED;
609 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
610 
611 	expected_flags &= ~MS_RDONLY;
612 	new_flags = read_mnt_flags("/mnt/A");
613 	ASSERT_EQ(new_flags, expected_flags);
614 
615 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
616 
617 	new_flags = read_mnt_flags("/mnt/A/AA");
618 	ASSERT_EQ(new_flags, expected_flags);
619 
620 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
621 
622 	new_flags = read_mnt_flags("/mnt/A/AA/B");
623 	ASSERT_EQ(new_flags, expected_flags);
624 
625 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
626 
627 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
628 	ASSERT_EQ(new_flags, expected_flags);
629 
630 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
631 
632 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
633 	ASSERT_GE(fd, 0);
634 
635 	/*
636 	 * We're holding a fd open for writing so this needs to fail somewhere
637 	 * in the middle and the mount options need to be unchanged.
638 	 */
639 	attr.attr_set = MOUNT_ATTR_RDONLY;
640 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
641 
642 	new_flags = read_mnt_flags("/mnt/A");
643 	ASSERT_EQ(new_flags, expected_flags);
644 
645 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
646 
647 	new_flags = read_mnt_flags("/mnt/A/AA");
648 	ASSERT_EQ(new_flags, expected_flags);
649 
650 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
651 
652 	new_flags = read_mnt_flags("/mnt/A/AA/B");
653 	ASSERT_EQ(new_flags, expected_flags);
654 
655 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
656 
657 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
658 	ASSERT_EQ(new_flags, expected_flags);
659 
660 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
661 
662 	EXPECT_EQ(close(fd), 0);
663 }
664 
665 TEST_F(mount_setattr, mount_has_writers)
666 {
667 	int fd, dfd;
668 	unsigned int old_flags = 0, new_flags = 0;
669 	struct mount_attr attr = {
670 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
671 		.attr_clr	= MOUNT_ATTR__ATIME,
672 		.propagation	= MS_SHARED,
673 	};
674 
675 	if (!mount_setattr_supported())
676 		SKIP(return, "mount_setattr syscall not supported");
677 
678 	old_flags = read_mnt_flags("/mnt/A");
679 	ASSERT_GT(old_flags, 0);
680 
681 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
682 	ASSERT_GE(fd, 0);
683 
684 	/*
685 	 * We're holding a fd open to a mount somwhere in the middle so this
686 	 * needs to fail somewhere in the middle. After this the mount options
687 	 * need to be unchanged.
688 	 */
689 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
690 
691 	new_flags = read_mnt_flags("/mnt/A");
692 	ASSERT_EQ(new_flags, old_flags);
693 
694 	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
695 
696 	new_flags = read_mnt_flags("/mnt/A/AA");
697 	ASSERT_EQ(new_flags, old_flags);
698 
699 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
700 
701 	new_flags = read_mnt_flags("/mnt/A/AA/B");
702 	ASSERT_EQ(new_flags, old_flags);
703 
704 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
705 
706 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
707 	ASSERT_EQ(new_flags, old_flags);
708 
709 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
710 
711 	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
712 	ASSERT_GE(dfd, 0);
713 	EXPECT_EQ(fsync(dfd), 0);
714 	EXPECT_EQ(close(dfd), 0);
715 
716 	EXPECT_EQ(fsync(fd), 0);
717 	EXPECT_EQ(close(fd), 0);
718 
719 	/* All writers are gone so this should succeed. */
720 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
721 }
722 
723 TEST_F(mount_setattr, mixed_mount_options)
724 {
725 	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
726 	struct mount_attr attr = {
727 		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
728 		.attr_set = MOUNT_ATTR_RELATIME,
729 	};
730 
731 	if (!mount_setattr_supported())
732 		SKIP(return, "mount_setattr syscall not supported");
733 
734 	old_flags1 = read_mnt_flags("/mnt/B");
735 	ASSERT_GT(old_flags1, 0);
736 
737 	old_flags2 = read_mnt_flags("/mnt/B/BB");
738 	ASSERT_GT(old_flags2, 0);
739 
740 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
741 
742 	expected_flags = old_flags2;
743 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
744 	expected_flags |= MS_RELATIME;
745 
746 	new_flags = read_mnt_flags("/mnt/B");
747 	ASSERT_EQ(new_flags, expected_flags);
748 
749 	expected_flags = old_flags2;
750 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
751 	expected_flags |= MS_RELATIME;
752 
753 	new_flags = read_mnt_flags("/mnt/B/BB");
754 	ASSERT_EQ(new_flags, expected_flags);
755 }
756 
757 TEST_F(mount_setattr, time_changes)
758 {
759 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
760 	struct mount_attr attr = {
761 		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
762 	};
763 
764 	if (!mount_setattr_supported())
765 		SKIP(return, "mount_setattr syscall not supported");
766 
767 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
768 
769 	attr.attr_set = MOUNT_ATTR_STRICTATIME;
770 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
771 
772 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
773 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
774 
775 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
776 	attr.attr_clr = MOUNT_ATTR__ATIME;
777 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
778 
779 	attr.attr_set = 0;
780 	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
781 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
782 
783 	attr.attr_clr = MOUNT_ATTR_NOATIME;
784 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
785 
786 	old_flags = read_mnt_flags("/mnt/A");
787 	ASSERT_GT(old_flags, 0);
788 
789 	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
790 	attr.attr_clr = MOUNT_ATTR__ATIME;
791 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
792 
793 	expected_flags = old_flags;
794 	expected_flags |= MS_NOATIME;
795 	expected_flags |= MS_NODIRATIME;
796 
797 	new_flags = read_mnt_flags("/mnt/A");
798 	ASSERT_EQ(new_flags, expected_flags);
799 
800 	new_flags = read_mnt_flags("/mnt/A/AA");
801 	ASSERT_EQ(new_flags, expected_flags);
802 
803 	new_flags = read_mnt_flags("/mnt/A/AA/B");
804 	ASSERT_EQ(new_flags, expected_flags);
805 
806 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
807 	ASSERT_EQ(new_flags, expected_flags);
808 
809 	memset(&attr, 0, sizeof(attr));
810 	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
811 	attr.attr_set |= MOUNT_ATTR_RELATIME;
812 	attr.attr_clr |= MOUNT_ATTR__ATIME;
813 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
814 
815 	expected_flags &= ~MS_NOATIME;
816 	expected_flags |= MS_RELATIME;
817 
818 	new_flags = read_mnt_flags("/mnt/A");
819 	ASSERT_EQ(new_flags, expected_flags);
820 
821 	new_flags = read_mnt_flags("/mnt/A/AA");
822 	ASSERT_EQ(new_flags, expected_flags);
823 
824 	new_flags = read_mnt_flags("/mnt/A/AA/B");
825 	ASSERT_EQ(new_flags, expected_flags);
826 
827 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
828 	ASSERT_EQ(new_flags, expected_flags);
829 
830 	memset(&attr, 0, sizeof(attr));
831 	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
832 	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
833 	attr.attr_clr |= MOUNT_ATTR__ATIME;
834 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
835 
836 	expected_flags &= ~MS_RELATIME;
837 
838 	new_flags = read_mnt_flags("/mnt/A");
839 	ASSERT_EQ(new_flags, expected_flags);
840 
841 	new_flags = read_mnt_flags("/mnt/A/AA");
842 	ASSERT_EQ(new_flags, expected_flags);
843 
844 	new_flags = read_mnt_flags("/mnt/A/AA/B");
845 	ASSERT_EQ(new_flags, expected_flags);
846 
847 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
848 	ASSERT_EQ(new_flags, expected_flags);
849 
850 	memset(&attr, 0, sizeof(attr));
851 	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
852 	attr.attr_set |= MOUNT_ATTR_NOATIME;
853 	attr.attr_clr |= MOUNT_ATTR__ATIME;
854 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
855 
856 	expected_flags |= MS_NOATIME;
857 	new_flags = read_mnt_flags("/mnt/A");
858 	ASSERT_EQ(new_flags, expected_flags);
859 
860 	new_flags = read_mnt_flags("/mnt/A/AA");
861 	ASSERT_EQ(new_flags, expected_flags);
862 
863 	new_flags = read_mnt_flags("/mnt/A/AA/B");
864 	ASSERT_EQ(new_flags, expected_flags);
865 
866 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
867 	ASSERT_EQ(new_flags, expected_flags);
868 
869 	memset(&attr, 0, sizeof(attr));
870 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
871 
872 	new_flags = read_mnt_flags("/mnt/A");
873 	ASSERT_EQ(new_flags, expected_flags);
874 
875 	new_flags = read_mnt_flags("/mnt/A/AA");
876 	ASSERT_EQ(new_flags, expected_flags);
877 
878 	new_flags = read_mnt_flags("/mnt/A/AA/B");
879 	ASSERT_EQ(new_flags, expected_flags);
880 
881 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
882 	ASSERT_EQ(new_flags, expected_flags);
883 
884 	memset(&attr, 0, sizeof(attr));
885 	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
886 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
887 
888 	expected_flags &= ~MS_NODIRATIME;
889 
890 	new_flags = read_mnt_flags("/mnt/A");
891 	ASSERT_EQ(new_flags, expected_flags);
892 
893 	new_flags = read_mnt_flags("/mnt/A/AA");
894 	ASSERT_EQ(new_flags, expected_flags);
895 
896 	new_flags = read_mnt_flags("/mnt/A/AA/B");
897 	ASSERT_EQ(new_flags, expected_flags);
898 
899 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
900 	ASSERT_EQ(new_flags, expected_flags);
901 }
902 
903 TEST_F(mount_setattr, multi_threaded)
904 {
905 	int i, j, nthreads, ret = 0;
906 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
907 	pthread_attr_t pattr;
908 	pthread_t threads[DEFAULT_THREADS];
909 
910 	if (!mount_setattr_supported())
911 		SKIP(return, "mount_setattr syscall not supported");
912 
913 	old_flags = read_mnt_flags("/mnt/A");
914 	ASSERT_GT(old_flags, 0);
915 
916 	/* Try to change mount options from multiple threads. */
917 	nthreads = get_nprocs_conf();
918 	if (nthreads > DEFAULT_THREADS)
919 		nthreads = DEFAULT_THREADS;
920 
921 	pthread_attr_init(&pattr);
922 	for (i = 0; i < nthreads; i++)
923 		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
924 
925 	for (j = 0; j < i; j++) {
926 		void *retptr = NULL;
927 
928 		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
929 
930 		ret += ptr_to_int(retptr);
931 		EXPECT_EQ(ret, 0);
932 	}
933 	pthread_attr_destroy(&pattr);
934 
935 	ASSERT_EQ(ret, 0);
936 
937 	expected_flags = old_flags;
938 	expected_flags |= MS_RDONLY;
939 	expected_flags |= MS_NOSUID;
940 	new_flags = read_mnt_flags("/mnt/A");
941 	ASSERT_EQ(new_flags, expected_flags);
942 
943 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
944 
945 	new_flags = read_mnt_flags("/mnt/A/AA");
946 	ASSERT_EQ(new_flags, expected_flags);
947 
948 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
949 
950 	new_flags = read_mnt_flags("/mnt/A/AA/B");
951 	ASSERT_EQ(new_flags, expected_flags);
952 
953 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
954 
955 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
956 	ASSERT_EQ(new_flags, expected_flags);
957 
958 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
959 }
960 
961 TEST_F(mount_setattr, wrong_user_namespace)
962 {
963 	int ret;
964 	struct mount_attr attr = {
965 		.attr_set = MOUNT_ATTR_RDONLY,
966 	};
967 
968 	if (!mount_setattr_supported())
969 		SKIP(return, "mount_setattr syscall not supported");
970 
971 	EXPECT_EQ(create_and_enter_userns(), 0);
972 	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
973 	ASSERT_LT(ret, 0);
974 	ASSERT_EQ(errno, EPERM);
975 }
976 
977 TEST_F(mount_setattr, wrong_mount_namespace)
978 {
979 	int fd, ret;
980 	struct mount_attr attr = {
981 		.attr_set = MOUNT_ATTR_RDONLY,
982 	};
983 
984 	if (!mount_setattr_supported())
985 		SKIP(return, "mount_setattr syscall not supported");
986 
987 	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
988 	ASSERT_GE(fd, 0);
989 
990 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
991 
992 	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
993 	ASSERT_LT(ret, 0);
994 	ASSERT_EQ(errno, EINVAL);
995 }
996 
997 FIXTURE(mount_setattr_idmapped) {
998 };
999 
1000 FIXTURE_SETUP(mount_setattr_idmapped)
1001 {
1002 	int img_fd = -EBADF;
1003 
1004 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1005 
1006 	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1007 
1008 	(void)umount2("/mnt", MNT_DETACH);
1009 	(void)umount2("/tmp", MNT_DETACH);
1010 
1011 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1012 			"size=100000,mode=700"), 0);
1013 
1014 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1015 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1016 	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1017 
1018 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1019 			"size=100000,mode=700"), 0);
1020 
1021 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1022 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1023 	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1024 
1025 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1026 			"size=100000,mode=700"), 0);
1027 
1028 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1029 			"size=100000,mode=700"), 0);
1030 
1031 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1032 
1033 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1034 			"size=100000,mode=700"), 0);
1035 
1036 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1037 
1038 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1039 
1040 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1041 
1042 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1043 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1044 
1045 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1046 
1047 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1048 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1049 
1050 	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1051 	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1052 	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1053 	ASSERT_GE(img_fd, 0);
1054 	ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1055 	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1056 	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1057 	ASSERT_EQ(close(img_fd), 0);
1058 }
1059 
1060 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1061 {
1062 	(void)umount2("/mnt/A", MNT_DETACH);
1063 	(void)umount2("/tmp", MNT_DETACH);
1064 }
1065 
1066 /**
1067  * Validate that negative fd values are rejected.
1068  */
1069 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1070 {
1071 	struct mount_attr attr = {
1072 		.attr_set	= MOUNT_ATTR_IDMAP,
1073 		.userns_fd	= -EBADF,
1074 	};
1075 
1076 	if (!mount_setattr_supported())
1077 		SKIP(return, "mount_setattr syscall not supported");
1078 
1079 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1080 		TH_LOG("failure: created idmapped mount with negative fd");
1081 	}
1082 }
1083 
1084 /**
1085  * Validate that excessively large fd values are rejected.
1086  */
1087 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1088 {
1089 	struct mount_attr attr = {
1090 		.attr_set	= MOUNT_ATTR_IDMAP,
1091 		.userns_fd	= INT64_MAX,
1092 	};
1093 
1094 	if (!mount_setattr_supported())
1095 		SKIP(return, "mount_setattr syscall not supported");
1096 
1097 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1098 		TH_LOG("failure: created idmapped mount with too large fd value");
1099 	}
1100 }
1101 
1102 /**
1103  * Validate that closed fd values are rejected.
1104  */
1105 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1106 {
1107 	int fd;
1108 	struct mount_attr attr = {
1109 		.attr_set = MOUNT_ATTR_IDMAP,
1110 	};
1111 
1112 	if (!mount_setattr_supported())
1113 		SKIP(return, "mount_setattr syscall not supported");
1114 
1115 	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1116 	ASSERT_GE(fd, 0);
1117 	ASSERT_GE(close(fd), 0);
1118 
1119 	attr.userns_fd = fd;
1120 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1121 		TH_LOG("failure: created idmapped mount with closed fd");
1122 	}
1123 }
1124 
1125 /**
1126  * Validate that the initial user namespace is rejected.
1127  */
1128 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1129 {
1130 	int open_tree_fd = -EBADF;
1131 	struct mount_attr attr = {
1132 		.attr_set = MOUNT_ATTR_IDMAP,
1133 	};
1134 
1135 	if (!mount_setattr_supported())
1136 		SKIP(return, "mount_setattr syscall not supported");
1137 
1138 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1139 				     AT_NO_AUTOMOUNT |
1140 				     AT_SYMLINK_NOFOLLOW |
1141 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1142 	ASSERT_GE(open_tree_fd, 0);
1143 
1144 	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1145 	ASSERT_GE(attr.userns_fd, 0);
1146 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1147 	ASSERT_EQ(errno, EPERM);
1148 	ASSERT_EQ(close(attr.userns_fd), 0);
1149 	ASSERT_EQ(close(open_tree_fd), 0);
1150 }
1151 
1152 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1153 		   unsigned long range)
1154 {
1155 	char map[100], procfile[256];
1156 
1157 	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1158 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1159 	if (write_file(procfile, map, strlen(map)))
1160 		return -1;
1161 
1162 
1163 	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1164 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1165 	if (write_file(procfile, map, strlen(map)))
1166 		return -1;
1167 
1168 	return 0;
1169 }
1170 
1171 #define __STACK_SIZE (8 * 1024 * 1024)
1172 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1173 {
1174 	void *stack;
1175 
1176 	stack = malloc(__STACK_SIZE);
1177 	if (!stack)
1178 		return -ENOMEM;
1179 
1180 #ifdef __ia64__
1181 	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1182 #else
1183 	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1184 #endif
1185 }
1186 
1187 static int get_userns_fd_cb(void *data)
1188 {
1189 	return kill(getpid(), SIGSTOP);
1190 }
1191 
1192 static int wait_for_pid(pid_t pid)
1193 {
1194 	int status, ret;
1195 
1196 again:
1197 	ret = waitpid(pid, &status, 0);
1198 	if (ret == -1) {
1199 		if (errno == EINTR)
1200 			goto again;
1201 
1202 		return -1;
1203 	}
1204 
1205 	if (!WIFEXITED(status))
1206 		return -1;
1207 
1208 	return WEXITSTATUS(status);
1209 }
1210 
1211 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1212 {
1213 	int ret;
1214 	pid_t pid;
1215 	char path[256];
1216 
1217 	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1218 	if (pid < 0)
1219 		return -errno;
1220 
1221 	ret = map_ids(pid, nsid, hostid, range);
1222 	if (ret < 0)
1223 		return ret;
1224 
1225 	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1226 	ret = open(path, O_RDONLY | O_CLOEXEC);
1227 	kill(pid, SIGKILL);
1228 	wait_for_pid(pid);
1229 	return ret;
1230 }
1231 
1232 /**
1233  * Validate that an attached mount in our mount namespace cannot be idmapped.
1234  * (The kernel enforces that the mount's mount namespace and the caller's mount
1235  *  namespace match.)
1236  */
1237 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1238 {
1239 	int open_tree_fd = -EBADF;
1240 	struct mount_attr attr = {
1241 		.attr_set = MOUNT_ATTR_IDMAP,
1242 	};
1243 
1244 	if (!mount_setattr_supported())
1245 		SKIP(return, "mount_setattr syscall not supported");
1246 
1247 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1248 				     AT_EMPTY_PATH |
1249 				     AT_NO_AUTOMOUNT |
1250 				     AT_SYMLINK_NOFOLLOW |
1251 				     OPEN_TREE_CLOEXEC);
1252 	ASSERT_GE(open_tree_fd, 0);
1253 
1254 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1255 	ASSERT_GE(attr.userns_fd, 0);
1256 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1257 	ASSERT_EQ(close(attr.userns_fd), 0);
1258 	ASSERT_EQ(close(open_tree_fd), 0);
1259 }
1260 
1261 /**
1262  * Validate that idmapping a mount is rejected if the mount's mount namespace
1263  * and our mount namespace don't match.
1264  * (The kernel enforces that the mount's mount namespace and the caller's mount
1265  *  namespace match.)
1266  */
1267 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1268 {
1269 	int open_tree_fd = -EBADF;
1270 	struct mount_attr attr = {
1271 		.attr_set = MOUNT_ATTR_IDMAP,
1272 	};
1273 
1274 	if (!mount_setattr_supported())
1275 		SKIP(return, "mount_setattr syscall not supported");
1276 
1277 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1278 				     AT_EMPTY_PATH |
1279 				     AT_NO_AUTOMOUNT |
1280 				     AT_SYMLINK_NOFOLLOW |
1281 				     OPEN_TREE_CLOEXEC);
1282 	ASSERT_GE(open_tree_fd, 0);
1283 
1284 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1285 
1286 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1287 	ASSERT_GE(attr.userns_fd, 0);
1288 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1289 				    sizeof(attr)), 0);
1290 	ASSERT_EQ(close(attr.userns_fd), 0);
1291 	ASSERT_EQ(close(open_tree_fd), 0);
1292 }
1293 
1294 /**
1295  * Validate that an attached mount in our mount namespace can be idmapped.
1296  */
1297 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1298 {
1299 	int open_tree_fd = -EBADF;
1300 	struct mount_attr attr = {
1301 		.attr_set = MOUNT_ATTR_IDMAP,
1302 	};
1303 
1304 	if (!mount_setattr_supported())
1305 		SKIP(return, "mount_setattr syscall not supported");
1306 
1307 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1308 				     AT_EMPTY_PATH |
1309 				     AT_NO_AUTOMOUNT |
1310 				     AT_SYMLINK_NOFOLLOW |
1311 				     OPEN_TREE_CLOEXEC |
1312 				     OPEN_TREE_CLONE);
1313 	ASSERT_GE(open_tree_fd, 0);
1314 
1315 	/* Changing mount properties on a detached mount. */
1316 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1317 	ASSERT_GE(attr.userns_fd, 0);
1318 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1319 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1320 	ASSERT_EQ(close(attr.userns_fd), 0);
1321 	ASSERT_EQ(close(open_tree_fd), 0);
1322 }
1323 
1324 /**
1325  * Validate that a detached mount not in our mount namespace can be idmapped.
1326  */
1327 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1328 {
1329 	int open_tree_fd = -EBADF;
1330 	struct mount_attr attr = {
1331 		.attr_set = MOUNT_ATTR_IDMAP,
1332 	};
1333 
1334 	if (!mount_setattr_supported())
1335 		SKIP(return, "mount_setattr syscall not supported");
1336 
1337 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1338 				     AT_EMPTY_PATH |
1339 				     AT_NO_AUTOMOUNT |
1340 				     AT_SYMLINK_NOFOLLOW |
1341 				     OPEN_TREE_CLOEXEC |
1342 				     OPEN_TREE_CLONE);
1343 	ASSERT_GE(open_tree_fd, 0);
1344 
1345 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1346 
1347 	/* Changing mount properties on a detached mount. */
1348 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1349 	ASSERT_GE(attr.userns_fd, 0);
1350 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1351 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1352 	ASSERT_EQ(close(attr.userns_fd), 0);
1353 	ASSERT_EQ(close(open_tree_fd), 0);
1354 }
1355 
1356 /**
1357  * Validate that currently changing the idmapping of an idmapped mount fails.
1358  */
1359 TEST_F(mount_setattr_idmapped, change_idmapping)
1360 {
1361 	int open_tree_fd = -EBADF;
1362 	struct mount_attr attr = {
1363 		.attr_set = MOUNT_ATTR_IDMAP,
1364 	};
1365 
1366 	if (!mount_setattr_supported())
1367 		SKIP(return, "mount_setattr syscall not supported");
1368 
1369 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1370 				     AT_EMPTY_PATH |
1371 				     AT_NO_AUTOMOUNT |
1372 				     AT_SYMLINK_NOFOLLOW |
1373 				     OPEN_TREE_CLOEXEC |
1374 				     OPEN_TREE_CLONE);
1375 	ASSERT_GE(open_tree_fd, 0);
1376 
1377 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1378 	ASSERT_GE(attr.userns_fd, 0);
1379 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1380 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1381 	ASSERT_EQ(close(attr.userns_fd), 0);
1382 
1383 	/* Change idmapping on a detached mount that is already idmapped. */
1384 	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1385 	ASSERT_GE(attr.userns_fd, 0);
1386 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1387 	ASSERT_EQ(close(attr.userns_fd), 0);
1388 	ASSERT_EQ(close(open_tree_fd), 0);
1389 }
1390 
1391 static bool expected_uid_gid(int dfd, const char *path, int flags,
1392 			     uid_t expected_uid, gid_t expected_gid)
1393 {
1394 	int ret;
1395 	struct stat st;
1396 
1397 	ret = fstatat(dfd, path, &st, flags);
1398 	if (ret < 0)
1399 		return false;
1400 
1401 	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1402 }
1403 
1404 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1405 {
1406 	int open_tree_fd = -EBADF;
1407 	struct mount_attr attr = {
1408 		.attr_set = MOUNT_ATTR_IDMAP,
1409 	};
1410 
1411 	if (!mount_setattr_supported())
1412 		SKIP(return, "mount_setattr syscall not supported");
1413 
1414 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1415 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1416 
1417 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1418 				     AT_RECURSIVE |
1419 				     AT_EMPTY_PATH |
1420 				     AT_NO_AUTOMOUNT |
1421 				     AT_SYMLINK_NOFOLLOW |
1422 				     OPEN_TREE_CLOEXEC |
1423 				     OPEN_TREE_CLONE);
1424 	ASSERT_GE(open_tree_fd, 0);
1425 
1426 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1427 	ASSERT_GE(attr.userns_fd, 0);
1428 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1429 	ASSERT_EQ(close(attr.userns_fd), 0);
1430 	ASSERT_EQ(close(open_tree_fd), 0);
1431 
1432 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1433 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1434 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1435 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1436 }
1437 
1438 TEST_F(mount_setattr, mount_attr_nosymfollow)
1439 {
1440 	int fd;
1441 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1442 	struct mount_attr attr = {
1443 		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
1444 	};
1445 
1446 	if (!mount_setattr_supported())
1447 		SKIP(return, "mount_setattr syscall not supported");
1448 
1449 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1450 	ASSERT_GT(fd, 0);
1451 	ASSERT_EQ(close(fd), 0);
1452 
1453 	old_flags = read_mnt_flags("/mnt/A");
1454 	ASSERT_GT(old_flags, 0);
1455 
1456 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1457 
1458 	expected_flags = old_flags;
1459 	expected_flags |= ST_NOSYMFOLLOW;
1460 
1461 	new_flags = read_mnt_flags("/mnt/A");
1462 	ASSERT_EQ(new_flags, expected_flags);
1463 
1464 	new_flags = read_mnt_flags("/mnt/A/AA");
1465 	ASSERT_EQ(new_flags, expected_flags);
1466 
1467 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1468 	ASSERT_EQ(new_flags, expected_flags);
1469 
1470 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1471 	ASSERT_EQ(new_flags, expected_flags);
1472 
1473 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1474 	ASSERT_LT(fd, 0);
1475 	ASSERT_EQ(errno, ELOOP);
1476 
1477 	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1478 	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1479 
1480 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1481 
1482 	expected_flags &= ~ST_NOSYMFOLLOW;
1483 	new_flags = read_mnt_flags("/mnt/A");
1484 	ASSERT_EQ(new_flags, expected_flags);
1485 
1486 	new_flags = read_mnt_flags("/mnt/A/AA");
1487 	ASSERT_EQ(new_flags, expected_flags);
1488 
1489 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1490 	ASSERT_EQ(new_flags, expected_flags);
1491 
1492 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1493 	ASSERT_EQ(new_flags, expected_flags);
1494 
1495 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1496 	ASSERT_GT(fd, 0);
1497 	ASSERT_EQ(close(fd), 0);
1498 }
1499 
1500 TEST_HARNESS_MAIN
1501