1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 
22 #include "../kselftest_harness.h"
23 
24 #ifndef CLONE_NEWNS
25 #define CLONE_NEWNS 0x00020000
26 #endif
27 
28 #ifndef CLONE_NEWUSER
29 #define CLONE_NEWUSER 0x10000000
30 #endif
31 
32 #ifndef MS_REC
33 #define MS_REC 16384
34 #endif
35 
36 #ifndef MS_RELATIME
37 #define MS_RELATIME (1 << 21)
38 #endif
39 
40 #ifndef MS_STRICTATIME
41 #define MS_STRICTATIME (1 << 24)
42 #endif
43 
44 #ifndef MOUNT_ATTR_RDONLY
45 #define MOUNT_ATTR_RDONLY 0x00000001
46 #endif
47 
48 #ifndef MOUNT_ATTR_NOSUID
49 #define MOUNT_ATTR_NOSUID 0x00000002
50 #endif
51 
52 #ifndef MOUNT_ATTR_NOEXEC
53 #define MOUNT_ATTR_NOEXEC 0x00000008
54 #endif
55 
56 #ifndef MOUNT_ATTR_NODIRATIME
57 #define MOUNT_ATTR_NODIRATIME 0x00000080
58 #endif
59 
60 #ifndef MOUNT_ATTR__ATIME
61 #define MOUNT_ATTR__ATIME 0x00000070
62 #endif
63 
64 #ifndef MOUNT_ATTR_RELATIME
65 #define MOUNT_ATTR_RELATIME 0x00000000
66 #endif
67 
68 #ifndef MOUNT_ATTR_NOATIME
69 #define MOUNT_ATTR_NOATIME 0x00000010
70 #endif
71 
72 #ifndef MOUNT_ATTR_STRICTATIME
73 #define MOUNT_ATTR_STRICTATIME 0x00000020
74 #endif
75 
76 #ifndef AT_RECURSIVE
77 #define AT_RECURSIVE 0x8000
78 #endif
79 
80 #ifndef MS_SHARED
81 #define MS_SHARED (1 << 20)
82 #endif
83 
84 #define DEFAULT_THREADS 4
85 #define ptr_to_int(p) ((int)((intptr_t)(p)))
86 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
87 
88 #ifndef __NR_mount_setattr
89 	#if defined __alpha__
90 		#define __NR_mount_setattr 552
91 	#elif defined _MIPS_SIM
92 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
93 			#define __NR_mount_setattr (442 + 4000)
94 		#endif
95 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
96 			#define __NR_mount_setattr (442 + 6000)
97 		#endif
98 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
99 			#define __NR_mount_setattr (442 + 5000)
100 		#endif
101 	#elif defined __ia64__
102 		#define __NR_mount_setattr (442 + 1024)
103 	#else
104 		#define __NR_mount_setattr 442
105 	#endif
106 #endif
107 
108 #ifndef __NR_open_tree
109 	#if defined __alpha__
110 		#define __NR_open_tree 538
111 	#elif defined _MIPS_SIM
112 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
113 			#define __NR_open_tree 4428
114 		#endif
115 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
116 			#define __NR_open_tree 6428
117 		#endif
118 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
119 			#define __NR_open_tree 5428
120 		#endif
121 	#elif defined __ia64__
122 		#define __NR_open_tree (428 + 1024)
123 	#else
124 		#define __NR_open_tree 428
125 	#endif
126 #endif
127 
128 #ifndef MOUNT_ATTR_IDMAP
129 #define MOUNT_ATTR_IDMAP 0x00100000
130 #endif
131 
132 #ifndef MOUNT_ATTR_NOSYMFOLLOW
133 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
134 #endif
135 
136 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
137 				    struct mount_attr *attr, size_t size)
138 {
139 	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
140 }
141 
142 #ifndef OPEN_TREE_CLONE
143 #define OPEN_TREE_CLONE 1
144 #endif
145 
146 #ifndef OPEN_TREE_CLOEXEC
147 #define OPEN_TREE_CLOEXEC O_CLOEXEC
148 #endif
149 
150 #ifndef AT_RECURSIVE
151 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
152 #endif
153 
154 static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
155 {
156 	return syscall(__NR_open_tree, dfd, filename, flags);
157 }
158 
159 static ssize_t write_nointr(int fd, const void *buf, size_t count)
160 {
161 	ssize_t ret;
162 
163 	do {
164 		ret = write(fd, buf, count);
165 	} while (ret < 0 && errno == EINTR);
166 
167 	return ret;
168 }
169 
170 static int write_file(const char *path, const void *buf, size_t count)
171 {
172 	int fd;
173 	ssize_t ret;
174 
175 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
176 	if (fd < 0)
177 		return -1;
178 
179 	ret = write_nointr(fd, buf, count);
180 	close(fd);
181 	if (ret < 0 || (size_t)ret != count)
182 		return -1;
183 
184 	return 0;
185 }
186 
187 static int create_and_enter_userns(void)
188 {
189 	uid_t uid;
190 	gid_t gid;
191 	char map[100];
192 
193 	uid = getuid();
194 	gid = getgid();
195 
196 	if (unshare(CLONE_NEWUSER))
197 		return -1;
198 
199 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
200 	    errno != ENOENT)
201 		return -1;
202 
203 	snprintf(map, sizeof(map), "0 %d 1", uid);
204 	if (write_file("/proc/self/uid_map", map, strlen(map)))
205 		return -1;
206 
207 
208 	snprintf(map, sizeof(map), "0 %d 1", gid);
209 	if (write_file("/proc/self/gid_map", map, strlen(map)))
210 		return -1;
211 
212 	if (setgid(0))
213 		return -1;
214 
215 	if (setuid(0))
216 		return -1;
217 
218 	return 0;
219 }
220 
221 static int prepare_unpriv_mountns(void)
222 {
223 	if (create_and_enter_userns())
224 		return -1;
225 
226 	if (unshare(CLONE_NEWNS))
227 		return -1;
228 
229 	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
230 		return -1;
231 
232 	return 0;
233 }
234 
235 #ifndef ST_NOSYMFOLLOW
236 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
237 #endif
238 
239 static int read_mnt_flags(const char *path)
240 {
241 	int ret;
242 	struct statvfs stat;
243 	unsigned int mnt_flags;
244 
245 	ret = statvfs(path, &stat);
246 	if (ret != 0)
247 		return -EINVAL;
248 
249 	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
250 			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
251 			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
252 		return -EINVAL;
253 
254 	mnt_flags = 0;
255 	if (stat.f_flag & ST_RDONLY)
256 		mnt_flags |= MS_RDONLY;
257 	if (stat.f_flag & ST_NOSUID)
258 		mnt_flags |= MS_NOSUID;
259 	if (stat.f_flag & ST_NODEV)
260 		mnt_flags |= MS_NODEV;
261 	if (stat.f_flag & ST_NOEXEC)
262 		mnt_flags |= MS_NOEXEC;
263 	if (stat.f_flag & ST_NOATIME)
264 		mnt_flags |= MS_NOATIME;
265 	if (stat.f_flag & ST_NODIRATIME)
266 		mnt_flags |= MS_NODIRATIME;
267 	if (stat.f_flag & ST_RELATIME)
268 		mnt_flags |= MS_RELATIME;
269 	if (stat.f_flag & ST_SYNCHRONOUS)
270 		mnt_flags |= MS_SYNCHRONOUS;
271 	if (stat.f_flag & ST_MANDLOCK)
272 		mnt_flags |= ST_MANDLOCK;
273 	if (stat.f_flag & ST_NOSYMFOLLOW)
274 		mnt_flags |= ST_NOSYMFOLLOW;
275 
276 	return mnt_flags;
277 }
278 
279 static char *get_field(char *src, int nfields)
280 {
281 	int i;
282 	char *p = src;
283 
284 	for (i = 0; i < nfields; i++) {
285 		while (*p && *p != ' ' && *p != '\t')
286 			p++;
287 
288 		if (!*p)
289 			break;
290 
291 		p++;
292 	}
293 
294 	return p;
295 }
296 
297 static void null_endofword(char *word)
298 {
299 	while (*word && *word != ' ' && *word != '\t')
300 		word++;
301 	*word = '\0';
302 }
303 
304 static bool is_shared_mount(const char *path)
305 {
306 	size_t len = 0;
307 	char *line = NULL;
308 	FILE *f = NULL;
309 
310 	f = fopen("/proc/self/mountinfo", "re");
311 	if (!f)
312 		return false;
313 
314 	while (getline(&line, &len, f) != -1) {
315 		char *opts, *target;
316 
317 		target = get_field(line, 4);
318 		if (!target)
319 			continue;
320 
321 		opts = get_field(target, 2);
322 		if (!opts)
323 			continue;
324 
325 		null_endofword(target);
326 
327 		if (strcmp(target, path) != 0)
328 			continue;
329 
330 		null_endofword(opts);
331 		if (strstr(opts, "shared:"))
332 			return true;
333 	}
334 
335 	free(line);
336 	fclose(f);
337 
338 	return false;
339 }
340 
341 static void *mount_setattr_thread(void *data)
342 {
343 	struct mount_attr attr = {
344 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
345 		.attr_clr	= 0,
346 		.propagation	= MS_SHARED,
347 	};
348 
349 	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
350 		pthread_exit(int_to_ptr(-1));
351 
352 	pthread_exit(int_to_ptr(0));
353 }
354 
355 /* Attempt to de-conflict with the selftests tree. */
356 #ifndef SKIP
357 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
358 #endif
359 
360 static bool mount_setattr_supported(void)
361 {
362 	int ret;
363 
364 	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
365 	if (ret < 0 && errno == ENOSYS)
366 		return false;
367 
368 	return true;
369 }
370 
371 FIXTURE(mount_setattr) {
372 };
373 
374 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
375 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
376 
377 FIXTURE_SETUP(mount_setattr)
378 {
379 	int fd = -EBADF;
380 
381 	if (!mount_setattr_supported())
382 		SKIP(return, "mount_setattr syscall not supported");
383 
384 	ASSERT_EQ(prepare_unpriv_mountns(), 0);
385 
386 	(void)umount2("/mnt", MNT_DETACH);
387 	(void)umount2("/tmp", MNT_DETACH);
388 
389 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
390 			"size=100000,mode=700"), 0);
391 
392 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
393 
394 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
395 			"size=100000,mode=700"), 0);
396 
397 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
398 
399 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
400 			"size=100000,mode=700"), 0);
401 
402 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
403 			"size=100000,mode=700"), 0);
404 
405 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
406 
407 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
408 			"size=100000,mode=700"), 0);
409 
410 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
411 
412 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
413 
414 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
415 
416 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
417 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
418 
419 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
420 
421 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
422 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
423 
424 	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
425 	ASSERT_GT(fd, 0);
426 	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
427 	ASSERT_EQ(close(fd), 0);
428 }
429 
430 FIXTURE_TEARDOWN(mount_setattr)
431 {
432 	if (!mount_setattr_supported())
433 		SKIP(return, "mount_setattr syscall not supported");
434 
435 	(void)umount2("/mnt/A", MNT_DETACH);
436 	(void)umount2("/tmp", MNT_DETACH);
437 }
438 
439 TEST_F(mount_setattr, invalid_attributes)
440 {
441 	struct mount_attr invalid_attr = {
442 		.attr_set = (1U << 31),
443 	};
444 
445 	if (!mount_setattr_supported())
446 		SKIP(return, "mount_setattr syscall not supported");
447 
448 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
449 				    sizeof(invalid_attr)), 0);
450 
451 	invalid_attr.attr_set	= 0;
452 	invalid_attr.attr_clr	= (1U << 31);
453 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
454 				    sizeof(invalid_attr)), 0);
455 
456 	invalid_attr.attr_clr		= 0;
457 	invalid_attr.propagation	= (1U << 31);
458 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
459 				    sizeof(invalid_attr)), 0);
460 
461 	invalid_attr.attr_set		= (1U << 31);
462 	invalid_attr.attr_clr		= (1U << 31);
463 	invalid_attr.propagation	= (1U << 31);
464 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
465 				    sizeof(invalid_attr)), 0);
466 
467 	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
468 				    sizeof(invalid_attr)), 0);
469 }
470 
471 TEST_F(mount_setattr, extensibility)
472 {
473 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
474 	char *s = "dummy";
475 	struct mount_attr invalid_attr = {};
476 	struct mount_attr_large {
477 		struct mount_attr attr1;
478 		struct mount_attr attr2;
479 		struct mount_attr attr3;
480 	} large_attr = {};
481 
482 	if (!mount_setattr_supported())
483 		SKIP(return, "mount_setattr syscall not supported");
484 
485 	old_flags = read_mnt_flags("/mnt/A");
486 	ASSERT_GT(old_flags, 0);
487 
488 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
489 				    sizeof(invalid_attr)), 0);
490 	ASSERT_EQ(errno, EFAULT);
491 
492 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
493 				    sizeof(invalid_attr)), 0);
494 	ASSERT_EQ(errno, EINVAL);
495 
496 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
497 	ASSERT_EQ(errno, EINVAL);
498 
499 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
500 				    sizeof(invalid_attr) / 2), 0);
501 	ASSERT_EQ(errno, EINVAL);
502 
503 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
504 				    sizeof(invalid_attr) / 2), 0);
505 	ASSERT_EQ(errno, EINVAL);
506 
507 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
508 				    (void *)&large_attr, sizeof(large_attr)), 0);
509 
510 	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
511 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
512 				    (void *)&large_attr, sizeof(large_attr)), 0);
513 
514 	large_attr.attr3.attr_set = 0;
515 	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
516 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
517 				    (void *)&large_attr, sizeof(large_attr)), 0);
518 
519 	expected_flags = old_flags;
520 	expected_flags |= MS_RDONLY;
521 
522 	new_flags = read_mnt_flags("/mnt/A");
523 	ASSERT_EQ(new_flags, expected_flags);
524 
525 	new_flags = read_mnt_flags("/mnt/A/AA");
526 	ASSERT_EQ(new_flags, expected_flags);
527 
528 	new_flags = read_mnt_flags("/mnt/A/AA/B");
529 	ASSERT_EQ(new_flags, expected_flags);
530 
531 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
532 	ASSERT_EQ(new_flags, expected_flags);
533 }
534 
535 TEST_F(mount_setattr, basic)
536 {
537 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
538 	struct mount_attr attr = {
539 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
540 		.attr_clr	= MOUNT_ATTR__ATIME,
541 	};
542 
543 	if (!mount_setattr_supported())
544 		SKIP(return, "mount_setattr syscall not supported");
545 
546 	old_flags = read_mnt_flags("/mnt/A");
547 	ASSERT_GT(old_flags, 0);
548 
549 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
550 
551 	expected_flags = old_flags;
552 	expected_flags |= MS_RDONLY;
553 	expected_flags |= MS_NOEXEC;
554 	expected_flags &= ~MS_NOATIME;
555 	expected_flags |= MS_RELATIME;
556 
557 	new_flags = read_mnt_flags("/mnt/A");
558 	ASSERT_EQ(new_flags, expected_flags);
559 
560 	new_flags = read_mnt_flags("/mnt/A/AA");
561 	ASSERT_EQ(new_flags, old_flags);
562 
563 	new_flags = read_mnt_flags("/mnt/A/AA/B");
564 	ASSERT_EQ(new_flags, old_flags);
565 
566 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
567 	ASSERT_EQ(new_flags, old_flags);
568 }
569 
570 TEST_F(mount_setattr, basic_recursive)
571 {
572 	int fd;
573 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
574 	struct mount_attr attr = {
575 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
576 		.attr_clr	= MOUNT_ATTR__ATIME,
577 	};
578 
579 	if (!mount_setattr_supported())
580 		SKIP(return, "mount_setattr syscall not supported");
581 
582 	old_flags = read_mnt_flags("/mnt/A");
583 	ASSERT_GT(old_flags, 0);
584 
585 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
586 
587 	expected_flags = old_flags;
588 	expected_flags |= MS_RDONLY;
589 	expected_flags |= MS_NOEXEC;
590 	expected_flags &= ~MS_NOATIME;
591 	expected_flags |= MS_RELATIME;
592 
593 	new_flags = read_mnt_flags("/mnt/A");
594 	ASSERT_EQ(new_flags, expected_flags);
595 
596 	new_flags = read_mnt_flags("/mnt/A/AA");
597 	ASSERT_EQ(new_flags, expected_flags);
598 
599 	new_flags = read_mnt_flags("/mnt/A/AA/B");
600 	ASSERT_EQ(new_flags, expected_flags);
601 
602 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
603 	ASSERT_EQ(new_flags, expected_flags);
604 
605 	memset(&attr, 0, sizeof(attr));
606 	attr.attr_clr = MOUNT_ATTR_RDONLY;
607 	attr.propagation = MS_SHARED;
608 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
609 
610 	expected_flags &= ~MS_RDONLY;
611 	new_flags = read_mnt_flags("/mnt/A");
612 	ASSERT_EQ(new_flags, expected_flags);
613 
614 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
615 
616 	new_flags = read_mnt_flags("/mnt/A/AA");
617 	ASSERT_EQ(new_flags, expected_flags);
618 
619 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
620 
621 	new_flags = read_mnt_flags("/mnt/A/AA/B");
622 	ASSERT_EQ(new_flags, expected_flags);
623 
624 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
625 
626 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
627 	ASSERT_EQ(new_flags, expected_flags);
628 
629 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
630 
631 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
632 	ASSERT_GE(fd, 0);
633 
634 	/*
635 	 * We're holding a fd open for writing so this needs to fail somewhere
636 	 * in the middle and the mount options need to be unchanged.
637 	 */
638 	attr.attr_set = MOUNT_ATTR_RDONLY;
639 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
640 
641 	new_flags = read_mnt_flags("/mnt/A");
642 	ASSERT_EQ(new_flags, expected_flags);
643 
644 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
645 
646 	new_flags = read_mnt_flags("/mnt/A/AA");
647 	ASSERT_EQ(new_flags, expected_flags);
648 
649 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
650 
651 	new_flags = read_mnt_flags("/mnt/A/AA/B");
652 	ASSERT_EQ(new_flags, expected_flags);
653 
654 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
655 
656 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
657 	ASSERT_EQ(new_flags, expected_flags);
658 
659 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
660 
661 	EXPECT_EQ(close(fd), 0);
662 }
663 
664 TEST_F(mount_setattr, mount_has_writers)
665 {
666 	int fd, dfd;
667 	unsigned int old_flags = 0, new_flags = 0;
668 	struct mount_attr attr = {
669 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
670 		.attr_clr	= MOUNT_ATTR__ATIME,
671 		.propagation	= MS_SHARED,
672 	};
673 
674 	if (!mount_setattr_supported())
675 		SKIP(return, "mount_setattr syscall not supported");
676 
677 	old_flags = read_mnt_flags("/mnt/A");
678 	ASSERT_GT(old_flags, 0);
679 
680 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
681 	ASSERT_GE(fd, 0);
682 
683 	/*
684 	 * We're holding a fd open to a mount somwhere in the middle so this
685 	 * needs to fail somewhere in the middle. After this the mount options
686 	 * need to be unchanged.
687 	 */
688 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
689 
690 	new_flags = read_mnt_flags("/mnt/A");
691 	ASSERT_EQ(new_flags, old_flags);
692 
693 	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
694 
695 	new_flags = read_mnt_flags("/mnt/A/AA");
696 	ASSERT_EQ(new_flags, old_flags);
697 
698 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
699 
700 	new_flags = read_mnt_flags("/mnt/A/AA/B");
701 	ASSERT_EQ(new_flags, old_flags);
702 
703 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
704 
705 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
706 	ASSERT_EQ(new_flags, old_flags);
707 
708 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
709 
710 	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
711 	ASSERT_GE(dfd, 0);
712 	EXPECT_EQ(fsync(dfd), 0);
713 	EXPECT_EQ(close(dfd), 0);
714 
715 	EXPECT_EQ(fsync(fd), 0);
716 	EXPECT_EQ(close(fd), 0);
717 
718 	/* All writers are gone so this should succeed. */
719 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
720 }
721 
722 TEST_F(mount_setattr, mixed_mount_options)
723 {
724 	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
725 	struct mount_attr attr = {
726 		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
727 		.attr_set = MOUNT_ATTR_RELATIME,
728 	};
729 
730 	if (!mount_setattr_supported())
731 		SKIP(return, "mount_setattr syscall not supported");
732 
733 	old_flags1 = read_mnt_flags("/mnt/B");
734 	ASSERT_GT(old_flags1, 0);
735 
736 	old_flags2 = read_mnt_flags("/mnt/B/BB");
737 	ASSERT_GT(old_flags2, 0);
738 
739 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
740 
741 	expected_flags = old_flags2;
742 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
743 	expected_flags |= MS_RELATIME;
744 
745 	new_flags = read_mnt_flags("/mnt/B");
746 	ASSERT_EQ(new_flags, expected_flags);
747 
748 	expected_flags = old_flags2;
749 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
750 	expected_flags |= MS_RELATIME;
751 
752 	new_flags = read_mnt_flags("/mnt/B/BB");
753 	ASSERT_EQ(new_flags, expected_flags);
754 }
755 
756 TEST_F(mount_setattr, time_changes)
757 {
758 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
759 	struct mount_attr attr = {
760 		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
761 	};
762 
763 	if (!mount_setattr_supported())
764 		SKIP(return, "mount_setattr syscall not supported");
765 
766 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
767 
768 	attr.attr_set = MOUNT_ATTR_STRICTATIME;
769 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
770 
771 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
772 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
773 
774 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
775 	attr.attr_clr = MOUNT_ATTR__ATIME;
776 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
777 
778 	attr.attr_set = 0;
779 	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
780 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
781 
782 	attr.attr_clr = MOUNT_ATTR_NOATIME;
783 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
784 
785 	old_flags = read_mnt_flags("/mnt/A");
786 	ASSERT_GT(old_flags, 0);
787 
788 	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
789 	attr.attr_clr = MOUNT_ATTR__ATIME;
790 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
791 
792 	expected_flags = old_flags;
793 	expected_flags |= MS_NOATIME;
794 	expected_flags |= MS_NODIRATIME;
795 
796 	new_flags = read_mnt_flags("/mnt/A");
797 	ASSERT_EQ(new_flags, expected_flags);
798 
799 	new_flags = read_mnt_flags("/mnt/A/AA");
800 	ASSERT_EQ(new_flags, expected_flags);
801 
802 	new_flags = read_mnt_flags("/mnt/A/AA/B");
803 	ASSERT_EQ(new_flags, expected_flags);
804 
805 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
806 	ASSERT_EQ(new_flags, expected_flags);
807 
808 	memset(&attr, 0, sizeof(attr));
809 	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
810 	attr.attr_set |= MOUNT_ATTR_RELATIME;
811 	attr.attr_clr |= MOUNT_ATTR__ATIME;
812 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
813 
814 	expected_flags &= ~MS_NOATIME;
815 	expected_flags |= MS_RELATIME;
816 
817 	new_flags = read_mnt_flags("/mnt/A");
818 	ASSERT_EQ(new_flags, expected_flags);
819 
820 	new_flags = read_mnt_flags("/mnt/A/AA");
821 	ASSERT_EQ(new_flags, expected_flags);
822 
823 	new_flags = read_mnt_flags("/mnt/A/AA/B");
824 	ASSERT_EQ(new_flags, expected_flags);
825 
826 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
827 	ASSERT_EQ(new_flags, expected_flags);
828 
829 	memset(&attr, 0, sizeof(attr));
830 	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
831 	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
832 	attr.attr_clr |= MOUNT_ATTR__ATIME;
833 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
834 
835 	expected_flags &= ~MS_RELATIME;
836 
837 	new_flags = read_mnt_flags("/mnt/A");
838 	ASSERT_EQ(new_flags, expected_flags);
839 
840 	new_flags = read_mnt_flags("/mnt/A/AA");
841 	ASSERT_EQ(new_flags, expected_flags);
842 
843 	new_flags = read_mnt_flags("/mnt/A/AA/B");
844 	ASSERT_EQ(new_flags, expected_flags);
845 
846 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
847 	ASSERT_EQ(new_flags, expected_flags);
848 
849 	memset(&attr, 0, sizeof(attr));
850 	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
851 	attr.attr_set |= MOUNT_ATTR_NOATIME;
852 	attr.attr_clr |= MOUNT_ATTR__ATIME;
853 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
854 
855 	expected_flags |= MS_NOATIME;
856 	new_flags = read_mnt_flags("/mnt/A");
857 	ASSERT_EQ(new_flags, expected_flags);
858 
859 	new_flags = read_mnt_flags("/mnt/A/AA");
860 	ASSERT_EQ(new_flags, expected_flags);
861 
862 	new_flags = read_mnt_flags("/mnt/A/AA/B");
863 	ASSERT_EQ(new_flags, expected_flags);
864 
865 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
866 	ASSERT_EQ(new_flags, expected_flags);
867 
868 	memset(&attr, 0, sizeof(attr));
869 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
870 
871 	new_flags = read_mnt_flags("/mnt/A");
872 	ASSERT_EQ(new_flags, expected_flags);
873 
874 	new_flags = read_mnt_flags("/mnt/A/AA");
875 	ASSERT_EQ(new_flags, expected_flags);
876 
877 	new_flags = read_mnt_flags("/mnt/A/AA/B");
878 	ASSERT_EQ(new_flags, expected_flags);
879 
880 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
881 	ASSERT_EQ(new_flags, expected_flags);
882 
883 	memset(&attr, 0, sizeof(attr));
884 	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
885 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
886 
887 	expected_flags &= ~MS_NODIRATIME;
888 
889 	new_flags = read_mnt_flags("/mnt/A");
890 	ASSERT_EQ(new_flags, expected_flags);
891 
892 	new_flags = read_mnt_flags("/mnt/A/AA");
893 	ASSERT_EQ(new_flags, expected_flags);
894 
895 	new_flags = read_mnt_flags("/mnt/A/AA/B");
896 	ASSERT_EQ(new_flags, expected_flags);
897 
898 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
899 	ASSERT_EQ(new_flags, expected_flags);
900 }
901 
902 TEST_F(mount_setattr, multi_threaded)
903 {
904 	int i, j, nthreads, ret = 0;
905 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
906 	pthread_attr_t pattr;
907 	pthread_t threads[DEFAULT_THREADS];
908 
909 	if (!mount_setattr_supported())
910 		SKIP(return, "mount_setattr syscall not supported");
911 
912 	old_flags = read_mnt_flags("/mnt/A");
913 	ASSERT_GT(old_flags, 0);
914 
915 	/* Try to change mount options from multiple threads. */
916 	nthreads = get_nprocs_conf();
917 	if (nthreads > DEFAULT_THREADS)
918 		nthreads = DEFAULT_THREADS;
919 
920 	pthread_attr_init(&pattr);
921 	for (i = 0; i < nthreads; i++)
922 		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
923 
924 	for (j = 0; j < i; j++) {
925 		void *retptr = NULL;
926 
927 		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
928 
929 		ret += ptr_to_int(retptr);
930 		EXPECT_EQ(ret, 0);
931 	}
932 	pthread_attr_destroy(&pattr);
933 
934 	ASSERT_EQ(ret, 0);
935 
936 	expected_flags = old_flags;
937 	expected_flags |= MS_RDONLY;
938 	expected_flags |= MS_NOSUID;
939 	new_flags = read_mnt_flags("/mnt/A");
940 	ASSERT_EQ(new_flags, expected_flags);
941 
942 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
943 
944 	new_flags = read_mnt_flags("/mnt/A/AA");
945 	ASSERT_EQ(new_flags, expected_flags);
946 
947 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
948 
949 	new_flags = read_mnt_flags("/mnt/A/AA/B");
950 	ASSERT_EQ(new_flags, expected_flags);
951 
952 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
953 
954 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
955 	ASSERT_EQ(new_flags, expected_flags);
956 
957 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
958 }
959 
960 TEST_F(mount_setattr, wrong_user_namespace)
961 {
962 	int ret;
963 	struct mount_attr attr = {
964 		.attr_set = MOUNT_ATTR_RDONLY,
965 	};
966 
967 	if (!mount_setattr_supported())
968 		SKIP(return, "mount_setattr syscall not supported");
969 
970 	EXPECT_EQ(create_and_enter_userns(), 0);
971 	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
972 	ASSERT_LT(ret, 0);
973 	ASSERT_EQ(errno, EPERM);
974 }
975 
976 TEST_F(mount_setattr, wrong_mount_namespace)
977 {
978 	int fd, ret;
979 	struct mount_attr attr = {
980 		.attr_set = MOUNT_ATTR_RDONLY,
981 	};
982 
983 	if (!mount_setattr_supported())
984 		SKIP(return, "mount_setattr syscall not supported");
985 
986 	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
987 	ASSERT_GE(fd, 0);
988 
989 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
990 
991 	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
992 	ASSERT_LT(ret, 0);
993 	ASSERT_EQ(errno, EINVAL);
994 }
995 
996 FIXTURE(mount_setattr_idmapped) {
997 };
998 
999 FIXTURE_SETUP(mount_setattr_idmapped)
1000 {
1001 	int img_fd = -EBADF;
1002 
1003 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1004 
1005 	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1006 
1007 	(void)umount2("/mnt", MNT_DETACH);
1008 	(void)umount2("/tmp", MNT_DETACH);
1009 
1010 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1011 			"size=100000,mode=700"), 0);
1012 
1013 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1014 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1015 	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1016 
1017 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1018 			"size=100000,mode=700"), 0);
1019 
1020 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1021 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1022 	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1023 
1024 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1025 			"size=100000,mode=700"), 0);
1026 
1027 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1028 			"size=100000,mode=700"), 0);
1029 
1030 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1031 
1032 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1033 			"size=100000,mode=700"), 0);
1034 
1035 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1036 
1037 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1038 
1039 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1040 
1041 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1042 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1043 
1044 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1045 
1046 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1047 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1048 
1049 	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1050 	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1051 	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1052 	ASSERT_GE(img_fd, 0);
1053 	ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1054 	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1055 	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1056 	ASSERT_EQ(close(img_fd), 0);
1057 }
1058 
1059 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1060 {
1061 	(void)umount2("/mnt/A", MNT_DETACH);
1062 	(void)umount2("/tmp", MNT_DETACH);
1063 }
1064 
1065 /**
1066  * Validate that negative fd values are rejected.
1067  */
1068 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1069 {
1070 	struct mount_attr attr = {
1071 		.attr_set	= MOUNT_ATTR_IDMAP,
1072 		.userns_fd	= -EBADF,
1073 	};
1074 
1075 	if (!mount_setattr_supported())
1076 		SKIP(return, "mount_setattr syscall not supported");
1077 
1078 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1079 		TH_LOG("failure: created idmapped mount with negative fd");
1080 	}
1081 }
1082 
1083 /**
1084  * Validate that excessively large fd values are rejected.
1085  */
1086 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1087 {
1088 	struct mount_attr attr = {
1089 		.attr_set	= MOUNT_ATTR_IDMAP,
1090 		.userns_fd	= INT64_MAX,
1091 	};
1092 
1093 	if (!mount_setattr_supported())
1094 		SKIP(return, "mount_setattr syscall not supported");
1095 
1096 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1097 		TH_LOG("failure: created idmapped mount with too large fd value");
1098 	}
1099 }
1100 
1101 /**
1102  * Validate that closed fd values are rejected.
1103  */
1104 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1105 {
1106 	int fd;
1107 	struct mount_attr attr = {
1108 		.attr_set = MOUNT_ATTR_IDMAP,
1109 	};
1110 
1111 	if (!mount_setattr_supported())
1112 		SKIP(return, "mount_setattr syscall not supported");
1113 
1114 	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1115 	ASSERT_GE(fd, 0);
1116 	ASSERT_GE(close(fd), 0);
1117 
1118 	attr.userns_fd = fd;
1119 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1120 		TH_LOG("failure: created idmapped mount with closed fd");
1121 	}
1122 }
1123 
1124 /**
1125  * Validate that the initial user namespace is rejected.
1126  */
1127 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1128 {
1129 	int open_tree_fd = -EBADF;
1130 	struct mount_attr attr = {
1131 		.attr_set = MOUNT_ATTR_IDMAP,
1132 	};
1133 
1134 	if (!mount_setattr_supported())
1135 		SKIP(return, "mount_setattr syscall not supported");
1136 
1137 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1138 				     AT_NO_AUTOMOUNT |
1139 				     AT_SYMLINK_NOFOLLOW |
1140 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1141 	ASSERT_GE(open_tree_fd, 0);
1142 
1143 	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1144 	ASSERT_GE(attr.userns_fd, 0);
1145 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1146 	ASSERT_EQ(errno, EPERM);
1147 	ASSERT_EQ(close(attr.userns_fd), 0);
1148 	ASSERT_EQ(close(open_tree_fd), 0);
1149 }
1150 
1151 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1152 		   unsigned long range)
1153 {
1154 	char map[100], procfile[256];
1155 
1156 	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1157 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1158 	if (write_file(procfile, map, strlen(map)))
1159 		return -1;
1160 
1161 
1162 	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1163 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1164 	if (write_file(procfile, map, strlen(map)))
1165 		return -1;
1166 
1167 	return 0;
1168 }
1169 
1170 #define __STACK_SIZE (8 * 1024 * 1024)
1171 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1172 {
1173 	void *stack;
1174 
1175 	stack = malloc(__STACK_SIZE);
1176 	if (!stack)
1177 		return -ENOMEM;
1178 
1179 #ifdef __ia64__
1180 	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1181 #else
1182 	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1183 #endif
1184 }
1185 
1186 static int get_userns_fd_cb(void *data)
1187 {
1188 	return kill(getpid(), SIGSTOP);
1189 }
1190 
1191 static int wait_for_pid(pid_t pid)
1192 {
1193 	int status, ret;
1194 
1195 again:
1196 	ret = waitpid(pid, &status, 0);
1197 	if (ret == -1) {
1198 		if (errno == EINTR)
1199 			goto again;
1200 
1201 		return -1;
1202 	}
1203 
1204 	if (!WIFEXITED(status))
1205 		return -1;
1206 
1207 	return WEXITSTATUS(status);
1208 }
1209 
1210 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1211 {
1212 	int ret;
1213 	pid_t pid;
1214 	char path[256];
1215 
1216 	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1217 	if (pid < 0)
1218 		return -errno;
1219 
1220 	ret = map_ids(pid, nsid, hostid, range);
1221 	if (ret < 0)
1222 		return ret;
1223 
1224 	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1225 	ret = open(path, O_RDONLY | O_CLOEXEC);
1226 	kill(pid, SIGKILL);
1227 	wait_for_pid(pid);
1228 	return ret;
1229 }
1230 
1231 /**
1232  * Validate that an attached mount in our mount namespace cannot be idmapped.
1233  * (The kernel enforces that the mount's mount namespace and the caller's mount
1234  *  namespace match.)
1235  */
1236 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1237 {
1238 	int open_tree_fd = -EBADF;
1239 	struct mount_attr attr = {
1240 		.attr_set = MOUNT_ATTR_IDMAP,
1241 	};
1242 
1243 	if (!mount_setattr_supported())
1244 		SKIP(return, "mount_setattr syscall not supported");
1245 
1246 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1247 				     AT_EMPTY_PATH |
1248 				     AT_NO_AUTOMOUNT |
1249 				     AT_SYMLINK_NOFOLLOW |
1250 				     OPEN_TREE_CLOEXEC);
1251 	ASSERT_GE(open_tree_fd, 0);
1252 
1253 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1254 	ASSERT_GE(attr.userns_fd, 0);
1255 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1256 	ASSERT_EQ(close(attr.userns_fd), 0);
1257 	ASSERT_EQ(close(open_tree_fd), 0);
1258 }
1259 
1260 /**
1261  * Validate that idmapping a mount is rejected if the mount's mount namespace
1262  * and our mount namespace don't match.
1263  * (The kernel enforces that the mount's mount namespace and the caller's mount
1264  *  namespace match.)
1265  */
1266 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1267 {
1268 	int open_tree_fd = -EBADF;
1269 	struct mount_attr attr = {
1270 		.attr_set = MOUNT_ATTR_IDMAP,
1271 	};
1272 
1273 	if (!mount_setattr_supported())
1274 		SKIP(return, "mount_setattr syscall not supported");
1275 
1276 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1277 				     AT_EMPTY_PATH |
1278 				     AT_NO_AUTOMOUNT |
1279 				     AT_SYMLINK_NOFOLLOW |
1280 				     OPEN_TREE_CLOEXEC);
1281 	ASSERT_GE(open_tree_fd, 0);
1282 
1283 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1284 
1285 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1286 	ASSERT_GE(attr.userns_fd, 0);
1287 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1288 				    sizeof(attr)), 0);
1289 	ASSERT_EQ(close(attr.userns_fd), 0);
1290 	ASSERT_EQ(close(open_tree_fd), 0);
1291 }
1292 
1293 /**
1294  * Validate that an attached mount in our mount namespace can be idmapped.
1295  */
1296 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1297 {
1298 	int open_tree_fd = -EBADF;
1299 	struct mount_attr attr = {
1300 		.attr_set = MOUNT_ATTR_IDMAP,
1301 	};
1302 
1303 	if (!mount_setattr_supported())
1304 		SKIP(return, "mount_setattr syscall not supported");
1305 
1306 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1307 				     AT_EMPTY_PATH |
1308 				     AT_NO_AUTOMOUNT |
1309 				     AT_SYMLINK_NOFOLLOW |
1310 				     OPEN_TREE_CLOEXEC |
1311 				     OPEN_TREE_CLONE);
1312 	ASSERT_GE(open_tree_fd, 0);
1313 
1314 	/* Changing mount properties on a detached mount. */
1315 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1316 	ASSERT_GE(attr.userns_fd, 0);
1317 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1318 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1319 	ASSERT_EQ(close(attr.userns_fd), 0);
1320 	ASSERT_EQ(close(open_tree_fd), 0);
1321 }
1322 
1323 /**
1324  * Validate that a detached mount not in our mount namespace can be idmapped.
1325  */
1326 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1327 {
1328 	int open_tree_fd = -EBADF;
1329 	struct mount_attr attr = {
1330 		.attr_set = MOUNT_ATTR_IDMAP,
1331 	};
1332 
1333 	if (!mount_setattr_supported())
1334 		SKIP(return, "mount_setattr syscall not supported");
1335 
1336 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1337 				     AT_EMPTY_PATH |
1338 				     AT_NO_AUTOMOUNT |
1339 				     AT_SYMLINK_NOFOLLOW |
1340 				     OPEN_TREE_CLOEXEC |
1341 				     OPEN_TREE_CLONE);
1342 	ASSERT_GE(open_tree_fd, 0);
1343 
1344 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1345 
1346 	/* Changing mount properties on a detached mount. */
1347 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1348 	ASSERT_GE(attr.userns_fd, 0);
1349 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1350 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1351 	ASSERT_EQ(close(attr.userns_fd), 0);
1352 	ASSERT_EQ(close(open_tree_fd), 0);
1353 }
1354 
1355 /**
1356  * Validate that currently changing the idmapping of an idmapped mount fails.
1357  */
1358 TEST_F(mount_setattr_idmapped, change_idmapping)
1359 {
1360 	int open_tree_fd = -EBADF;
1361 	struct mount_attr attr = {
1362 		.attr_set = MOUNT_ATTR_IDMAP,
1363 	};
1364 
1365 	if (!mount_setattr_supported())
1366 		SKIP(return, "mount_setattr syscall not supported");
1367 
1368 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1369 				     AT_EMPTY_PATH |
1370 				     AT_NO_AUTOMOUNT |
1371 				     AT_SYMLINK_NOFOLLOW |
1372 				     OPEN_TREE_CLOEXEC |
1373 				     OPEN_TREE_CLONE);
1374 	ASSERT_GE(open_tree_fd, 0);
1375 
1376 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1377 	ASSERT_GE(attr.userns_fd, 0);
1378 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1379 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1380 	ASSERT_EQ(close(attr.userns_fd), 0);
1381 
1382 	/* Change idmapping on a detached mount that is already idmapped. */
1383 	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1384 	ASSERT_GE(attr.userns_fd, 0);
1385 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1386 	ASSERT_EQ(close(attr.userns_fd), 0);
1387 	ASSERT_EQ(close(open_tree_fd), 0);
1388 }
1389 
1390 static bool expected_uid_gid(int dfd, const char *path, int flags,
1391 			     uid_t expected_uid, gid_t expected_gid)
1392 {
1393 	int ret;
1394 	struct stat st;
1395 
1396 	ret = fstatat(dfd, path, &st, flags);
1397 	if (ret < 0)
1398 		return false;
1399 
1400 	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1401 }
1402 
1403 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1404 {
1405 	int open_tree_fd = -EBADF;
1406 	struct mount_attr attr = {
1407 		.attr_set = MOUNT_ATTR_IDMAP,
1408 	};
1409 
1410 	if (!mount_setattr_supported())
1411 		SKIP(return, "mount_setattr syscall not supported");
1412 
1413 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1414 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1415 
1416 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1417 				     AT_RECURSIVE |
1418 				     AT_EMPTY_PATH |
1419 				     AT_NO_AUTOMOUNT |
1420 				     AT_SYMLINK_NOFOLLOW |
1421 				     OPEN_TREE_CLOEXEC |
1422 				     OPEN_TREE_CLONE);
1423 	ASSERT_GE(open_tree_fd, 0);
1424 
1425 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1426 	ASSERT_GE(attr.userns_fd, 0);
1427 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1428 	ASSERT_EQ(close(attr.userns_fd), 0);
1429 	ASSERT_EQ(close(open_tree_fd), 0);
1430 
1431 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1432 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1433 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1434 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1435 }
1436 
1437 TEST_F(mount_setattr, mount_attr_nosymfollow)
1438 {
1439 	int fd;
1440 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1441 	struct mount_attr attr = {
1442 		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
1443 	};
1444 
1445 	if (!mount_setattr_supported())
1446 		SKIP(return, "mount_setattr syscall not supported");
1447 
1448 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1449 	ASSERT_GT(fd, 0);
1450 	ASSERT_EQ(close(fd), 0);
1451 
1452 	old_flags = read_mnt_flags("/mnt/A");
1453 	ASSERT_GT(old_flags, 0);
1454 
1455 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1456 
1457 	expected_flags = old_flags;
1458 	expected_flags |= ST_NOSYMFOLLOW;
1459 
1460 	new_flags = read_mnt_flags("/mnt/A");
1461 	ASSERT_EQ(new_flags, expected_flags);
1462 
1463 	new_flags = read_mnt_flags("/mnt/A/AA");
1464 	ASSERT_EQ(new_flags, expected_flags);
1465 
1466 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1467 	ASSERT_EQ(new_flags, expected_flags);
1468 
1469 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1470 	ASSERT_EQ(new_flags, expected_flags);
1471 
1472 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1473 	ASSERT_LT(fd, 0);
1474 	ASSERT_EQ(errno, ELOOP);
1475 
1476 	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1477 	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1478 
1479 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1480 
1481 	expected_flags &= ~ST_NOSYMFOLLOW;
1482 	new_flags = read_mnt_flags("/mnt/A");
1483 	ASSERT_EQ(new_flags, expected_flags);
1484 
1485 	new_flags = read_mnt_flags("/mnt/A/AA");
1486 	ASSERT_EQ(new_flags, expected_flags);
1487 
1488 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1489 	ASSERT_EQ(new_flags, expected_flags);
1490 
1491 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1492 	ASSERT_EQ(new_flags, expected_flags);
1493 
1494 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1495 	ASSERT_GT(fd, 0);
1496 	ASSERT_EQ(close(fd), 0);
1497 }
1498 
1499 TEST_HARNESS_MAIN
1500