1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 #include <linux/mount.h>
22
23 #include "../kselftest_harness.h"
24
25 #ifndef CLONE_NEWNS
26 #define CLONE_NEWNS 0x00020000
27 #endif
28
29 #ifndef CLONE_NEWUSER
30 #define CLONE_NEWUSER 0x10000000
31 #endif
32
33 #ifndef MS_REC
34 #define MS_REC 16384
35 #endif
36
37 #ifndef MS_RELATIME
38 #define MS_RELATIME (1 << 21)
39 #endif
40
41 #ifndef MS_STRICTATIME
42 #define MS_STRICTATIME (1 << 24)
43 #endif
44
45 #ifndef MOUNT_ATTR_RDONLY
46 #define MOUNT_ATTR_RDONLY 0x00000001
47 #endif
48
49 #ifndef MOUNT_ATTR_NOSUID
50 #define MOUNT_ATTR_NOSUID 0x00000002
51 #endif
52
53 #ifndef MOUNT_ATTR_NOEXEC
54 #define MOUNT_ATTR_NOEXEC 0x00000008
55 #endif
56
57 #ifndef MOUNT_ATTR_NODIRATIME
58 #define MOUNT_ATTR_NODIRATIME 0x00000080
59 #endif
60
61 #ifndef MOUNT_ATTR__ATIME
62 #define MOUNT_ATTR__ATIME 0x00000070
63 #endif
64
65 #ifndef MOUNT_ATTR_RELATIME
66 #define MOUNT_ATTR_RELATIME 0x00000000
67 #endif
68
69 #ifndef MOUNT_ATTR_NOATIME
70 #define MOUNT_ATTR_NOATIME 0x00000010
71 #endif
72
73 #ifndef MOUNT_ATTR_STRICTATIME
74 #define MOUNT_ATTR_STRICTATIME 0x00000020
75 #endif
76
77 #ifndef AT_RECURSIVE
78 #define AT_RECURSIVE 0x8000
79 #endif
80
81 #ifndef MS_SHARED
82 #define MS_SHARED (1 << 20)
83 #endif
84
85 #define DEFAULT_THREADS 4
86 #define ptr_to_int(p) ((int)((intptr_t)(p)))
87 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
88
89 #ifndef __NR_mount_setattr
90 #if defined __alpha__
91 #define __NR_mount_setattr 552
92 #elif defined _MIPS_SIM
93 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
94 #define __NR_mount_setattr (442 + 4000)
95 #endif
96 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
97 #define __NR_mount_setattr (442 + 6000)
98 #endif
99 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
100 #define __NR_mount_setattr (442 + 5000)
101 #endif
102 #elif defined __ia64__
103 #define __NR_mount_setattr (442 + 1024)
104 #else
105 #define __NR_mount_setattr 442
106 #endif
107 #endif
108
109 #ifndef __NR_open_tree
110 #if defined __alpha__
111 #define __NR_open_tree 538
112 #elif defined _MIPS_SIM
113 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
114 #define __NR_open_tree 4428
115 #endif
116 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
117 #define __NR_open_tree 6428
118 #endif
119 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
120 #define __NR_open_tree 5428
121 #endif
122 #elif defined __ia64__
123 #define __NR_open_tree (428 + 1024)
124 #else
125 #define __NR_open_tree 428
126 #endif
127 #endif
128
129 #ifndef MOUNT_ATTR_IDMAP
130 #define MOUNT_ATTR_IDMAP 0x00100000
131 #endif
132
133 #ifndef MOUNT_ATTR_NOSYMFOLLOW
134 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
135 #endif
136
sys_mount_setattr(int dfd,const char * path,unsigned int flags,struct mount_attr * attr,size_t size)137 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
138 struct mount_attr *attr, size_t size)
139 {
140 return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
141 }
142
143 #ifndef OPEN_TREE_CLONE
144 #define OPEN_TREE_CLONE 1
145 #endif
146
147 #ifndef OPEN_TREE_CLOEXEC
148 #define OPEN_TREE_CLOEXEC O_CLOEXEC
149 #endif
150
151 #ifndef AT_RECURSIVE
152 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
153 #endif
154
sys_open_tree(int dfd,const char * filename,unsigned int flags)155 static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
156 {
157 return syscall(__NR_open_tree, dfd, filename, flags);
158 }
159
write_nointr(int fd,const void * buf,size_t count)160 static ssize_t write_nointr(int fd, const void *buf, size_t count)
161 {
162 ssize_t ret;
163
164 do {
165 ret = write(fd, buf, count);
166 } while (ret < 0 && errno == EINTR);
167
168 return ret;
169 }
170
write_file(const char * path,const void * buf,size_t count)171 static int write_file(const char *path, const void *buf, size_t count)
172 {
173 int fd;
174 ssize_t ret;
175
176 fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
177 if (fd < 0)
178 return -1;
179
180 ret = write_nointr(fd, buf, count);
181 close(fd);
182 if (ret < 0 || (size_t)ret != count)
183 return -1;
184
185 return 0;
186 }
187
create_and_enter_userns(void)188 static int create_and_enter_userns(void)
189 {
190 uid_t uid;
191 gid_t gid;
192 char map[100];
193
194 uid = getuid();
195 gid = getgid();
196
197 if (unshare(CLONE_NEWUSER))
198 return -1;
199
200 if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
201 errno != ENOENT)
202 return -1;
203
204 snprintf(map, sizeof(map), "0 %d 1", uid);
205 if (write_file("/proc/self/uid_map", map, strlen(map)))
206 return -1;
207
208
209 snprintf(map, sizeof(map), "0 %d 1", gid);
210 if (write_file("/proc/self/gid_map", map, strlen(map)))
211 return -1;
212
213 if (setgid(0))
214 return -1;
215
216 if (setuid(0))
217 return -1;
218
219 return 0;
220 }
221
prepare_unpriv_mountns(void)222 static int prepare_unpriv_mountns(void)
223 {
224 if (create_and_enter_userns())
225 return -1;
226
227 if (unshare(CLONE_NEWNS))
228 return -1;
229
230 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
231 return -1;
232
233 return 0;
234 }
235
236 #ifndef ST_NOSYMFOLLOW
237 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
238 #endif
239
read_mnt_flags(const char * path)240 static int read_mnt_flags(const char *path)
241 {
242 int ret;
243 struct statvfs stat;
244 unsigned int mnt_flags;
245
246 ret = statvfs(path, &stat);
247 if (ret != 0)
248 return -EINVAL;
249
250 if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
251 ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
252 ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
253 return -EINVAL;
254
255 mnt_flags = 0;
256 if (stat.f_flag & ST_RDONLY)
257 mnt_flags |= MS_RDONLY;
258 if (stat.f_flag & ST_NOSUID)
259 mnt_flags |= MS_NOSUID;
260 if (stat.f_flag & ST_NODEV)
261 mnt_flags |= MS_NODEV;
262 if (stat.f_flag & ST_NOEXEC)
263 mnt_flags |= MS_NOEXEC;
264 if (stat.f_flag & ST_NOATIME)
265 mnt_flags |= MS_NOATIME;
266 if (stat.f_flag & ST_NODIRATIME)
267 mnt_flags |= MS_NODIRATIME;
268 if (stat.f_flag & ST_RELATIME)
269 mnt_flags |= MS_RELATIME;
270 if (stat.f_flag & ST_SYNCHRONOUS)
271 mnt_flags |= MS_SYNCHRONOUS;
272 if (stat.f_flag & ST_MANDLOCK)
273 mnt_flags |= ST_MANDLOCK;
274 if (stat.f_flag & ST_NOSYMFOLLOW)
275 mnt_flags |= ST_NOSYMFOLLOW;
276
277 return mnt_flags;
278 }
279
get_field(char * src,int nfields)280 static char *get_field(char *src, int nfields)
281 {
282 int i;
283 char *p = src;
284
285 for (i = 0; i < nfields; i++) {
286 while (*p && *p != ' ' && *p != '\t')
287 p++;
288
289 if (!*p)
290 break;
291
292 p++;
293 }
294
295 return p;
296 }
297
null_endofword(char * word)298 static void null_endofword(char *word)
299 {
300 while (*word && *word != ' ' && *word != '\t')
301 word++;
302 *word = '\0';
303 }
304
is_shared_mount(const char * path)305 static bool is_shared_mount(const char *path)
306 {
307 size_t len = 0;
308 char *line = NULL;
309 FILE *f = NULL;
310
311 f = fopen("/proc/self/mountinfo", "re");
312 if (!f)
313 return false;
314
315 while (getline(&line, &len, f) != -1) {
316 char *opts, *target;
317
318 target = get_field(line, 4);
319 if (!target)
320 continue;
321
322 opts = get_field(target, 2);
323 if (!opts)
324 continue;
325
326 null_endofword(target);
327
328 if (strcmp(target, path) != 0)
329 continue;
330
331 null_endofword(opts);
332 if (strstr(opts, "shared:"))
333 return true;
334 }
335
336 free(line);
337 fclose(f);
338
339 return false;
340 }
341
mount_setattr_thread(void * data)342 static void *mount_setattr_thread(void *data)
343 {
344 struct mount_attr attr = {
345 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
346 .attr_clr = 0,
347 .propagation = MS_SHARED,
348 };
349
350 if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
351 pthread_exit(int_to_ptr(-1));
352
353 pthread_exit(int_to_ptr(0));
354 }
355
356 /* Attempt to de-conflict with the selftests tree. */
357 #ifndef SKIP
358 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
359 #endif
360
mount_setattr_supported(void)361 static bool mount_setattr_supported(void)
362 {
363 int ret;
364
365 ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
366 if (ret < 0 && errno == ENOSYS)
367 return false;
368
369 return true;
370 }
371
FIXTURE(mount_setattr)372 FIXTURE(mount_setattr) {
373 };
374
375 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
376 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
377
FIXTURE_SETUP(mount_setattr)378 FIXTURE_SETUP(mount_setattr)
379 {
380 int fd = -EBADF;
381
382 if (!mount_setattr_supported())
383 SKIP(return, "mount_setattr syscall not supported");
384
385 ASSERT_EQ(prepare_unpriv_mountns(), 0);
386
387 (void)umount2("/mnt", MNT_DETACH);
388 (void)umount2("/tmp", MNT_DETACH);
389
390 ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
391 "size=100000,mode=700"), 0);
392
393 ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
394
395 ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
396 "size=100000,mode=700"), 0);
397
398 ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
399
400 ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
401 "size=100000,mode=700"), 0);
402
403 ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
404 "size=100000,mode=700"), 0);
405
406 ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
407
408 ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
409 "size=100000,mode=700"), 0);
410
411 ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
412
413 ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
414
415 ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
416
417 ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
418 MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
419
420 ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
421
422 ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
423 MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
424
425 fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
426 ASSERT_GT(fd, 0);
427 ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
428 ASSERT_EQ(close(fd), 0);
429 }
430
FIXTURE_TEARDOWN(mount_setattr)431 FIXTURE_TEARDOWN(mount_setattr)
432 {
433 if (!mount_setattr_supported())
434 SKIP(return, "mount_setattr syscall not supported");
435
436 (void)umount2("/mnt/A", MNT_DETACH);
437 (void)umount2("/tmp", MNT_DETACH);
438 }
439
TEST_F(mount_setattr,invalid_attributes)440 TEST_F(mount_setattr, invalid_attributes)
441 {
442 struct mount_attr invalid_attr = {
443 .attr_set = (1U << 31),
444 };
445
446 if (!mount_setattr_supported())
447 SKIP(return, "mount_setattr syscall not supported");
448
449 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
450 sizeof(invalid_attr)), 0);
451
452 invalid_attr.attr_set = 0;
453 invalid_attr.attr_clr = (1U << 31);
454 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
455 sizeof(invalid_attr)), 0);
456
457 invalid_attr.attr_clr = 0;
458 invalid_attr.propagation = (1U << 31);
459 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
460 sizeof(invalid_attr)), 0);
461
462 invalid_attr.attr_set = (1U << 31);
463 invalid_attr.attr_clr = (1U << 31);
464 invalid_attr.propagation = (1U << 31);
465 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
466 sizeof(invalid_attr)), 0);
467
468 ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
469 sizeof(invalid_attr)), 0);
470 }
471
TEST_F(mount_setattr,extensibility)472 TEST_F(mount_setattr, extensibility)
473 {
474 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
475 char *s = "dummy";
476 struct mount_attr invalid_attr = {};
477 struct mount_attr_large {
478 struct mount_attr attr1;
479 struct mount_attr attr2;
480 struct mount_attr attr3;
481 } large_attr = {};
482
483 if (!mount_setattr_supported())
484 SKIP(return, "mount_setattr syscall not supported");
485
486 old_flags = read_mnt_flags("/mnt/A");
487 ASSERT_GT(old_flags, 0);
488
489 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
490 sizeof(invalid_attr)), 0);
491 ASSERT_EQ(errno, EFAULT);
492
493 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
494 sizeof(invalid_attr)), 0);
495 ASSERT_EQ(errno, EINVAL);
496
497 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
498 ASSERT_EQ(errno, EINVAL);
499
500 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
501 sizeof(invalid_attr) / 2), 0);
502 ASSERT_EQ(errno, EINVAL);
503
504 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
505 sizeof(invalid_attr) / 2), 0);
506 ASSERT_EQ(errno, EINVAL);
507
508 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
509 (void *)&large_attr, sizeof(large_attr)), 0);
510
511 large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
512 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
513 (void *)&large_attr, sizeof(large_attr)), 0);
514
515 large_attr.attr3.attr_set = 0;
516 large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
517 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
518 (void *)&large_attr, sizeof(large_attr)), 0);
519
520 expected_flags = old_flags;
521 expected_flags |= MS_RDONLY;
522
523 new_flags = read_mnt_flags("/mnt/A");
524 ASSERT_EQ(new_flags, expected_flags);
525
526 new_flags = read_mnt_flags("/mnt/A/AA");
527 ASSERT_EQ(new_flags, expected_flags);
528
529 new_flags = read_mnt_flags("/mnt/A/AA/B");
530 ASSERT_EQ(new_flags, expected_flags);
531
532 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
533 ASSERT_EQ(new_flags, expected_flags);
534 }
535
TEST_F(mount_setattr,basic)536 TEST_F(mount_setattr, basic)
537 {
538 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
539 struct mount_attr attr = {
540 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
541 .attr_clr = MOUNT_ATTR__ATIME,
542 };
543
544 if (!mount_setattr_supported())
545 SKIP(return, "mount_setattr syscall not supported");
546
547 old_flags = read_mnt_flags("/mnt/A");
548 ASSERT_GT(old_flags, 0);
549
550 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
551
552 expected_flags = old_flags;
553 expected_flags |= MS_RDONLY;
554 expected_flags |= MS_NOEXEC;
555 expected_flags &= ~MS_NOATIME;
556 expected_flags |= MS_RELATIME;
557
558 new_flags = read_mnt_flags("/mnt/A");
559 ASSERT_EQ(new_flags, expected_flags);
560
561 new_flags = read_mnt_flags("/mnt/A/AA");
562 ASSERT_EQ(new_flags, old_flags);
563
564 new_flags = read_mnt_flags("/mnt/A/AA/B");
565 ASSERT_EQ(new_flags, old_flags);
566
567 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
568 ASSERT_EQ(new_flags, old_flags);
569 }
570
TEST_F(mount_setattr,basic_recursive)571 TEST_F(mount_setattr, basic_recursive)
572 {
573 int fd;
574 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
575 struct mount_attr attr = {
576 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
577 .attr_clr = MOUNT_ATTR__ATIME,
578 };
579
580 if (!mount_setattr_supported())
581 SKIP(return, "mount_setattr syscall not supported");
582
583 old_flags = read_mnt_flags("/mnt/A");
584 ASSERT_GT(old_flags, 0);
585
586 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
587
588 expected_flags = old_flags;
589 expected_flags |= MS_RDONLY;
590 expected_flags |= MS_NOEXEC;
591 expected_flags &= ~MS_NOATIME;
592 expected_flags |= MS_RELATIME;
593
594 new_flags = read_mnt_flags("/mnt/A");
595 ASSERT_EQ(new_flags, expected_flags);
596
597 new_flags = read_mnt_flags("/mnt/A/AA");
598 ASSERT_EQ(new_flags, expected_flags);
599
600 new_flags = read_mnt_flags("/mnt/A/AA/B");
601 ASSERT_EQ(new_flags, expected_flags);
602
603 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
604 ASSERT_EQ(new_flags, expected_flags);
605
606 memset(&attr, 0, sizeof(attr));
607 attr.attr_clr = MOUNT_ATTR_RDONLY;
608 attr.propagation = MS_SHARED;
609 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
610
611 expected_flags &= ~MS_RDONLY;
612 new_flags = read_mnt_flags("/mnt/A");
613 ASSERT_EQ(new_flags, expected_flags);
614
615 ASSERT_EQ(is_shared_mount("/mnt/A"), true);
616
617 new_flags = read_mnt_flags("/mnt/A/AA");
618 ASSERT_EQ(new_flags, expected_flags);
619
620 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
621
622 new_flags = read_mnt_flags("/mnt/A/AA/B");
623 ASSERT_EQ(new_flags, expected_flags);
624
625 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
626
627 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
628 ASSERT_EQ(new_flags, expected_flags);
629
630 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
631
632 fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
633 ASSERT_GE(fd, 0);
634
635 /*
636 * We're holding a fd open for writing so this needs to fail somewhere
637 * in the middle and the mount options need to be unchanged.
638 */
639 attr.attr_set = MOUNT_ATTR_RDONLY;
640 ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
641
642 new_flags = read_mnt_flags("/mnt/A");
643 ASSERT_EQ(new_flags, expected_flags);
644
645 ASSERT_EQ(is_shared_mount("/mnt/A"), true);
646
647 new_flags = read_mnt_flags("/mnt/A/AA");
648 ASSERT_EQ(new_flags, expected_flags);
649
650 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
651
652 new_flags = read_mnt_flags("/mnt/A/AA/B");
653 ASSERT_EQ(new_flags, expected_flags);
654
655 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
656
657 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
658 ASSERT_EQ(new_flags, expected_flags);
659
660 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
661
662 EXPECT_EQ(close(fd), 0);
663 }
664
TEST_F(mount_setattr,mount_has_writers)665 TEST_F(mount_setattr, mount_has_writers)
666 {
667 int fd, dfd;
668 unsigned int old_flags = 0, new_flags = 0;
669 struct mount_attr attr = {
670 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
671 .attr_clr = MOUNT_ATTR__ATIME,
672 .propagation = MS_SHARED,
673 };
674
675 if (!mount_setattr_supported())
676 SKIP(return, "mount_setattr syscall not supported");
677
678 old_flags = read_mnt_flags("/mnt/A");
679 ASSERT_GT(old_flags, 0);
680
681 fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
682 ASSERT_GE(fd, 0);
683
684 /*
685 * We're holding a fd open to a mount somwhere in the middle so this
686 * needs to fail somewhere in the middle. After this the mount options
687 * need to be unchanged.
688 */
689 ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
690
691 new_flags = read_mnt_flags("/mnt/A");
692 ASSERT_EQ(new_flags, old_flags);
693
694 ASSERT_EQ(is_shared_mount("/mnt/A"), false);
695
696 new_flags = read_mnt_flags("/mnt/A/AA");
697 ASSERT_EQ(new_flags, old_flags);
698
699 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
700
701 new_flags = read_mnt_flags("/mnt/A/AA/B");
702 ASSERT_EQ(new_flags, old_flags);
703
704 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
705
706 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
707 ASSERT_EQ(new_flags, old_flags);
708
709 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
710
711 dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
712 ASSERT_GE(dfd, 0);
713 EXPECT_EQ(fsync(dfd), 0);
714 EXPECT_EQ(close(dfd), 0);
715
716 EXPECT_EQ(fsync(fd), 0);
717 EXPECT_EQ(close(fd), 0);
718
719 /* All writers are gone so this should succeed. */
720 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
721 }
722
TEST_F(mount_setattr,mixed_mount_options)723 TEST_F(mount_setattr, mixed_mount_options)
724 {
725 unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
726 struct mount_attr attr = {
727 .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
728 .attr_set = MOUNT_ATTR_RELATIME,
729 };
730
731 if (!mount_setattr_supported())
732 SKIP(return, "mount_setattr syscall not supported");
733
734 old_flags1 = read_mnt_flags("/mnt/B");
735 ASSERT_GT(old_flags1, 0);
736
737 old_flags2 = read_mnt_flags("/mnt/B/BB");
738 ASSERT_GT(old_flags2, 0);
739
740 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
741
742 expected_flags = old_flags2;
743 expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
744 expected_flags |= MS_RELATIME;
745
746 new_flags = read_mnt_flags("/mnt/B");
747 ASSERT_EQ(new_flags, expected_flags);
748
749 expected_flags = old_flags2;
750 expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
751 expected_flags |= MS_RELATIME;
752
753 new_flags = read_mnt_flags("/mnt/B/BB");
754 ASSERT_EQ(new_flags, expected_flags);
755 }
756
TEST_F(mount_setattr,time_changes)757 TEST_F(mount_setattr, time_changes)
758 {
759 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
760 struct mount_attr attr = {
761 .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
762 };
763
764 if (!mount_setattr_supported())
765 SKIP(return, "mount_setattr syscall not supported");
766
767 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
768
769 attr.attr_set = MOUNT_ATTR_STRICTATIME;
770 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
771
772 attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
773 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
774
775 attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
776 attr.attr_clr = MOUNT_ATTR__ATIME;
777 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
778
779 attr.attr_set = 0;
780 attr.attr_clr = MOUNT_ATTR_STRICTATIME;
781 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
782
783 attr.attr_clr = MOUNT_ATTR_NOATIME;
784 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
785
786 old_flags = read_mnt_flags("/mnt/A");
787 ASSERT_GT(old_flags, 0);
788
789 attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
790 attr.attr_clr = MOUNT_ATTR__ATIME;
791 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
792
793 expected_flags = old_flags;
794 expected_flags |= MS_NOATIME;
795 expected_flags |= MS_NODIRATIME;
796
797 new_flags = read_mnt_flags("/mnt/A");
798 ASSERT_EQ(new_flags, expected_flags);
799
800 new_flags = read_mnt_flags("/mnt/A/AA");
801 ASSERT_EQ(new_flags, expected_flags);
802
803 new_flags = read_mnt_flags("/mnt/A/AA/B");
804 ASSERT_EQ(new_flags, expected_flags);
805
806 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
807 ASSERT_EQ(new_flags, expected_flags);
808
809 memset(&attr, 0, sizeof(attr));
810 attr.attr_set &= ~MOUNT_ATTR_NOATIME;
811 attr.attr_set |= MOUNT_ATTR_RELATIME;
812 attr.attr_clr |= MOUNT_ATTR__ATIME;
813 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
814
815 expected_flags &= ~MS_NOATIME;
816 expected_flags |= MS_RELATIME;
817
818 new_flags = read_mnt_flags("/mnt/A");
819 ASSERT_EQ(new_flags, expected_flags);
820
821 new_flags = read_mnt_flags("/mnt/A/AA");
822 ASSERT_EQ(new_flags, expected_flags);
823
824 new_flags = read_mnt_flags("/mnt/A/AA/B");
825 ASSERT_EQ(new_flags, expected_flags);
826
827 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
828 ASSERT_EQ(new_flags, expected_flags);
829
830 memset(&attr, 0, sizeof(attr));
831 attr.attr_set &= ~MOUNT_ATTR_RELATIME;
832 attr.attr_set |= MOUNT_ATTR_STRICTATIME;
833 attr.attr_clr |= MOUNT_ATTR__ATIME;
834 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
835
836 expected_flags &= ~MS_RELATIME;
837
838 new_flags = read_mnt_flags("/mnt/A");
839 ASSERT_EQ(new_flags, expected_flags);
840
841 new_flags = read_mnt_flags("/mnt/A/AA");
842 ASSERT_EQ(new_flags, expected_flags);
843
844 new_flags = read_mnt_flags("/mnt/A/AA/B");
845 ASSERT_EQ(new_flags, expected_flags);
846
847 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
848 ASSERT_EQ(new_flags, expected_flags);
849
850 memset(&attr, 0, sizeof(attr));
851 attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
852 attr.attr_set |= MOUNT_ATTR_NOATIME;
853 attr.attr_clr |= MOUNT_ATTR__ATIME;
854 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
855
856 expected_flags |= MS_NOATIME;
857 new_flags = read_mnt_flags("/mnt/A");
858 ASSERT_EQ(new_flags, expected_flags);
859
860 new_flags = read_mnt_flags("/mnt/A/AA");
861 ASSERT_EQ(new_flags, expected_flags);
862
863 new_flags = read_mnt_flags("/mnt/A/AA/B");
864 ASSERT_EQ(new_flags, expected_flags);
865
866 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
867 ASSERT_EQ(new_flags, expected_flags);
868
869 memset(&attr, 0, sizeof(attr));
870 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
871
872 new_flags = read_mnt_flags("/mnt/A");
873 ASSERT_EQ(new_flags, expected_flags);
874
875 new_flags = read_mnt_flags("/mnt/A/AA");
876 ASSERT_EQ(new_flags, expected_flags);
877
878 new_flags = read_mnt_flags("/mnt/A/AA/B");
879 ASSERT_EQ(new_flags, expected_flags);
880
881 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
882 ASSERT_EQ(new_flags, expected_flags);
883
884 memset(&attr, 0, sizeof(attr));
885 attr.attr_clr = MOUNT_ATTR_NODIRATIME;
886 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
887
888 expected_flags &= ~MS_NODIRATIME;
889
890 new_flags = read_mnt_flags("/mnt/A");
891 ASSERT_EQ(new_flags, expected_flags);
892
893 new_flags = read_mnt_flags("/mnt/A/AA");
894 ASSERT_EQ(new_flags, expected_flags);
895
896 new_flags = read_mnt_flags("/mnt/A/AA/B");
897 ASSERT_EQ(new_flags, expected_flags);
898
899 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
900 ASSERT_EQ(new_flags, expected_flags);
901 }
902
TEST_F(mount_setattr,multi_threaded)903 TEST_F(mount_setattr, multi_threaded)
904 {
905 int i, j, nthreads, ret = 0;
906 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
907 pthread_attr_t pattr;
908 pthread_t threads[DEFAULT_THREADS];
909
910 if (!mount_setattr_supported())
911 SKIP(return, "mount_setattr syscall not supported");
912
913 old_flags = read_mnt_flags("/mnt/A");
914 ASSERT_GT(old_flags, 0);
915
916 /* Try to change mount options from multiple threads. */
917 nthreads = get_nprocs_conf();
918 if (nthreads > DEFAULT_THREADS)
919 nthreads = DEFAULT_THREADS;
920
921 pthread_attr_init(&pattr);
922 for (i = 0; i < nthreads; i++)
923 ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
924
925 for (j = 0; j < i; j++) {
926 void *retptr = NULL;
927
928 EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
929
930 ret += ptr_to_int(retptr);
931 EXPECT_EQ(ret, 0);
932 }
933 pthread_attr_destroy(&pattr);
934
935 ASSERT_EQ(ret, 0);
936
937 expected_flags = old_flags;
938 expected_flags |= MS_RDONLY;
939 expected_flags |= MS_NOSUID;
940 new_flags = read_mnt_flags("/mnt/A");
941 ASSERT_EQ(new_flags, expected_flags);
942
943 ASSERT_EQ(is_shared_mount("/mnt/A"), true);
944
945 new_flags = read_mnt_flags("/mnt/A/AA");
946 ASSERT_EQ(new_flags, expected_flags);
947
948 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
949
950 new_flags = read_mnt_flags("/mnt/A/AA/B");
951 ASSERT_EQ(new_flags, expected_flags);
952
953 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
954
955 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
956 ASSERT_EQ(new_flags, expected_flags);
957
958 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
959 }
960
TEST_F(mount_setattr,wrong_user_namespace)961 TEST_F(mount_setattr, wrong_user_namespace)
962 {
963 int ret;
964 struct mount_attr attr = {
965 .attr_set = MOUNT_ATTR_RDONLY,
966 };
967
968 if (!mount_setattr_supported())
969 SKIP(return, "mount_setattr syscall not supported");
970
971 EXPECT_EQ(create_and_enter_userns(), 0);
972 ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
973 ASSERT_LT(ret, 0);
974 ASSERT_EQ(errno, EPERM);
975 }
976
TEST_F(mount_setattr,wrong_mount_namespace)977 TEST_F(mount_setattr, wrong_mount_namespace)
978 {
979 int fd, ret;
980 struct mount_attr attr = {
981 .attr_set = MOUNT_ATTR_RDONLY,
982 };
983
984 if (!mount_setattr_supported())
985 SKIP(return, "mount_setattr syscall not supported");
986
987 fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
988 ASSERT_GE(fd, 0);
989
990 ASSERT_EQ(unshare(CLONE_NEWNS), 0);
991
992 ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
993 ASSERT_LT(ret, 0);
994 ASSERT_EQ(errno, EINVAL);
995 }
996
FIXTURE(mount_setattr_idmapped)997 FIXTURE(mount_setattr_idmapped) {
998 };
999
FIXTURE_SETUP(mount_setattr_idmapped)1000 FIXTURE_SETUP(mount_setattr_idmapped)
1001 {
1002 int img_fd = -EBADF;
1003
1004 ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1005
1006 ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1007
1008 (void)umount2("/mnt", MNT_DETACH);
1009 (void)umount2("/tmp", MNT_DETACH);
1010
1011 ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1012 "size=100000,mode=700"), 0);
1013
1014 ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1015 ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1016 ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1017
1018 ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1019 "size=100000,mode=700"), 0);
1020
1021 ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1022 ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1023 ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1024
1025 ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1026 "size=100000,mode=700"), 0);
1027
1028 ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1029 "size=2m,mode=700"), 0);
1030
1031 ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1032
1033 ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1034 "size=100000,mode=700"), 0);
1035
1036 ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1037
1038 ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1039
1040 ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1041
1042 ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1043 MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1044
1045 ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1046
1047 ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1048 MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1049
1050 ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1051 ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1052 img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1053 ASSERT_GE(img_fd, 0);
1054 ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1055 ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1056 ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1057 ASSERT_EQ(close(img_fd), 0);
1058 }
1059
FIXTURE_TEARDOWN(mount_setattr_idmapped)1060 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1061 {
1062 (void)umount2("/mnt/A", MNT_DETACH);
1063 (void)umount2("/tmp", MNT_DETACH);
1064 }
1065
1066 /**
1067 * Validate that negative fd values are rejected.
1068 */
TEST_F(mount_setattr_idmapped,invalid_fd_negative)1069 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1070 {
1071 struct mount_attr attr = {
1072 .attr_set = MOUNT_ATTR_IDMAP,
1073 .userns_fd = -EBADF,
1074 };
1075
1076 if (!mount_setattr_supported())
1077 SKIP(return, "mount_setattr syscall not supported");
1078
1079 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1080 TH_LOG("failure: created idmapped mount with negative fd");
1081 }
1082 }
1083
1084 /**
1085 * Validate that excessively large fd values are rejected.
1086 */
TEST_F(mount_setattr_idmapped,invalid_fd_large)1087 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1088 {
1089 struct mount_attr attr = {
1090 .attr_set = MOUNT_ATTR_IDMAP,
1091 .userns_fd = INT64_MAX,
1092 };
1093
1094 if (!mount_setattr_supported())
1095 SKIP(return, "mount_setattr syscall not supported");
1096
1097 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1098 TH_LOG("failure: created idmapped mount with too large fd value");
1099 }
1100 }
1101
1102 /**
1103 * Validate that closed fd values are rejected.
1104 */
TEST_F(mount_setattr_idmapped,invalid_fd_closed)1105 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1106 {
1107 int fd;
1108 struct mount_attr attr = {
1109 .attr_set = MOUNT_ATTR_IDMAP,
1110 };
1111
1112 if (!mount_setattr_supported())
1113 SKIP(return, "mount_setattr syscall not supported");
1114
1115 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1116 ASSERT_GE(fd, 0);
1117 ASSERT_GE(close(fd), 0);
1118
1119 attr.userns_fd = fd;
1120 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1121 TH_LOG("failure: created idmapped mount with closed fd");
1122 }
1123 }
1124
1125 /**
1126 * Validate that the initial user namespace is rejected.
1127 */
TEST_F(mount_setattr_idmapped,invalid_fd_initial_userns)1128 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1129 {
1130 int open_tree_fd = -EBADF;
1131 struct mount_attr attr = {
1132 .attr_set = MOUNT_ATTR_IDMAP,
1133 };
1134
1135 if (!mount_setattr_supported())
1136 SKIP(return, "mount_setattr syscall not supported");
1137
1138 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1139 AT_NO_AUTOMOUNT |
1140 AT_SYMLINK_NOFOLLOW |
1141 OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1142 ASSERT_GE(open_tree_fd, 0);
1143
1144 attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1145 ASSERT_GE(attr.userns_fd, 0);
1146 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1147 ASSERT_EQ(errno, EPERM);
1148 ASSERT_EQ(close(attr.userns_fd), 0);
1149 ASSERT_EQ(close(open_tree_fd), 0);
1150 }
1151
map_ids(pid_t pid,unsigned long nsid,unsigned long hostid,unsigned long range)1152 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1153 unsigned long range)
1154 {
1155 char map[100], procfile[256];
1156
1157 snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1158 snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1159 if (write_file(procfile, map, strlen(map)))
1160 return -1;
1161
1162
1163 snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1164 snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1165 if (write_file(procfile, map, strlen(map)))
1166 return -1;
1167
1168 return 0;
1169 }
1170
1171 #define __STACK_SIZE (8 * 1024 * 1024)
do_clone(int (* fn)(void *),void * arg,int flags)1172 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1173 {
1174 void *stack;
1175
1176 stack = malloc(__STACK_SIZE);
1177 if (!stack)
1178 return -ENOMEM;
1179
1180 #ifdef __ia64__
1181 return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1182 #else
1183 return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1184 #endif
1185 }
1186
get_userns_fd_cb(void * data)1187 static int get_userns_fd_cb(void *data)
1188 {
1189 return kill(getpid(), SIGSTOP);
1190 }
1191
wait_for_pid(pid_t pid)1192 static int wait_for_pid(pid_t pid)
1193 {
1194 int status, ret;
1195
1196 again:
1197 ret = waitpid(pid, &status, 0);
1198 if (ret == -1) {
1199 if (errno == EINTR)
1200 goto again;
1201
1202 return -1;
1203 }
1204
1205 if (!WIFEXITED(status))
1206 return -1;
1207
1208 return WEXITSTATUS(status);
1209 }
1210
get_userns_fd(unsigned long nsid,unsigned long hostid,unsigned long range)1211 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1212 {
1213 int ret;
1214 pid_t pid;
1215 char path[256];
1216
1217 pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1218 if (pid < 0)
1219 return -errno;
1220
1221 ret = map_ids(pid, nsid, hostid, range);
1222 if (ret < 0)
1223 return ret;
1224
1225 snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1226 ret = open(path, O_RDONLY | O_CLOEXEC);
1227 kill(pid, SIGKILL);
1228 wait_for_pid(pid);
1229 return ret;
1230 }
1231
1232 /**
1233 * Validate that an attached mount in our mount namespace cannot be idmapped.
1234 * (The kernel enforces that the mount's mount namespace and the caller's mount
1235 * namespace match.)
1236 */
TEST_F(mount_setattr_idmapped,attached_mount_inside_current_mount_namespace)1237 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1238 {
1239 int open_tree_fd = -EBADF;
1240 struct mount_attr attr = {
1241 .attr_set = MOUNT_ATTR_IDMAP,
1242 };
1243
1244 if (!mount_setattr_supported())
1245 SKIP(return, "mount_setattr syscall not supported");
1246
1247 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1248 AT_EMPTY_PATH |
1249 AT_NO_AUTOMOUNT |
1250 AT_SYMLINK_NOFOLLOW |
1251 OPEN_TREE_CLOEXEC);
1252 ASSERT_GE(open_tree_fd, 0);
1253
1254 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1255 ASSERT_GE(attr.userns_fd, 0);
1256 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1257 ASSERT_EQ(close(attr.userns_fd), 0);
1258 ASSERT_EQ(close(open_tree_fd), 0);
1259 }
1260
1261 /**
1262 * Validate that idmapping a mount is rejected if the mount's mount namespace
1263 * and our mount namespace don't match.
1264 * (The kernel enforces that the mount's mount namespace and the caller's mount
1265 * namespace match.)
1266 */
TEST_F(mount_setattr_idmapped,attached_mount_outside_current_mount_namespace)1267 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1268 {
1269 int open_tree_fd = -EBADF;
1270 struct mount_attr attr = {
1271 .attr_set = MOUNT_ATTR_IDMAP,
1272 };
1273
1274 if (!mount_setattr_supported())
1275 SKIP(return, "mount_setattr syscall not supported");
1276
1277 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1278 AT_EMPTY_PATH |
1279 AT_NO_AUTOMOUNT |
1280 AT_SYMLINK_NOFOLLOW |
1281 OPEN_TREE_CLOEXEC);
1282 ASSERT_GE(open_tree_fd, 0);
1283
1284 ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1285
1286 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1287 ASSERT_GE(attr.userns_fd, 0);
1288 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1289 sizeof(attr)), 0);
1290 ASSERT_EQ(close(attr.userns_fd), 0);
1291 ASSERT_EQ(close(open_tree_fd), 0);
1292 }
1293
1294 /**
1295 * Validate that an attached mount in our mount namespace can be idmapped.
1296 */
TEST_F(mount_setattr_idmapped,detached_mount_inside_current_mount_namespace)1297 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1298 {
1299 int open_tree_fd = -EBADF;
1300 struct mount_attr attr = {
1301 .attr_set = MOUNT_ATTR_IDMAP,
1302 };
1303
1304 if (!mount_setattr_supported())
1305 SKIP(return, "mount_setattr syscall not supported");
1306
1307 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1308 AT_EMPTY_PATH |
1309 AT_NO_AUTOMOUNT |
1310 AT_SYMLINK_NOFOLLOW |
1311 OPEN_TREE_CLOEXEC |
1312 OPEN_TREE_CLONE);
1313 ASSERT_GE(open_tree_fd, 0);
1314
1315 /* Changing mount properties on a detached mount. */
1316 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1317 ASSERT_GE(attr.userns_fd, 0);
1318 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1319 AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1320 ASSERT_EQ(close(attr.userns_fd), 0);
1321 ASSERT_EQ(close(open_tree_fd), 0);
1322 }
1323
1324 /**
1325 * Validate that a detached mount not in our mount namespace can be idmapped.
1326 */
TEST_F(mount_setattr_idmapped,detached_mount_outside_current_mount_namespace)1327 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1328 {
1329 int open_tree_fd = -EBADF;
1330 struct mount_attr attr = {
1331 .attr_set = MOUNT_ATTR_IDMAP,
1332 };
1333
1334 if (!mount_setattr_supported())
1335 SKIP(return, "mount_setattr syscall not supported");
1336
1337 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1338 AT_EMPTY_PATH |
1339 AT_NO_AUTOMOUNT |
1340 AT_SYMLINK_NOFOLLOW |
1341 OPEN_TREE_CLOEXEC |
1342 OPEN_TREE_CLONE);
1343 ASSERT_GE(open_tree_fd, 0);
1344
1345 ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1346
1347 /* Changing mount properties on a detached mount. */
1348 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1349 ASSERT_GE(attr.userns_fd, 0);
1350 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1351 AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1352 ASSERT_EQ(close(attr.userns_fd), 0);
1353 ASSERT_EQ(close(open_tree_fd), 0);
1354 }
1355
1356 /**
1357 * Validate that currently changing the idmapping of an idmapped mount fails.
1358 */
TEST_F(mount_setattr_idmapped,change_idmapping)1359 TEST_F(mount_setattr_idmapped, change_idmapping)
1360 {
1361 int open_tree_fd = -EBADF;
1362 struct mount_attr attr = {
1363 .attr_set = MOUNT_ATTR_IDMAP,
1364 };
1365
1366 if (!mount_setattr_supported())
1367 SKIP(return, "mount_setattr syscall not supported");
1368
1369 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1370 AT_EMPTY_PATH |
1371 AT_NO_AUTOMOUNT |
1372 AT_SYMLINK_NOFOLLOW |
1373 OPEN_TREE_CLOEXEC |
1374 OPEN_TREE_CLONE);
1375 ASSERT_GE(open_tree_fd, 0);
1376
1377 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1378 ASSERT_GE(attr.userns_fd, 0);
1379 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1380 AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1381 ASSERT_EQ(close(attr.userns_fd), 0);
1382
1383 /* Change idmapping on a detached mount that is already idmapped. */
1384 attr.userns_fd = get_userns_fd(0, 20000, 10000);
1385 ASSERT_GE(attr.userns_fd, 0);
1386 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1387 ASSERT_EQ(close(attr.userns_fd), 0);
1388 ASSERT_EQ(close(open_tree_fd), 0);
1389 }
1390
expected_uid_gid(int dfd,const char * path,int flags,uid_t expected_uid,gid_t expected_gid)1391 static bool expected_uid_gid(int dfd, const char *path, int flags,
1392 uid_t expected_uid, gid_t expected_gid)
1393 {
1394 int ret;
1395 struct stat st;
1396
1397 ret = fstatat(dfd, path, &st, flags);
1398 if (ret < 0)
1399 return false;
1400
1401 return st.st_uid == expected_uid && st.st_gid == expected_gid;
1402 }
1403
TEST_F(mount_setattr_idmapped,idmap_mount_tree_invalid)1404 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1405 {
1406 int open_tree_fd = -EBADF;
1407 struct mount_attr attr = {
1408 .attr_set = MOUNT_ATTR_IDMAP,
1409 };
1410
1411 if (!mount_setattr_supported())
1412 SKIP(return, "mount_setattr syscall not supported");
1413
1414 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1415 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1416
1417 open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1418 AT_RECURSIVE |
1419 AT_EMPTY_PATH |
1420 AT_NO_AUTOMOUNT |
1421 AT_SYMLINK_NOFOLLOW |
1422 OPEN_TREE_CLOEXEC |
1423 OPEN_TREE_CLONE);
1424 ASSERT_GE(open_tree_fd, 0);
1425
1426 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1427 ASSERT_GE(attr.userns_fd, 0);
1428 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1429 ASSERT_EQ(close(attr.userns_fd), 0);
1430 ASSERT_EQ(close(open_tree_fd), 0);
1431
1432 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1433 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1434 ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1435 ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1436 }
1437
TEST_F(mount_setattr,mount_attr_nosymfollow)1438 TEST_F(mount_setattr, mount_attr_nosymfollow)
1439 {
1440 int fd;
1441 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1442 struct mount_attr attr = {
1443 .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
1444 };
1445
1446 if (!mount_setattr_supported())
1447 SKIP(return, "mount_setattr syscall not supported");
1448
1449 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1450 ASSERT_GT(fd, 0);
1451 ASSERT_EQ(close(fd), 0);
1452
1453 old_flags = read_mnt_flags("/mnt/A");
1454 ASSERT_GT(old_flags, 0);
1455
1456 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1457
1458 expected_flags = old_flags;
1459 expected_flags |= ST_NOSYMFOLLOW;
1460
1461 new_flags = read_mnt_flags("/mnt/A");
1462 ASSERT_EQ(new_flags, expected_flags);
1463
1464 new_flags = read_mnt_flags("/mnt/A/AA");
1465 ASSERT_EQ(new_flags, expected_flags);
1466
1467 new_flags = read_mnt_flags("/mnt/A/AA/B");
1468 ASSERT_EQ(new_flags, expected_flags);
1469
1470 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1471 ASSERT_EQ(new_flags, expected_flags);
1472
1473 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1474 ASSERT_LT(fd, 0);
1475 ASSERT_EQ(errno, ELOOP);
1476
1477 attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1478 attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1479
1480 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1481
1482 expected_flags &= ~ST_NOSYMFOLLOW;
1483 new_flags = read_mnt_flags("/mnt/A");
1484 ASSERT_EQ(new_flags, expected_flags);
1485
1486 new_flags = read_mnt_flags("/mnt/A/AA");
1487 ASSERT_EQ(new_flags, expected_flags);
1488
1489 new_flags = read_mnt_flags("/mnt/A/AA/B");
1490 ASSERT_EQ(new_flags, expected_flags);
1491
1492 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1493 ASSERT_EQ(new_flags, expected_flags);
1494
1495 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1496 ASSERT_GT(fd, 0);
1497 ASSERT_EQ(close(fd), 0);
1498 }
1499
1500 TEST_HARNESS_MAIN
1501