1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <sched.h> 4 #include <stdio.h> 5 #include <errno.h> 6 #include <pthread.h> 7 #include <string.h> 8 #include <sys/stat.h> 9 #include <sys/types.h> 10 #include <sys/mount.h> 11 #include <sys/wait.h> 12 #include <sys/vfs.h> 13 #include <sys/statvfs.h> 14 #include <sys/sysinfo.h> 15 #include <stdlib.h> 16 #include <unistd.h> 17 #include <fcntl.h> 18 #include <grp.h> 19 #include <stdbool.h> 20 #include <stdarg.h> 21 #include <linux/mount.h> 22 23 #include "../kselftest_harness.h" 24 25 #ifndef CLONE_NEWNS 26 #define CLONE_NEWNS 0x00020000 27 #endif 28 29 #ifndef CLONE_NEWUSER 30 #define CLONE_NEWUSER 0x10000000 31 #endif 32 33 #ifndef MS_REC 34 #define MS_REC 16384 35 #endif 36 37 #ifndef MS_RELATIME 38 #define MS_RELATIME (1 << 21) 39 #endif 40 41 #ifndef MS_STRICTATIME 42 #define MS_STRICTATIME (1 << 24) 43 #endif 44 45 #ifndef MOUNT_ATTR_RDONLY 46 #define MOUNT_ATTR_RDONLY 0x00000001 47 #endif 48 49 #ifndef MOUNT_ATTR_NOSUID 50 #define MOUNT_ATTR_NOSUID 0x00000002 51 #endif 52 53 #ifndef MOUNT_ATTR_NOEXEC 54 #define MOUNT_ATTR_NOEXEC 0x00000008 55 #endif 56 57 #ifndef MOUNT_ATTR_NODIRATIME 58 #define MOUNT_ATTR_NODIRATIME 0x00000080 59 #endif 60 61 #ifndef MOUNT_ATTR__ATIME 62 #define MOUNT_ATTR__ATIME 0x00000070 63 #endif 64 65 #ifndef MOUNT_ATTR_RELATIME 66 #define MOUNT_ATTR_RELATIME 0x00000000 67 #endif 68 69 #ifndef MOUNT_ATTR_NOATIME 70 #define MOUNT_ATTR_NOATIME 0x00000010 71 #endif 72 73 #ifndef MOUNT_ATTR_STRICTATIME 74 #define MOUNT_ATTR_STRICTATIME 0x00000020 75 #endif 76 77 #ifndef AT_RECURSIVE 78 #define AT_RECURSIVE 0x8000 79 #endif 80 81 #ifndef MS_SHARED 82 #define MS_SHARED (1 << 20) 83 #endif 84 85 #define DEFAULT_THREADS 4 86 #define ptr_to_int(p) ((int)((intptr_t)(p))) 87 #define int_to_ptr(u) ((void *)((intptr_t)(u))) 88 89 #ifndef __NR_mount_setattr 90 #if defined __alpha__ 91 #define __NR_mount_setattr 552 92 #elif defined _MIPS_SIM 93 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ 94 #define __NR_mount_setattr (442 + 4000) 95 #endif 96 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ 97 #define __NR_mount_setattr (442 + 6000) 98 #endif 99 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ 100 #define __NR_mount_setattr (442 + 5000) 101 #endif 102 #elif defined __ia64__ 103 #define __NR_mount_setattr (442 + 1024) 104 #else 105 #define __NR_mount_setattr 442 106 #endif 107 #endif 108 109 #ifndef __NR_open_tree 110 #if defined __alpha__ 111 #define __NR_open_tree 538 112 #elif defined _MIPS_SIM 113 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ 114 #define __NR_open_tree 4428 115 #endif 116 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ 117 #define __NR_open_tree 6428 118 #endif 119 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ 120 #define __NR_open_tree 5428 121 #endif 122 #elif defined __ia64__ 123 #define __NR_open_tree (428 + 1024) 124 #else 125 #define __NR_open_tree 428 126 #endif 127 #endif 128 129 #ifndef MOUNT_ATTR_IDMAP 130 #define MOUNT_ATTR_IDMAP 0x00100000 131 #endif 132 133 #ifndef MOUNT_ATTR_NOSYMFOLLOW 134 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 135 #endif 136 137 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags, 138 struct mount_attr *attr, size_t size) 139 { 140 return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); 141 } 142 143 #ifndef OPEN_TREE_CLONE 144 #define OPEN_TREE_CLONE 1 145 #endif 146 147 #ifndef OPEN_TREE_CLOEXEC 148 #define OPEN_TREE_CLOEXEC O_CLOEXEC 149 #endif 150 151 #ifndef AT_RECURSIVE 152 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ 153 #endif 154 155 static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) 156 { 157 return syscall(__NR_open_tree, dfd, filename, flags); 158 } 159 160 static ssize_t write_nointr(int fd, const void *buf, size_t count) 161 { 162 ssize_t ret; 163 164 do { 165 ret = write(fd, buf, count); 166 } while (ret < 0 && errno == EINTR); 167 168 return ret; 169 } 170 171 static int write_file(const char *path, const void *buf, size_t count) 172 { 173 int fd; 174 ssize_t ret; 175 176 fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); 177 if (fd < 0) 178 return -1; 179 180 ret = write_nointr(fd, buf, count); 181 close(fd); 182 if (ret < 0 || (size_t)ret != count) 183 return -1; 184 185 return 0; 186 } 187 188 static int create_and_enter_userns(void) 189 { 190 uid_t uid; 191 gid_t gid; 192 char map[100]; 193 194 uid = getuid(); 195 gid = getgid(); 196 197 if (unshare(CLONE_NEWUSER)) 198 return -1; 199 200 if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && 201 errno != ENOENT) 202 return -1; 203 204 snprintf(map, sizeof(map), "0 %d 1", uid); 205 if (write_file("/proc/self/uid_map", map, strlen(map))) 206 return -1; 207 208 209 snprintf(map, sizeof(map), "0 %d 1", gid); 210 if (write_file("/proc/self/gid_map", map, strlen(map))) 211 return -1; 212 213 if (setgid(0)) 214 return -1; 215 216 if (setuid(0)) 217 return -1; 218 219 return 0; 220 } 221 222 static int prepare_unpriv_mountns(void) 223 { 224 if (create_and_enter_userns()) 225 return -1; 226 227 if (unshare(CLONE_NEWNS)) 228 return -1; 229 230 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0)) 231 return -1; 232 233 return 0; 234 } 235 236 #ifndef ST_NOSYMFOLLOW 237 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */ 238 #endif 239 240 static int read_mnt_flags(const char *path) 241 { 242 int ret; 243 struct statvfs stat; 244 unsigned int mnt_flags; 245 246 ret = statvfs(path, &stat); 247 if (ret != 0) 248 return -EINVAL; 249 250 if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | 251 ST_NOATIME | ST_NODIRATIME | ST_RELATIME | 252 ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW)) 253 return -EINVAL; 254 255 mnt_flags = 0; 256 if (stat.f_flag & ST_RDONLY) 257 mnt_flags |= MS_RDONLY; 258 if (stat.f_flag & ST_NOSUID) 259 mnt_flags |= MS_NOSUID; 260 if (stat.f_flag & ST_NODEV) 261 mnt_flags |= MS_NODEV; 262 if (stat.f_flag & ST_NOEXEC) 263 mnt_flags |= MS_NOEXEC; 264 if (stat.f_flag & ST_NOATIME) 265 mnt_flags |= MS_NOATIME; 266 if (stat.f_flag & ST_NODIRATIME) 267 mnt_flags |= MS_NODIRATIME; 268 if (stat.f_flag & ST_RELATIME) 269 mnt_flags |= MS_RELATIME; 270 if (stat.f_flag & ST_SYNCHRONOUS) 271 mnt_flags |= MS_SYNCHRONOUS; 272 if (stat.f_flag & ST_MANDLOCK) 273 mnt_flags |= ST_MANDLOCK; 274 if (stat.f_flag & ST_NOSYMFOLLOW) 275 mnt_flags |= ST_NOSYMFOLLOW; 276 277 return mnt_flags; 278 } 279 280 static char *get_field(char *src, int nfields) 281 { 282 int i; 283 char *p = src; 284 285 for (i = 0; i < nfields; i++) { 286 while (*p && *p != ' ' && *p != '\t') 287 p++; 288 289 if (!*p) 290 break; 291 292 p++; 293 } 294 295 return p; 296 } 297 298 static void null_endofword(char *word) 299 { 300 while (*word && *word != ' ' && *word != '\t') 301 word++; 302 *word = '\0'; 303 } 304 305 static bool is_shared_mount(const char *path) 306 { 307 size_t len = 0; 308 char *line = NULL; 309 FILE *f = NULL; 310 311 f = fopen("/proc/self/mountinfo", "re"); 312 if (!f) 313 return false; 314 315 while (getline(&line, &len, f) != -1) { 316 char *opts, *target; 317 318 target = get_field(line, 4); 319 if (!target) 320 continue; 321 322 opts = get_field(target, 2); 323 if (!opts) 324 continue; 325 326 null_endofword(target); 327 328 if (strcmp(target, path) != 0) 329 continue; 330 331 null_endofword(opts); 332 if (strstr(opts, "shared:")) 333 return true; 334 } 335 336 free(line); 337 fclose(f); 338 339 return false; 340 } 341 342 static void *mount_setattr_thread(void *data) 343 { 344 struct mount_attr attr = { 345 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID, 346 .attr_clr = 0, 347 .propagation = MS_SHARED, 348 }; 349 350 if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr))) 351 pthread_exit(int_to_ptr(-1)); 352 353 pthread_exit(int_to_ptr(0)); 354 } 355 356 /* Attempt to de-conflict with the selftests tree. */ 357 #ifndef SKIP 358 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) 359 #endif 360 361 static bool mount_setattr_supported(void) 362 { 363 int ret; 364 365 ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0); 366 if (ret < 0 && errno == ENOSYS) 367 return false; 368 369 return true; 370 } 371 372 FIXTURE(mount_setattr) { 373 }; 374 375 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data" 376 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink" 377 378 FIXTURE_SETUP(mount_setattr) 379 { 380 int fd = -EBADF; 381 382 if (!mount_setattr_supported()) 383 SKIP(return, "mount_setattr syscall not supported"); 384 385 ASSERT_EQ(prepare_unpriv_mountns(), 0); 386 387 (void)umount2("/mnt", MNT_DETACH); 388 (void)umount2("/tmp", MNT_DETACH); 389 390 ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, 391 "size=100000,mode=700"), 0); 392 393 ASSERT_EQ(mkdir("/tmp/B", 0777), 0); 394 395 ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, 396 "size=100000,mode=700"), 0); 397 398 ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); 399 400 ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, 401 "size=100000,mode=700"), 0); 402 403 ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, 404 "size=100000,mode=700"), 0); 405 406 ASSERT_EQ(mkdir("/mnt/A", 0777), 0); 407 408 ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, 409 "size=100000,mode=700"), 0); 410 411 ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); 412 413 ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); 414 415 ASSERT_EQ(mkdir("/mnt/B", 0777), 0); 416 417 ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", 418 MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); 419 420 ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); 421 422 ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", 423 MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); 424 425 fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC); 426 ASSERT_GT(fd, 0); 427 ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0); 428 ASSERT_EQ(close(fd), 0); 429 } 430 431 FIXTURE_TEARDOWN(mount_setattr) 432 { 433 if (!mount_setattr_supported()) 434 SKIP(return, "mount_setattr syscall not supported"); 435 436 (void)umount2("/mnt/A", MNT_DETACH); 437 (void)umount2("/tmp", MNT_DETACH); 438 } 439 440 TEST_F(mount_setattr, invalid_attributes) 441 { 442 struct mount_attr invalid_attr = { 443 .attr_set = (1U << 31), 444 }; 445 446 if (!mount_setattr_supported()) 447 SKIP(return, "mount_setattr syscall not supported"); 448 449 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 450 sizeof(invalid_attr)), 0); 451 452 invalid_attr.attr_set = 0; 453 invalid_attr.attr_clr = (1U << 31); 454 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 455 sizeof(invalid_attr)), 0); 456 457 invalid_attr.attr_clr = 0; 458 invalid_attr.propagation = (1U << 31); 459 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 460 sizeof(invalid_attr)), 0); 461 462 invalid_attr.attr_set = (1U << 31); 463 invalid_attr.attr_clr = (1U << 31); 464 invalid_attr.propagation = (1U << 31); 465 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 466 sizeof(invalid_attr)), 0); 467 468 ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr, 469 sizeof(invalid_attr)), 0); 470 } 471 472 TEST_F(mount_setattr, extensibility) 473 { 474 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 475 char *s = "dummy"; 476 struct mount_attr invalid_attr = {}; 477 struct mount_attr_large { 478 struct mount_attr attr1; 479 struct mount_attr attr2; 480 struct mount_attr attr3; 481 } large_attr = {}; 482 483 if (!mount_setattr_supported()) 484 SKIP(return, "mount_setattr syscall not supported"); 485 486 old_flags = read_mnt_flags("/mnt/A"); 487 ASSERT_GT(old_flags, 0); 488 489 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL, 490 sizeof(invalid_attr)), 0); 491 ASSERT_EQ(errno, EFAULT); 492 493 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s, 494 sizeof(invalid_attr)), 0); 495 ASSERT_EQ(errno, EINVAL); 496 497 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0); 498 ASSERT_EQ(errno, EINVAL); 499 500 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 501 sizeof(invalid_attr) / 2), 0); 502 ASSERT_EQ(errno, EINVAL); 503 504 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 505 sizeof(invalid_attr) / 2), 0); 506 ASSERT_EQ(errno, EINVAL); 507 508 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, 509 (void *)&large_attr, sizeof(large_attr)), 0); 510 511 large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY; 512 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, 513 (void *)&large_attr, sizeof(large_attr)), 0); 514 515 large_attr.attr3.attr_set = 0; 516 large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY; 517 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, 518 (void *)&large_attr, sizeof(large_attr)), 0); 519 520 expected_flags = old_flags; 521 expected_flags |= MS_RDONLY; 522 523 new_flags = read_mnt_flags("/mnt/A"); 524 ASSERT_EQ(new_flags, expected_flags); 525 526 new_flags = read_mnt_flags("/mnt/A/AA"); 527 ASSERT_EQ(new_flags, expected_flags); 528 529 new_flags = read_mnt_flags("/mnt/A/AA/B"); 530 ASSERT_EQ(new_flags, expected_flags); 531 532 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 533 ASSERT_EQ(new_flags, expected_flags); 534 } 535 536 TEST_F(mount_setattr, basic) 537 { 538 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 539 struct mount_attr attr = { 540 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, 541 .attr_clr = MOUNT_ATTR__ATIME, 542 }; 543 544 if (!mount_setattr_supported()) 545 SKIP(return, "mount_setattr syscall not supported"); 546 547 old_flags = read_mnt_flags("/mnt/A"); 548 ASSERT_GT(old_flags, 0); 549 550 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0); 551 552 expected_flags = old_flags; 553 expected_flags |= MS_RDONLY; 554 expected_flags |= MS_NOEXEC; 555 expected_flags &= ~MS_NOATIME; 556 expected_flags |= MS_RELATIME; 557 558 new_flags = read_mnt_flags("/mnt/A"); 559 ASSERT_EQ(new_flags, expected_flags); 560 561 new_flags = read_mnt_flags("/mnt/A/AA"); 562 ASSERT_EQ(new_flags, old_flags); 563 564 new_flags = read_mnt_flags("/mnt/A/AA/B"); 565 ASSERT_EQ(new_flags, old_flags); 566 567 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 568 ASSERT_EQ(new_flags, old_flags); 569 } 570 571 TEST_F(mount_setattr, basic_recursive) 572 { 573 int fd; 574 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 575 struct mount_attr attr = { 576 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, 577 .attr_clr = MOUNT_ATTR__ATIME, 578 }; 579 580 if (!mount_setattr_supported()) 581 SKIP(return, "mount_setattr syscall not supported"); 582 583 old_flags = read_mnt_flags("/mnt/A"); 584 ASSERT_GT(old_flags, 0); 585 586 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 587 588 expected_flags = old_flags; 589 expected_flags |= MS_RDONLY; 590 expected_flags |= MS_NOEXEC; 591 expected_flags &= ~MS_NOATIME; 592 expected_flags |= MS_RELATIME; 593 594 new_flags = read_mnt_flags("/mnt/A"); 595 ASSERT_EQ(new_flags, expected_flags); 596 597 new_flags = read_mnt_flags("/mnt/A/AA"); 598 ASSERT_EQ(new_flags, expected_flags); 599 600 new_flags = read_mnt_flags("/mnt/A/AA/B"); 601 ASSERT_EQ(new_flags, expected_flags); 602 603 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 604 ASSERT_EQ(new_flags, expected_flags); 605 606 memset(&attr, 0, sizeof(attr)); 607 attr.attr_clr = MOUNT_ATTR_RDONLY; 608 attr.propagation = MS_SHARED; 609 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 610 611 expected_flags &= ~MS_RDONLY; 612 new_flags = read_mnt_flags("/mnt/A"); 613 ASSERT_EQ(new_flags, expected_flags); 614 615 ASSERT_EQ(is_shared_mount("/mnt/A"), true); 616 617 new_flags = read_mnt_flags("/mnt/A/AA"); 618 ASSERT_EQ(new_flags, expected_flags); 619 620 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); 621 622 new_flags = read_mnt_flags("/mnt/A/AA/B"); 623 ASSERT_EQ(new_flags, expected_flags); 624 625 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); 626 627 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 628 ASSERT_EQ(new_flags, expected_flags); 629 630 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); 631 632 fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); 633 ASSERT_GE(fd, 0); 634 635 /* 636 * We're holding a fd open for writing so this needs to fail somewhere 637 * in the middle and the mount options need to be unchanged. 638 */ 639 attr.attr_set = MOUNT_ATTR_RDONLY; 640 ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 641 642 new_flags = read_mnt_flags("/mnt/A"); 643 ASSERT_EQ(new_flags, expected_flags); 644 645 ASSERT_EQ(is_shared_mount("/mnt/A"), true); 646 647 new_flags = read_mnt_flags("/mnt/A/AA"); 648 ASSERT_EQ(new_flags, expected_flags); 649 650 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); 651 652 new_flags = read_mnt_flags("/mnt/A/AA/B"); 653 ASSERT_EQ(new_flags, expected_flags); 654 655 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); 656 657 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 658 ASSERT_EQ(new_flags, expected_flags); 659 660 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); 661 662 EXPECT_EQ(close(fd), 0); 663 } 664 665 TEST_F(mount_setattr, mount_has_writers) 666 { 667 int fd, dfd; 668 unsigned int old_flags = 0, new_flags = 0; 669 struct mount_attr attr = { 670 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, 671 .attr_clr = MOUNT_ATTR__ATIME, 672 .propagation = MS_SHARED, 673 }; 674 675 if (!mount_setattr_supported()) 676 SKIP(return, "mount_setattr syscall not supported"); 677 678 old_flags = read_mnt_flags("/mnt/A"); 679 ASSERT_GT(old_flags, 0); 680 681 fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); 682 ASSERT_GE(fd, 0); 683 684 /* 685 * We're holding a fd open to a mount somwhere in the middle so this 686 * needs to fail somewhere in the middle. After this the mount options 687 * need to be unchanged. 688 */ 689 ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 690 691 new_flags = read_mnt_flags("/mnt/A"); 692 ASSERT_EQ(new_flags, old_flags); 693 694 ASSERT_EQ(is_shared_mount("/mnt/A"), false); 695 696 new_flags = read_mnt_flags("/mnt/A/AA"); 697 ASSERT_EQ(new_flags, old_flags); 698 699 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false); 700 701 new_flags = read_mnt_flags("/mnt/A/AA/B"); 702 ASSERT_EQ(new_flags, old_flags); 703 704 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false); 705 706 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 707 ASSERT_EQ(new_flags, old_flags); 708 709 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false); 710 711 dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC); 712 ASSERT_GE(dfd, 0); 713 EXPECT_EQ(fsync(dfd), 0); 714 EXPECT_EQ(close(dfd), 0); 715 716 EXPECT_EQ(fsync(fd), 0); 717 EXPECT_EQ(close(fd), 0); 718 719 /* All writers are gone so this should succeed. */ 720 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 721 } 722 723 TEST_F(mount_setattr, mixed_mount_options) 724 { 725 unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0; 726 struct mount_attr attr = { 727 .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME, 728 .attr_set = MOUNT_ATTR_RELATIME, 729 }; 730 731 if (!mount_setattr_supported()) 732 SKIP(return, "mount_setattr syscall not supported"); 733 734 old_flags1 = read_mnt_flags("/mnt/B"); 735 ASSERT_GT(old_flags1, 0); 736 737 old_flags2 = read_mnt_flags("/mnt/B/BB"); 738 ASSERT_GT(old_flags2, 0); 739 740 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0); 741 742 expected_flags = old_flags2; 743 expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); 744 expected_flags |= MS_RELATIME; 745 746 new_flags = read_mnt_flags("/mnt/B"); 747 ASSERT_EQ(new_flags, expected_flags); 748 749 expected_flags = old_flags2; 750 expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); 751 expected_flags |= MS_RELATIME; 752 753 new_flags = read_mnt_flags("/mnt/B/BB"); 754 ASSERT_EQ(new_flags, expected_flags); 755 } 756 757 TEST_F(mount_setattr, time_changes) 758 { 759 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 760 struct mount_attr attr = { 761 .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME, 762 }; 763 764 if (!mount_setattr_supported()) 765 SKIP(return, "mount_setattr syscall not supported"); 766 767 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 768 769 attr.attr_set = MOUNT_ATTR_STRICTATIME; 770 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 771 772 attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; 773 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 774 775 attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; 776 attr.attr_clr = MOUNT_ATTR__ATIME; 777 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 778 779 attr.attr_set = 0; 780 attr.attr_clr = MOUNT_ATTR_STRICTATIME; 781 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 782 783 attr.attr_clr = MOUNT_ATTR_NOATIME; 784 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 785 786 old_flags = read_mnt_flags("/mnt/A"); 787 ASSERT_GT(old_flags, 0); 788 789 attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME; 790 attr.attr_clr = MOUNT_ATTR__ATIME; 791 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 792 793 expected_flags = old_flags; 794 expected_flags |= MS_NOATIME; 795 expected_flags |= MS_NODIRATIME; 796 797 new_flags = read_mnt_flags("/mnt/A"); 798 ASSERT_EQ(new_flags, expected_flags); 799 800 new_flags = read_mnt_flags("/mnt/A/AA"); 801 ASSERT_EQ(new_flags, expected_flags); 802 803 new_flags = read_mnt_flags("/mnt/A/AA/B"); 804 ASSERT_EQ(new_flags, expected_flags); 805 806 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 807 ASSERT_EQ(new_flags, expected_flags); 808 809 memset(&attr, 0, sizeof(attr)); 810 attr.attr_set &= ~MOUNT_ATTR_NOATIME; 811 attr.attr_set |= MOUNT_ATTR_RELATIME; 812 attr.attr_clr |= MOUNT_ATTR__ATIME; 813 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 814 815 expected_flags &= ~MS_NOATIME; 816 expected_flags |= MS_RELATIME; 817 818 new_flags = read_mnt_flags("/mnt/A"); 819 ASSERT_EQ(new_flags, expected_flags); 820 821 new_flags = read_mnt_flags("/mnt/A/AA"); 822 ASSERT_EQ(new_flags, expected_flags); 823 824 new_flags = read_mnt_flags("/mnt/A/AA/B"); 825 ASSERT_EQ(new_flags, expected_flags); 826 827 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 828 ASSERT_EQ(new_flags, expected_flags); 829 830 memset(&attr, 0, sizeof(attr)); 831 attr.attr_set &= ~MOUNT_ATTR_RELATIME; 832 attr.attr_set |= MOUNT_ATTR_STRICTATIME; 833 attr.attr_clr |= MOUNT_ATTR__ATIME; 834 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 835 836 expected_flags &= ~MS_RELATIME; 837 838 new_flags = read_mnt_flags("/mnt/A"); 839 ASSERT_EQ(new_flags, expected_flags); 840 841 new_flags = read_mnt_flags("/mnt/A/AA"); 842 ASSERT_EQ(new_flags, expected_flags); 843 844 new_flags = read_mnt_flags("/mnt/A/AA/B"); 845 ASSERT_EQ(new_flags, expected_flags); 846 847 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 848 ASSERT_EQ(new_flags, expected_flags); 849 850 memset(&attr, 0, sizeof(attr)); 851 attr.attr_set &= ~MOUNT_ATTR_STRICTATIME; 852 attr.attr_set |= MOUNT_ATTR_NOATIME; 853 attr.attr_clr |= MOUNT_ATTR__ATIME; 854 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 855 856 expected_flags |= MS_NOATIME; 857 new_flags = read_mnt_flags("/mnt/A"); 858 ASSERT_EQ(new_flags, expected_flags); 859 860 new_flags = read_mnt_flags("/mnt/A/AA"); 861 ASSERT_EQ(new_flags, expected_flags); 862 863 new_flags = read_mnt_flags("/mnt/A/AA/B"); 864 ASSERT_EQ(new_flags, expected_flags); 865 866 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 867 ASSERT_EQ(new_flags, expected_flags); 868 869 memset(&attr, 0, sizeof(attr)); 870 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 871 872 new_flags = read_mnt_flags("/mnt/A"); 873 ASSERT_EQ(new_flags, expected_flags); 874 875 new_flags = read_mnt_flags("/mnt/A/AA"); 876 ASSERT_EQ(new_flags, expected_flags); 877 878 new_flags = read_mnt_flags("/mnt/A/AA/B"); 879 ASSERT_EQ(new_flags, expected_flags); 880 881 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 882 ASSERT_EQ(new_flags, expected_flags); 883 884 memset(&attr, 0, sizeof(attr)); 885 attr.attr_clr = MOUNT_ATTR_NODIRATIME; 886 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 887 888 expected_flags &= ~MS_NODIRATIME; 889 890 new_flags = read_mnt_flags("/mnt/A"); 891 ASSERT_EQ(new_flags, expected_flags); 892 893 new_flags = read_mnt_flags("/mnt/A/AA"); 894 ASSERT_EQ(new_flags, expected_flags); 895 896 new_flags = read_mnt_flags("/mnt/A/AA/B"); 897 ASSERT_EQ(new_flags, expected_flags); 898 899 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 900 ASSERT_EQ(new_flags, expected_flags); 901 } 902 903 TEST_F(mount_setattr, multi_threaded) 904 { 905 int i, j, nthreads, ret = 0; 906 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 907 pthread_attr_t pattr; 908 pthread_t threads[DEFAULT_THREADS]; 909 910 if (!mount_setattr_supported()) 911 SKIP(return, "mount_setattr syscall not supported"); 912 913 old_flags = read_mnt_flags("/mnt/A"); 914 ASSERT_GT(old_flags, 0); 915 916 /* Try to change mount options from multiple threads. */ 917 nthreads = get_nprocs_conf(); 918 if (nthreads > DEFAULT_THREADS) 919 nthreads = DEFAULT_THREADS; 920 921 pthread_attr_init(&pattr); 922 for (i = 0; i < nthreads; i++) 923 ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0); 924 925 for (j = 0; j < i; j++) { 926 void *retptr = NULL; 927 928 EXPECT_EQ(pthread_join(threads[j], &retptr), 0); 929 930 ret += ptr_to_int(retptr); 931 EXPECT_EQ(ret, 0); 932 } 933 pthread_attr_destroy(&pattr); 934 935 ASSERT_EQ(ret, 0); 936 937 expected_flags = old_flags; 938 expected_flags |= MS_RDONLY; 939 expected_flags |= MS_NOSUID; 940 new_flags = read_mnt_flags("/mnt/A"); 941 ASSERT_EQ(new_flags, expected_flags); 942 943 ASSERT_EQ(is_shared_mount("/mnt/A"), true); 944 945 new_flags = read_mnt_flags("/mnt/A/AA"); 946 ASSERT_EQ(new_flags, expected_flags); 947 948 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); 949 950 new_flags = read_mnt_flags("/mnt/A/AA/B"); 951 ASSERT_EQ(new_flags, expected_flags); 952 953 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); 954 955 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 956 ASSERT_EQ(new_flags, expected_flags); 957 958 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); 959 } 960 961 TEST_F(mount_setattr, wrong_user_namespace) 962 { 963 int ret; 964 struct mount_attr attr = { 965 .attr_set = MOUNT_ATTR_RDONLY, 966 }; 967 968 if (!mount_setattr_supported()) 969 SKIP(return, "mount_setattr syscall not supported"); 970 971 EXPECT_EQ(create_and_enter_userns(), 0); 972 ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)); 973 ASSERT_LT(ret, 0); 974 ASSERT_EQ(errno, EPERM); 975 } 976 977 TEST_F(mount_setattr, wrong_mount_namespace) 978 { 979 int fd, ret; 980 struct mount_attr attr = { 981 .attr_set = MOUNT_ATTR_RDONLY, 982 }; 983 984 if (!mount_setattr_supported()) 985 SKIP(return, "mount_setattr syscall not supported"); 986 987 fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC); 988 ASSERT_GE(fd, 0); 989 990 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 991 992 ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr)); 993 ASSERT_LT(ret, 0); 994 ASSERT_EQ(errno, EINVAL); 995 } 996 997 FIXTURE(mount_setattr_idmapped) { 998 }; 999 1000 FIXTURE_SETUP(mount_setattr_idmapped) 1001 { 1002 int img_fd = -EBADF; 1003 1004 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 1005 1006 ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0); 1007 1008 (void)umount2("/mnt", MNT_DETACH); 1009 (void)umount2("/tmp", MNT_DETACH); 1010 1011 ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, 1012 "size=100000,mode=700"), 0); 1013 1014 ASSERT_EQ(mkdir("/tmp/B", 0777), 0); 1015 ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0); 1016 ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0); 1017 1018 ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, 1019 "size=100000,mode=700"), 0); 1020 1021 ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); 1022 ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0); 1023 ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0); 1024 1025 ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, 1026 "size=100000,mode=700"), 0); 1027 1028 ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, 1029 "size=100000,mode=700"), 0); 1030 1031 ASSERT_EQ(mkdir("/mnt/A", 0777), 0); 1032 1033 ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, 1034 "size=100000,mode=700"), 0); 1035 1036 ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); 1037 1038 ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); 1039 1040 ASSERT_EQ(mkdir("/mnt/B", 0777), 0); 1041 1042 ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", 1043 MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); 1044 1045 ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); 1046 1047 ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", 1048 MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); 1049 1050 ASSERT_EQ(mkdir("/mnt/C", 0777), 0); 1051 ASSERT_EQ(mkdir("/mnt/D", 0777), 0); 1052 img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600); 1053 ASSERT_GE(img_fd, 0); 1054 ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0); 1055 ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0); 1056 ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0); 1057 ASSERT_EQ(close(img_fd), 0); 1058 } 1059 1060 FIXTURE_TEARDOWN(mount_setattr_idmapped) 1061 { 1062 (void)umount2("/mnt/A", MNT_DETACH); 1063 (void)umount2("/tmp", MNT_DETACH); 1064 } 1065 1066 /** 1067 * Validate that negative fd values are rejected. 1068 */ 1069 TEST_F(mount_setattr_idmapped, invalid_fd_negative) 1070 { 1071 struct mount_attr attr = { 1072 .attr_set = MOUNT_ATTR_IDMAP, 1073 .userns_fd = -EBADF, 1074 }; 1075 1076 if (!mount_setattr_supported()) 1077 SKIP(return, "mount_setattr syscall not supported"); 1078 1079 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { 1080 TH_LOG("failure: created idmapped mount with negative fd"); 1081 } 1082 } 1083 1084 /** 1085 * Validate that excessively large fd values are rejected. 1086 */ 1087 TEST_F(mount_setattr_idmapped, invalid_fd_large) 1088 { 1089 struct mount_attr attr = { 1090 .attr_set = MOUNT_ATTR_IDMAP, 1091 .userns_fd = INT64_MAX, 1092 }; 1093 1094 if (!mount_setattr_supported()) 1095 SKIP(return, "mount_setattr syscall not supported"); 1096 1097 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { 1098 TH_LOG("failure: created idmapped mount with too large fd value"); 1099 } 1100 } 1101 1102 /** 1103 * Validate that closed fd values are rejected. 1104 */ 1105 TEST_F(mount_setattr_idmapped, invalid_fd_closed) 1106 { 1107 int fd; 1108 struct mount_attr attr = { 1109 .attr_set = MOUNT_ATTR_IDMAP, 1110 }; 1111 1112 if (!mount_setattr_supported()) 1113 SKIP(return, "mount_setattr syscall not supported"); 1114 1115 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 1116 ASSERT_GE(fd, 0); 1117 ASSERT_GE(close(fd), 0); 1118 1119 attr.userns_fd = fd; 1120 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { 1121 TH_LOG("failure: created idmapped mount with closed fd"); 1122 } 1123 } 1124 1125 /** 1126 * Validate that the initial user namespace is rejected. 1127 */ 1128 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns) 1129 { 1130 int open_tree_fd = -EBADF; 1131 struct mount_attr attr = { 1132 .attr_set = MOUNT_ATTR_IDMAP, 1133 }; 1134 1135 if (!mount_setattr_supported()) 1136 SKIP(return, "mount_setattr syscall not supported"); 1137 1138 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1139 AT_NO_AUTOMOUNT | 1140 AT_SYMLINK_NOFOLLOW | 1141 OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); 1142 ASSERT_GE(open_tree_fd, 0); 1143 1144 attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC); 1145 ASSERT_GE(attr.userns_fd, 0); 1146 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1147 ASSERT_EQ(errno, EPERM); 1148 ASSERT_EQ(close(attr.userns_fd), 0); 1149 ASSERT_EQ(close(open_tree_fd), 0); 1150 } 1151 1152 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid, 1153 unsigned long range) 1154 { 1155 char map[100], procfile[256]; 1156 1157 snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid); 1158 snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); 1159 if (write_file(procfile, map, strlen(map))) 1160 return -1; 1161 1162 1163 snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid); 1164 snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); 1165 if (write_file(procfile, map, strlen(map))) 1166 return -1; 1167 1168 return 0; 1169 } 1170 1171 #define __STACK_SIZE (8 * 1024 * 1024) 1172 static pid_t do_clone(int (*fn)(void *), void *arg, int flags) 1173 { 1174 void *stack; 1175 1176 stack = malloc(__STACK_SIZE); 1177 if (!stack) 1178 return -ENOMEM; 1179 1180 #ifdef __ia64__ 1181 return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL); 1182 #else 1183 return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL); 1184 #endif 1185 } 1186 1187 static int get_userns_fd_cb(void *data) 1188 { 1189 return kill(getpid(), SIGSTOP); 1190 } 1191 1192 static int wait_for_pid(pid_t pid) 1193 { 1194 int status, ret; 1195 1196 again: 1197 ret = waitpid(pid, &status, 0); 1198 if (ret == -1) { 1199 if (errno == EINTR) 1200 goto again; 1201 1202 return -1; 1203 } 1204 1205 if (!WIFEXITED(status)) 1206 return -1; 1207 1208 return WEXITSTATUS(status); 1209 } 1210 1211 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range) 1212 { 1213 int ret; 1214 pid_t pid; 1215 char path[256]; 1216 1217 pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER); 1218 if (pid < 0) 1219 return -errno; 1220 1221 ret = map_ids(pid, nsid, hostid, range); 1222 if (ret < 0) 1223 return ret; 1224 1225 snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); 1226 ret = open(path, O_RDONLY | O_CLOEXEC); 1227 kill(pid, SIGKILL); 1228 wait_for_pid(pid); 1229 return ret; 1230 } 1231 1232 /** 1233 * Validate that an attached mount in our mount namespace cannot be idmapped. 1234 * (The kernel enforces that the mount's mount namespace and the caller's mount 1235 * namespace match.) 1236 */ 1237 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) 1238 { 1239 int open_tree_fd = -EBADF; 1240 struct mount_attr attr = { 1241 .attr_set = MOUNT_ATTR_IDMAP, 1242 }; 1243 1244 if (!mount_setattr_supported()) 1245 SKIP(return, "mount_setattr syscall not supported"); 1246 1247 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1248 AT_EMPTY_PATH | 1249 AT_NO_AUTOMOUNT | 1250 AT_SYMLINK_NOFOLLOW | 1251 OPEN_TREE_CLOEXEC); 1252 ASSERT_GE(open_tree_fd, 0); 1253 1254 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1255 ASSERT_GE(attr.userns_fd, 0); 1256 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1257 ASSERT_EQ(close(attr.userns_fd), 0); 1258 ASSERT_EQ(close(open_tree_fd), 0); 1259 } 1260 1261 /** 1262 * Validate that idmapping a mount is rejected if the mount's mount namespace 1263 * and our mount namespace don't match. 1264 * (The kernel enforces that the mount's mount namespace and the caller's mount 1265 * namespace match.) 1266 */ 1267 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) 1268 { 1269 int open_tree_fd = -EBADF; 1270 struct mount_attr attr = { 1271 .attr_set = MOUNT_ATTR_IDMAP, 1272 }; 1273 1274 if (!mount_setattr_supported()) 1275 SKIP(return, "mount_setattr syscall not supported"); 1276 1277 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1278 AT_EMPTY_PATH | 1279 AT_NO_AUTOMOUNT | 1280 AT_SYMLINK_NOFOLLOW | 1281 OPEN_TREE_CLOEXEC); 1282 ASSERT_GE(open_tree_fd, 0); 1283 1284 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 1285 1286 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1287 ASSERT_GE(attr.userns_fd, 0); 1288 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, 1289 sizeof(attr)), 0); 1290 ASSERT_EQ(close(attr.userns_fd), 0); 1291 ASSERT_EQ(close(open_tree_fd), 0); 1292 } 1293 1294 /** 1295 * Validate that an attached mount in our mount namespace can be idmapped. 1296 */ 1297 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace) 1298 { 1299 int open_tree_fd = -EBADF; 1300 struct mount_attr attr = { 1301 .attr_set = MOUNT_ATTR_IDMAP, 1302 }; 1303 1304 if (!mount_setattr_supported()) 1305 SKIP(return, "mount_setattr syscall not supported"); 1306 1307 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1308 AT_EMPTY_PATH | 1309 AT_NO_AUTOMOUNT | 1310 AT_SYMLINK_NOFOLLOW | 1311 OPEN_TREE_CLOEXEC | 1312 OPEN_TREE_CLONE); 1313 ASSERT_GE(open_tree_fd, 0); 1314 1315 /* Changing mount properties on a detached mount. */ 1316 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1317 ASSERT_GE(attr.userns_fd, 0); 1318 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", 1319 AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1320 ASSERT_EQ(close(attr.userns_fd), 0); 1321 ASSERT_EQ(close(open_tree_fd), 0); 1322 } 1323 1324 /** 1325 * Validate that a detached mount not in our mount namespace can be idmapped. 1326 */ 1327 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) 1328 { 1329 int open_tree_fd = -EBADF; 1330 struct mount_attr attr = { 1331 .attr_set = MOUNT_ATTR_IDMAP, 1332 }; 1333 1334 if (!mount_setattr_supported()) 1335 SKIP(return, "mount_setattr syscall not supported"); 1336 1337 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1338 AT_EMPTY_PATH | 1339 AT_NO_AUTOMOUNT | 1340 AT_SYMLINK_NOFOLLOW | 1341 OPEN_TREE_CLOEXEC | 1342 OPEN_TREE_CLONE); 1343 ASSERT_GE(open_tree_fd, 0); 1344 1345 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 1346 1347 /* Changing mount properties on a detached mount. */ 1348 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1349 ASSERT_GE(attr.userns_fd, 0); 1350 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", 1351 AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1352 ASSERT_EQ(close(attr.userns_fd), 0); 1353 ASSERT_EQ(close(open_tree_fd), 0); 1354 } 1355 1356 /** 1357 * Validate that currently changing the idmapping of an idmapped mount fails. 1358 */ 1359 TEST_F(mount_setattr_idmapped, change_idmapping) 1360 { 1361 int open_tree_fd = -EBADF; 1362 struct mount_attr attr = { 1363 .attr_set = MOUNT_ATTR_IDMAP, 1364 }; 1365 1366 if (!mount_setattr_supported()) 1367 SKIP(return, "mount_setattr syscall not supported"); 1368 1369 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1370 AT_EMPTY_PATH | 1371 AT_NO_AUTOMOUNT | 1372 AT_SYMLINK_NOFOLLOW | 1373 OPEN_TREE_CLOEXEC | 1374 OPEN_TREE_CLONE); 1375 ASSERT_GE(open_tree_fd, 0); 1376 1377 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1378 ASSERT_GE(attr.userns_fd, 0); 1379 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", 1380 AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1381 ASSERT_EQ(close(attr.userns_fd), 0); 1382 1383 /* Change idmapping on a detached mount that is already idmapped. */ 1384 attr.userns_fd = get_userns_fd(0, 20000, 10000); 1385 ASSERT_GE(attr.userns_fd, 0); 1386 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1387 ASSERT_EQ(close(attr.userns_fd), 0); 1388 ASSERT_EQ(close(open_tree_fd), 0); 1389 } 1390 1391 static bool expected_uid_gid(int dfd, const char *path, int flags, 1392 uid_t expected_uid, gid_t expected_gid) 1393 { 1394 int ret; 1395 struct stat st; 1396 1397 ret = fstatat(dfd, path, &st, flags); 1398 if (ret < 0) 1399 return false; 1400 1401 return st.st_uid == expected_uid && st.st_gid == expected_gid; 1402 } 1403 1404 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) 1405 { 1406 int open_tree_fd = -EBADF; 1407 struct mount_attr attr = { 1408 .attr_set = MOUNT_ATTR_IDMAP, 1409 }; 1410 1411 if (!mount_setattr_supported()) 1412 SKIP(return, "mount_setattr syscall not supported"); 1413 1414 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); 1415 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); 1416 1417 open_tree_fd = sys_open_tree(-EBADF, "/mnt/A", 1418 AT_RECURSIVE | 1419 AT_EMPTY_PATH | 1420 AT_NO_AUTOMOUNT | 1421 AT_SYMLINK_NOFOLLOW | 1422 OPEN_TREE_CLOEXEC | 1423 OPEN_TREE_CLONE); 1424 ASSERT_GE(open_tree_fd, 0); 1425 1426 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1427 ASSERT_GE(attr.userns_fd, 0); 1428 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1429 ASSERT_EQ(close(attr.userns_fd), 0); 1430 ASSERT_EQ(close(open_tree_fd), 0); 1431 1432 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); 1433 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); 1434 ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0); 1435 ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0); 1436 } 1437 1438 TEST_F(mount_setattr, mount_attr_nosymfollow) 1439 { 1440 int fd; 1441 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 1442 struct mount_attr attr = { 1443 .attr_set = MOUNT_ATTR_NOSYMFOLLOW, 1444 }; 1445 1446 if (!mount_setattr_supported()) 1447 SKIP(return, "mount_setattr syscall not supported"); 1448 1449 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC); 1450 ASSERT_GT(fd, 0); 1451 ASSERT_EQ(close(fd), 0); 1452 1453 old_flags = read_mnt_flags("/mnt/A"); 1454 ASSERT_GT(old_flags, 0); 1455 1456 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 1457 1458 expected_flags = old_flags; 1459 expected_flags |= ST_NOSYMFOLLOW; 1460 1461 new_flags = read_mnt_flags("/mnt/A"); 1462 ASSERT_EQ(new_flags, expected_flags); 1463 1464 new_flags = read_mnt_flags("/mnt/A/AA"); 1465 ASSERT_EQ(new_flags, expected_flags); 1466 1467 new_flags = read_mnt_flags("/mnt/A/AA/B"); 1468 ASSERT_EQ(new_flags, expected_flags); 1469 1470 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 1471 ASSERT_EQ(new_flags, expected_flags); 1472 1473 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC); 1474 ASSERT_LT(fd, 0); 1475 ASSERT_EQ(errno, ELOOP); 1476 1477 attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW; 1478 attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW; 1479 1480 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 1481 1482 expected_flags &= ~ST_NOSYMFOLLOW; 1483 new_flags = read_mnt_flags("/mnt/A"); 1484 ASSERT_EQ(new_flags, expected_flags); 1485 1486 new_flags = read_mnt_flags("/mnt/A/AA"); 1487 ASSERT_EQ(new_flags, expected_flags); 1488 1489 new_flags = read_mnt_flags("/mnt/A/AA/B"); 1490 ASSERT_EQ(new_flags, expected_flags); 1491 1492 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 1493 ASSERT_EQ(new_flags, expected_flags); 1494 1495 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC); 1496 ASSERT_GT(fd, 0); 1497 ASSERT_EQ(close(fd), 0); 1498 } 1499 1500 TEST_HARNESS_MAIN 1501