1 /* 2 * This program is free software; you can redistribute it and/or 3 * modify it under the terms of the GNU General Public License as 4 * published by the Free Software Foundation, version 2 of the 5 * License. 6 */ 7 8 #include <linux/export.h> 9 #include <linux/nsproxy.h> 10 #include <linux/slab.h> 11 #include <linux/sched/signal.h> 12 #include <linux/user_namespace.h> 13 #include <linux/proc_ns.h> 14 #include <linux/highuid.h> 15 #include <linux/cred.h> 16 #include <linux/securebits.h> 17 #include <linux/keyctl.h> 18 #include <linux/key-type.h> 19 #include <keys/user-type.h> 20 #include <linux/seq_file.h> 21 #include <linux/fs.h> 22 #include <linux/uaccess.h> 23 #include <linux/ctype.h> 24 #include <linux/projid.h> 25 #include <linux/fs_struct.h> 26 27 static struct kmem_cache *user_ns_cachep __read_mostly; 28 static DEFINE_MUTEX(userns_state_mutex); 29 30 static bool new_idmap_permitted(const struct file *file, 31 struct user_namespace *ns, int cap_setid, 32 struct uid_gid_map *map); 33 static void free_user_ns(struct work_struct *work); 34 35 static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid) 36 { 37 return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES); 38 } 39 40 static void dec_user_namespaces(struct ucounts *ucounts) 41 { 42 return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES); 43 } 44 45 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) 46 { 47 /* Start with the same capabilities as init but useless for doing 48 * anything as the capabilities are bound to the new user namespace. 49 */ 50 cred->securebits = SECUREBITS_DEFAULT; 51 cred->cap_inheritable = CAP_EMPTY_SET; 52 cred->cap_permitted = CAP_FULL_SET; 53 cred->cap_effective = CAP_FULL_SET; 54 cred->cap_ambient = CAP_EMPTY_SET; 55 cred->cap_bset = CAP_FULL_SET; 56 #ifdef CONFIG_KEYS 57 key_put(cred->request_key_auth); 58 cred->request_key_auth = NULL; 59 #endif 60 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ 61 cred->user_ns = user_ns; 62 } 63 64 /* 65 * Create a new user namespace, deriving the creator from the user in the 66 * passed credentials, and replacing that user with the new root user for the 67 * new namespace. 68 * 69 * This is called by copy_creds(), which will finish setting the target task's 70 * credentials. 71 */ 72 int create_user_ns(struct cred *new) 73 { 74 struct user_namespace *ns, *parent_ns = new->user_ns; 75 kuid_t owner = new->euid; 76 kgid_t group = new->egid; 77 struct ucounts *ucounts; 78 int ret, i; 79 80 ret = -ENOSPC; 81 if (parent_ns->level > 32) 82 goto fail; 83 84 ucounts = inc_user_namespaces(parent_ns, owner); 85 if (!ucounts) 86 goto fail; 87 88 /* 89 * Verify that we can not violate the policy of which files 90 * may be accessed that is specified by the root directory, 91 * by verifing that the root directory is at the root of the 92 * mount namespace which allows all files to be accessed. 93 */ 94 ret = -EPERM; 95 if (current_chrooted()) 96 goto fail_dec; 97 98 /* The creator needs a mapping in the parent user namespace 99 * or else we won't be able to reasonably tell userspace who 100 * created a user_namespace. 101 */ 102 ret = -EPERM; 103 if (!kuid_has_mapping(parent_ns, owner) || 104 !kgid_has_mapping(parent_ns, group)) 105 goto fail_dec; 106 107 ret = -ENOMEM; 108 ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); 109 if (!ns) 110 goto fail_dec; 111 112 ret = ns_alloc_inum(&ns->ns); 113 if (ret) 114 goto fail_free; 115 ns->ns.ops = &userns_operations; 116 117 atomic_set(&ns->count, 1); 118 /* Leave the new->user_ns reference with the new user namespace. */ 119 ns->parent = parent_ns; 120 ns->level = parent_ns->level + 1; 121 ns->owner = owner; 122 ns->group = group; 123 INIT_WORK(&ns->work, free_user_ns); 124 for (i = 0; i < UCOUNT_COUNTS; i++) { 125 ns->ucount_max[i] = INT_MAX; 126 } 127 ns->ucounts = ucounts; 128 129 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ 130 mutex_lock(&userns_state_mutex); 131 ns->flags = parent_ns->flags; 132 mutex_unlock(&userns_state_mutex); 133 134 #ifdef CONFIG_PERSISTENT_KEYRINGS 135 init_rwsem(&ns->persistent_keyring_register_sem); 136 #endif 137 ret = -ENOMEM; 138 if (!setup_userns_sysctls(ns)) 139 goto fail_keyring; 140 141 set_cred_user_ns(new, ns); 142 return 0; 143 fail_keyring: 144 #ifdef CONFIG_PERSISTENT_KEYRINGS 145 key_put(ns->persistent_keyring_register); 146 #endif 147 ns_free_inum(&ns->ns); 148 fail_free: 149 kmem_cache_free(user_ns_cachep, ns); 150 fail_dec: 151 dec_user_namespaces(ucounts); 152 fail: 153 return ret; 154 } 155 156 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) 157 { 158 struct cred *cred; 159 int err = -ENOMEM; 160 161 if (!(unshare_flags & CLONE_NEWUSER)) 162 return 0; 163 164 cred = prepare_creds(); 165 if (cred) { 166 err = create_user_ns(cred); 167 if (err) 168 put_cred(cred); 169 else 170 *new_cred = cred; 171 } 172 173 return err; 174 } 175 176 static void free_user_ns(struct work_struct *work) 177 { 178 struct user_namespace *parent, *ns = 179 container_of(work, struct user_namespace, work); 180 181 do { 182 struct ucounts *ucounts = ns->ucounts; 183 parent = ns->parent; 184 retire_userns_sysctls(ns); 185 #ifdef CONFIG_PERSISTENT_KEYRINGS 186 key_put(ns->persistent_keyring_register); 187 #endif 188 ns_free_inum(&ns->ns); 189 kmem_cache_free(user_ns_cachep, ns); 190 dec_user_namespaces(ucounts); 191 ns = parent; 192 } while (atomic_dec_and_test(&parent->count)); 193 } 194 195 void __put_user_ns(struct user_namespace *ns) 196 { 197 schedule_work(&ns->work); 198 } 199 EXPORT_SYMBOL(__put_user_ns); 200 201 static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) 202 { 203 unsigned idx, extents; 204 u32 first, last, id2; 205 206 id2 = id + count - 1; 207 208 /* Find the matching extent */ 209 extents = map->nr_extents; 210 smp_rmb(); 211 for (idx = 0; idx < extents; idx++) { 212 first = map->extent[idx].first; 213 last = first + map->extent[idx].count - 1; 214 if (id >= first && id <= last && 215 (id2 >= first && id2 <= last)) 216 break; 217 } 218 /* Map the id or note failure */ 219 if (idx < extents) 220 id = (id - first) + map->extent[idx].lower_first; 221 else 222 id = (u32) -1; 223 224 return id; 225 } 226 227 static u32 map_id_down(struct uid_gid_map *map, u32 id) 228 { 229 unsigned idx, extents; 230 u32 first, last; 231 232 /* Find the matching extent */ 233 extents = map->nr_extents; 234 smp_rmb(); 235 for (idx = 0; idx < extents; idx++) { 236 first = map->extent[idx].first; 237 last = first + map->extent[idx].count - 1; 238 if (id >= first && id <= last) 239 break; 240 } 241 /* Map the id or note failure */ 242 if (idx < extents) 243 id = (id - first) + map->extent[idx].lower_first; 244 else 245 id = (u32) -1; 246 247 return id; 248 } 249 250 static u32 map_id_up(struct uid_gid_map *map, u32 id) 251 { 252 unsigned idx, extents; 253 u32 first, last; 254 255 /* Find the matching extent */ 256 extents = map->nr_extents; 257 smp_rmb(); 258 for (idx = 0; idx < extents; idx++) { 259 first = map->extent[idx].lower_first; 260 last = first + map->extent[idx].count - 1; 261 if (id >= first && id <= last) 262 break; 263 } 264 /* Map the id or note failure */ 265 if (idx < extents) 266 id = (id - first) + map->extent[idx].first; 267 else 268 id = (u32) -1; 269 270 return id; 271 } 272 273 /** 274 * make_kuid - Map a user-namespace uid pair into a kuid. 275 * @ns: User namespace that the uid is in 276 * @uid: User identifier 277 * 278 * Maps a user-namespace uid pair into a kernel internal kuid, 279 * and returns that kuid. 280 * 281 * When there is no mapping defined for the user-namespace uid 282 * pair INVALID_UID is returned. Callers are expected to test 283 * for and handle INVALID_UID being returned. INVALID_UID 284 * may be tested for using uid_valid(). 285 */ 286 kuid_t make_kuid(struct user_namespace *ns, uid_t uid) 287 { 288 /* Map the uid to a global kernel uid */ 289 return KUIDT_INIT(map_id_down(&ns->uid_map, uid)); 290 } 291 EXPORT_SYMBOL(make_kuid); 292 293 /** 294 * from_kuid - Create a uid from a kuid user-namespace pair. 295 * @targ: The user namespace we want a uid in. 296 * @kuid: The kernel internal uid to start with. 297 * 298 * Map @kuid into the user-namespace specified by @targ and 299 * return the resulting uid. 300 * 301 * There is always a mapping into the initial user_namespace. 302 * 303 * If @kuid has no mapping in @targ (uid_t)-1 is returned. 304 */ 305 uid_t from_kuid(struct user_namespace *targ, kuid_t kuid) 306 { 307 /* Map the uid from a global kernel uid */ 308 return map_id_up(&targ->uid_map, __kuid_val(kuid)); 309 } 310 EXPORT_SYMBOL(from_kuid); 311 312 /** 313 * from_kuid_munged - Create a uid from a kuid user-namespace pair. 314 * @targ: The user namespace we want a uid in. 315 * @kuid: The kernel internal uid to start with. 316 * 317 * Map @kuid into the user-namespace specified by @targ and 318 * return the resulting uid. 319 * 320 * There is always a mapping into the initial user_namespace. 321 * 322 * Unlike from_kuid from_kuid_munged never fails and always 323 * returns a valid uid. This makes from_kuid_munged appropriate 324 * for use in syscalls like stat and getuid where failing the 325 * system call and failing to provide a valid uid are not an 326 * options. 327 * 328 * If @kuid has no mapping in @targ overflowuid is returned. 329 */ 330 uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid) 331 { 332 uid_t uid; 333 uid = from_kuid(targ, kuid); 334 335 if (uid == (uid_t) -1) 336 uid = overflowuid; 337 return uid; 338 } 339 EXPORT_SYMBOL(from_kuid_munged); 340 341 /** 342 * make_kgid - Map a user-namespace gid pair into a kgid. 343 * @ns: User namespace that the gid is in 344 * @gid: group identifier 345 * 346 * Maps a user-namespace gid pair into a kernel internal kgid, 347 * and returns that kgid. 348 * 349 * When there is no mapping defined for the user-namespace gid 350 * pair INVALID_GID is returned. Callers are expected to test 351 * for and handle INVALID_GID being returned. INVALID_GID may be 352 * tested for using gid_valid(). 353 */ 354 kgid_t make_kgid(struct user_namespace *ns, gid_t gid) 355 { 356 /* Map the gid to a global kernel gid */ 357 return KGIDT_INIT(map_id_down(&ns->gid_map, gid)); 358 } 359 EXPORT_SYMBOL(make_kgid); 360 361 /** 362 * from_kgid - Create a gid from a kgid user-namespace pair. 363 * @targ: The user namespace we want a gid in. 364 * @kgid: The kernel internal gid to start with. 365 * 366 * Map @kgid into the user-namespace specified by @targ and 367 * return the resulting gid. 368 * 369 * There is always a mapping into the initial user_namespace. 370 * 371 * If @kgid has no mapping in @targ (gid_t)-1 is returned. 372 */ 373 gid_t from_kgid(struct user_namespace *targ, kgid_t kgid) 374 { 375 /* Map the gid from a global kernel gid */ 376 return map_id_up(&targ->gid_map, __kgid_val(kgid)); 377 } 378 EXPORT_SYMBOL(from_kgid); 379 380 /** 381 * from_kgid_munged - Create a gid from a kgid user-namespace pair. 382 * @targ: The user namespace we want a gid in. 383 * @kgid: The kernel internal gid to start with. 384 * 385 * Map @kgid into the user-namespace specified by @targ and 386 * return the resulting gid. 387 * 388 * There is always a mapping into the initial user_namespace. 389 * 390 * Unlike from_kgid from_kgid_munged never fails and always 391 * returns a valid gid. This makes from_kgid_munged appropriate 392 * for use in syscalls like stat and getgid where failing the 393 * system call and failing to provide a valid gid are not options. 394 * 395 * If @kgid has no mapping in @targ overflowgid is returned. 396 */ 397 gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) 398 { 399 gid_t gid; 400 gid = from_kgid(targ, kgid); 401 402 if (gid == (gid_t) -1) 403 gid = overflowgid; 404 return gid; 405 } 406 EXPORT_SYMBOL(from_kgid_munged); 407 408 /** 409 * make_kprojid - Map a user-namespace projid pair into a kprojid. 410 * @ns: User namespace that the projid is in 411 * @projid: Project identifier 412 * 413 * Maps a user-namespace uid pair into a kernel internal kuid, 414 * and returns that kuid. 415 * 416 * When there is no mapping defined for the user-namespace projid 417 * pair INVALID_PROJID is returned. Callers are expected to test 418 * for and handle handle INVALID_PROJID being returned. INVALID_PROJID 419 * may be tested for using projid_valid(). 420 */ 421 kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) 422 { 423 /* Map the uid to a global kernel uid */ 424 return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); 425 } 426 EXPORT_SYMBOL(make_kprojid); 427 428 /** 429 * from_kprojid - Create a projid from a kprojid user-namespace pair. 430 * @targ: The user namespace we want a projid in. 431 * @kprojid: The kernel internal project identifier to start with. 432 * 433 * Map @kprojid into the user-namespace specified by @targ and 434 * return the resulting projid. 435 * 436 * There is always a mapping into the initial user_namespace. 437 * 438 * If @kprojid has no mapping in @targ (projid_t)-1 is returned. 439 */ 440 projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) 441 { 442 /* Map the uid from a global kernel uid */ 443 return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); 444 } 445 EXPORT_SYMBOL(from_kprojid); 446 447 /** 448 * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair. 449 * @targ: The user namespace we want a projid in. 450 * @kprojid: The kernel internal projid to start with. 451 * 452 * Map @kprojid into the user-namespace specified by @targ and 453 * return the resulting projid. 454 * 455 * There is always a mapping into the initial user_namespace. 456 * 457 * Unlike from_kprojid from_kprojid_munged never fails and always 458 * returns a valid projid. This makes from_kprojid_munged 459 * appropriate for use in syscalls like stat and where 460 * failing the system call and failing to provide a valid projid are 461 * not an options. 462 * 463 * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned. 464 */ 465 projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid) 466 { 467 projid_t projid; 468 projid = from_kprojid(targ, kprojid); 469 470 if (projid == (projid_t) -1) 471 projid = OVERFLOW_PROJID; 472 return projid; 473 } 474 EXPORT_SYMBOL(from_kprojid_munged); 475 476 477 static int uid_m_show(struct seq_file *seq, void *v) 478 { 479 struct user_namespace *ns = seq->private; 480 struct uid_gid_extent *extent = v; 481 struct user_namespace *lower_ns; 482 uid_t lower; 483 484 lower_ns = seq_user_ns(seq); 485 if ((lower_ns == ns) && lower_ns->parent) 486 lower_ns = lower_ns->parent; 487 488 lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first)); 489 490 seq_printf(seq, "%10u %10u %10u\n", 491 extent->first, 492 lower, 493 extent->count); 494 495 return 0; 496 } 497 498 static int gid_m_show(struct seq_file *seq, void *v) 499 { 500 struct user_namespace *ns = seq->private; 501 struct uid_gid_extent *extent = v; 502 struct user_namespace *lower_ns; 503 gid_t lower; 504 505 lower_ns = seq_user_ns(seq); 506 if ((lower_ns == ns) && lower_ns->parent) 507 lower_ns = lower_ns->parent; 508 509 lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first)); 510 511 seq_printf(seq, "%10u %10u %10u\n", 512 extent->first, 513 lower, 514 extent->count); 515 516 return 0; 517 } 518 519 static int projid_m_show(struct seq_file *seq, void *v) 520 { 521 struct user_namespace *ns = seq->private; 522 struct uid_gid_extent *extent = v; 523 struct user_namespace *lower_ns; 524 projid_t lower; 525 526 lower_ns = seq_user_ns(seq); 527 if ((lower_ns == ns) && lower_ns->parent) 528 lower_ns = lower_ns->parent; 529 530 lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first)); 531 532 seq_printf(seq, "%10u %10u %10u\n", 533 extent->first, 534 lower, 535 extent->count); 536 537 return 0; 538 } 539 540 static void *m_start(struct seq_file *seq, loff_t *ppos, 541 struct uid_gid_map *map) 542 { 543 struct uid_gid_extent *extent = NULL; 544 loff_t pos = *ppos; 545 546 if (pos < map->nr_extents) 547 extent = &map->extent[pos]; 548 549 return extent; 550 } 551 552 static void *uid_m_start(struct seq_file *seq, loff_t *ppos) 553 { 554 struct user_namespace *ns = seq->private; 555 556 return m_start(seq, ppos, &ns->uid_map); 557 } 558 559 static void *gid_m_start(struct seq_file *seq, loff_t *ppos) 560 { 561 struct user_namespace *ns = seq->private; 562 563 return m_start(seq, ppos, &ns->gid_map); 564 } 565 566 static void *projid_m_start(struct seq_file *seq, loff_t *ppos) 567 { 568 struct user_namespace *ns = seq->private; 569 570 return m_start(seq, ppos, &ns->projid_map); 571 } 572 573 static void *m_next(struct seq_file *seq, void *v, loff_t *pos) 574 { 575 (*pos)++; 576 return seq->op->start(seq, pos); 577 } 578 579 static void m_stop(struct seq_file *seq, void *v) 580 { 581 return; 582 } 583 584 const struct seq_operations proc_uid_seq_operations = { 585 .start = uid_m_start, 586 .stop = m_stop, 587 .next = m_next, 588 .show = uid_m_show, 589 }; 590 591 const struct seq_operations proc_gid_seq_operations = { 592 .start = gid_m_start, 593 .stop = m_stop, 594 .next = m_next, 595 .show = gid_m_show, 596 }; 597 598 const struct seq_operations proc_projid_seq_operations = { 599 .start = projid_m_start, 600 .stop = m_stop, 601 .next = m_next, 602 .show = projid_m_show, 603 }; 604 605 static bool mappings_overlap(struct uid_gid_map *new_map, 606 struct uid_gid_extent *extent) 607 { 608 u32 upper_first, lower_first, upper_last, lower_last; 609 unsigned idx; 610 611 upper_first = extent->first; 612 lower_first = extent->lower_first; 613 upper_last = upper_first + extent->count - 1; 614 lower_last = lower_first + extent->count - 1; 615 616 for (idx = 0; idx < new_map->nr_extents; idx++) { 617 u32 prev_upper_first, prev_lower_first; 618 u32 prev_upper_last, prev_lower_last; 619 struct uid_gid_extent *prev; 620 621 prev = &new_map->extent[idx]; 622 623 prev_upper_first = prev->first; 624 prev_lower_first = prev->lower_first; 625 prev_upper_last = prev_upper_first + prev->count - 1; 626 prev_lower_last = prev_lower_first + prev->count - 1; 627 628 /* Does the upper range intersect a previous extent? */ 629 if ((prev_upper_first <= upper_last) && 630 (prev_upper_last >= upper_first)) 631 return true; 632 633 /* Does the lower range intersect a previous extent? */ 634 if ((prev_lower_first <= lower_last) && 635 (prev_lower_last >= lower_first)) 636 return true; 637 } 638 return false; 639 } 640 641 static ssize_t map_write(struct file *file, const char __user *buf, 642 size_t count, loff_t *ppos, 643 int cap_setid, 644 struct uid_gid_map *map, 645 struct uid_gid_map *parent_map) 646 { 647 struct seq_file *seq = file->private_data; 648 struct user_namespace *ns = seq->private; 649 struct uid_gid_map new_map; 650 unsigned idx; 651 struct uid_gid_extent *extent = NULL; 652 char *kbuf = NULL, *pos, *next_line; 653 ssize_t ret = -EINVAL; 654 655 /* 656 * The userns_state_mutex serializes all writes to any given map. 657 * 658 * Any map is only ever written once. 659 * 660 * An id map fits within 1 cache line on most architectures. 661 * 662 * On read nothing needs to be done unless you are on an 663 * architecture with a crazy cache coherency model like alpha. 664 * 665 * There is a one time data dependency between reading the 666 * count of the extents and the values of the extents. The 667 * desired behavior is to see the values of the extents that 668 * were written before the count of the extents. 669 * 670 * To achieve this smp_wmb() is used on guarantee the write 671 * order and smp_rmb() is guaranteed that we don't have crazy 672 * architectures returning stale data. 673 */ 674 mutex_lock(&userns_state_mutex); 675 676 ret = -EPERM; 677 /* Only allow one successful write to the map */ 678 if (map->nr_extents != 0) 679 goto out; 680 681 /* 682 * Adjusting namespace settings requires capabilities on the target. 683 */ 684 if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) 685 goto out; 686 687 /* Only allow < page size writes at the beginning of the file */ 688 ret = -EINVAL; 689 if ((*ppos != 0) || (count >= PAGE_SIZE)) 690 goto out; 691 692 /* Slurp in the user data */ 693 kbuf = memdup_user_nul(buf, count); 694 if (IS_ERR(kbuf)) { 695 ret = PTR_ERR(kbuf); 696 kbuf = NULL; 697 goto out; 698 } 699 700 /* Parse the user data */ 701 ret = -EINVAL; 702 pos = kbuf; 703 new_map.nr_extents = 0; 704 for (; pos; pos = next_line) { 705 extent = &new_map.extent[new_map.nr_extents]; 706 707 /* Find the end of line and ensure I don't look past it */ 708 next_line = strchr(pos, '\n'); 709 if (next_line) { 710 *next_line = '\0'; 711 next_line++; 712 if (*next_line == '\0') 713 next_line = NULL; 714 } 715 716 pos = skip_spaces(pos); 717 extent->first = simple_strtoul(pos, &pos, 10); 718 if (!isspace(*pos)) 719 goto out; 720 721 pos = skip_spaces(pos); 722 extent->lower_first = simple_strtoul(pos, &pos, 10); 723 if (!isspace(*pos)) 724 goto out; 725 726 pos = skip_spaces(pos); 727 extent->count = simple_strtoul(pos, &pos, 10); 728 if (*pos && !isspace(*pos)) 729 goto out; 730 731 /* Verify there is not trailing junk on the line */ 732 pos = skip_spaces(pos); 733 if (*pos != '\0') 734 goto out; 735 736 /* Verify we have been given valid starting values */ 737 if ((extent->first == (u32) -1) || 738 (extent->lower_first == (u32) -1)) 739 goto out; 740 741 /* Verify count is not zero and does not cause the 742 * extent to wrap 743 */ 744 if ((extent->first + extent->count) <= extent->first) 745 goto out; 746 if ((extent->lower_first + extent->count) <= 747 extent->lower_first) 748 goto out; 749 750 /* Do the ranges in extent overlap any previous extents? */ 751 if (mappings_overlap(&new_map, extent)) 752 goto out; 753 754 new_map.nr_extents++; 755 756 /* Fail if the file contains too many extents */ 757 if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && 758 (next_line != NULL)) 759 goto out; 760 } 761 /* Be very certaint the new map actually exists */ 762 if (new_map.nr_extents == 0) 763 goto out; 764 765 ret = -EPERM; 766 /* Validate the user is allowed to use user id's mapped to. */ 767 if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) 768 goto out; 769 770 /* Map the lower ids from the parent user namespace to the 771 * kernel global id space. 772 */ 773 for (idx = 0; idx < new_map.nr_extents; idx++) { 774 u32 lower_first; 775 extent = &new_map.extent[idx]; 776 777 lower_first = map_id_range_down(parent_map, 778 extent->lower_first, 779 extent->count); 780 781 /* Fail if we can not map the specified extent to 782 * the kernel global id space. 783 */ 784 if (lower_first == (u32) -1) 785 goto out; 786 787 extent->lower_first = lower_first; 788 } 789 790 /* Install the map */ 791 memcpy(map->extent, new_map.extent, 792 new_map.nr_extents*sizeof(new_map.extent[0])); 793 smp_wmb(); 794 map->nr_extents = new_map.nr_extents; 795 796 *ppos = count; 797 ret = count; 798 out: 799 mutex_unlock(&userns_state_mutex); 800 kfree(kbuf); 801 return ret; 802 } 803 804 ssize_t proc_uid_map_write(struct file *file, const char __user *buf, 805 size_t size, loff_t *ppos) 806 { 807 struct seq_file *seq = file->private_data; 808 struct user_namespace *ns = seq->private; 809 struct user_namespace *seq_ns = seq_user_ns(seq); 810 811 if (!ns->parent) 812 return -EPERM; 813 814 if ((seq_ns != ns) && (seq_ns != ns->parent)) 815 return -EPERM; 816 817 return map_write(file, buf, size, ppos, CAP_SETUID, 818 &ns->uid_map, &ns->parent->uid_map); 819 } 820 821 ssize_t proc_gid_map_write(struct file *file, const char __user *buf, 822 size_t size, loff_t *ppos) 823 { 824 struct seq_file *seq = file->private_data; 825 struct user_namespace *ns = seq->private; 826 struct user_namespace *seq_ns = seq_user_ns(seq); 827 828 if (!ns->parent) 829 return -EPERM; 830 831 if ((seq_ns != ns) && (seq_ns != ns->parent)) 832 return -EPERM; 833 834 return map_write(file, buf, size, ppos, CAP_SETGID, 835 &ns->gid_map, &ns->parent->gid_map); 836 } 837 838 ssize_t proc_projid_map_write(struct file *file, const char __user *buf, 839 size_t size, loff_t *ppos) 840 { 841 struct seq_file *seq = file->private_data; 842 struct user_namespace *ns = seq->private; 843 struct user_namespace *seq_ns = seq_user_ns(seq); 844 845 if (!ns->parent) 846 return -EPERM; 847 848 if ((seq_ns != ns) && (seq_ns != ns->parent)) 849 return -EPERM; 850 851 /* Anyone can set any valid project id no capability needed */ 852 return map_write(file, buf, size, ppos, -1, 853 &ns->projid_map, &ns->parent->projid_map); 854 } 855 856 static bool new_idmap_permitted(const struct file *file, 857 struct user_namespace *ns, int cap_setid, 858 struct uid_gid_map *new_map) 859 { 860 const struct cred *cred = file->f_cred; 861 /* Don't allow mappings that would allow anything that wouldn't 862 * be allowed without the establishment of unprivileged mappings. 863 */ 864 if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && 865 uid_eq(ns->owner, cred->euid)) { 866 u32 id = new_map->extent[0].lower_first; 867 if (cap_setid == CAP_SETUID) { 868 kuid_t uid = make_kuid(ns->parent, id); 869 if (uid_eq(uid, cred->euid)) 870 return true; 871 } else if (cap_setid == CAP_SETGID) { 872 kgid_t gid = make_kgid(ns->parent, id); 873 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && 874 gid_eq(gid, cred->egid)) 875 return true; 876 } 877 } 878 879 /* Allow anyone to set a mapping that doesn't require privilege */ 880 if (!cap_valid(cap_setid)) 881 return true; 882 883 /* Allow the specified ids if we have the appropriate capability 884 * (CAP_SETUID or CAP_SETGID) over the parent user namespace. 885 * And the opener of the id file also had the approprpiate capability. 886 */ 887 if (ns_capable(ns->parent, cap_setid) && 888 file_ns_capable(file, ns->parent, cap_setid)) 889 return true; 890 891 return false; 892 } 893 894 int proc_setgroups_show(struct seq_file *seq, void *v) 895 { 896 struct user_namespace *ns = seq->private; 897 unsigned long userns_flags = READ_ONCE(ns->flags); 898 899 seq_printf(seq, "%s\n", 900 (userns_flags & USERNS_SETGROUPS_ALLOWED) ? 901 "allow" : "deny"); 902 return 0; 903 } 904 905 ssize_t proc_setgroups_write(struct file *file, const char __user *buf, 906 size_t count, loff_t *ppos) 907 { 908 struct seq_file *seq = file->private_data; 909 struct user_namespace *ns = seq->private; 910 char kbuf[8], *pos; 911 bool setgroups_allowed; 912 ssize_t ret; 913 914 /* Only allow a very narrow range of strings to be written */ 915 ret = -EINVAL; 916 if ((*ppos != 0) || (count >= sizeof(kbuf))) 917 goto out; 918 919 /* What was written? */ 920 ret = -EFAULT; 921 if (copy_from_user(kbuf, buf, count)) 922 goto out; 923 kbuf[count] = '\0'; 924 pos = kbuf; 925 926 /* What is being requested? */ 927 ret = -EINVAL; 928 if (strncmp(pos, "allow", 5) == 0) { 929 pos += 5; 930 setgroups_allowed = true; 931 } 932 else if (strncmp(pos, "deny", 4) == 0) { 933 pos += 4; 934 setgroups_allowed = false; 935 } 936 else 937 goto out; 938 939 /* Verify there is not trailing junk on the line */ 940 pos = skip_spaces(pos); 941 if (*pos != '\0') 942 goto out; 943 944 ret = -EPERM; 945 mutex_lock(&userns_state_mutex); 946 if (setgroups_allowed) { 947 /* Enabling setgroups after setgroups has been disabled 948 * is not allowed. 949 */ 950 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) 951 goto out_unlock; 952 } else { 953 /* Permanently disabling setgroups after setgroups has 954 * been enabled by writing the gid_map is not allowed. 955 */ 956 if (ns->gid_map.nr_extents != 0) 957 goto out_unlock; 958 ns->flags &= ~USERNS_SETGROUPS_ALLOWED; 959 } 960 mutex_unlock(&userns_state_mutex); 961 962 /* Report a successful write */ 963 *ppos = count; 964 ret = count; 965 out: 966 return ret; 967 out_unlock: 968 mutex_unlock(&userns_state_mutex); 969 goto out; 970 } 971 972 bool userns_may_setgroups(const struct user_namespace *ns) 973 { 974 bool allowed; 975 976 mutex_lock(&userns_state_mutex); 977 /* It is not safe to use setgroups until a gid mapping in 978 * the user namespace has been established. 979 */ 980 allowed = ns->gid_map.nr_extents != 0; 981 /* Is setgroups allowed? */ 982 allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); 983 mutex_unlock(&userns_state_mutex); 984 985 return allowed; 986 } 987 988 /* 989 * Returns true if @child is the same namespace or a descendant of 990 * @ancestor. 991 */ 992 bool in_userns(const struct user_namespace *ancestor, 993 const struct user_namespace *child) 994 { 995 const struct user_namespace *ns; 996 for (ns = child; ns->level > ancestor->level; ns = ns->parent) 997 ; 998 return (ns == ancestor); 999 } 1000 1001 bool current_in_userns(const struct user_namespace *target_ns) 1002 { 1003 return in_userns(target_ns, current_user_ns()); 1004 } 1005 1006 static inline struct user_namespace *to_user_ns(struct ns_common *ns) 1007 { 1008 return container_of(ns, struct user_namespace, ns); 1009 } 1010 1011 static struct ns_common *userns_get(struct task_struct *task) 1012 { 1013 struct user_namespace *user_ns; 1014 1015 rcu_read_lock(); 1016 user_ns = get_user_ns(__task_cred(task)->user_ns); 1017 rcu_read_unlock(); 1018 1019 return user_ns ? &user_ns->ns : NULL; 1020 } 1021 1022 static void userns_put(struct ns_common *ns) 1023 { 1024 put_user_ns(to_user_ns(ns)); 1025 } 1026 1027 static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns) 1028 { 1029 struct user_namespace *user_ns = to_user_ns(ns); 1030 struct cred *cred; 1031 1032 /* Don't allow gaining capabilities by reentering 1033 * the same user namespace. 1034 */ 1035 if (user_ns == current_user_ns()) 1036 return -EINVAL; 1037 1038 /* Tasks that share a thread group must share a user namespace */ 1039 if (!thread_group_empty(current)) 1040 return -EINVAL; 1041 1042 if (current->fs->users != 1) 1043 return -EINVAL; 1044 1045 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 1046 return -EPERM; 1047 1048 cred = prepare_creds(); 1049 if (!cred) 1050 return -ENOMEM; 1051 1052 put_user_ns(cred->user_ns); 1053 set_cred_user_ns(cred, get_user_ns(user_ns)); 1054 1055 return commit_creds(cred); 1056 } 1057 1058 struct ns_common *ns_get_owner(struct ns_common *ns) 1059 { 1060 struct user_namespace *my_user_ns = current_user_ns(); 1061 struct user_namespace *owner, *p; 1062 1063 /* See if the owner is in the current user namespace */ 1064 owner = p = ns->ops->owner(ns); 1065 for (;;) { 1066 if (!p) 1067 return ERR_PTR(-EPERM); 1068 if (p == my_user_ns) 1069 break; 1070 p = p->parent; 1071 } 1072 1073 return &get_user_ns(owner)->ns; 1074 } 1075 1076 static struct user_namespace *userns_owner(struct ns_common *ns) 1077 { 1078 return to_user_ns(ns)->parent; 1079 } 1080 1081 const struct proc_ns_operations userns_operations = { 1082 .name = "user", 1083 .type = CLONE_NEWUSER, 1084 .get = userns_get, 1085 .put = userns_put, 1086 .install = userns_install, 1087 .owner = userns_owner, 1088 .get_parent = ns_get_owner, 1089 }; 1090 1091 static __init int user_namespaces_init(void) 1092 { 1093 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); 1094 return 0; 1095 } 1096 subsys_initcall(user_namespaces_init); 1097