1 /* 2 * This program is free software; you can redistribute it and/or 3 * modify it under the terms of the GNU General Public License as 4 * published by the Free Software Foundation, version 2 of the 5 * License. 6 */ 7 8 #include <linux/export.h> 9 #include <linux/nsproxy.h> 10 #include <linux/slab.h> 11 #include <linux/user_namespace.h> 12 #include <linux/proc_ns.h> 13 #include <linux/highuid.h> 14 #include <linux/cred.h> 15 #include <linux/securebits.h> 16 #include <linux/keyctl.h> 17 #include <linux/key-type.h> 18 #include <keys/user-type.h> 19 #include <linux/seq_file.h> 20 #include <linux/fs.h> 21 #include <linux/uaccess.h> 22 #include <linux/ctype.h> 23 #include <linux/projid.h> 24 #include <linux/fs_struct.h> 25 26 static struct kmem_cache *user_ns_cachep __read_mostly; 27 static DEFINE_MUTEX(userns_state_mutex); 28 29 static bool new_idmap_permitted(const struct file *file, 30 struct user_namespace *ns, int cap_setid, 31 struct uid_gid_map *map); 32 static void free_user_ns(struct work_struct *work); 33 34 static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid) 35 { 36 return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES); 37 } 38 39 static void dec_user_namespaces(struct ucounts *ucounts) 40 { 41 return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES); 42 } 43 44 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) 45 { 46 /* Start with the same capabilities as init but useless for doing 47 * anything as the capabilities are bound to the new user namespace. 48 */ 49 cred->securebits = SECUREBITS_DEFAULT; 50 cred->cap_inheritable = CAP_EMPTY_SET; 51 cred->cap_permitted = CAP_FULL_SET; 52 cred->cap_effective = CAP_FULL_SET; 53 cred->cap_ambient = CAP_EMPTY_SET; 54 cred->cap_bset = CAP_FULL_SET; 55 #ifdef CONFIG_KEYS 56 key_put(cred->request_key_auth); 57 cred->request_key_auth = NULL; 58 #endif 59 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ 60 cred->user_ns = user_ns; 61 } 62 63 /* 64 * Create a new user namespace, deriving the creator from the user in the 65 * passed credentials, and replacing that user with the new root user for the 66 * new namespace. 67 * 68 * This is called by copy_creds(), which will finish setting the target task's 69 * credentials. 70 */ 71 int create_user_ns(struct cred *new) 72 { 73 struct user_namespace *ns, *parent_ns = new->user_ns; 74 kuid_t owner = new->euid; 75 kgid_t group = new->egid; 76 struct ucounts *ucounts; 77 int ret, i; 78 79 ret = -ENOSPC; 80 if (parent_ns->level > 32) 81 goto fail; 82 83 ucounts = inc_user_namespaces(parent_ns, owner); 84 if (!ucounts) 85 goto fail; 86 87 /* 88 * Verify that we can not violate the policy of which files 89 * may be accessed that is specified by the root directory, 90 * by verifing that the root directory is at the root of the 91 * mount namespace which allows all files to be accessed. 92 */ 93 ret = -EPERM; 94 if (current_chrooted()) 95 goto fail_dec; 96 97 /* The creator needs a mapping in the parent user namespace 98 * or else we won't be able to reasonably tell userspace who 99 * created a user_namespace. 100 */ 101 ret = -EPERM; 102 if (!kuid_has_mapping(parent_ns, owner) || 103 !kgid_has_mapping(parent_ns, group)) 104 goto fail_dec; 105 106 ret = -ENOMEM; 107 ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); 108 if (!ns) 109 goto fail_dec; 110 111 ret = ns_alloc_inum(&ns->ns); 112 if (ret) 113 goto fail_free; 114 ns->ns.ops = &userns_operations; 115 116 atomic_set(&ns->count, 1); 117 /* Leave the new->user_ns reference with the new user namespace. */ 118 ns->parent = parent_ns; 119 ns->level = parent_ns->level + 1; 120 ns->owner = owner; 121 ns->group = group; 122 INIT_WORK(&ns->work, free_user_ns); 123 for (i = 0; i < UCOUNT_COUNTS; i++) { 124 ns->ucount_max[i] = INT_MAX; 125 } 126 ns->ucounts = ucounts; 127 128 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ 129 mutex_lock(&userns_state_mutex); 130 ns->flags = parent_ns->flags; 131 mutex_unlock(&userns_state_mutex); 132 133 #ifdef CONFIG_PERSISTENT_KEYRINGS 134 init_rwsem(&ns->persistent_keyring_register_sem); 135 #endif 136 ret = -ENOMEM; 137 if (!setup_userns_sysctls(ns)) 138 goto fail_keyring; 139 140 set_cred_user_ns(new, ns); 141 return 0; 142 fail_keyring: 143 #ifdef CONFIG_PERSISTENT_KEYRINGS 144 key_put(ns->persistent_keyring_register); 145 #endif 146 ns_free_inum(&ns->ns); 147 fail_free: 148 kmem_cache_free(user_ns_cachep, ns); 149 fail_dec: 150 dec_user_namespaces(ucounts); 151 fail: 152 return ret; 153 } 154 155 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) 156 { 157 struct cred *cred; 158 int err = -ENOMEM; 159 160 if (!(unshare_flags & CLONE_NEWUSER)) 161 return 0; 162 163 cred = prepare_creds(); 164 if (cred) { 165 err = create_user_ns(cred); 166 if (err) 167 put_cred(cred); 168 else 169 *new_cred = cred; 170 } 171 172 return err; 173 } 174 175 static void free_user_ns(struct work_struct *work) 176 { 177 struct user_namespace *parent, *ns = 178 container_of(work, struct user_namespace, work); 179 180 do { 181 struct ucounts *ucounts = ns->ucounts; 182 parent = ns->parent; 183 retire_userns_sysctls(ns); 184 #ifdef CONFIG_PERSISTENT_KEYRINGS 185 key_put(ns->persistent_keyring_register); 186 #endif 187 ns_free_inum(&ns->ns); 188 kmem_cache_free(user_ns_cachep, ns); 189 dec_user_namespaces(ucounts); 190 ns = parent; 191 } while (atomic_dec_and_test(&parent->count)); 192 } 193 194 void __put_user_ns(struct user_namespace *ns) 195 { 196 schedule_work(&ns->work); 197 } 198 EXPORT_SYMBOL(__put_user_ns); 199 200 static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) 201 { 202 unsigned idx, extents; 203 u32 first, last, id2; 204 205 id2 = id + count - 1; 206 207 /* Find the matching extent */ 208 extents = map->nr_extents; 209 smp_rmb(); 210 for (idx = 0; idx < extents; idx++) { 211 first = map->extent[idx].first; 212 last = first + map->extent[idx].count - 1; 213 if (id >= first && id <= last && 214 (id2 >= first && id2 <= last)) 215 break; 216 } 217 /* Map the id or note failure */ 218 if (idx < extents) 219 id = (id - first) + map->extent[idx].lower_first; 220 else 221 id = (u32) -1; 222 223 return id; 224 } 225 226 static u32 map_id_down(struct uid_gid_map *map, u32 id) 227 { 228 unsigned idx, extents; 229 u32 first, last; 230 231 /* Find the matching extent */ 232 extents = map->nr_extents; 233 smp_rmb(); 234 for (idx = 0; idx < extents; idx++) { 235 first = map->extent[idx].first; 236 last = first + map->extent[idx].count - 1; 237 if (id >= first && id <= last) 238 break; 239 } 240 /* Map the id or note failure */ 241 if (idx < extents) 242 id = (id - first) + map->extent[idx].lower_first; 243 else 244 id = (u32) -1; 245 246 return id; 247 } 248 249 static u32 map_id_up(struct uid_gid_map *map, u32 id) 250 { 251 unsigned idx, extents; 252 u32 first, last; 253 254 /* Find the matching extent */ 255 extents = map->nr_extents; 256 smp_rmb(); 257 for (idx = 0; idx < extents; idx++) { 258 first = map->extent[idx].lower_first; 259 last = first + map->extent[idx].count - 1; 260 if (id >= first && id <= last) 261 break; 262 } 263 /* Map the id or note failure */ 264 if (idx < extents) 265 id = (id - first) + map->extent[idx].first; 266 else 267 id = (u32) -1; 268 269 return id; 270 } 271 272 /** 273 * make_kuid - Map a user-namespace uid pair into a kuid. 274 * @ns: User namespace that the uid is in 275 * @uid: User identifier 276 * 277 * Maps a user-namespace uid pair into a kernel internal kuid, 278 * and returns that kuid. 279 * 280 * When there is no mapping defined for the user-namespace uid 281 * pair INVALID_UID is returned. Callers are expected to test 282 * for and handle INVALID_UID being returned. INVALID_UID 283 * may be tested for using uid_valid(). 284 */ 285 kuid_t make_kuid(struct user_namespace *ns, uid_t uid) 286 { 287 /* Map the uid to a global kernel uid */ 288 return KUIDT_INIT(map_id_down(&ns->uid_map, uid)); 289 } 290 EXPORT_SYMBOL(make_kuid); 291 292 /** 293 * from_kuid - Create a uid from a kuid user-namespace pair. 294 * @targ: The user namespace we want a uid in. 295 * @kuid: The kernel internal uid to start with. 296 * 297 * Map @kuid into the user-namespace specified by @targ and 298 * return the resulting uid. 299 * 300 * There is always a mapping into the initial user_namespace. 301 * 302 * If @kuid has no mapping in @targ (uid_t)-1 is returned. 303 */ 304 uid_t from_kuid(struct user_namespace *targ, kuid_t kuid) 305 { 306 /* Map the uid from a global kernel uid */ 307 return map_id_up(&targ->uid_map, __kuid_val(kuid)); 308 } 309 EXPORT_SYMBOL(from_kuid); 310 311 /** 312 * from_kuid_munged - Create a uid from a kuid user-namespace pair. 313 * @targ: The user namespace we want a uid in. 314 * @kuid: The kernel internal uid to start with. 315 * 316 * Map @kuid into the user-namespace specified by @targ and 317 * return the resulting uid. 318 * 319 * There is always a mapping into the initial user_namespace. 320 * 321 * Unlike from_kuid from_kuid_munged never fails and always 322 * returns a valid uid. This makes from_kuid_munged appropriate 323 * for use in syscalls like stat and getuid where failing the 324 * system call and failing to provide a valid uid are not an 325 * options. 326 * 327 * If @kuid has no mapping in @targ overflowuid is returned. 328 */ 329 uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid) 330 { 331 uid_t uid; 332 uid = from_kuid(targ, kuid); 333 334 if (uid == (uid_t) -1) 335 uid = overflowuid; 336 return uid; 337 } 338 EXPORT_SYMBOL(from_kuid_munged); 339 340 /** 341 * make_kgid - Map a user-namespace gid pair into a kgid. 342 * @ns: User namespace that the gid is in 343 * @gid: group identifier 344 * 345 * Maps a user-namespace gid pair into a kernel internal kgid, 346 * and returns that kgid. 347 * 348 * When there is no mapping defined for the user-namespace gid 349 * pair INVALID_GID is returned. Callers are expected to test 350 * for and handle INVALID_GID being returned. INVALID_GID may be 351 * tested for using gid_valid(). 352 */ 353 kgid_t make_kgid(struct user_namespace *ns, gid_t gid) 354 { 355 /* Map the gid to a global kernel gid */ 356 return KGIDT_INIT(map_id_down(&ns->gid_map, gid)); 357 } 358 EXPORT_SYMBOL(make_kgid); 359 360 /** 361 * from_kgid - Create a gid from a kgid user-namespace pair. 362 * @targ: The user namespace we want a gid in. 363 * @kgid: The kernel internal gid to start with. 364 * 365 * Map @kgid into the user-namespace specified by @targ and 366 * return the resulting gid. 367 * 368 * There is always a mapping into the initial user_namespace. 369 * 370 * If @kgid has no mapping in @targ (gid_t)-1 is returned. 371 */ 372 gid_t from_kgid(struct user_namespace *targ, kgid_t kgid) 373 { 374 /* Map the gid from a global kernel gid */ 375 return map_id_up(&targ->gid_map, __kgid_val(kgid)); 376 } 377 EXPORT_SYMBOL(from_kgid); 378 379 /** 380 * from_kgid_munged - Create a gid from a kgid user-namespace pair. 381 * @targ: The user namespace we want a gid in. 382 * @kgid: The kernel internal gid to start with. 383 * 384 * Map @kgid into the user-namespace specified by @targ and 385 * return the resulting gid. 386 * 387 * There is always a mapping into the initial user_namespace. 388 * 389 * Unlike from_kgid from_kgid_munged never fails and always 390 * returns a valid gid. This makes from_kgid_munged appropriate 391 * for use in syscalls like stat and getgid where failing the 392 * system call and failing to provide a valid gid are not options. 393 * 394 * If @kgid has no mapping in @targ overflowgid is returned. 395 */ 396 gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) 397 { 398 gid_t gid; 399 gid = from_kgid(targ, kgid); 400 401 if (gid == (gid_t) -1) 402 gid = overflowgid; 403 return gid; 404 } 405 EXPORT_SYMBOL(from_kgid_munged); 406 407 /** 408 * make_kprojid - Map a user-namespace projid pair into a kprojid. 409 * @ns: User namespace that the projid is in 410 * @projid: Project identifier 411 * 412 * Maps a user-namespace uid pair into a kernel internal kuid, 413 * and returns that kuid. 414 * 415 * When there is no mapping defined for the user-namespace projid 416 * pair INVALID_PROJID is returned. Callers are expected to test 417 * for and handle handle INVALID_PROJID being returned. INVALID_PROJID 418 * may be tested for using projid_valid(). 419 */ 420 kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) 421 { 422 /* Map the uid to a global kernel uid */ 423 return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); 424 } 425 EXPORT_SYMBOL(make_kprojid); 426 427 /** 428 * from_kprojid - Create a projid from a kprojid user-namespace pair. 429 * @targ: The user namespace we want a projid in. 430 * @kprojid: The kernel internal project identifier to start with. 431 * 432 * Map @kprojid into the user-namespace specified by @targ and 433 * return the resulting projid. 434 * 435 * There is always a mapping into the initial user_namespace. 436 * 437 * If @kprojid has no mapping in @targ (projid_t)-1 is returned. 438 */ 439 projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) 440 { 441 /* Map the uid from a global kernel uid */ 442 return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); 443 } 444 EXPORT_SYMBOL(from_kprojid); 445 446 /** 447 * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair. 448 * @targ: The user namespace we want a projid in. 449 * @kprojid: The kernel internal projid to start with. 450 * 451 * Map @kprojid into the user-namespace specified by @targ and 452 * return the resulting projid. 453 * 454 * There is always a mapping into the initial user_namespace. 455 * 456 * Unlike from_kprojid from_kprojid_munged never fails and always 457 * returns a valid projid. This makes from_kprojid_munged 458 * appropriate for use in syscalls like stat and where 459 * failing the system call and failing to provide a valid projid are 460 * not an options. 461 * 462 * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned. 463 */ 464 projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid) 465 { 466 projid_t projid; 467 projid = from_kprojid(targ, kprojid); 468 469 if (projid == (projid_t) -1) 470 projid = OVERFLOW_PROJID; 471 return projid; 472 } 473 EXPORT_SYMBOL(from_kprojid_munged); 474 475 476 static int uid_m_show(struct seq_file *seq, void *v) 477 { 478 struct user_namespace *ns = seq->private; 479 struct uid_gid_extent *extent = v; 480 struct user_namespace *lower_ns; 481 uid_t lower; 482 483 lower_ns = seq_user_ns(seq); 484 if ((lower_ns == ns) && lower_ns->parent) 485 lower_ns = lower_ns->parent; 486 487 lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first)); 488 489 seq_printf(seq, "%10u %10u %10u\n", 490 extent->first, 491 lower, 492 extent->count); 493 494 return 0; 495 } 496 497 static int gid_m_show(struct seq_file *seq, void *v) 498 { 499 struct user_namespace *ns = seq->private; 500 struct uid_gid_extent *extent = v; 501 struct user_namespace *lower_ns; 502 gid_t lower; 503 504 lower_ns = seq_user_ns(seq); 505 if ((lower_ns == ns) && lower_ns->parent) 506 lower_ns = lower_ns->parent; 507 508 lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first)); 509 510 seq_printf(seq, "%10u %10u %10u\n", 511 extent->first, 512 lower, 513 extent->count); 514 515 return 0; 516 } 517 518 static int projid_m_show(struct seq_file *seq, void *v) 519 { 520 struct user_namespace *ns = seq->private; 521 struct uid_gid_extent *extent = v; 522 struct user_namespace *lower_ns; 523 projid_t lower; 524 525 lower_ns = seq_user_ns(seq); 526 if ((lower_ns == ns) && lower_ns->parent) 527 lower_ns = lower_ns->parent; 528 529 lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first)); 530 531 seq_printf(seq, "%10u %10u %10u\n", 532 extent->first, 533 lower, 534 extent->count); 535 536 return 0; 537 } 538 539 static void *m_start(struct seq_file *seq, loff_t *ppos, 540 struct uid_gid_map *map) 541 { 542 struct uid_gid_extent *extent = NULL; 543 loff_t pos = *ppos; 544 545 if (pos < map->nr_extents) 546 extent = &map->extent[pos]; 547 548 return extent; 549 } 550 551 static void *uid_m_start(struct seq_file *seq, loff_t *ppos) 552 { 553 struct user_namespace *ns = seq->private; 554 555 return m_start(seq, ppos, &ns->uid_map); 556 } 557 558 static void *gid_m_start(struct seq_file *seq, loff_t *ppos) 559 { 560 struct user_namespace *ns = seq->private; 561 562 return m_start(seq, ppos, &ns->gid_map); 563 } 564 565 static void *projid_m_start(struct seq_file *seq, loff_t *ppos) 566 { 567 struct user_namespace *ns = seq->private; 568 569 return m_start(seq, ppos, &ns->projid_map); 570 } 571 572 static void *m_next(struct seq_file *seq, void *v, loff_t *pos) 573 { 574 (*pos)++; 575 return seq->op->start(seq, pos); 576 } 577 578 static void m_stop(struct seq_file *seq, void *v) 579 { 580 return; 581 } 582 583 const struct seq_operations proc_uid_seq_operations = { 584 .start = uid_m_start, 585 .stop = m_stop, 586 .next = m_next, 587 .show = uid_m_show, 588 }; 589 590 const struct seq_operations proc_gid_seq_operations = { 591 .start = gid_m_start, 592 .stop = m_stop, 593 .next = m_next, 594 .show = gid_m_show, 595 }; 596 597 const struct seq_operations proc_projid_seq_operations = { 598 .start = projid_m_start, 599 .stop = m_stop, 600 .next = m_next, 601 .show = projid_m_show, 602 }; 603 604 static bool mappings_overlap(struct uid_gid_map *new_map, 605 struct uid_gid_extent *extent) 606 { 607 u32 upper_first, lower_first, upper_last, lower_last; 608 unsigned idx; 609 610 upper_first = extent->first; 611 lower_first = extent->lower_first; 612 upper_last = upper_first + extent->count - 1; 613 lower_last = lower_first + extent->count - 1; 614 615 for (idx = 0; idx < new_map->nr_extents; idx++) { 616 u32 prev_upper_first, prev_lower_first; 617 u32 prev_upper_last, prev_lower_last; 618 struct uid_gid_extent *prev; 619 620 prev = &new_map->extent[idx]; 621 622 prev_upper_first = prev->first; 623 prev_lower_first = prev->lower_first; 624 prev_upper_last = prev_upper_first + prev->count - 1; 625 prev_lower_last = prev_lower_first + prev->count - 1; 626 627 /* Does the upper range intersect a previous extent? */ 628 if ((prev_upper_first <= upper_last) && 629 (prev_upper_last >= upper_first)) 630 return true; 631 632 /* Does the lower range intersect a previous extent? */ 633 if ((prev_lower_first <= lower_last) && 634 (prev_lower_last >= lower_first)) 635 return true; 636 } 637 return false; 638 } 639 640 static ssize_t map_write(struct file *file, const char __user *buf, 641 size_t count, loff_t *ppos, 642 int cap_setid, 643 struct uid_gid_map *map, 644 struct uid_gid_map *parent_map) 645 { 646 struct seq_file *seq = file->private_data; 647 struct user_namespace *ns = seq->private; 648 struct uid_gid_map new_map; 649 unsigned idx; 650 struct uid_gid_extent *extent = NULL; 651 char *kbuf = NULL, *pos, *next_line; 652 ssize_t ret = -EINVAL; 653 654 /* 655 * The userns_state_mutex serializes all writes to any given map. 656 * 657 * Any map is only ever written once. 658 * 659 * An id map fits within 1 cache line on most architectures. 660 * 661 * On read nothing needs to be done unless you are on an 662 * architecture with a crazy cache coherency model like alpha. 663 * 664 * There is a one time data dependency between reading the 665 * count of the extents and the values of the extents. The 666 * desired behavior is to see the values of the extents that 667 * were written before the count of the extents. 668 * 669 * To achieve this smp_wmb() is used on guarantee the write 670 * order and smp_rmb() is guaranteed that we don't have crazy 671 * architectures returning stale data. 672 */ 673 mutex_lock(&userns_state_mutex); 674 675 ret = -EPERM; 676 /* Only allow one successful write to the map */ 677 if (map->nr_extents != 0) 678 goto out; 679 680 /* 681 * Adjusting namespace settings requires capabilities on the target. 682 */ 683 if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) 684 goto out; 685 686 /* Only allow < page size writes at the beginning of the file */ 687 ret = -EINVAL; 688 if ((*ppos != 0) || (count >= PAGE_SIZE)) 689 goto out; 690 691 /* Slurp in the user data */ 692 kbuf = memdup_user_nul(buf, count); 693 if (IS_ERR(kbuf)) { 694 ret = PTR_ERR(kbuf); 695 kbuf = NULL; 696 goto out; 697 } 698 699 /* Parse the user data */ 700 ret = -EINVAL; 701 pos = kbuf; 702 new_map.nr_extents = 0; 703 for (; pos; pos = next_line) { 704 extent = &new_map.extent[new_map.nr_extents]; 705 706 /* Find the end of line and ensure I don't look past it */ 707 next_line = strchr(pos, '\n'); 708 if (next_line) { 709 *next_line = '\0'; 710 next_line++; 711 if (*next_line == '\0') 712 next_line = NULL; 713 } 714 715 pos = skip_spaces(pos); 716 extent->first = simple_strtoul(pos, &pos, 10); 717 if (!isspace(*pos)) 718 goto out; 719 720 pos = skip_spaces(pos); 721 extent->lower_first = simple_strtoul(pos, &pos, 10); 722 if (!isspace(*pos)) 723 goto out; 724 725 pos = skip_spaces(pos); 726 extent->count = simple_strtoul(pos, &pos, 10); 727 if (*pos && !isspace(*pos)) 728 goto out; 729 730 /* Verify there is not trailing junk on the line */ 731 pos = skip_spaces(pos); 732 if (*pos != '\0') 733 goto out; 734 735 /* Verify we have been given valid starting values */ 736 if ((extent->first == (u32) -1) || 737 (extent->lower_first == (u32) -1)) 738 goto out; 739 740 /* Verify count is not zero and does not cause the 741 * extent to wrap 742 */ 743 if ((extent->first + extent->count) <= extent->first) 744 goto out; 745 if ((extent->lower_first + extent->count) <= 746 extent->lower_first) 747 goto out; 748 749 /* Do the ranges in extent overlap any previous extents? */ 750 if (mappings_overlap(&new_map, extent)) 751 goto out; 752 753 new_map.nr_extents++; 754 755 /* Fail if the file contains too many extents */ 756 if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && 757 (next_line != NULL)) 758 goto out; 759 } 760 /* Be very certaint the new map actually exists */ 761 if (new_map.nr_extents == 0) 762 goto out; 763 764 ret = -EPERM; 765 /* Validate the user is allowed to use user id's mapped to. */ 766 if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) 767 goto out; 768 769 /* Map the lower ids from the parent user namespace to the 770 * kernel global id space. 771 */ 772 for (idx = 0; idx < new_map.nr_extents; idx++) { 773 u32 lower_first; 774 extent = &new_map.extent[idx]; 775 776 lower_first = map_id_range_down(parent_map, 777 extent->lower_first, 778 extent->count); 779 780 /* Fail if we can not map the specified extent to 781 * the kernel global id space. 782 */ 783 if (lower_first == (u32) -1) 784 goto out; 785 786 extent->lower_first = lower_first; 787 } 788 789 /* Install the map */ 790 memcpy(map->extent, new_map.extent, 791 new_map.nr_extents*sizeof(new_map.extent[0])); 792 smp_wmb(); 793 map->nr_extents = new_map.nr_extents; 794 795 *ppos = count; 796 ret = count; 797 out: 798 mutex_unlock(&userns_state_mutex); 799 kfree(kbuf); 800 return ret; 801 } 802 803 ssize_t proc_uid_map_write(struct file *file, const char __user *buf, 804 size_t size, loff_t *ppos) 805 { 806 struct seq_file *seq = file->private_data; 807 struct user_namespace *ns = seq->private; 808 struct user_namespace *seq_ns = seq_user_ns(seq); 809 810 if (!ns->parent) 811 return -EPERM; 812 813 if ((seq_ns != ns) && (seq_ns != ns->parent)) 814 return -EPERM; 815 816 return map_write(file, buf, size, ppos, CAP_SETUID, 817 &ns->uid_map, &ns->parent->uid_map); 818 } 819 820 ssize_t proc_gid_map_write(struct file *file, const char __user *buf, 821 size_t size, loff_t *ppos) 822 { 823 struct seq_file *seq = file->private_data; 824 struct user_namespace *ns = seq->private; 825 struct user_namespace *seq_ns = seq_user_ns(seq); 826 827 if (!ns->parent) 828 return -EPERM; 829 830 if ((seq_ns != ns) && (seq_ns != ns->parent)) 831 return -EPERM; 832 833 return map_write(file, buf, size, ppos, CAP_SETGID, 834 &ns->gid_map, &ns->parent->gid_map); 835 } 836 837 ssize_t proc_projid_map_write(struct file *file, const char __user *buf, 838 size_t size, loff_t *ppos) 839 { 840 struct seq_file *seq = file->private_data; 841 struct user_namespace *ns = seq->private; 842 struct user_namespace *seq_ns = seq_user_ns(seq); 843 844 if (!ns->parent) 845 return -EPERM; 846 847 if ((seq_ns != ns) && (seq_ns != ns->parent)) 848 return -EPERM; 849 850 /* Anyone can set any valid project id no capability needed */ 851 return map_write(file, buf, size, ppos, -1, 852 &ns->projid_map, &ns->parent->projid_map); 853 } 854 855 static bool new_idmap_permitted(const struct file *file, 856 struct user_namespace *ns, int cap_setid, 857 struct uid_gid_map *new_map) 858 { 859 const struct cred *cred = file->f_cred; 860 /* Don't allow mappings that would allow anything that wouldn't 861 * be allowed without the establishment of unprivileged mappings. 862 */ 863 if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && 864 uid_eq(ns->owner, cred->euid)) { 865 u32 id = new_map->extent[0].lower_first; 866 if (cap_setid == CAP_SETUID) { 867 kuid_t uid = make_kuid(ns->parent, id); 868 if (uid_eq(uid, cred->euid)) 869 return true; 870 } else if (cap_setid == CAP_SETGID) { 871 kgid_t gid = make_kgid(ns->parent, id); 872 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && 873 gid_eq(gid, cred->egid)) 874 return true; 875 } 876 } 877 878 /* Allow anyone to set a mapping that doesn't require privilege */ 879 if (!cap_valid(cap_setid)) 880 return true; 881 882 /* Allow the specified ids if we have the appropriate capability 883 * (CAP_SETUID or CAP_SETGID) over the parent user namespace. 884 * And the opener of the id file also had the approprpiate capability. 885 */ 886 if (ns_capable(ns->parent, cap_setid) && 887 file_ns_capable(file, ns->parent, cap_setid)) 888 return true; 889 890 return false; 891 } 892 893 int proc_setgroups_show(struct seq_file *seq, void *v) 894 { 895 struct user_namespace *ns = seq->private; 896 unsigned long userns_flags = ACCESS_ONCE(ns->flags); 897 898 seq_printf(seq, "%s\n", 899 (userns_flags & USERNS_SETGROUPS_ALLOWED) ? 900 "allow" : "deny"); 901 return 0; 902 } 903 904 ssize_t proc_setgroups_write(struct file *file, const char __user *buf, 905 size_t count, loff_t *ppos) 906 { 907 struct seq_file *seq = file->private_data; 908 struct user_namespace *ns = seq->private; 909 char kbuf[8], *pos; 910 bool setgroups_allowed; 911 ssize_t ret; 912 913 /* Only allow a very narrow range of strings to be written */ 914 ret = -EINVAL; 915 if ((*ppos != 0) || (count >= sizeof(kbuf))) 916 goto out; 917 918 /* What was written? */ 919 ret = -EFAULT; 920 if (copy_from_user(kbuf, buf, count)) 921 goto out; 922 kbuf[count] = '\0'; 923 pos = kbuf; 924 925 /* What is being requested? */ 926 ret = -EINVAL; 927 if (strncmp(pos, "allow", 5) == 0) { 928 pos += 5; 929 setgroups_allowed = true; 930 } 931 else if (strncmp(pos, "deny", 4) == 0) { 932 pos += 4; 933 setgroups_allowed = false; 934 } 935 else 936 goto out; 937 938 /* Verify there is not trailing junk on the line */ 939 pos = skip_spaces(pos); 940 if (*pos != '\0') 941 goto out; 942 943 ret = -EPERM; 944 mutex_lock(&userns_state_mutex); 945 if (setgroups_allowed) { 946 /* Enabling setgroups after setgroups has been disabled 947 * is not allowed. 948 */ 949 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) 950 goto out_unlock; 951 } else { 952 /* Permanently disabling setgroups after setgroups has 953 * been enabled by writing the gid_map is not allowed. 954 */ 955 if (ns->gid_map.nr_extents != 0) 956 goto out_unlock; 957 ns->flags &= ~USERNS_SETGROUPS_ALLOWED; 958 } 959 mutex_unlock(&userns_state_mutex); 960 961 /* Report a successful write */ 962 *ppos = count; 963 ret = count; 964 out: 965 return ret; 966 out_unlock: 967 mutex_unlock(&userns_state_mutex); 968 goto out; 969 } 970 971 bool userns_may_setgroups(const struct user_namespace *ns) 972 { 973 bool allowed; 974 975 mutex_lock(&userns_state_mutex); 976 /* It is not safe to use setgroups until a gid mapping in 977 * the user namespace has been established. 978 */ 979 allowed = ns->gid_map.nr_extents != 0; 980 /* Is setgroups allowed? */ 981 allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); 982 mutex_unlock(&userns_state_mutex); 983 984 return allowed; 985 } 986 987 /* 988 * Returns true if @ns is the same namespace as or a descendant of 989 * @target_ns. 990 */ 991 bool current_in_userns(const struct user_namespace *target_ns) 992 { 993 struct user_namespace *ns; 994 for (ns = current_user_ns(); ns; ns = ns->parent) { 995 if (ns == target_ns) 996 return true; 997 } 998 return false; 999 } 1000 1001 static inline struct user_namespace *to_user_ns(struct ns_common *ns) 1002 { 1003 return container_of(ns, struct user_namespace, ns); 1004 } 1005 1006 static struct ns_common *userns_get(struct task_struct *task) 1007 { 1008 struct user_namespace *user_ns; 1009 1010 rcu_read_lock(); 1011 user_ns = get_user_ns(__task_cred(task)->user_ns); 1012 rcu_read_unlock(); 1013 1014 return user_ns ? &user_ns->ns : NULL; 1015 } 1016 1017 static void userns_put(struct ns_common *ns) 1018 { 1019 put_user_ns(to_user_ns(ns)); 1020 } 1021 1022 static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns) 1023 { 1024 struct user_namespace *user_ns = to_user_ns(ns); 1025 struct cred *cred; 1026 1027 /* Don't allow gaining capabilities by reentering 1028 * the same user namespace. 1029 */ 1030 if (user_ns == current_user_ns()) 1031 return -EINVAL; 1032 1033 /* Tasks that share a thread group must share a user namespace */ 1034 if (!thread_group_empty(current)) 1035 return -EINVAL; 1036 1037 if (current->fs->users != 1) 1038 return -EINVAL; 1039 1040 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 1041 return -EPERM; 1042 1043 cred = prepare_creds(); 1044 if (!cred) 1045 return -ENOMEM; 1046 1047 put_user_ns(cred->user_ns); 1048 set_cred_user_ns(cred, get_user_ns(user_ns)); 1049 1050 return commit_creds(cred); 1051 } 1052 1053 struct ns_common *ns_get_owner(struct ns_common *ns) 1054 { 1055 struct user_namespace *my_user_ns = current_user_ns(); 1056 struct user_namespace *owner, *p; 1057 1058 /* See if the owner is in the current user namespace */ 1059 owner = p = ns->ops->owner(ns); 1060 for (;;) { 1061 if (!p) 1062 return ERR_PTR(-EPERM); 1063 if (p == my_user_ns) 1064 break; 1065 p = p->parent; 1066 } 1067 1068 return &get_user_ns(owner)->ns; 1069 } 1070 1071 static struct user_namespace *userns_owner(struct ns_common *ns) 1072 { 1073 return to_user_ns(ns)->parent; 1074 } 1075 1076 const struct proc_ns_operations userns_operations = { 1077 .name = "user", 1078 .type = CLONE_NEWUSER, 1079 .get = userns_get, 1080 .put = userns_put, 1081 .install = userns_install, 1082 .owner = userns_owner, 1083 .get_parent = ns_get_owner, 1084 }; 1085 1086 static __init int user_namespaces_init(void) 1087 { 1088 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); 1089 return 0; 1090 } 1091 subsys_initcall(user_namespaces_init); 1092