xref: /openbmc/linux/kernel/user_namespace.c (revision 95e9fd10)
1 /*
2  *  This program is free software; you can redistribute it and/or
3  *  modify it under the terms of the GNU General Public License as
4  *  published by the Free Software Foundation, version 2 of the
5  *  License.
6  */
7 
8 #include <linux/export.h>
9 #include <linux/nsproxy.h>
10 #include <linux/slab.h>
11 #include <linux/user_namespace.h>
12 #include <linux/highuid.h>
13 #include <linux/cred.h>
14 #include <linux/securebits.h>
15 #include <linux/keyctl.h>
16 #include <linux/key-type.h>
17 #include <keys/user-type.h>
18 #include <linux/seq_file.h>
19 #include <linux/fs.h>
20 #include <linux/uaccess.h>
21 #include <linux/ctype.h>
22 
23 static struct kmem_cache *user_ns_cachep __read_mostly;
24 
25 static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
26 				struct uid_gid_map *map);
27 
28 /*
29  * Create a new user namespace, deriving the creator from the user in the
30  * passed credentials, and replacing that user with the new root user for the
31  * new namespace.
32  *
33  * This is called by copy_creds(), which will finish setting the target task's
34  * credentials.
35  */
36 int create_user_ns(struct cred *new)
37 {
38 	struct user_namespace *ns, *parent_ns = new->user_ns;
39 	kuid_t owner = new->euid;
40 	kgid_t group = new->egid;
41 
42 	/* The creator needs a mapping in the parent user namespace
43 	 * or else we won't be able to reasonably tell userspace who
44 	 * created a user_namespace.
45 	 */
46 	if (!kuid_has_mapping(parent_ns, owner) ||
47 	    !kgid_has_mapping(parent_ns, group))
48 		return -EPERM;
49 
50 	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
51 	if (!ns)
52 		return -ENOMEM;
53 
54 	kref_init(&ns->kref);
55 	ns->parent = parent_ns;
56 	ns->owner = owner;
57 	ns->group = group;
58 
59 	/* Start with the same capabilities as init but useless for doing
60 	 * anything as the capabilities are bound to the new user namespace.
61 	 */
62 	new->securebits = SECUREBITS_DEFAULT;
63 	new->cap_inheritable = CAP_EMPTY_SET;
64 	new->cap_permitted = CAP_FULL_SET;
65 	new->cap_effective = CAP_FULL_SET;
66 	new->cap_bset = CAP_FULL_SET;
67 #ifdef CONFIG_KEYS
68 	key_put(new->request_key_auth);
69 	new->request_key_auth = NULL;
70 #endif
71 	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
72 
73 	/* Leave the new->user_ns reference with the new user namespace. */
74 	/* Leave the reference to our user_ns with the new cred. */
75 	new->user_ns = ns;
76 
77 	return 0;
78 }
79 
80 void free_user_ns(struct kref *kref)
81 {
82 	struct user_namespace *parent, *ns =
83 		container_of(kref, struct user_namespace, kref);
84 
85 	parent = ns->parent;
86 	kmem_cache_free(user_ns_cachep, ns);
87 	put_user_ns(parent);
88 }
89 EXPORT_SYMBOL(free_user_ns);
90 
91 static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
92 {
93 	unsigned idx, extents;
94 	u32 first, last, id2;
95 
96 	id2 = id + count - 1;
97 
98 	/* Find the matching extent */
99 	extents = map->nr_extents;
100 	smp_read_barrier_depends();
101 	for (idx = 0; idx < extents; idx++) {
102 		first = map->extent[idx].first;
103 		last = first + map->extent[idx].count - 1;
104 		if (id >= first && id <= last &&
105 		    (id2 >= first && id2 <= last))
106 			break;
107 	}
108 	/* Map the id or note failure */
109 	if (idx < extents)
110 		id = (id - first) + map->extent[idx].lower_first;
111 	else
112 		id = (u32) -1;
113 
114 	return id;
115 }
116 
117 static u32 map_id_down(struct uid_gid_map *map, u32 id)
118 {
119 	unsigned idx, extents;
120 	u32 first, last;
121 
122 	/* Find the matching extent */
123 	extents = map->nr_extents;
124 	smp_read_barrier_depends();
125 	for (idx = 0; idx < extents; idx++) {
126 		first = map->extent[idx].first;
127 		last = first + map->extent[idx].count - 1;
128 		if (id >= first && id <= last)
129 			break;
130 	}
131 	/* Map the id or note failure */
132 	if (idx < extents)
133 		id = (id - first) + map->extent[idx].lower_first;
134 	else
135 		id = (u32) -1;
136 
137 	return id;
138 }
139 
140 static u32 map_id_up(struct uid_gid_map *map, u32 id)
141 {
142 	unsigned idx, extents;
143 	u32 first, last;
144 
145 	/* Find the matching extent */
146 	extents = map->nr_extents;
147 	smp_read_barrier_depends();
148 	for (idx = 0; idx < extents; idx++) {
149 		first = map->extent[idx].lower_first;
150 		last = first + map->extent[idx].count - 1;
151 		if (id >= first && id <= last)
152 			break;
153 	}
154 	/* Map the id or note failure */
155 	if (idx < extents)
156 		id = (id - first) + map->extent[idx].first;
157 	else
158 		id = (u32) -1;
159 
160 	return id;
161 }
162 
163 /**
164  *	make_kuid - Map a user-namespace uid pair into a kuid.
165  *	@ns:  User namespace that the uid is in
166  *	@uid: User identifier
167  *
168  *	Maps a user-namespace uid pair into a kernel internal kuid,
169  *	and returns that kuid.
170  *
171  *	When there is no mapping defined for the user-namespace uid
172  *	pair INVALID_UID is returned.  Callers are expected to test
173  *	for and handle handle INVALID_UID being returned.  INVALID_UID
174  *	may be tested for using uid_valid().
175  */
176 kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
177 {
178 	/* Map the uid to a global kernel uid */
179 	return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
180 }
181 EXPORT_SYMBOL(make_kuid);
182 
183 /**
184  *	from_kuid - Create a uid from a kuid user-namespace pair.
185  *	@targ: The user namespace we want a uid in.
186  *	@kuid: The kernel internal uid to start with.
187  *
188  *	Map @kuid into the user-namespace specified by @targ and
189  *	return the resulting uid.
190  *
191  *	There is always a mapping into the initial user_namespace.
192  *
193  *	If @kuid has no mapping in @targ (uid_t)-1 is returned.
194  */
195 uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
196 {
197 	/* Map the uid from a global kernel uid */
198 	return map_id_up(&targ->uid_map, __kuid_val(kuid));
199 }
200 EXPORT_SYMBOL(from_kuid);
201 
202 /**
203  *	from_kuid_munged - Create a uid from a kuid user-namespace pair.
204  *	@targ: The user namespace we want a uid in.
205  *	@kuid: The kernel internal uid to start with.
206  *
207  *	Map @kuid into the user-namespace specified by @targ and
208  *	return the resulting uid.
209  *
210  *	There is always a mapping into the initial user_namespace.
211  *
212  *	Unlike from_kuid from_kuid_munged never fails and always
213  *	returns a valid uid.  This makes from_kuid_munged appropriate
214  *	for use in syscalls like stat and getuid where failing the
215  *	system call and failing to provide a valid uid are not an
216  *	options.
217  *
218  *	If @kuid has no mapping in @targ overflowuid is returned.
219  */
220 uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
221 {
222 	uid_t uid;
223 	uid = from_kuid(targ, kuid);
224 
225 	if (uid == (uid_t) -1)
226 		uid = overflowuid;
227 	return uid;
228 }
229 EXPORT_SYMBOL(from_kuid_munged);
230 
231 /**
232  *	make_kgid - Map a user-namespace gid pair into a kgid.
233  *	@ns:  User namespace that the gid is in
234  *	@uid: group identifier
235  *
236  *	Maps a user-namespace gid pair into a kernel internal kgid,
237  *	and returns that kgid.
238  *
239  *	When there is no mapping defined for the user-namespace gid
240  *	pair INVALID_GID is returned.  Callers are expected to test
241  *	for and handle INVALID_GID being returned.  INVALID_GID may be
242  *	tested for using gid_valid().
243  */
244 kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
245 {
246 	/* Map the gid to a global kernel gid */
247 	return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
248 }
249 EXPORT_SYMBOL(make_kgid);
250 
251 /**
252  *	from_kgid - Create a gid from a kgid user-namespace pair.
253  *	@targ: The user namespace we want a gid in.
254  *	@kgid: The kernel internal gid to start with.
255  *
256  *	Map @kgid into the user-namespace specified by @targ and
257  *	return the resulting gid.
258  *
259  *	There is always a mapping into the initial user_namespace.
260  *
261  *	If @kgid has no mapping in @targ (gid_t)-1 is returned.
262  */
263 gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
264 {
265 	/* Map the gid from a global kernel gid */
266 	return map_id_up(&targ->gid_map, __kgid_val(kgid));
267 }
268 EXPORT_SYMBOL(from_kgid);
269 
270 /**
271  *	from_kgid_munged - Create a gid from a kgid user-namespace pair.
272  *	@targ: The user namespace we want a gid in.
273  *	@kgid: The kernel internal gid to start with.
274  *
275  *	Map @kgid into the user-namespace specified by @targ and
276  *	return the resulting gid.
277  *
278  *	There is always a mapping into the initial user_namespace.
279  *
280  *	Unlike from_kgid from_kgid_munged never fails and always
281  *	returns a valid gid.  This makes from_kgid_munged appropriate
282  *	for use in syscalls like stat and getgid where failing the
283  *	system call and failing to provide a valid gid are not options.
284  *
285  *	If @kgid has no mapping in @targ overflowgid is returned.
286  */
287 gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
288 {
289 	gid_t gid;
290 	gid = from_kgid(targ, kgid);
291 
292 	if (gid == (gid_t) -1)
293 		gid = overflowgid;
294 	return gid;
295 }
296 EXPORT_SYMBOL(from_kgid_munged);
297 
298 static int uid_m_show(struct seq_file *seq, void *v)
299 {
300 	struct user_namespace *ns = seq->private;
301 	struct uid_gid_extent *extent = v;
302 	struct user_namespace *lower_ns;
303 	uid_t lower;
304 
305 	lower_ns = current_user_ns();
306 	if ((lower_ns == ns) && lower_ns->parent)
307 		lower_ns = lower_ns->parent;
308 
309 	lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
310 
311 	seq_printf(seq, "%10u %10u %10u\n",
312 		extent->first,
313 		lower,
314 		extent->count);
315 
316 	return 0;
317 }
318 
319 static int gid_m_show(struct seq_file *seq, void *v)
320 {
321 	struct user_namespace *ns = seq->private;
322 	struct uid_gid_extent *extent = v;
323 	struct user_namespace *lower_ns;
324 	gid_t lower;
325 
326 	lower_ns = current_user_ns();
327 	if ((lower_ns == ns) && lower_ns->parent)
328 		lower_ns = lower_ns->parent;
329 
330 	lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
331 
332 	seq_printf(seq, "%10u %10u %10u\n",
333 		extent->first,
334 		lower,
335 		extent->count);
336 
337 	return 0;
338 }
339 
340 static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map)
341 {
342 	struct uid_gid_extent *extent = NULL;
343 	loff_t pos = *ppos;
344 
345 	if (pos < map->nr_extents)
346 		extent = &map->extent[pos];
347 
348 	return extent;
349 }
350 
351 static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
352 {
353 	struct user_namespace *ns = seq->private;
354 
355 	return m_start(seq, ppos, &ns->uid_map);
356 }
357 
358 static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
359 {
360 	struct user_namespace *ns = seq->private;
361 
362 	return m_start(seq, ppos, &ns->gid_map);
363 }
364 
365 static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
366 {
367 	(*pos)++;
368 	return seq->op->start(seq, pos);
369 }
370 
371 static void m_stop(struct seq_file *seq, void *v)
372 {
373 	return;
374 }
375 
376 struct seq_operations proc_uid_seq_operations = {
377 	.start = uid_m_start,
378 	.stop = m_stop,
379 	.next = m_next,
380 	.show = uid_m_show,
381 };
382 
383 struct seq_operations proc_gid_seq_operations = {
384 	.start = gid_m_start,
385 	.stop = m_stop,
386 	.next = m_next,
387 	.show = gid_m_show,
388 };
389 
390 static DEFINE_MUTEX(id_map_mutex);
391 
392 static ssize_t map_write(struct file *file, const char __user *buf,
393 			 size_t count, loff_t *ppos,
394 			 int cap_setid,
395 			 struct uid_gid_map *map,
396 			 struct uid_gid_map *parent_map)
397 {
398 	struct seq_file *seq = file->private_data;
399 	struct user_namespace *ns = seq->private;
400 	struct uid_gid_map new_map;
401 	unsigned idx;
402 	struct uid_gid_extent *extent, *last = NULL;
403 	unsigned long page = 0;
404 	char *kbuf, *pos, *next_line;
405 	ssize_t ret = -EINVAL;
406 
407 	/*
408 	 * The id_map_mutex serializes all writes to any given map.
409 	 *
410 	 * Any map is only ever written once.
411 	 *
412 	 * An id map fits within 1 cache line on most architectures.
413 	 *
414 	 * On read nothing needs to be done unless you are on an
415 	 * architecture with a crazy cache coherency model like alpha.
416 	 *
417 	 * There is a one time data dependency between reading the
418 	 * count of the extents and the values of the extents.  The
419 	 * desired behavior is to see the values of the extents that
420 	 * were written before the count of the extents.
421 	 *
422 	 * To achieve this smp_wmb() is used on guarantee the write
423 	 * order and smp_read_barrier_depends() is guaranteed that we
424 	 * don't have crazy architectures returning stale data.
425 	 *
426 	 */
427 	mutex_lock(&id_map_mutex);
428 
429 	ret = -EPERM;
430 	/* Only allow one successful write to the map */
431 	if (map->nr_extents != 0)
432 		goto out;
433 
434 	/* Require the appropriate privilege CAP_SETUID or CAP_SETGID
435 	 * over the user namespace in order to set the id mapping.
436 	 */
437 	if (!ns_capable(ns, cap_setid))
438 		goto out;
439 
440 	/* Get a buffer */
441 	ret = -ENOMEM;
442 	page = __get_free_page(GFP_TEMPORARY);
443 	kbuf = (char *) page;
444 	if (!page)
445 		goto out;
446 
447 	/* Only allow <= page size writes at the beginning of the file */
448 	ret = -EINVAL;
449 	if ((*ppos != 0) || (count >= PAGE_SIZE))
450 		goto out;
451 
452 	/* Slurp in the user data */
453 	ret = -EFAULT;
454 	if (copy_from_user(kbuf, buf, count))
455 		goto out;
456 	kbuf[count] = '\0';
457 
458 	/* Parse the user data */
459 	ret = -EINVAL;
460 	pos = kbuf;
461 	new_map.nr_extents = 0;
462 	for (;pos; pos = next_line) {
463 		extent = &new_map.extent[new_map.nr_extents];
464 
465 		/* Find the end of line and ensure I don't look past it */
466 		next_line = strchr(pos, '\n');
467 		if (next_line) {
468 			*next_line = '\0';
469 			next_line++;
470 			if (*next_line == '\0')
471 				next_line = NULL;
472 		}
473 
474 		pos = skip_spaces(pos);
475 		extent->first = simple_strtoul(pos, &pos, 10);
476 		if (!isspace(*pos))
477 			goto out;
478 
479 		pos = skip_spaces(pos);
480 		extent->lower_first = simple_strtoul(pos, &pos, 10);
481 		if (!isspace(*pos))
482 			goto out;
483 
484 		pos = skip_spaces(pos);
485 		extent->count = simple_strtoul(pos, &pos, 10);
486 		if (*pos && !isspace(*pos))
487 			goto out;
488 
489 		/* Verify there is not trailing junk on the line */
490 		pos = skip_spaces(pos);
491 		if (*pos != '\0')
492 			goto out;
493 
494 		/* Verify we have been given valid starting values */
495 		if ((extent->first == (u32) -1) ||
496 		    (extent->lower_first == (u32) -1 ))
497 			goto out;
498 
499 		/* Verify count is not zero and does not cause the extent to wrap */
500 		if ((extent->first + extent->count) <= extent->first)
501 			goto out;
502 		if ((extent->lower_first + extent->count) <= extent->lower_first)
503 			goto out;
504 
505 		/* For now only accept extents that are strictly in order */
506 		if (last &&
507 		    (((last->first + last->count) > extent->first) ||
508 		     ((last->lower_first + last->count) > extent->lower_first)))
509 			goto out;
510 
511 		new_map.nr_extents++;
512 		last = extent;
513 
514 		/* Fail if the file contains too many extents */
515 		if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
516 		    (next_line != NULL))
517 			goto out;
518 	}
519 	/* Be very certaint the new map actually exists */
520 	if (new_map.nr_extents == 0)
521 		goto out;
522 
523 	ret = -EPERM;
524 	/* Validate the user is allowed to use user id's mapped to. */
525 	if (!new_idmap_permitted(ns, cap_setid, &new_map))
526 		goto out;
527 
528 	/* Map the lower ids from the parent user namespace to the
529 	 * kernel global id space.
530 	 */
531 	for (idx = 0; idx < new_map.nr_extents; idx++) {
532 		u32 lower_first;
533 		extent = &new_map.extent[idx];
534 
535 		lower_first = map_id_range_down(parent_map,
536 						extent->lower_first,
537 						extent->count);
538 
539 		/* Fail if we can not map the specified extent to
540 		 * the kernel global id space.
541 		 */
542 		if (lower_first == (u32) -1)
543 			goto out;
544 
545 		extent->lower_first = lower_first;
546 	}
547 
548 	/* Install the map */
549 	memcpy(map->extent, new_map.extent,
550 		new_map.nr_extents*sizeof(new_map.extent[0]));
551 	smp_wmb();
552 	map->nr_extents = new_map.nr_extents;
553 
554 	*ppos = count;
555 	ret = count;
556 out:
557 	mutex_unlock(&id_map_mutex);
558 	if (page)
559 		free_page(page);
560 	return ret;
561 }
562 
563 ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
564 {
565 	struct seq_file *seq = file->private_data;
566 	struct user_namespace *ns = seq->private;
567 
568 	if (!ns->parent)
569 		return -EPERM;
570 
571 	return map_write(file, buf, size, ppos, CAP_SETUID,
572 			 &ns->uid_map, &ns->parent->uid_map);
573 }
574 
575 ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
576 {
577 	struct seq_file *seq = file->private_data;
578 	struct user_namespace *ns = seq->private;
579 
580 	if (!ns->parent)
581 		return -EPERM;
582 
583 	return map_write(file, buf, size, ppos, CAP_SETGID,
584 			 &ns->gid_map, &ns->parent->gid_map);
585 }
586 
587 static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
588 				struct uid_gid_map *new_map)
589 {
590 	/* Allow the specified ids if we have the appropriate capability
591 	 * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
592 	 */
593 	if (ns_capable(ns->parent, cap_setid))
594 		return true;
595 
596 	return false;
597 }
598 
599 static __init int user_namespaces_init(void)
600 {
601 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
602 	return 0;
603 }
604 module_init(user_namespaces_init);
605