xref: /openbmc/linux/ipc/shm.c (revision 5bd8e16d)
1 /*
2  * linux/ipc/shm.c
3  * Copyright (C) 1992, 1993 Krishna Balasubramanian
4  *	 Many improvements/fixes by Bruno Haible.
5  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7  *
8  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15  *
16  * support for audit of ipc object properties and permission changes
17  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18  *
19  * namespaces support
20  * OpenVZ, SWsoft Inc.
21  * Pavel Emelianov <xemul@openvz.org>
22  *
23  * Better ipc lock (kern_ipc_perm.lock) handling
24  * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
25  */
26 
27 #include <linux/slab.h>
28 #include <linux/mm.h>
29 #include <linux/hugetlb.h>
30 #include <linux/shm.h>
31 #include <linux/init.h>
32 #include <linux/file.h>
33 #include <linux/mman.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/security.h>
36 #include <linux/syscalls.h>
37 #include <linux/audit.h>
38 #include <linux/capability.h>
39 #include <linux/ptrace.h>
40 #include <linux/seq_file.h>
41 #include <linux/rwsem.h>
42 #include <linux/nsproxy.h>
43 #include <linux/mount.h>
44 #include <linux/ipc_namespace.h>
45 
46 #include <asm/uaccess.h>
47 
48 #include "util.h"
49 
50 struct shm_file_data {
51 	int id;
52 	struct ipc_namespace *ns;
53 	struct file *file;
54 	const struct vm_operations_struct *vm_ops;
55 };
56 
57 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
58 
59 static const struct file_operations shm_file_operations;
60 static const struct vm_operations_struct shm_vm_ops;
61 
62 #define shm_ids(ns)	((ns)->ids[IPC_SHM_IDS])
63 
64 #define shm_unlock(shp)			\
65 	ipc_unlock(&(shp)->shm_perm)
66 
67 static int newseg(struct ipc_namespace *, struct ipc_params *);
68 static void shm_open(struct vm_area_struct *vma);
69 static void shm_close(struct vm_area_struct *vma);
70 static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
71 #ifdef CONFIG_PROC_FS
72 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
73 #endif
74 
75 void shm_init_ns(struct ipc_namespace *ns)
76 {
77 	ns->shm_ctlmax = SHMMAX;
78 	ns->shm_ctlall = SHMALL;
79 	ns->shm_ctlmni = SHMMNI;
80 	ns->shm_rmid_forced = 0;
81 	ns->shm_tot = 0;
82 	ipc_init_ids(&shm_ids(ns));
83 }
84 
85 /*
86  * Called with shm_ids.rwsem (writer) and the shp structure locked.
87  * Only shm_ids.rwsem remains locked on exit.
88  */
89 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
90 {
91 	struct shmid_kernel *shp;
92 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
93 
94 	if (shp->shm_nattch){
95 		shp->shm_perm.mode |= SHM_DEST;
96 		/* Do not find it any more */
97 		shp->shm_perm.key = IPC_PRIVATE;
98 		shm_unlock(shp);
99 	} else
100 		shm_destroy(ns, shp);
101 }
102 
103 #ifdef CONFIG_IPC_NS
104 void shm_exit_ns(struct ipc_namespace *ns)
105 {
106 	free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
107 	idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
108 }
109 #endif
110 
111 static int __init ipc_ns_init(void)
112 {
113 	shm_init_ns(&init_ipc_ns);
114 	return 0;
115 }
116 
117 pure_initcall(ipc_ns_init);
118 
119 void __init shm_init (void)
120 {
121 	ipc_init_proc_interface("sysvipc/shm",
122 #if BITS_PER_LONG <= 32
123 				"       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
124 #else
125 				"       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
126 #endif
127 				IPC_SHM_IDS, sysvipc_shm_proc_show);
128 }
129 
130 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
131 {
132 	struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id);
133 
134 	if (IS_ERR(ipcp))
135 		return ERR_CAST(ipcp);
136 
137 	return container_of(ipcp, struct shmid_kernel, shm_perm);
138 }
139 
140 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
141 {
142 	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
143 
144 	if (IS_ERR(ipcp))
145 		return ERR_CAST(ipcp);
146 
147 	return container_of(ipcp, struct shmid_kernel, shm_perm);
148 }
149 
150 /*
151  * shm_lock_(check_) routines are called in the paths where the rwsem
152  * is not necessarily held.
153  */
154 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
155 {
156 	struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
157 
158 	if (IS_ERR(ipcp))
159 		return (struct shmid_kernel *)ipcp;
160 
161 	return container_of(ipcp, struct shmid_kernel, shm_perm);
162 }
163 
164 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
165 {
166 	rcu_read_lock();
167 	ipc_lock_object(&ipcp->shm_perm);
168 }
169 
170 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
171 {
172 	ipc_rmid(&shm_ids(ns), &s->shm_perm);
173 }
174 
175 
176 /* This is called by fork, once for every shm attach. */
177 static void shm_open(struct vm_area_struct *vma)
178 {
179 	struct file *file = vma->vm_file;
180 	struct shm_file_data *sfd = shm_file_data(file);
181 	struct shmid_kernel *shp;
182 
183 	shp = shm_lock(sfd->ns, sfd->id);
184 	BUG_ON(IS_ERR(shp));
185 	shp->shm_atim = get_seconds();
186 	shp->shm_lprid = task_tgid_vnr(current);
187 	shp->shm_nattch++;
188 	shm_unlock(shp);
189 }
190 
191 /*
192  * shm_destroy - free the struct shmid_kernel
193  *
194  * @ns: namespace
195  * @shp: struct to free
196  *
197  * It has to be called with shp and shm_ids.rwsem (writer) locked,
198  * but returns with shp unlocked and freed.
199  */
200 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
201 {
202 	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
203 	shm_rmid(ns, shp);
204 	shm_unlock(shp);
205 	if (!is_file_hugepages(shp->shm_file))
206 		shmem_lock(shp->shm_file, 0, shp->mlock_user);
207 	else if (shp->mlock_user)
208 		user_shm_unlock(file_inode(shp->shm_file)->i_size,
209 						shp->mlock_user);
210 	fput (shp->shm_file);
211 	security_shm_free(shp);
212 	ipc_rcu_putref(shp);
213 }
214 
215 /*
216  * shm_may_destroy - identifies whether shm segment should be destroyed now
217  *
218  * Returns true if and only if there are no active users of the segment and
219  * one of the following is true:
220  *
221  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
222  *
223  * 2) sysctl kernel.shm_rmid_forced is set to 1.
224  */
225 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
226 {
227 	return (shp->shm_nattch == 0) &&
228 	       (ns->shm_rmid_forced ||
229 		(shp->shm_perm.mode & SHM_DEST));
230 }
231 
232 /*
233  * remove the attach descriptor vma.
234  * free memory for segment if it is marked destroyed.
235  * The descriptor has already been removed from the current->mm->mmap list
236  * and will later be kfree()d.
237  */
238 static void shm_close(struct vm_area_struct *vma)
239 {
240 	struct file * file = vma->vm_file;
241 	struct shm_file_data *sfd = shm_file_data(file);
242 	struct shmid_kernel *shp;
243 	struct ipc_namespace *ns = sfd->ns;
244 
245 	down_write(&shm_ids(ns).rwsem);
246 	/* remove from the list of attaches of the shm segment */
247 	shp = shm_lock(ns, sfd->id);
248 	BUG_ON(IS_ERR(shp));
249 	shp->shm_lprid = task_tgid_vnr(current);
250 	shp->shm_dtim = get_seconds();
251 	shp->shm_nattch--;
252 	if (shm_may_destroy(ns, shp))
253 		shm_destroy(ns, shp);
254 	else
255 		shm_unlock(shp);
256 	up_write(&shm_ids(ns).rwsem);
257 }
258 
259 /* Called with ns->shm_ids(ns).rwsem locked */
260 static int shm_try_destroy_current(int id, void *p, void *data)
261 {
262 	struct ipc_namespace *ns = data;
263 	struct kern_ipc_perm *ipcp = p;
264 	struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
265 
266 	if (shp->shm_creator != current)
267 		return 0;
268 
269 	/*
270 	 * Mark it as orphaned to destroy the segment when
271 	 * kernel.shm_rmid_forced is changed.
272 	 * It is noop if the following shm_may_destroy() returns true.
273 	 */
274 	shp->shm_creator = NULL;
275 
276 	/*
277 	 * Don't even try to destroy it.  If shm_rmid_forced=0 and IPC_RMID
278 	 * is not set, it shouldn't be deleted here.
279 	 */
280 	if (!ns->shm_rmid_forced)
281 		return 0;
282 
283 	if (shm_may_destroy(ns, shp)) {
284 		shm_lock_by_ptr(shp);
285 		shm_destroy(ns, shp);
286 	}
287 	return 0;
288 }
289 
290 /* Called with ns->shm_ids(ns).rwsem locked */
291 static int shm_try_destroy_orphaned(int id, void *p, void *data)
292 {
293 	struct ipc_namespace *ns = data;
294 	struct kern_ipc_perm *ipcp = p;
295 	struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
296 
297 	/*
298 	 * We want to destroy segments without users and with already
299 	 * exit'ed originating process.
300 	 *
301 	 * As shp->* are changed under rwsem, it's safe to skip shp locking.
302 	 */
303 	if (shp->shm_creator != NULL)
304 		return 0;
305 
306 	if (shm_may_destroy(ns, shp)) {
307 		shm_lock_by_ptr(shp);
308 		shm_destroy(ns, shp);
309 	}
310 	return 0;
311 }
312 
313 void shm_destroy_orphaned(struct ipc_namespace *ns)
314 {
315 	down_write(&shm_ids(ns).rwsem);
316 	if (shm_ids(ns).in_use)
317 		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
318 	up_write(&shm_ids(ns).rwsem);
319 }
320 
321 
322 void exit_shm(struct task_struct *task)
323 {
324 	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
325 
326 	if (shm_ids(ns).in_use == 0)
327 		return;
328 
329 	/* Destroy all already created segments, but not mapped yet */
330 	down_write(&shm_ids(ns).rwsem);
331 	if (shm_ids(ns).in_use)
332 		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
333 	up_write(&shm_ids(ns).rwsem);
334 }
335 
336 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
337 {
338 	struct file *file = vma->vm_file;
339 	struct shm_file_data *sfd = shm_file_data(file);
340 
341 	return sfd->vm_ops->fault(vma, vmf);
342 }
343 
344 #ifdef CONFIG_NUMA
345 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
346 {
347 	struct file *file = vma->vm_file;
348 	struct shm_file_data *sfd = shm_file_data(file);
349 	int err = 0;
350 	if (sfd->vm_ops->set_policy)
351 		err = sfd->vm_ops->set_policy(vma, new);
352 	return err;
353 }
354 
355 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
356 					unsigned long addr)
357 {
358 	struct file *file = vma->vm_file;
359 	struct shm_file_data *sfd = shm_file_data(file);
360 	struct mempolicy *pol = NULL;
361 
362 	if (sfd->vm_ops->get_policy)
363 		pol = sfd->vm_ops->get_policy(vma, addr);
364 	else if (vma->vm_policy)
365 		pol = vma->vm_policy;
366 
367 	return pol;
368 }
369 #endif
370 
371 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
372 {
373 	struct shm_file_data *sfd = shm_file_data(file);
374 	int ret;
375 
376 	ret = sfd->file->f_op->mmap(sfd->file, vma);
377 	if (ret != 0)
378 		return ret;
379 	sfd->vm_ops = vma->vm_ops;
380 #ifdef CONFIG_MMU
381 	BUG_ON(!sfd->vm_ops->fault);
382 #endif
383 	vma->vm_ops = &shm_vm_ops;
384 	shm_open(vma);
385 
386 	return ret;
387 }
388 
389 static int shm_release(struct inode *ino, struct file *file)
390 {
391 	struct shm_file_data *sfd = shm_file_data(file);
392 
393 	put_ipc_ns(sfd->ns);
394 	shm_file_data(file) = NULL;
395 	kfree(sfd);
396 	return 0;
397 }
398 
399 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
400 {
401 	struct shm_file_data *sfd = shm_file_data(file);
402 
403 	if (!sfd->file->f_op->fsync)
404 		return -EINVAL;
405 	return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
406 }
407 
408 static long shm_fallocate(struct file *file, int mode, loff_t offset,
409 			  loff_t len)
410 {
411 	struct shm_file_data *sfd = shm_file_data(file);
412 
413 	if (!sfd->file->f_op->fallocate)
414 		return -EOPNOTSUPP;
415 	return sfd->file->f_op->fallocate(file, mode, offset, len);
416 }
417 
418 static unsigned long shm_get_unmapped_area(struct file *file,
419 	unsigned long addr, unsigned long len, unsigned long pgoff,
420 	unsigned long flags)
421 {
422 	struct shm_file_data *sfd = shm_file_data(file);
423 	return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
424 						pgoff, flags);
425 }
426 
427 static const struct file_operations shm_file_operations = {
428 	.mmap		= shm_mmap,
429 	.fsync		= shm_fsync,
430 	.release	= shm_release,
431 #ifndef CONFIG_MMU
432 	.get_unmapped_area	= shm_get_unmapped_area,
433 #endif
434 	.llseek		= noop_llseek,
435 	.fallocate	= shm_fallocate,
436 };
437 
438 static const struct file_operations shm_file_operations_huge = {
439 	.mmap		= shm_mmap,
440 	.fsync		= shm_fsync,
441 	.release	= shm_release,
442 	.get_unmapped_area	= shm_get_unmapped_area,
443 	.llseek		= noop_llseek,
444 	.fallocate	= shm_fallocate,
445 };
446 
447 int is_file_shm_hugepages(struct file *file)
448 {
449 	return file->f_op == &shm_file_operations_huge;
450 }
451 
452 static const struct vm_operations_struct shm_vm_ops = {
453 	.open	= shm_open,	/* callback for a new vm-area open */
454 	.close	= shm_close,	/* callback for when the vm-area is released */
455 	.fault	= shm_fault,
456 #if defined(CONFIG_NUMA)
457 	.set_policy = shm_set_policy,
458 	.get_policy = shm_get_policy,
459 #endif
460 };
461 
462 /**
463  * newseg - Create a new shared memory segment
464  * @ns: namespace
465  * @params: ptr to the structure that contains key, size and shmflg
466  *
467  * Called with shm_ids.rwsem held as a writer.
468  */
469 
470 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
471 {
472 	key_t key = params->key;
473 	int shmflg = params->flg;
474 	size_t size = params->u.size;
475 	int error;
476 	struct shmid_kernel *shp;
477 	size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
478 	struct file * file;
479 	char name[13];
480 	int id;
481 	vm_flags_t acctflag = 0;
482 
483 	if (size < SHMMIN || size > ns->shm_ctlmax)
484 		return -EINVAL;
485 
486 	if (ns->shm_tot + numpages > ns->shm_ctlall)
487 		return -ENOSPC;
488 
489 	shp = ipc_rcu_alloc(sizeof(*shp));
490 	if (!shp)
491 		return -ENOMEM;
492 
493 	shp->shm_perm.key = key;
494 	shp->shm_perm.mode = (shmflg & S_IRWXUGO);
495 	shp->mlock_user = NULL;
496 
497 	shp->shm_perm.security = NULL;
498 	error = security_shm_alloc(shp);
499 	if (error) {
500 		ipc_rcu_putref(shp);
501 		return error;
502 	}
503 
504 	sprintf (name, "SYSV%08x", key);
505 	if (shmflg & SHM_HUGETLB) {
506 		struct hstate *hs;
507 		size_t hugesize;
508 
509 		hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
510 		if (!hs) {
511 			error = -EINVAL;
512 			goto no_file;
513 		}
514 		hugesize = ALIGN(size, huge_page_size(hs));
515 
516 		/* hugetlb_file_setup applies strict accounting */
517 		if (shmflg & SHM_NORESERVE)
518 			acctflag = VM_NORESERVE;
519 		file = hugetlb_file_setup(name, hugesize, acctflag,
520 				  &shp->mlock_user, HUGETLB_SHMFS_INODE,
521 				(shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
522 	} else {
523 		/*
524 		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
525 	 	 * if it's asked for.
526 		 */
527 		if  ((shmflg & SHM_NORESERVE) &&
528 				sysctl_overcommit_memory != OVERCOMMIT_NEVER)
529 			acctflag = VM_NORESERVE;
530 		file = shmem_file_setup(name, size, acctflag);
531 	}
532 	error = PTR_ERR(file);
533 	if (IS_ERR(file))
534 		goto no_file;
535 
536 	id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
537 	if (id < 0) {
538 		error = id;
539 		goto no_id;
540 	}
541 
542 	shp->shm_cprid = task_tgid_vnr(current);
543 	shp->shm_lprid = 0;
544 	shp->shm_atim = shp->shm_dtim = 0;
545 	shp->shm_ctim = get_seconds();
546 	shp->shm_segsz = size;
547 	shp->shm_nattch = 0;
548 	shp->shm_file = file;
549 	shp->shm_creator = current;
550 
551 	/*
552 	 * shmid gets reported as "inode#" in /proc/pid/maps.
553 	 * proc-ps tools use this. Changing this will break them.
554 	 */
555 	file_inode(file)->i_ino = shp->shm_perm.id;
556 
557 	ns->shm_tot += numpages;
558 	error = shp->shm_perm.id;
559 
560 	ipc_unlock_object(&shp->shm_perm);
561 	rcu_read_unlock();
562 	return error;
563 
564 no_id:
565 	if (is_file_hugepages(file) && shp->mlock_user)
566 		user_shm_unlock(size, shp->mlock_user);
567 	fput(file);
568 no_file:
569 	security_shm_free(shp);
570 	ipc_rcu_putref(shp);
571 	return error;
572 }
573 
574 /*
575  * Called with shm_ids.rwsem and ipcp locked.
576  */
577 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
578 {
579 	struct shmid_kernel *shp;
580 
581 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
582 	return security_shm_associate(shp, shmflg);
583 }
584 
585 /*
586  * Called with shm_ids.rwsem and ipcp locked.
587  */
588 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
589 				struct ipc_params *params)
590 {
591 	struct shmid_kernel *shp;
592 
593 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
594 	if (shp->shm_segsz < params->u.size)
595 		return -EINVAL;
596 
597 	return 0;
598 }
599 
600 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
601 {
602 	struct ipc_namespace *ns;
603 	struct ipc_ops shm_ops;
604 	struct ipc_params shm_params;
605 
606 	ns = current->nsproxy->ipc_ns;
607 
608 	shm_ops.getnew = newseg;
609 	shm_ops.associate = shm_security;
610 	shm_ops.more_checks = shm_more_checks;
611 
612 	shm_params.key = key;
613 	shm_params.flg = shmflg;
614 	shm_params.u.size = size;
615 
616 	return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
617 }
618 
619 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
620 {
621 	switch(version) {
622 	case IPC_64:
623 		return copy_to_user(buf, in, sizeof(*in));
624 	case IPC_OLD:
625 	    {
626 		struct shmid_ds out;
627 
628 		memset(&out, 0, sizeof(out));
629 		ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
630 		out.shm_segsz	= in->shm_segsz;
631 		out.shm_atime	= in->shm_atime;
632 		out.shm_dtime	= in->shm_dtime;
633 		out.shm_ctime	= in->shm_ctime;
634 		out.shm_cpid	= in->shm_cpid;
635 		out.shm_lpid	= in->shm_lpid;
636 		out.shm_nattch	= in->shm_nattch;
637 
638 		return copy_to_user(buf, &out, sizeof(out));
639 	    }
640 	default:
641 		return -EINVAL;
642 	}
643 }
644 
645 static inline unsigned long
646 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
647 {
648 	switch(version) {
649 	case IPC_64:
650 		if (copy_from_user(out, buf, sizeof(*out)))
651 			return -EFAULT;
652 		return 0;
653 	case IPC_OLD:
654 	    {
655 		struct shmid_ds tbuf_old;
656 
657 		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
658 			return -EFAULT;
659 
660 		out->shm_perm.uid	= tbuf_old.shm_perm.uid;
661 		out->shm_perm.gid	= tbuf_old.shm_perm.gid;
662 		out->shm_perm.mode	= tbuf_old.shm_perm.mode;
663 
664 		return 0;
665 	    }
666 	default:
667 		return -EINVAL;
668 	}
669 }
670 
671 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
672 {
673 	switch(version) {
674 	case IPC_64:
675 		return copy_to_user(buf, in, sizeof(*in));
676 	case IPC_OLD:
677 	    {
678 		struct shminfo out;
679 
680 		if(in->shmmax > INT_MAX)
681 			out.shmmax = INT_MAX;
682 		else
683 			out.shmmax = (int)in->shmmax;
684 
685 		out.shmmin	= in->shmmin;
686 		out.shmmni	= in->shmmni;
687 		out.shmseg	= in->shmseg;
688 		out.shmall	= in->shmall;
689 
690 		return copy_to_user(buf, &out, sizeof(out));
691 	    }
692 	default:
693 		return -EINVAL;
694 	}
695 }
696 
697 /*
698  * Calculate and add used RSS and swap pages of a shm.
699  * Called with shm_ids.rwsem held as a reader
700  */
701 static void shm_add_rss_swap(struct shmid_kernel *shp,
702 	unsigned long *rss_add, unsigned long *swp_add)
703 {
704 	struct inode *inode;
705 
706 	inode = file_inode(shp->shm_file);
707 
708 	if (is_file_hugepages(shp->shm_file)) {
709 		struct address_space *mapping = inode->i_mapping;
710 		struct hstate *h = hstate_file(shp->shm_file);
711 		*rss_add += pages_per_huge_page(h) * mapping->nrpages;
712 	} else {
713 #ifdef CONFIG_SHMEM
714 		struct shmem_inode_info *info = SHMEM_I(inode);
715 		spin_lock(&info->lock);
716 		*rss_add += inode->i_mapping->nrpages;
717 		*swp_add += info->swapped;
718 		spin_unlock(&info->lock);
719 #else
720 		*rss_add += inode->i_mapping->nrpages;
721 #endif
722 	}
723 }
724 
725 /*
726  * Called with shm_ids.rwsem held as a reader
727  */
728 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
729 		unsigned long *swp)
730 {
731 	int next_id;
732 	int total, in_use;
733 
734 	*rss = 0;
735 	*swp = 0;
736 
737 	in_use = shm_ids(ns).in_use;
738 
739 	for (total = 0, next_id = 0; total < in_use; next_id++) {
740 		struct kern_ipc_perm *ipc;
741 		struct shmid_kernel *shp;
742 
743 		ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
744 		if (ipc == NULL)
745 			continue;
746 		shp = container_of(ipc, struct shmid_kernel, shm_perm);
747 
748 		shm_add_rss_swap(shp, rss, swp);
749 
750 		total++;
751 	}
752 }
753 
754 /*
755  * This function handles some shmctl commands which require the rwsem
756  * to be held in write mode.
757  * NOTE: no locks must be held, the rwsem is taken inside this function.
758  */
759 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
760 		       struct shmid_ds __user *buf, int version)
761 {
762 	struct kern_ipc_perm *ipcp;
763 	struct shmid64_ds shmid64;
764 	struct shmid_kernel *shp;
765 	int err;
766 
767 	if (cmd == IPC_SET) {
768 		if (copy_shmid_from_user(&shmid64, buf, version))
769 			return -EFAULT;
770 	}
771 
772 	down_write(&shm_ids(ns).rwsem);
773 	rcu_read_lock();
774 
775 	ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
776 				      &shmid64.shm_perm, 0);
777 	if (IS_ERR(ipcp)) {
778 		err = PTR_ERR(ipcp);
779 		goto out_unlock1;
780 	}
781 
782 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
783 
784 	err = security_shm_shmctl(shp, cmd);
785 	if (err)
786 		goto out_unlock1;
787 
788 	switch (cmd) {
789 	case IPC_RMID:
790 		ipc_lock_object(&shp->shm_perm);
791 		/* do_shm_rmid unlocks the ipc object and rcu */
792 		do_shm_rmid(ns, ipcp);
793 		goto out_up;
794 	case IPC_SET:
795 		ipc_lock_object(&shp->shm_perm);
796 		err = ipc_update_perm(&shmid64.shm_perm, ipcp);
797 		if (err)
798 			goto out_unlock0;
799 		shp->shm_ctim = get_seconds();
800 		break;
801 	default:
802 		err = -EINVAL;
803 		goto out_unlock1;
804 	}
805 
806 out_unlock0:
807 	ipc_unlock_object(&shp->shm_perm);
808 out_unlock1:
809 	rcu_read_unlock();
810 out_up:
811 	up_write(&shm_ids(ns).rwsem);
812 	return err;
813 }
814 
815 static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
816 			 int cmd, int version, void __user *buf)
817 {
818 	int err;
819 	struct shmid_kernel *shp;
820 
821 	/* preliminary security checks for *_INFO */
822 	if (cmd == IPC_INFO || cmd == SHM_INFO) {
823 		err = security_shm_shmctl(NULL, cmd);
824 		if (err)
825 			return err;
826 	}
827 
828 	switch (cmd) {
829 	case IPC_INFO:
830 	{
831 		struct shminfo64 shminfo;
832 
833 		memset(&shminfo, 0, sizeof(shminfo));
834 		shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
835 		shminfo.shmmax = ns->shm_ctlmax;
836 		shminfo.shmall = ns->shm_ctlall;
837 
838 		shminfo.shmmin = SHMMIN;
839 		if(copy_shminfo_to_user (buf, &shminfo, version))
840 			return -EFAULT;
841 
842 		down_read(&shm_ids(ns).rwsem);
843 		err = ipc_get_maxid(&shm_ids(ns));
844 		up_read(&shm_ids(ns).rwsem);
845 
846 		if(err<0)
847 			err = 0;
848 		goto out;
849 	}
850 	case SHM_INFO:
851 	{
852 		struct shm_info shm_info;
853 
854 		memset(&shm_info, 0, sizeof(shm_info));
855 		down_read(&shm_ids(ns).rwsem);
856 		shm_info.used_ids = shm_ids(ns).in_use;
857 		shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
858 		shm_info.shm_tot = ns->shm_tot;
859 		shm_info.swap_attempts = 0;
860 		shm_info.swap_successes = 0;
861 		err = ipc_get_maxid(&shm_ids(ns));
862 		up_read(&shm_ids(ns).rwsem);
863 		if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
864 			err = -EFAULT;
865 			goto out;
866 		}
867 
868 		err = err < 0 ? 0 : err;
869 		goto out;
870 	}
871 	case SHM_STAT:
872 	case IPC_STAT:
873 	{
874 		struct shmid64_ds tbuf;
875 		int result;
876 
877 		rcu_read_lock();
878 		if (cmd == SHM_STAT) {
879 			shp = shm_obtain_object(ns, shmid);
880 			if (IS_ERR(shp)) {
881 				err = PTR_ERR(shp);
882 				goto out_unlock;
883 			}
884 			result = shp->shm_perm.id;
885 		} else {
886 			shp = shm_obtain_object_check(ns, shmid);
887 			if (IS_ERR(shp)) {
888 				err = PTR_ERR(shp);
889 				goto out_unlock;
890 			}
891 			result = 0;
892 		}
893 
894 		err = -EACCES;
895 		if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
896 			goto out_unlock;
897 
898 		err = security_shm_shmctl(shp, cmd);
899 		if (err)
900 			goto out_unlock;
901 
902 		memset(&tbuf, 0, sizeof(tbuf));
903 		kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
904 		tbuf.shm_segsz	= shp->shm_segsz;
905 		tbuf.shm_atime	= shp->shm_atim;
906 		tbuf.shm_dtime	= shp->shm_dtim;
907 		tbuf.shm_ctime	= shp->shm_ctim;
908 		tbuf.shm_cpid	= shp->shm_cprid;
909 		tbuf.shm_lpid	= shp->shm_lprid;
910 		tbuf.shm_nattch	= shp->shm_nattch;
911 		rcu_read_unlock();
912 
913 		if (copy_shmid_to_user(buf, &tbuf, version))
914 			err = -EFAULT;
915 		else
916 			err = result;
917 		goto out;
918 	}
919 	default:
920 		return -EINVAL;
921 	}
922 
923 out_unlock:
924 	rcu_read_unlock();
925 out:
926 	return err;
927 }
928 
929 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
930 {
931 	struct shmid_kernel *shp;
932 	int err, version;
933 	struct ipc_namespace *ns;
934 
935 	if (cmd < 0 || shmid < 0)
936 		return -EINVAL;
937 
938 	version = ipc_parse_version(&cmd);
939 	ns = current->nsproxy->ipc_ns;
940 
941 	switch (cmd) {
942 	case IPC_INFO:
943 	case SHM_INFO:
944 	case SHM_STAT:
945 	case IPC_STAT:
946 		return shmctl_nolock(ns, shmid, cmd, version, buf);
947 	case IPC_RMID:
948 	case IPC_SET:
949 		return shmctl_down(ns, shmid, cmd, buf, version);
950 	case SHM_LOCK:
951 	case SHM_UNLOCK:
952 	{
953 		struct file *shm_file;
954 
955 		rcu_read_lock();
956 		shp = shm_obtain_object_check(ns, shmid);
957 		if (IS_ERR(shp)) {
958 			err = PTR_ERR(shp);
959 			goto out_unlock1;
960 		}
961 
962 		audit_ipc_obj(&(shp->shm_perm));
963 		err = security_shm_shmctl(shp, cmd);
964 		if (err)
965 			goto out_unlock1;
966 
967 		ipc_lock_object(&shp->shm_perm);
968 		if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
969 			kuid_t euid = current_euid();
970 			err = -EPERM;
971 			if (!uid_eq(euid, shp->shm_perm.uid) &&
972 			    !uid_eq(euid, shp->shm_perm.cuid))
973 				goto out_unlock0;
974 			if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
975 				goto out_unlock0;
976 		}
977 
978 		shm_file = shp->shm_file;
979 		if (is_file_hugepages(shm_file))
980 			goto out_unlock0;
981 
982 		if (cmd == SHM_LOCK) {
983 			struct user_struct *user = current_user();
984 			err = shmem_lock(shm_file, 1, user);
985 			if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
986 				shp->shm_perm.mode |= SHM_LOCKED;
987 				shp->mlock_user = user;
988 			}
989 			goto out_unlock0;
990 		}
991 
992 		/* SHM_UNLOCK */
993 		if (!(shp->shm_perm.mode & SHM_LOCKED))
994 			goto out_unlock0;
995 		shmem_lock(shm_file, 0, shp->mlock_user);
996 		shp->shm_perm.mode &= ~SHM_LOCKED;
997 		shp->mlock_user = NULL;
998 		get_file(shm_file);
999 		ipc_unlock_object(&shp->shm_perm);
1000 		rcu_read_unlock();
1001 		shmem_unlock_mapping(shm_file->f_mapping);
1002 
1003 		fput(shm_file);
1004 		return err;
1005 	}
1006 	default:
1007 		return -EINVAL;
1008 	}
1009 
1010 out_unlock0:
1011 	ipc_unlock_object(&shp->shm_perm);
1012 out_unlock1:
1013 	rcu_read_unlock();
1014 	return err;
1015 }
1016 
1017 /*
1018  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1019  *
1020  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1021  * "raddr" thing points to kernel space, and there has to be a wrapper around
1022  * this.
1023  */
1024 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1025 	      unsigned long shmlba)
1026 {
1027 	struct shmid_kernel *shp;
1028 	unsigned long addr;
1029 	unsigned long size;
1030 	struct file * file;
1031 	int    err;
1032 	unsigned long flags;
1033 	unsigned long prot;
1034 	int acc_mode;
1035 	struct ipc_namespace *ns;
1036 	struct shm_file_data *sfd;
1037 	struct path path;
1038 	fmode_t f_mode;
1039 	unsigned long populate = 0;
1040 
1041 	err = -EINVAL;
1042 	if (shmid < 0)
1043 		goto out;
1044 	else if ((addr = (ulong)shmaddr)) {
1045 		if (addr & (shmlba - 1)) {
1046 			if (shmflg & SHM_RND)
1047 				addr &= ~(shmlba - 1);	   /* round down */
1048 			else
1049 #ifndef __ARCH_FORCE_SHMLBA
1050 				if (addr & ~PAGE_MASK)
1051 #endif
1052 					goto out;
1053 		}
1054 		flags = MAP_SHARED | MAP_FIXED;
1055 	} else {
1056 		if ((shmflg & SHM_REMAP))
1057 			goto out;
1058 
1059 		flags = MAP_SHARED;
1060 	}
1061 
1062 	if (shmflg & SHM_RDONLY) {
1063 		prot = PROT_READ;
1064 		acc_mode = S_IRUGO;
1065 		f_mode = FMODE_READ;
1066 	} else {
1067 		prot = PROT_READ | PROT_WRITE;
1068 		acc_mode = S_IRUGO | S_IWUGO;
1069 		f_mode = FMODE_READ | FMODE_WRITE;
1070 	}
1071 	if (shmflg & SHM_EXEC) {
1072 		prot |= PROT_EXEC;
1073 		acc_mode |= S_IXUGO;
1074 	}
1075 
1076 	/*
1077 	 * We cannot rely on the fs check since SYSV IPC does have an
1078 	 * additional creator id...
1079 	 */
1080 	ns = current->nsproxy->ipc_ns;
1081 	rcu_read_lock();
1082 	shp = shm_obtain_object_check(ns, shmid);
1083 	if (IS_ERR(shp)) {
1084 		err = PTR_ERR(shp);
1085 		goto out_unlock;
1086 	}
1087 
1088 	err = -EACCES;
1089 	if (ipcperms(ns, &shp->shm_perm, acc_mode))
1090 		goto out_unlock;
1091 
1092 	err = security_shm_shmat(shp, shmaddr, shmflg);
1093 	if (err)
1094 		goto out_unlock;
1095 
1096 	ipc_lock_object(&shp->shm_perm);
1097 	path = shp->shm_file->f_path;
1098 	path_get(&path);
1099 	shp->shm_nattch++;
1100 	size = i_size_read(path.dentry->d_inode);
1101 	ipc_unlock_object(&shp->shm_perm);
1102 	rcu_read_unlock();
1103 
1104 	err = -ENOMEM;
1105 	sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1106 	if (!sfd) {
1107 		path_put(&path);
1108 		goto out_nattch;
1109 	}
1110 
1111 	file = alloc_file(&path, f_mode,
1112 			  is_file_hugepages(shp->shm_file) ?
1113 				&shm_file_operations_huge :
1114 				&shm_file_operations);
1115 	err = PTR_ERR(file);
1116 	if (IS_ERR(file)) {
1117 		kfree(sfd);
1118 		path_put(&path);
1119 		goto out_nattch;
1120 	}
1121 
1122 	file->private_data = sfd;
1123 	file->f_mapping = shp->shm_file->f_mapping;
1124 	sfd->id = shp->shm_perm.id;
1125 	sfd->ns = get_ipc_ns(ns);
1126 	sfd->file = shp->shm_file;
1127 	sfd->vm_ops = NULL;
1128 
1129 	err = security_mmap_file(file, prot, flags);
1130 	if (err)
1131 		goto out_fput;
1132 
1133 	down_write(&current->mm->mmap_sem);
1134 	if (addr && !(shmflg & SHM_REMAP)) {
1135 		err = -EINVAL;
1136 		if (find_vma_intersection(current->mm, addr, addr + size))
1137 			goto invalid;
1138 		/*
1139 		 * If shm segment goes below stack, make sure there is some
1140 		 * space left for the stack to grow (at least 4 pages).
1141 		 */
1142 		if (addr < current->mm->start_stack &&
1143 		    addr > current->mm->start_stack - size - PAGE_SIZE * 5)
1144 			goto invalid;
1145 	}
1146 
1147 	addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
1148 	*raddr = addr;
1149 	err = 0;
1150 	if (IS_ERR_VALUE(addr))
1151 		err = (long)addr;
1152 invalid:
1153 	up_write(&current->mm->mmap_sem);
1154 	if (populate)
1155 		mm_populate(addr, populate);
1156 
1157 out_fput:
1158 	fput(file);
1159 
1160 out_nattch:
1161 	down_write(&shm_ids(ns).rwsem);
1162 	shp = shm_lock(ns, shmid);
1163 	BUG_ON(IS_ERR(shp));
1164 	shp->shm_nattch--;
1165 	if (shm_may_destroy(ns, shp))
1166 		shm_destroy(ns, shp);
1167 	else
1168 		shm_unlock(shp);
1169 	up_write(&shm_ids(ns).rwsem);
1170 	return err;
1171 
1172 out_unlock:
1173 	rcu_read_unlock();
1174 out:
1175 	return err;
1176 }
1177 
1178 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1179 {
1180 	unsigned long ret;
1181 	long err;
1182 
1183 	err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1184 	if (err)
1185 		return err;
1186 	force_successful_syscall_return();
1187 	return (long)ret;
1188 }
1189 
1190 /*
1191  * detach and kill segment if marked destroyed.
1192  * The work is done in shm_close.
1193  */
1194 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1195 {
1196 	struct mm_struct *mm = current->mm;
1197 	struct vm_area_struct *vma;
1198 	unsigned long addr = (unsigned long)shmaddr;
1199 	int retval = -EINVAL;
1200 #ifdef CONFIG_MMU
1201 	loff_t size = 0;
1202 	struct vm_area_struct *next;
1203 #endif
1204 
1205 	if (addr & ~PAGE_MASK)
1206 		return retval;
1207 
1208 	down_write(&mm->mmap_sem);
1209 
1210 	/*
1211 	 * This function tries to be smart and unmap shm segments that
1212 	 * were modified by partial mlock or munmap calls:
1213 	 * - It first determines the size of the shm segment that should be
1214 	 *   unmapped: It searches for a vma that is backed by shm and that
1215 	 *   started at address shmaddr. It records it's size and then unmaps
1216 	 *   it.
1217 	 * - Then it unmaps all shm vmas that started at shmaddr and that
1218 	 *   are within the initially determined size.
1219 	 * Errors from do_munmap are ignored: the function only fails if
1220 	 * it's called with invalid parameters or if it's called to unmap
1221 	 * a part of a vma. Both calls in this function are for full vmas,
1222 	 * the parameters are directly copied from the vma itself and always
1223 	 * valid - therefore do_munmap cannot fail. (famous last words?)
1224 	 */
1225 	/*
1226 	 * If it had been mremap()'d, the starting address would not
1227 	 * match the usual checks anyway. So assume all vma's are
1228 	 * above the starting address given.
1229 	 */
1230 	vma = find_vma(mm, addr);
1231 
1232 #ifdef CONFIG_MMU
1233 	while (vma) {
1234 		next = vma->vm_next;
1235 
1236 		/*
1237 		 * Check if the starting address would match, i.e. it's
1238 		 * a fragment created by mprotect() and/or munmap(), or it
1239 		 * otherwise it starts at this address with no hassles.
1240 		 */
1241 		if ((vma->vm_ops == &shm_vm_ops) &&
1242 			(vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1243 
1244 
1245 			size = file_inode(vma->vm_file)->i_size;
1246 			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1247 			/*
1248 			 * We discovered the size of the shm segment, so
1249 			 * break out of here and fall through to the next
1250 			 * loop that uses the size information to stop
1251 			 * searching for matching vma's.
1252 			 */
1253 			retval = 0;
1254 			vma = next;
1255 			break;
1256 		}
1257 		vma = next;
1258 	}
1259 
1260 	/*
1261 	 * We need look no further than the maximum address a fragment
1262 	 * could possibly have landed at. Also cast things to loff_t to
1263 	 * prevent overflows and make comparisons vs. equal-width types.
1264 	 */
1265 	size = PAGE_ALIGN(size);
1266 	while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1267 		next = vma->vm_next;
1268 
1269 		/* finding a matching vma now does not alter retval */
1270 		if ((vma->vm_ops == &shm_vm_ops) &&
1271 			(vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1272 
1273 			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1274 		vma = next;
1275 	}
1276 
1277 #else /* CONFIG_MMU */
1278 	/* under NOMMU conditions, the exact address to be destroyed must be
1279 	 * given */
1280 	if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1281 		do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1282 		retval = 0;
1283 	}
1284 
1285 #endif
1286 
1287 	up_write(&mm->mmap_sem);
1288 	return retval;
1289 }
1290 
1291 #ifdef CONFIG_PROC_FS
1292 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1293 {
1294 	struct user_namespace *user_ns = seq_user_ns(s);
1295 	struct shmid_kernel *shp = it;
1296 	unsigned long rss = 0, swp = 0;
1297 
1298 	shm_add_rss_swap(shp, &rss, &swp);
1299 
1300 #if BITS_PER_LONG <= 32
1301 #define SIZE_SPEC "%10lu"
1302 #else
1303 #define SIZE_SPEC "%21lu"
1304 #endif
1305 
1306 	return seq_printf(s,
1307 			  "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1308 			  "%5lu %5u %5u %5u %5u %10lu %10lu %10lu "
1309 			  SIZE_SPEC " " SIZE_SPEC "\n",
1310 			  shp->shm_perm.key,
1311 			  shp->shm_perm.id,
1312 			  shp->shm_perm.mode,
1313 			  shp->shm_segsz,
1314 			  shp->shm_cprid,
1315 			  shp->shm_lprid,
1316 			  shp->shm_nattch,
1317 			  from_kuid_munged(user_ns, shp->shm_perm.uid),
1318 			  from_kgid_munged(user_ns, shp->shm_perm.gid),
1319 			  from_kuid_munged(user_ns, shp->shm_perm.cuid),
1320 			  from_kgid_munged(user_ns, shp->shm_perm.cgid),
1321 			  shp->shm_atim,
1322 			  shp->shm_dtim,
1323 			  shp->shm_ctim,
1324 			  rss * PAGE_SIZE,
1325 			  swp * PAGE_SIZE);
1326 }
1327 #endif
1328