1 /* 2 * linux/ipc/shm.c 3 * Copyright (C) 1992, 1993 Krishna Balasubramanian 4 * Many improvements/fixes by Bruno Haible. 5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. 6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. 7 * 8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de> 10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> 11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com> 12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> 13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> 14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> 15 * 16 * support for audit of ipc object properties and permission changes 17 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 18 * 19 * namespaces support 20 * OpenVZ, SWsoft Inc. 21 * Pavel Emelianov <xemul@openvz.org> 22 * 23 * Better ipc lock (kern_ipc_perm.lock) handling 24 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. 25 */ 26 27 #include <linux/slab.h> 28 #include <linux/mm.h> 29 #include <linux/hugetlb.h> 30 #include <linux/shm.h> 31 #include <linux/init.h> 32 #include <linux/file.h> 33 #include <linux/mman.h> 34 #include <linux/shmem_fs.h> 35 #include <linux/security.h> 36 #include <linux/syscalls.h> 37 #include <linux/audit.h> 38 #include <linux/capability.h> 39 #include <linux/ptrace.h> 40 #include <linux/seq_file.h> 41 #include <linux/rwsem.h> 42 #include <linux/nsproxy.h> 43 #include <linux/mount.h> 44 #include <linux/ipc_namespace.h> 45 46 #include <linux/uaccess.h> 47 48 #include "util.h" 49 50 struct shm_file_data { 51 int id; 52 struct ipc_namespace *ns; 53 struct file *file; 54 const struct vm_operations_struct *vm_ops; 55 }; 56 57 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) 58 59 static const struct file_operations shm_file_operations; 60 static const struct vm_operations_struct shm_vm_ops; 61 62 #define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS]) 63 64 #define shm_unlock(shp) \ 65 ipc_unlock(&(shp)->shm_perm) 66 67 static int newseg(struct ipc_namespace *, struct ipc_params *); 68 static void shm_open(struct vm_area_struct *vma); 69 static void shm_close(struct vm_area_struct *vma); 70 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); 71 #ifdef CONFIG_PROC_FS 72 static int sysvipc_shm_proc_show(struct seq_file *s, void *it); 73 #endif 74 75 int shm_init_ns(struct ipc_namespace *ns) 76 { 77 ns->shm_ctlmax = SHMMAX; 78 ns->shm_ctlall = SHMALL; 79 ns->shm_ctlmni = SHMMNI; 80 ns->shm_rmid_forced = 0; 81 ns->shm_tot = 0; 82 return ipc_init_ids(&shm_ids(ns)); 83 } 84 85 /* 86 * Called with shm_ids.rwsem (writer) and the shp structure locked. 87 * Only shm_ids.rwsem remains locked on exit. 88 */ 89 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 90 { 91 struct shmid_kernel *shp; 92 93 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 94 95 if (shp->shm_nattch) { 96 shp->shm_perm.mode |= SHM_DEST; 97 /* Do not find it any more */ 98 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm); 99 shm_unlock(shp); 100 } else 101 shm_destroy(ns, shp); 102 } 103 104 #ifdef CONFIG_IPC_NS 105 void shm_exit_ns(struct ipc_namespace *ns) 106 { 107 free_ipcs(ns, &shm_ids(ns), do_shm_rmid); 108 idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); 109 rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht); 110 } 111 #endif 112 113 static int __init ipc_ns_init(void) 114 { 115 const int err = shm_init_ns(&init_ipc_ns); 116 WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err); 117 return err; 118 } 119 120 pure_initcall(ipc_ns_init); 121 122 void __init shm_init(void) 123 { 124 ipc_init_proc_interface("sysvipc/shm", 125 #if BITS_PER_LONG <= 32 126 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 127 #else 128 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 129 #endif 130 IPC_SHM_IDS, sysvipc_shm_proc_show); 131 } 132 133 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) 134 { 135 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); 136 137 if (IS_ERR(ipcp)) 138 return ERR_CAST(ipcp); 139 140 return container_of(ipcp, struct shmid_kernel, shm_perm); 141 } 142 143 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) 144 { 145 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); 146 147 if (IS_ERR(ipcp)) 148 return ERR_CAST(ipcp); 149 150 return container_of(ipcp, struct shmid_kernel, shm_perm); 151 } 152 153 /* 154 * shm_lock_(check_) routines are called in the paths where the rwsem 155 * is not necessarily held. 156 */ 157 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) 158 { 159 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); 160 161 /* 162 * Callers of shm_lock() must validate the status of the returned ipc 163 * object pointer (as returned by ipc_lock()), and error out as 164 * appropriate. 165 */ 166 if (IS_ERR(ipcp)) 167 return (void *)ipcp; 168 return container_of(ipcp, struct shmid_kernel, shm_perm); 169 } 170 171 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) 172 { 173 rcu_read_lock(); 174 ipc_lock_object(&ipcp->shm_perm); 175 } 176 177 static void shm_rcu_free(struct rcu_head *head) 178 { 179 struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm, 180 rcu); 181 struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel, 182 shm_perm); 183 security_shm_free(shp); 184 kvfree(shp); 185 } 186 187 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 188 { 189 list_del(&s->shm_clist); 190 ipc_rmid(&shm_ids(ns), &s->shm_perm); 191 } 192 193 194 static int __shm_open(struct vm_area_struct *vma) 195 { 196 struct file *file = vma->vm_file; 197 struct shm_file_data *sfd = shm_file_data(file); 198 struct shmid_kernel *shp; 199 200 shp = shm_lock(sfd->ns, sfd->id); 201 202 if (IS_ERR(shp)) 203 return PTR_ERR(shp); 204 205 shp->shm_atim = ktime_get_real_seconds(); 206 shp->shm_lprid = task_tgid_vnr(current); 207 shp->shm_nattch++; 208 shm_unlock(shp); 209 return 0; 210 } 211 212 /* This is called by fork, once for every shm attach. */ 213 static void shm_open(struct vm_area_struct *vma) 214 { 215 int err = __shm_open(vma); 216 /* 217 * We raced in the idr lookup or with shm_destroy(). 218 * Either way, the ID is busted. 219 */ 220 WARN_ON_ONCE(err); 221 } 222 223 /* 224 * shm_destroy - free the struct shmid_kernel 225 * 226 * @ns: namespace 227 * @shp: struct to free 228 * 229 * It has to be called with shp and shm_ids.rwsem (writer) locked, 230 * but returns with shp unlocked and freed. 231 */ 232 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 233 { 234 struct file *shm_file; 235 236 shm_file = shp->shm_file; 237 shp->shm_file = NULL; 238 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; 239 shm_rmid(ns, shp); 240 shm_unlock(shp); 241 if (!is_file_hugepages(shm_file)) 242 shmem_lock(shm_file, 0, shp->mlock_user); 243 else if (shp->mlock_user) 244 user_shm_unlock(i_size_read(file_inode(shm_file)), 245 shp->mlock_user); 246 fput(shm_file); 247 ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); 248 } 249 250 /* 251 * shm_may_destroy - identifies whether shm segment should be destroyed now 252 * 253 * Returns true if and only if there are no active users of the segment and 254 * one of the following is true: 255 * 256 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp 257 * 258 * 2) sysctl kernel.shm_rmid_forced is set to 1. 259 */ 260 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 261 { 262 return (shp->shm_nattch == 0) && 263 (ns->shm_rmid_forced || 264 (shp->shm_perm.mode & SHM_DEST)); 265 } 266 267 /* 268 * remove the attach descriptor vma. 269 * free memory for segment if it is marked destroyed. 270 * The descriptor has already been removed from the current->mm->mmap list 271 * and will later be kfree()d. 272 */ 273 static void shm_close(struct vm_area_struct *vma) 274 { 275 struct file *file = vma->vm_file; 276 struct shm_file_data *sfd = shm_file_data(file); 277 struct shmid_kernel *shp; 278 struct ipc_namespace *ns = sfd->ns; 279 280 down_write(&shm_ids(ns).rwsem); 281 /* remove from the list of attaches of the shm segment */ 282 shp = shm_lock(ns, sfd->id); 283 284 /* 285 * We raced in the idr lookup or with shm_destroy(). 286 * Either way, the ID is busted. 287 */ 288 if (WARN_ON_ONCE(IS_ERR(shp))) 289 goto done; /* no-op */ 290 291 shp->shm_lprid = task_tgid_vnr(current); 292 shp->shm_dtim = ktime_get_real_seconds(); 293 shp->shm_nattch--; 294 if (shm_may_destroy(ns, shp)) 295 shm_destroy(ns, shp); 296 else 297 shm_unlock(shp); 298 done: 299 up_write(&shm_ids(ns).rwsem); 300 } 301 302 /* Called with ns->shm_ids(ns).rwsem locked */ 303 static int shm_try_destroy_orphaned(int id, void *p, void *data) 304 { 305 struct ipc_namespace *ns = data; 306 struct kern_ipc_perm *ipcp = p; 307 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); 308 309 /* 310 * We want to destroy segments without users and with already 311 * exit'ed originating process. 312 * 313 * As shp->* are changed under rwsem, it's safe to skip shp locking. 314 */ 315 if (shp->shm_creator != NULL) 316 return 0; 317 318 if (shm_may_destroy(ns, shp)) { 319 shm_lock_by_ptr(shp); 320 shm_destroy(ns, shp); 321 } 322 return 0; 323 } 324 325 void shm_destroy_orphaned(struct ipc_namespace *ns) 326 { 327 down_write(&shm_ids(ns).rwsem); 328 if (shm_ids(ns).in_use) 329 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); 330 up_write(&shm_ids(ns).rwsem); 331 } 332 333 /* Locking assumes this will only be called with task == current */ 334 void exit_shm(struct task_struct *task) 335 { 336 struct ipc_namespace *ns = task->nsproxy->ipc_ns; 337 struct shmid_kernel *shp, *n; 338 339 if (list_empty(&task->sysvshm.shm_clist)) 340 return; 341 342 /* 343 * If kernel.shm_rmid_forced is not set then only keep track of 344 * which shmids are orphaned, so that a later set of the sysctl 345 * can clean them up. 346 */ 347 if (!ns->shm_rmid_forced) { 348 down_read(&shm_ids(ns).rwsem); 349 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) 350 shp->shm_creator = NULL; 351 /* 352 * Only under read lock but we are only called on current 353 * so no entry on the list will be shared. 354 */ 355 list_del(&task->sysvshm.shm_clist); 356 up_read(&shm_ids(ns).rwsem); 357 return; 358 } 359 360 /* 361 * Destroy all already created segments, that were not yet mapped, 362 * and mark any mapped as orphan to cover the sysctl toggling. 363 * Destroy is skipped if shm_may_destroy() returns false. 364 */ 365 down_write(&shm_ids(ns).rwsem); 366 list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { 367 shp->shm_creator = NULL; 368 369 if (shm_may_destroy(ns, shp)) { 370 shm_lock_by_ptr(shp); 371 shm_destroy(ns, shp); 372 } 373 } 374 375 /* Remove the list head from any segments still attached. */ 376 list_del(&task->sysvshm.shm_clist); 377 up_write(&shm_ids(ns).rwsem); 378 } 379 380 static int shm_fault(struct vm_fault *vmf) 381 { 382 struct file *file = vmf->vma->vm_file; 383 struct shm_file_data *sfd = shm_file_data(file); 384 385 return sfd->vm_ops->fault(vmf); 386 } 387 388 #ifdef CONFIG_NUMA 389 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 390 { 391 struct file *file = vma->vm_file; 392 struct shm_file_data *sfd = shm_file_data(file); 393 int err = 0; 394 395 if (sfd->vm_ops->set_policy) 396 err = sfd->vm_ops->set_policy(vma, new); 397 return err; 398 } 399 400 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, 401 unsigned long addr) 402 { 403 struct file *file = vma->vm_file; 404 struct shm_file_data *sfd = shm_file_data(file); 405 struct mempolicy *pol = NULL; 406 407 if (sfd->vm_ops->get_policy) 408 pol = sfd->vm_ops->get_policy(vma, addr); 409 else if (vma->vm_policy) 410 pol = vma->vm_policy; 411 412 return pol; 413 } 414 #endif 415 416 static int shm_mmap(struct file *file, struct vm_area_struct *vma) 417 { 418 struct shm_file_data *sfd = shm_file_data(file); 419 int ret; 420 421 /* 422 * In case of remap_file_pages() emulation, the file can represent 423 * removed IPC ID: propogate shm_lock() error to caller. 424 */ 425 ret = __shm_open(vma); 426 if (ret) 427 return ret; 428 429 ret = call_mmap(sfd->file, vma); 430 if (ret) { 431 shm_close(vma); 432 return ret; 433 } 434 sfd->vm_ops = vma->vm_ops; 435 #ifdef CONFIG_MMU 436 WARN_ON(!sfd->vm_ops->fault); 437 #endif 438 vma->vm_ops = &shm_vm_ops; 439 return 0; 440 } 441 442 static int shm_release(struct inode *ino, struct file *file) 443 { 444 struct shm_file_data *sfd = shm_file_data(file); 445 446 put_ipc_ns(sfd->ns); 447 shm_file_data(file) = NULL; 448 kfree(sfd); 449 return 0; 450 } 451 452 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) 453 { 454 struct shm_file_data *sfd = shm_file_data(file); 455 456 if (!sfd->file->f_op->fsync) 457 return -EINVAL; 458 return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 459 } 460 461 static long shm_fallocate(struct file *file, int mode, loff_t offset, 462 loff_t len) 463 { 464 struct shm_file_data *sfd = shm_file_data(file); 465 466 if (!sfd->file->f_op->fallocate) 467 return -EOPNOTSUPP; 468 return sfd->file->f_op->fallocate(file, mode, offset, len); 469 } 470 471 static unsigned long shm_get_unmapped_area(struct file *file, 472 unsigned long addr, unsigned long len, unsigned long pgoff, 473 unsigned long flags) 474 { 475 struct shm_file_data *sfd = shm_file_data(file); 476 477 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, 478 pgoff, flags); 479 } 480 481 static const struct file_operations shm_file_operations = { 482 .mmap = shm_mmap, 483 .fsync = shm_fsync, 484 .release = shm_release, 485 .get_unmapped_area = shm_get_unmapped_area, 486 .llseek = noop_llseek, 487 .fallocate = shm_fallocate, 488 }; 489 490 /* 491 * shm_file_operations_huge is now identical to shm_file_operations, 492 * but we keep it distinct for the sake of is_file_shm_hugepages(). 493 */ 494 static const struct file_operations shm_file_operations_huge = { 495 .mmap = shm_mmap, 496 .fsync = shm_fsync, 497 .release = shm_release, 498 .get_unmapped_area = shm_get_unmapped_area, 499 .llseek = noop_llseek, 500 .fallocate = shm_fallocate, 501 }; 502 503 bool is_file_shm_hugepages(struct file *file) 504 { 505 return file->f_op == &shm_file_operations_huge; 506 } 507 508 static const struct vm_operations_struct shm_vm_ops = { 509 .open = shm_open, /* callback for a new vm-area open */ 510 .close = shm_close, /* callback for when the vm-area is released */ 511 .fault = shm_fault, 512 #if defined(CONFIG_NUMA) 513 .set_policy = shm_set_policy, 514 .get_policy = shm_get_policy, 515 #endif 516 }; 517 518 /** 519 * newseg - Create a new shared memory segment 520 * @ns: namespace 521 * @params: ptr to the structure that contains key, size and shmflg 522 * 523 * Called with shm_ids.rwsem held as a writer. 524 */ 525 static int newseg(struct ipc_namespace *ns, struct ipc_params *params) 526 { 527 key_t key = params->key; 528 int shmflg = params->flg; 529 size_t size = params->u.size; 530 int error; 531 struct shmid_kernel *shp; 532 size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 533 struct file *file; 534 char name[13]; 535 vm_flags_t acctflag = 0; 536 537 if (size < SHMMIN || size > ns->shm_ctlmax) 538 return -EINVAL; 539 540 if (numpages << PAGE_SHIFT < size) 541 return -ENOSPC; 542 543 if (ns->shm_tot + numpages < ns->shm_tot || 544 ns->shm_tot + numpages > ns->shm_ctlall) 545 return -ENOSPC; 546 547 shp = kvmalloc(sizeof(*shp), GFP_KERNEL); 548 if (unlikely(!shp)) 549 return -ENOMEM; 550 551 shp->shm_perm.key = key; 552 shp->shm_perm.mode = (shmflg & S_IRWXUGO); 553 shp->mlock_user = NULL; 554 555 shp->shm_perm.security = NULL; 556 error = security_shm_alloc(shp); 557 if (error) { 558 kvfree(shp); 559 return error; 560 } 561 562 sprintf(name, "SYSV%08x", key); 563 if (shmflg & SHM_HUGETLB) { 564 struct hstate *hs; 565 size_t hugesize; 566 567 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 568 if (!hs) { 569 error = -EINVAL; 570 goto no_file; 571 } 572 hugesize = ALIGN(size, huge_page_size(hs)); 573 574 /* hugetlb_file_setup applies strict accounting */ 575 if (shmflg & SHM_NORESERVE) 576 acctflag = VM_NORESERVE; 577 file = hugetlb_file_setup(name, hugesize, acctflag, 578 &shp->mlock_user, HUGETLB_SHMFS_INODE, 579 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 580 } else { 581 /* 582 * Do not allow no accounting for OVERCOMMIT_NEVER, even 583 * if it's asked for. 584 */ 585 if ((shmflg & SHM_NORESERVE) && 586 sysctl_overcommit_memory != OVERCOMMIT_NEVER) 587 acctflag = VM_NORESERVE; 588 file = shmem_kernel_file_setup(name, size, acctflag); 589 } 590 error = PTR_ERR(file); 591 if (IS_ERR(file)) 592 goto no_file; 593 594 shp->shm_cprid = task_tgid_vnr(current); 595 shp->shm_lprid = 0; 596 shp->shm_atim = shp->shm_dtim = 0; 597 shp->shm_ctim = ktime_get_real_seconds(); 598 shp->shm_segsz = size; 599 shp->shm_nattch = 0; 600 shp->shm_file = file; 601 shp->shm_creator = current; 602 603 error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); 604 if (error < 0) 605 goto no_id; 606 607 list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); 608 609 /* 610 * shmid gets reported as "inode#" in /proc/pid/maps. 611 * proc-ps tools use this. Changing this will break them. 612 */ 613 file_inode(file)->i_ino = shp->shm_perm.id; 614 615 ns->shm_tot += numpages; 616 error = shp->shm_perm.id; 617 618 ipc_unlock_object(&shp->shm_perm); 619 rcu_read_unlock(); 620 return error; 621 622 no_id: 623 if (is_file_hugepages(file) && shp->mlock_user) 624 user_shm_unlock(size, shp->mlock_user); 625 fput(file); 626 no_file: 627 call_rcu(&shp->shm_perm.rcu, shm_rcu_free); 628 return error; 629 } 630 631 /* 632 * Called with shm_ids.rwsem and ipcp locked. 633 */ 634 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) 635 { 636 struct shmid_kernel *shp; 637 638 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 639 return security_shm_associate(shp, shmflg); 640 } 641 642 /* 643 * Called with shm_ids.rwsem and ipcp locked. 644 */ 645 static inline int shm_more_checks(struct kern_ipc_perm *ipcp, 646 struct ipc_params *params) 647 { 648 struct shmid_kernel *shp; 649 650 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 651 if (shp->shm_segsz < params->u.size) 652 return -EINVAL; 653 654 return 0; 655 } 656 657 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) 658 { 659 struct ipc_namespace *ns; 660 static const struct ipc_ops shm_ops = { 661 .getnew = newseg, 662 .associate = shm_security, 663 .more_checks = shm_more_checks, 664 }; 665 struct ipc_params shm_params; 666 667 ns = current->nsproxy->ipc_ns; 668 669 shm_params.key = key; 670 shm_params.flg = shmflg; 671 shm_params.u.size = size; 672 673 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); 674 } 675 676 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) 677 { 678 switch (version) { 679 case IPC_64: 680 return copy_to_user(buf, in, sizeof(*in)); 681 case IPC_OLD: 682 { 683 struct shmid_ds out; 684 685 memset(&out, 0, sizeof(out)); 686 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); 687 out.shm_segsz = in->shm_segsz; 688 out.shm_atime = in->shm_atime; 689 out.shm_dtime = in->shm_dtime; 690 out.shm_ctime = in->shm_ctime; 691 out.shm_cpid = in->shm_cpid; 692 out.shm_lpid = in->shm_lpid; 693 out.shm_nattch = in->shm_nattch; 694 695 return copy_to_user(buf, &out, sizeof(out)); 696 } 697 default: 698 return -EINVAL; 699 } 700 } 701 702 static inline unsigned long 703 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) 704 { 705 switch (version) { 706 case IPC_64: 707 if (copy_from_user(out, buf, sizeof(*out))) 708 return -EFAULT; 709 return 0; 710 case IPC_OLD: 711 { 712 struct shmid_ds tbuf_old; 713 714 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 715 return -EFAULT; 716 717 out->shm_perm.uid = tbuf_old.shm_perm.uid; 718 out->shm_perm.gid = tbuf_old.shm_perm.gid; 719 out->shm_perm.mode = tbuf_old.shm_perm.mode; 720 721 return 0; 722 } 723 default: 724 return -EINVAL; 725 } 726 } 727 728 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) 729 { 730 switch (version) { 731 case IPC_64: 732 return copy_to_user(buf, in, sizeof(*in)); 733 case IPC_OLD: 734 { 735 struct shminfo out; 736 737 if (in->shmmax > INT_MAX) 738 out.shmmax = INT_MAX; 739 else 740 out.shmmax = (int)in->shmmax; 741 742 out.shmmin = in->shmmin; 743 out.shmmni = in->shmmni; 744 out.shmseg = in->shmseg; 745 out.shmall = in->shmall; 746 747 return copy_to_user(buf, &out, sizeof(out)); 748 } 749 default: 750 return -EINVAL; 751 } 752 } 753 754 /* 755 * Calculate and add used RSS and swap pages of a shm. 756 * Called with shm_ids.rwsem held as a reader 757 */ 758 static void shm_add_rss_swap(struct shmid_kernel *shp, 759 unsigned long *rss_add, unsigned long *swp_add) 760 { 761 struct inode *inode; 762 763 inode = file_inode(shp->shm_file); 764 765 if (is_file_hugepages(shp->shm_file)) { 766 struct address_space *mapping = inode->i_mapping; 767 struct hstate *h = hstate_file(shp->shm_file); 768 *rss_add += pages_per_huge_page(h) * mapping->nrpages; 769 } else { 770 #ifdef CONFIG_SHMEM 771 struct shmem_inode_info *info = SHMEM_I(inode); 772 773 spin_lock_irq(&info->lock); 774 *rss_add += inode->i_mapping->nrpages; 775 *swp_add += info->swapped; 776 spin_unlock_irq(&info->lock); 777 #else 778 *rss_add += inode->i_mapping->nrpages; 779 #endif 780 } 781 } 782 783 /* 784 * Called with shm_ids.rwsem held as a reader 785 */ 786 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, 787 unsigned long *swp) 788 { 789 int next_id; 790 int total, in_use; 791 792 *rss = 0; 793 *swp = 0; 794 795 in_use = shm_ids(ns).in_use; 796 797 for (total = 0, next_id = 0; total < in_use; next_id++) { 798 struct kern_ipc_perm *ipc; 799 struct shmid_kernel *shp; 800 801 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); 802 if (ipc == NULL) 803 continue; 804 shp = container_of(ipc, struct shmid_kernel, shm_perm); 805 806 shm_add_rss_swap(shp, rss, swp); 807 808 total++; 809 } 810 } 811 812 /* 813 * This function handles some shmctl commands which require the rwsem 814 * to be held in write mode. 815 * NOTE: no locks must be held, the rwsem is taken inside this function. 816 */ 817 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, 818 struct shmid64_ds *shmid64) 819 { 820 struct kern_ipc_perm *ipcp; 821 struct shmid_kernel *shp; 822 int err; 823 824 down_write(&shm_ids(ns).rwsem); 825 rcu_read_lock(); 826 827 ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd, 828 &shmid64->shm_perm, 0); 829 if (IS_ERR(ipcp)) { 830 err = PTR_ERR(ipcp); 831 goto out_unlock1; 832 } 833 834 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 835 836 err = security_shm_shmctl(shp, cmd); 837 if (err) 838 goto out_unlock1; 839 840 switch (cmd) { 841 case IPC_RMID: 842 ipc_lock_object(&shp->shm_perm); 843 /* do_shm_rmid unlocks the ipc object and rcu */ 844 do_shm_rmid(ns, ipcp); 845 goto out_up; 846 case IPC_SET: 847 ipc_lock_object(&shp->shm_perm); 848 err = ipc_update_perm(&shmid64->shm_perm, ipcp); 849 if (err) 850 goto out_unlock0; 851 shp->shm_ctim = ktime_get_real_seconds(); 852 break; 853 default: 854 err = -EINVAL; 855 goto out_unlock1; 856 } 857 858 out_unlock0: 859 ipc_unlock_object(&shp->shm_perm); 860 out_unlock1: 861 rcu_read_unlock(); 862 out_up: 863 up_write(&shm_ids(ns).rwsem); 864 return err; 865 } 866 867 static int shmctl_ipc_info(struct ipc_namespace *ns, 868 struct shminfo64 *shminfo) 869 { 870 int err = security_shm_shmctl(NULL, IPC_INFO); 871 if (!err) { 872 memset(shminfo, 0, sizeof(*shminfo)); 873 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni; 874 shminfo->shmmax = ns->shm_ctlmax; 875 shminfo->shmall = ns->shm_ctlall; 876 shminfo->shmmin = SHMMIN; 877 down_read(&shm_ids(ns).rwsem); 878 err = ipc_get_maxid(&shm_ids(ns)); 879 up_read(&shm_ids(ns).rwsem); 880 if (err < 0) 881 err = 0; 882 } 883 return err; 884 } 885 886 static int shmctl_shm_info(struct ipc_namespace *ns, 887 struct shm_info *shm_info) 888 { 889 int err = security_shm_shmctl(NULL, SHM_INFO); 890 if (!err) { 891 memset(shm_info, 0, sizeof(*shm_info)); 892 down_read(&shm_ids(ns).rwsem); 893 shm_info->used_ids = shm_ids(ns).in_use; 894 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp); 895 shm_info->shm_tot = ns->shm_tot; 896 shm_info->swap_attempts = 0; 897 shm_info->swap_successes = 0; 898 err = ipc_get_maxid(&shm_ids(ns)); 899 up_read(&shm_ids(ns).rwsem); 900 if (err < 0) 901 err = 0; 902 } 903 return err; 904 } 905 906 static int shmctl_stat(struct ipc_namespace *ns, int shmid, 907 int cmd, struct shmid64_ds *tbuf) 908 { 909 struct shmid_kernel *shp; 910 int result; 911 int err; 912 913 rcu_read_lock(); 914 if (cmd == SHM_STAT) { 915 shp = shm_obtain_object(ns, shmid); 916 if (IS_ERR(shp)) { 917 err = PTR_ERR(shp); 918 goto out_unlock; 919 } 920 result = shp->shm_perm.id; 921 } else { 922 shp = shm_obtain_object_check(ns, shmid); 923 if (IS_ERR(shp)) { 924 err = PTR_ERR(shp); 925 goto out_unlock; 926 } 927 result = 0; 928 } 929 930 err = -EACCES; 931 if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) 932 goto out_unlock; 933 934 err = security_shm_shmctl(shp, cmd); 935 if (err) 936 goto out_unlock; 937 938 memset(tbuf, 0, sizeof(*tbuf)); 939 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm); 940 tbuf->shm_segsz = shp->shm_segsz; 941 tbuf->shm_atime = shp->shm_atim; 942 tbuf->shm_dtime = shp->shm_dtim; 943 tbuf->shm_ctime = shp->shm_ctim; 944 tbuf->shm_cpid = shp->shm_cprid; 945 tbuf->shm_lpid = shp->shm_lprid; 946 tbuf->shm_nattch = shp->shm_nattch; 947 rcu_read_unlock(); 948 return result; 949 950 out_unlock: 951 rcu_read_unlock(); 952 return err; 953 } 954 955 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd) 956 { 957 struct shmid_kernel *shp; 958 struct file *shm_file; 959 int err; 960 961 rcu_read_lock(); 962 shp = shm_obtain_object_check(ns, shmid); 963 if (IS_ERR(shp)) { 964 err = PTR_ERR(shp); 965 goto out_unlock1; 966 } 967 968 audit_ipc_obj(&(shp->shm_perm)); 969 err = security_shm_shmctl(shp, cmd); 970 if (err) 971 goto out_unlock1; 972 973 ipc_lock_object(&shp->shm_perm); 974 975 /* check if shm_destroy() is tearing down shp */ 976 if (!ipc_valid_object(&shp->shm_perm)) { 977 err = -EIDRM; 978 goto out_unlock0; 979 } 980 981 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { 982 kuid_t euid = current_euid(); 983 984 if (!uid_eq(euid, shp->shm_perm.uid) && 985 !uid_eq(euid, shp->shm_perm.cuid)) { 986 err = -EPERM; 987 goto out_unlock0; 988 } 989 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { 990 err = -EPERM; 991 goto out_unlock0; 992 } 993 } 994 995 shm_file = shp->shm_file; 996 if (is_file_hugepages(shm_file)) 997 goto out_unlock0; 998 999 if (cmd == SHM_LOCK) { 1000 struct user_struct *user = current_user(); 1001 1002 err = shmem_lock(shm_file, 1, user); 1003 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { 1004 shp->shm_perm.mode |= SHM_LOCKED; 1005 shp->mlock_user = user; 1006 } 1007 goto out_unlock0; 1008 } 1009 1010 /* SHM_UNLOCK */ 1011 if (!(shp->shm_perm.mode & SHM_LOCKED)) 1012 goto out_unlock0; 1013 shmem_lock(shm_file, 0, shp->mlock_user); 1014 shp->shm_perm.mode &= ~SHM_LOCKED; 1015 shp->mlock_user = NULL; 1016 get_file(shm_file); 1017 ipc_unlock_object(&shp->shm_perm); 1018 rcu_read_unlock(); 1019 shmem_unlock_mapping(shm_file->f_mapping); 1020 1021 fput(shm_file); 1022 return err; 1023 1024 out_unlock0: 1025 ipc_unlock_object(&shp->shm_perm); 1026 out_unlock1: 1027 rcu_read_unlock(); 1028 return err; 1029 } 1030 1031 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) 1032 { 1033 int err, version; 1034 struct ipc_namespace *ns; 1035 struct shmid64_ds sem64; 1036 1037 if (cmd < 0 || shmid < 0) 1038 return -EINVAL; 1039 1040 version = ipc_parse_version(&cmd); 1041 ns = current->nsproxy->ipc_ns; 1042 1043 switch (cmd) { 1044 case IPC_INFO: { 1045 struct shminfo64 shminfo; 1046 err = shmctl_ipc_info(ns, &shminfo); 1047 if (err < 0) 1048 return err; 1049 if (copy_shminfo_to_user(buf, &shminfo, version)) 1050 err = -EFAULT; 1051 return err; 1052 } 1053 case SHM_INFO: { 1054 struct shm_info shm_info; 1055 err = shmctl_shm_info(ns, &shm_info); 1056 if (err < 0) 1057 return err; 1058 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) 1059 err = -EFAULT; 1060 return err; 1061 } 1062 case SHM_STAT: 1063 case IPC_STAT: { 1064 err = shmctl_stat(ns, shmid, cmd, &sem64); 1065 if (err < 0) 1066 return err; 1067 if (copy_shmid_to_user(buf, &sem64, version)) 1068 err = -EFAULT; 1069 return err; 1070 } 1071 case IPC_SET: 1072 if (copy_shmid_from_user(&sem64, buf, version)) 1073 return -EFAULT; 1074 /* fallthru */ 1075 case IPC_RMID: 1076 return shmctl_down(ns, shmid, cmd, &sem64); 1077 case SHM_LOCK: 1078 case SHM_UNLOCK: 1079 return shmctl_do_lock(ns, shmid, cmd); 1080 default: 1081 return -EINVAL; 1082 } 1083 } 1084 1085 #ifdef CONFIG_COMPAT 1086 1087 struct compat_shmid_ds { 1088 struct compat_ipc_perm shm_perm; 1089 int shm_segsz; 1090 compat_time_t shm_atime; 1091 compat_time_t shm_dtime; 1092 compat_time_t shm_ctime; 1093 compat_ipc_pid_t shm_cpid; 1094 compat_ipc_pid_t shm_lpid; 1095 unsigned short shm_nattch; 1096 unsigned short shm_unused; 1097 compat_uptr_t shm_unused2; 1098 compat_uptr_t shm_unused3; 1099 }; 1100 1101 struct compat_shminfo64 { 1102 compat_ulong_t shmmax; 1103 compat_ulong_t shmmin; 1104 compat_ulong_t shmmni; 1105 compat_ulong_t shmseg; 1106 compat_ulong_t shmall; 1107 compat_ulong_t __unused1; 1108 compat_ulong_t __unused2; 1109 compat_ulong_t __unused3; 1110 compat_ulong_t __unused4; 1111 }; 1112 1113 struct compat_shm_info { 1114 compat_int_t used_ids; 1115 compat_ulong_t shm_tot, shm_rss, shm_swp; 1116 compat_ulong_t swap_attempts, swap_successes; 1117 }; 1118 1119 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in, 1120 int version) 1121 { 1122 if (in->shmmax > INT_MAX) 1123 in->shmmax = INT_MAX; 1124 if (version == IPC_64) { 1125 struct compat_shminfo64 info; 1126 memset(&info, 0, sizeof(info)); 1127 info.shmmax = in->shmmax; 1128 info.shmmin = in->shmmin; 1129 info.shmmni = in->shmmni; 1130 info.shmseg = in->shmseg; 1131 info.shmall = in->shmall; 1132 return copy_to_user(buf, &info, sizeof(info)); 1133 } else { 1134 struct shminfo info; 1135 memset(&info, 0, sizeof(info)); 1136 info.shmmax = in->shmmax; 1137 info.shmmin = in->shmmin; 1138 info.shmmni = in->shmmni; 1139 info.shmseg = in->shmseg; 1140 info.shmall = in->shmall; 1141 return copy_to_user(buf, &info, sizeof(info)); 1142 } 1143 } 1144 1145 static int put_compat_shm_info(struct shm_info *ip, 1146 struct compat_shm_info __user *uip) 1147 { 1148 struct compat_shm_info info; 1149 1150 memset(&info, 0, sizeof(info)); 1151 info.used_ids = ip->used_ids; 1152 info.shm_tot = ip->shm_tot; 1153 info.shm_rss = ip->shm_rss; 1154 info.shm_swp = ip->shm_swp; 1155 info.swap_attempts = ip->swap_attempts; 1156 info.swap_successes = ip->swap_successes; 1157 return copy_to_user(uip, &info, sizeof(info)); 1158 } 1159 1160 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in, 1161 int version) 1162 { 1163 if (version == IPC_64) { 1164 struct compat_shmid64_ds v; 1165 memset(&v, 0, sizeof(v)); 1166 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm); 1167 v.shm_atime = in->shm_atime; 1168 v.shm_dtime = in->shm_dtime; 1169 v.shm_ctime = in->shm_ctime; 1170 v.shm_segsz = in->shm_segsz; 1171 v.shm_nattch = in->shm_nattch; 1172 v.shm_cpid = in->shm_cpid; 1173 v.shm_lpid = in->shm_lpid; 1174 return copy_to_user(buf, &v, sizeof(v)); 1175 } else { 1176 struct compat_shmid_ds v; 1177 memset(&v, 0, sizeof(v)); 1178 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm); 1179 v.shm_perm.key = in->shm_perm.key; 1180 v.shm_atime = in->shm_atime; 1181 v.shm_dtime = in->shm_dtime; 1182 v.shm_ctime = in->shm_ctime; 1183 v.shm_segsz = in->shm_segsz; 1184 v.shm_nattch = in->shm_nattch; 1185 v.shm_cpid = in->shm_cpid; 1186 v.shm_lpid = in->shm_lpid; 1187 return copy_to_user(buf, &v, sizeof(v)); 1188 } 1189 } 1190 1191 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf, 1192 int version) 1193 { 1194 memset(out, 0, sizeof(*out)); 1195 if (version == IPC_64) { 1196 struct compat_shmid64_ds *p = buf; 1197 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm); 1198 } else { 1199 struct compat_shmid_ds *p = buf; 1200 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm); 1201 } 1202 } 1203 1204 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) 1205 { 1206 struct ipc_namespace *ns; 1207 struct shmid64_ds sem64; 1208 int version = compat_ipc_parse_version(&cmd); 1209 int err; 1210 1211 ns = current->nsproxy->ipc_ns; 1212 1213 if (cmd < 0 || shmid < 0) 1214 return -EINVAL; 1215 1216 switch (cmd) { 1217 case IPC_INFO: { 1218 struct shminfo64 shminfo; 1219 err = shmctl_ipc_info(ns, &shminfo); 1220 if (err < 0) 1221 return err; 1222 if (copy_compat_shminfo_to_user(uptr, &shminfo, version)) 1223 err = -EFAULT; 1224 return err; 1225 } 1226 case SHM_INFO: { 1227 struct shm_info shm_info; 1228 err = shmctl_shm_info(ns, &shm_info); 1229 if (err < 0) 1230 return err; 1231 if (put_compat_shm_info(&shm_info, uptr)) 1232 err = -EFAULT; 1233 return err; 1234 } 1235 case IPC_STAT: 1236 case SHM_STAT: 1237 err = shmctl_stat(ns, shmid, cmd, &sem64); 1238 if (err < 0) 1239 return err; 1240 if (copy_compat_shmid_to_user(uptr, &sem64, version)) 1241 err = -EFAULT; 1242 return err; 1243 1244 case IPC_SET: 1245 if (copy_compat_shmid_from_user(&sem64, uptr, version)) 1246 return -EFAULT; 1247 /* fallthru */ 1248 case IPC_RMID: 1249 return shmctl_down(ns, shmid, cmd, &sem64); 1250 case SHM_LOCK: 1251 case SHM_UNLOCK: 1252 return shmctl_do_lock(ns, shmid, cmd); 1253 break; 1254 default: 1255 return -EINVAL; 1256 } 1257 return err; 1258 } 1259 #endif 1260 1261 /* 1262 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. 1263 * 1264 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The 1265 * "raddr" thing points to kernel space, and there has to be a wrapper around 1266 * this. 1267 */ 1268 long do_shmat(int shmid, char __user *shmaddr, int shmflg, 1269 ulong *raddr, unsigned long shmlba) 1270 { 1271 struct shmid_kernel *shp; 1272 unsigned long addr = (unsigned long)shmaddr; 1273 unsigned long size; 1274 struct file *file; 1275 int err; 1276 unsigned long flags = MAP_SHARED; 1277 unsigned long prot; 1278 int acc_mode; 1279 struct ipc_namespace *ns; 1280 struct shm_file_data *sfd; 1281 struct path path; 1282 fmode_t f_mode; 1283 unsigned long populate = 0; 1284 1285 err = -EINVAL; 1286 if (shmid < 0) 1287 goto out; 1288 1289 if (addr) { 1290 if (addr & (shmlba - 1)) { 1291 /* 1292 * Round down to the nearest multiple of shmlba. 1293 * For sane do_mmap_pgoff() parameters, avoid 1294 * round downs that trigger nil-page and MAP_FIXED. 1295 */ 1296 if ((shmflg & SHM_RND) && addr >= shmlba) 1297 addr &= ~(shmlba - 1); 1298 else 1299 #ifndef __ARCH_FORCE_SHMLBA 1300 if (addr & ~PAGE_MASK) 1301 #endif 1302 goto out; 1303 } 1304 1305 flags |= MAP_FIXED; 1306 } else if ((shmflg & SHM_REMAP)) 1307 goto out; 1308 1309 if (shmflg & SHM_RDONLY) { 1310 prot = PROT_READ; 1311 acc_mode = S_IRUGO; 1312 f_mode = FMODE_READ; 1313 } else { 1314 prot = PROT_READ | PROT_WRITE; 1315 acc_mode = S_IRUGO | S_IWUGO; 1316 f_mode = FMODE_READ | FMODE_WRITE; 1317 } 1318 if (shmflg & SHM_EXEC) { 1319 prot |= PROT_EXEC; 1320 acc_mode |= S_IXUGO; 1321 } 1322 1323 /* 1324 * We cannot rely on the fs check since SYSV IPC does have an 1325 * additional creator id... 1326 */ 1327 ns = current->nsproxy->ipc_ns; 1328 rcu_read_lock(); 1329 shp = shm_obtain_object_check(ns, shmid); 1330 if (IS_ERR(shp)) { 1331 err = PTR_ERR(shp); 1332 goto out_unlock; 1333 } 1334 1335 err = -EACCES; 1336 if (ipcperms(ns, &shp->shm_perm, acc_mode)) 1337 goto out_unlock; 1338 1339 err = security_shm_shmat(shp, shmaddr, shmflg); 1340 if (err) 1341 goto out_unlock; 1342 1343 ipc_lock_object(&shp->shm_perm); 1344 1345 /* check if shm_destroy() is tearing down shp */ 1346 if (!ipc_valid_object(&shp->shm_perm)) { 1347 ipc_unlock_object(&shp->shm_perm); 1348 err = -EIDRM; 1349 goto out_unlock; 1350 } 1351 1352 path = shp->shm_file->f_path; 1353 path_get(&path); 1354 shp->shm_nattch++; 1355 size = i_size_read(d_inode(path.dentry)); 1356 ipc_unlock_object(&shp->shm_perm); 1357 rcu_read_unlock(); 1358 1359 err = -ENOMEM; 1360 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); 1361 if (!sfd) { 1362 path_put(&path); 1363 goto out_nattch; 1364 } 1365 1366 file = alloc_file(&path, f_mode, 1367 is_file_hugepages(shp->shm_file) ? 1368 &shm_file_operations_huge : 1369 &shm_file_operations); 1370 err = PTR_ERR(file); 1371 if (IS_ERR(file)) { 1372 kfree(sfd); 1373 path_put(&path); 1374 goto out_nattch; 1375 } 1376 1377 file->private_data = sfd; 1378 file->f_mapping = shp->shm_file->f_mapping; 1379 sfd->id = shp->shm_perm.id; 1380 sfd->ns = get_ipc_ns(ns); 1381 sfd->file = shp->shm_file; 1382 sfd->vm_ops = NULL; 1383 1384 err = security_mmap_file(file, prot, flags); 1385 if (err) 1386 goto out_fput; 1387 1388 if (down_write_killable(¤t->mm->mmap_sem)) { 1389 err = -EINTR; 1390 goto out_fput; 1391 } 1392 1393 if (addr && !(shmflg & SHM_REMAP)) { 1394 err = -EINVAL; 1395 if (addr + size < addr) 1396 goto invalid; 1397 1398 if (find_vma_intersection(current->mm, addr, addr + size)) 1399 goto invalid; 1400 } 1401 1402 addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL); 1403 *raddr = addr; 1404 err = 0; 1405 if (IS_ERR_VALUE(addr)) 1406 err = (long)addr; 1407 invalid: 1408 up_write(¤t->mm->mmap_sem); 1409 if (populate) 1410 mm_populate(addr, populate); 1411 1412 out_fput: 1413 fput(file); 1414 1415 out_nattch: 1416 down_write(&shm_ids(ns).rwsem); 1417 shp = shm_lock(ns, shmid); 1418 shp->shm_nattch--; 1419 if (shm_may_destroy(ns, shp)) 1420 shm_destroy(ns, shp); 1421 else 1422 shm_unlock(shp); 1423 up_write(&shm_ids(ns).rwsem); 1424 return err; 1425 1426 out_unlock: 1427 rcu_read_unlock(); 1428 out: 1429 return err; 1430 } 1431 1432 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) 1433 { 1434 unsigned long ret; 1435 long err; 1436 1437 err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); 1438 if (err) 1439 return err; 1440 force_successful_syscall_return(); 1441 return (long)ret; 1442 } 1443 1444 #ifdef CONFIG_COMPAT 1445 1446 #ifndef COMPAT_SHMLBA 1447 #define COMPAT_SHMLBA SHMLBA 1448 #endif 1449 1450 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) 1451 { 1452 unsigned long ret; 1453 long err; 1454 1455 err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA); 1456 if (err) 1457 return err; 1458 force_successful_syscall_return(); 1459 return (long)ret; 1460 } 1461 #endif 1462 1463 /* 1464 * detach and kill segment if marked destroyed. 1465 * The work is done in shm_close. 1466 */ 1467 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) 1468 { 1469 struct mm_struct *mm = current->mm; 1470 struct vm_area_struct *vma; 1471 unsigned long addr = (unsigned long)shmaddr; 1472 int retval = -EINVAL; 1473 #ifdef CONFIG_MMU 1474 loff_t size = 0; 1475 struct file *file; 1476 struct vm_area_struct *next; 1477 #endif 1478 1479 if (addr & ~PAGE_MASK) 1480 return retval; 1481 1482 if (down_write_killable(&mm->mmap_sem)) 1483 return -EINTR; 1484 1485 /* 1486 * This function tries to be smart and unmap shm segments that 1487 * were modified by partial mlock or munmap calls: 1488 * - It first determines the size of the shm segment that should be 1489 * unmapped: It searches for a vma that is backed by shm and that 1490 * started at address shmaddr. It records it's size and then unmaps 1491 * it. 1492 * - Then it unmaps all shm vmas that started at shmaddr and that 1493 * are within the initially determined size and that are from the 1494 * same shm segment from which we determined the size. 1495 * Errors from do_munmap are ignored: the function only fails if 1496 * it's called with invalid parameters or if it's called to unmap 1497 * a part of a vma. Both calls in this function are for full vmas, 1498 * the parameters are directly copied from the vma itself and always 1499 * valid - therefore do_munmap cannot fail. (famous last words?) 1500 */ 1501 /* 1502 * If it had been mremap()'d, the starting address would not 1503 * match the usual checks anyway. So assume all vma's are 1504 * above the starting address given. 1505 */ 1506 vma = find_vma(mm, addr); 1507 1508 #ifdef CONFIG_MMU 1509 while (vma) { 1510 next = vma->vm_next; 1511 1512 /* 1513 * Check if the starting address would match, i.e. it's 1514 * a fragment created by mprotect() and/or munmap(), or it 1515 * otherwise it starts at this address with no hassles. 1516 */ 1517 if ((vma->vm_ops == &shm_vm_ops) && 1518 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { 1519 1520 /* 1521 * Record the file of the shm segment being 1522 * unmapped. With mremap(), someone could place 1523 * page from another segment but with equal offsets 1524 * in the range we are unmapping. 1525 */ 1526 file = vma->vm_file; 1527 size = i_size_read(file_inode(vma->vm_file)); 1528 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1529 /* 1530 * We discovered the size of the shm segment, so 1531 * break out of here and fall through to the next 1532 * loop that uses the size information to stop 1533 * searching for matching vma's. 1534 */ 1535 retval = 0; 1536 vma = next; 1537 break; 1538 } 1539 vma = next; 1540 } 1541 1542 /* 1543 * We need look no further than the maximum address a fragment 1544 * could possibly have landed at. Also cast things to loff_t to 1545 * prevent overflows and make comparisons vs. equal-width types. 1546 */ 1547 size = PAGE_ALIGN(size); 1548 while (vma && (loff_t)(vma->vm_end - addr) <= size) { 1549 next = vma->vm_next; 1550 1551 /* finding a matching vma now does not alter retval */ 1552 if ((vma->vm_ops == &shm_vm_ops) && 1553 ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) && 1554 (vma->vm_file == file)) 1555 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1556 vma = next; 1557 } 1558 1559 #else /* CONFIG_MMU */ 1560 /* under NOMMU conditions, the exact address to be destroyed must be 1561 * given 1562 */ 1563 if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { 1564 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1565 retval = 0; 1566 } 1567 1568 #endif 1569 1570 up_write(&mm->mmap_sem); 1571 return retval; 1572 } 1573 1574 #ifdef CONFIG_PROC_FS 1575 static int sysvipc_shm_proc_show(struct seq_file *s, void *it) 1576 { 1577 struct user_namespace *user_ns = seq_user_ns(s); 1578 struct kern_ipc_perm *ipcp = it; 1579 struct shmid_kernel *shp; 1580 unsigned long rss = 0, swp = 0; 1581 1582 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 1583 shm_add_rss_swap(shp, &rss, &swp); 1584 1585 #if BITS_PER_LONG <= 32 1586 #define SIZE_SPEC "%10lu" 1587 #else 1588 #define SIZE_SPEC "%21lu" 1589 #endif 1590 1591 seq_printf(s, 1592 "%10d %10d %4o " SIZE_SPEC " %5u %5u " 1593 "%5lu %5u %5u %5u %5u %10llu %10llu %10llu " 1594 SIZE_SPEC " " SIZE_SPEC "\n", 1595 shp->shm_perm.key, 1596 shp->shm_perm.id, 1597 shp->shm_perm.mode, 1598 shp->shm_segsz, 1599 shp->shm_cprid, 1600 shp->shm_lprid, 1601 shp->shm_nattch, 1602 from_kuid_munged(user_ns, shp->shm_perm.uid), 1603 from_kgid_munged(user_ns, shp->shm_perm.gid), 1604 from_kuid_munged(user_ns, shp->shm_perm.cuid), 1605 from_kgid_munged(user_ns, shp->shm_perm.cgid), 1606 shp->shm_atim, 1607 shp->shm_dtim, 1608 shp->shm_ctim, 1609 rss * PAGE_SIZE, 1610 swp * PAGE_SIZE); 1611 1612 return 0; 1613 } 1614 #endif 1615