1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/shm.c 4 * Copyright (C) 1992, 1993 Krishna Balasubramanian 5 * Many improvements/fixes by Bruno Haible. 6 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. 7 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. 8 * 9 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 10 * BIGMEM support, Andrea Arcangeli <andrea@suse.de> 11 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> 12 * HIGHMEM support, Ingo Molnar <mingo@redhat.com> 13 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> 14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> 15 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 * 24 * Better ipc lock (kern_ipc_perm.lock) handling 25 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. 26 */ 27 28 #include <linux/slab.h> 29 #include <linux/mm.h> 30 #include <linux/hugetlb.h> 31 #include <linux/shm.h> 32 #include <linux/init.h> 33 #include <linux/file.h> 34 #include <linux/mman.h> 35 #include <linux/shmem_fs.h> 36 #include <linux/security.h> 37 #include <linux/syscalls.h> 38 #include <linux/audit.h> 39 #include <linux/capability.h> 40 #include <linux/ptrace.h> 41 #include <linux/seq_file.h> 42 #include <linux/rwsem.h> 43 #include <linux/nsproxy.h> 44 #include <linux/mount.h> 45 #include <linux/ipc_namespace.h> 46 47 #include <linux/uaccess.h> 48 49 #include "util.h" 50 51 struct shmid_kernel /* private to the kernel */ 52 { 53 struct kern_ipc_perm shm_perm; 54 struct file *shm_file; 55 unsigned long shm_nattch; 56 unsigned long shm_segsz; 57 time64_t shm_atim; 58 time64_t shm_dtim; 59 time64_t shm_ctim; 60 struct pid *shm_cprid; 61 struct pid *shm_lprid; 62 struct user_struct *mlock_user; 63 64 /* The task created the shm object. NULL if the task is dead. */ 65 struct task_struct *shm_creator; 66 struct list_head shm_clist; /* list by creator */ 67 } __randomize_layout; 68 69 /* shm_mode upper byte flags */ 70 #define SHM_DEST 01000 /* segment will be destroyed on last detach */ 71 #define SHM_LOCKED 02000 /* segment will not be swapped */ 72 73 struct shm_file_data { 74 int id; 75 struct ipc_namespace *ns; 76 struct file *file; 77 const struct vm_operations_struct *vm_ops; 78 }; 79 80 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) 81 82 static const struct file_operations shm_file_operations; 83 static const struct vm_operations_struct shm_vm_ops; 84 85 #define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS]) 86 87 #define shm_unlock(shp) \ 88 ipc_unlock(&(shp)->shm_perm) 89 90 static int newseg(struct ipc_namespace *, struct ipc_params *); 91 static void shm_open(struct vm_area_struct *vma); 92 static void shm_close(struct vm_area_struct *vma); 93 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); 94 #ifdef CONFIG_PROC_FS 95 static int sysvipc_shm_proc_show(struct seq_file *s, void *it); 96 #endif 97 98 int shm_init_ns(struct ipc_namespace *ns) 99 { 100 ns->shm_ctlmax = SHMMAX; 101 ns->shm_ctlall = SHMALL; 102 ns->shm_ctlmni = SHMMNI; 103 ns->shm_rmid_forced = 0; 104 ns->shm_tot = 0; 105 return ipc_init_ids(&shm_ids(ns)); 106 } 107 108 /* 109 * Called with shm_ids.rwsem (writer) and the shp structure locked. 110 * Only shm_ids.rwsem remains locked on exit. 111 */ 112 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 113 { 114 struct shmid_kernel *shp; 115 116 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 117 118 if (shp->shm_nattch) { 119 shp->shm_perm.mode |= SHM_DEST; 120 /* Do not find it any more */ 121 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm); 122 shm_unlock(shp); 123 } else 124 shm_destroy(ns, shp); 125 } 126 127 #ifdef CONFIG_IPC_NS 128 void shm_exit_ns(struct ipc_namespace *ns) 129 { 130 free_ipcs(ns, &shm_ids(ns), do_shm_rmid); 131 idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); 132 rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht); 133 } 134 #endif 135 136 static int __init ipc_ns_init(void) 137 { 138 const int err = shm_init_ns(&init_ipc_ns); 139 WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err); 140 return err; 141 } 142 143 pure_initcall(ipc_ns_init); 144 145 void __init shm_init(void) 146 { 147 ipc_init_proc_interface("sysvipc/shm", 148 #if BITS_PER_LONG <= 32 149 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 150 #else 151 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 152 #endif 153 IPC_SHM_IDS, sysvipc_shm_proc_show); 154 } 155 156 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) 157 { 158 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); 159 160 if (IS_ERR(ipcp)) 161 return ERR_CAST(ipcp); 162 163 return container_of(ipcp, struct shmid_kernel, shm_perm); 164 } 165 166 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) 167 { 168 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); 169 170 if (IS_ERR(ipcp)) 171 return ERR_CAST(ipcp); 172 173 return container_of(ipcp, struct shmid_kernel, shm_perm); 174 } 175 176 /* 177 * shm_lock_(check_) routines are called in the paths where the rwsem 178 * is not necessarily held. 179 */ 180 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) 181 { 182 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); 183 184 /* 185 * Callers of shm_lock() must validate the status of the returned ipc 186 * object pointer (as returned by ipc_lock()), and error out as 187 * appropriate. 188 */ 189 if (IS_ERR(ipcp)) 190 return (void *)ipcp; 191 return container_of(ipcp, struct shmid_kernel, shm_perm); 192 } 193 194 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) 195 { 196 rcu_read_lock(); 197 ipc_lock_object(&ipcp->shm_perm); 198 } 199 200 static void shm_rcu_free(struct rcu_head *head) 201 { 202 struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm, 203 rcu); 204 struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel, 205 shm_perm); 206 security_shm_free(&shp->shm_perm); 207 kvfree(shp); 208 } 209 210 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 211 { 212 list_del(&s->shm_clist); 213 ipc_rmid(&shm_ids(ns), &s->shm_perm); 214 } 215 216 217 static int __shm_open(struct vm_area_struct *vma) 218 { 219 struct file *file = vma->vm_file; 220 struct shm_file_data *sfd = shm_file_data(file); 221 struct shmid_kernel *shp; 222 223 shp = shm_lock(sfd->ns, sfd->id); 224 225 if (IS_ERR(shp)) 226 return PTR_ERR(shp); 227 228 shp->shm_atim = ktime_get_real_seconds(); 229 ipc_update_pid(&shp->shm_lprid, task_tgid(current)); 230 shp->shm_nattch++; 231 shm_unlock(shp); 232 return 0; 233 } 234 235 /* This is called by fork, once for every shm attach. */ 236 static void shm_open(struct vm_area_struct *vma) 237 { 238 int err = __shm_open(vma); 239 /* 240 * We raced in the idr lookup or with shm_destroy(). 241 * Either way, the ID is busted. 242 */ 243 WARN_ON_ONCE(err); 244 } 245 246 /* 247 * shm_destroy - free the struct shmid_kernel 248 * 249 * @ns: namespace 250 * @shp: struct to free 251 * 252 * It has to be called with shp and shm_ids.rwsem (writer) locked, 253 * but returns with shp unlocked and freed. 254 */ 255 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 256 { 257 struct file *shm_file; 258 259 shm_file = shp->shm_file; 260 shp->shm_file = NULL; 261 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; 262 shm_rmid(ns, shp); 263 shm_unlock(shp); 264 if (!is_file_hugepages(shm_file)) 265 shmem_lock(shm_file, 0, shp->mlock_user); 266 else if (shp->mlock_user) 267 user_shm_unlock(i_size_read(file_inode(shm_file)), 268 shp->mlock_user); 269 fput(shm_file); 270 ipc_update_pid(&shp->shm_cprid, NULL); 271 ipc_update_pid(&shp->shm_lprid, NULL); 272 ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); 273 } 274 275 /* 276 * shm_may_destroy - identifies whether shm segment should be destroyed now 277 * 278 * Returns true if and only if there are no active users of the segment and 279 * one of the following is true: 280 * 281 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp 282 * 283 * 2) sysctl kernel.shm_rmid_forced is set to 1. 284 */ 285 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 286 { 287 return (shp->shm_nattch == 0) && 288 (ns->shm_rmid_forced || 289 (shp->shm_perm.mode & SHM_DEST)); 290 } 291 292 /* 293 * remove the attach descriptor vma. 294 * free memory for segment if it is marked destroyed. 295 * The descriptor has already been removed from the current->mm->mmap list 296 * and will later be kfree()d. 297 */ 298 static void shm_close(struct vm_area_struct *vma) 299 { 300 struct file *file = vma->vm_file; 301 struct shm_file_data *sfd = shm_file_data(file); 302 struct shmid_kernel *shp; 303 struct ipc_namespace *ns = sfd->ns; 304 305 down_write(&shm_ids(ns).rwsem); 306 /* remove from the list of attaches of the shm segment */ 307 shp = shm_lock(ns, sfd->id); 308 309 /* 310 * We raced in the idr lookup or with shm_destroy(). 311 * Either way, the ID is busted. 312 */ 313 if (WARN_ON_ONCE(IS_ERR(shp))) 314 goto done; /* no-op */ 315 316 ipc_update_pid(&shp->shm_lprid, task_tgid(current)); 317 shp->shm_dtim = ktime_get_real_seconds(); 318 shp->shm_nattch--; 319 if (shm_may_destroy(ns, shp)) 320 shm_destroy(ns, shp); 321 else 322 shm_unlock(shp); 323 done: 324 up_write(&shm_ids(ns).rwsem); 325 } 326 327 /* Called with ns->shm_ids(ns).rwsem locked */ 328 static int shm_try_destroy_orphaned(int id, void *p, void *data) 329 { 330 struct ipc_namespace *ns = data; 331 struct kern_ipc_perm *ipcp = p; 332 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); 333 334 /* 335 * We want to destroy segments without users and with already 336 * exit'ed originating process. 337 * 338 * As shp->* are changed under rwsem, it's safe to skip shp locking. 339 */ 340 if (shp->shm_creator != NULL) 341 return 0; 342 343 if (shm_may_destroy(ns, shp)) { 344 shm_lock_by_ptr(shp); 345 shm_destroy(ns, shp); 346 } 347 return 0; 348 } 349 350 void shm_destroy_orphaned(struct ipc_namespace *ns) 351 { 352 down_write(&shm_ids(ns).rwsem); 353 if (shm_ids(ns).in_use) 354 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); 355 up_write(&shm_ids(ns).rwsem); 356 } 357 358 /* Locking assumes this will only be called with task == current */ 359 void exit_shm(struct task_struct *task) 360 { 361 struct ipc_namespace *ns = task->nsproxy->ipc_ns; 362 struct shmid_kernel *shp, *n; 363 364 if (list_empty(&task->sysvshm.shm_clist)) 365 return; 366 367 /* 368 * If kernel.shm_rmid_forced is not set then only keep track of 369 * which shmids are orphaned, so that a later set of the sysctl 370 * can clean them up. 371 */ 372 if (!ns->shm_rmid_forced) { 373 down_read(&shm_ids(ns).rwsem); 374 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) 375 shp->shm_creator = NULL; 376 /* 377 * Only under read lock but we are only called on current 378 * so no entry on the list will be shared. 379 */ 380 list_del(&task->sysvshm.shm_clist); 381 up_read(&shm_ids(ns).rwsem); 382 return; 383 } 384 385 /* 386 * Destroy all already created segments, that were not yet mapped, 387 * and mark any mapped as orphan to cover the sysctl toggling. 388 * Destroy is skipped if shm_may_destroy() returns false. 389 */ 390 down_write(&shm_ids(ns).rwsem); 391 list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { 392 shp->shm_creator = NULL; 393 394 if (shm_may_destroy(ns, shp)) { 395 shm_lock_by_ptr(shp); 396 shm_destroy(ns, shp); 397 } 398 } 399 400 /* Remove the list head from any segments still attached. */ 401 list_del(&task->sysvshm.shm_clist); 402 up_write(&shm_ids(ns).rwsem); 403 } 404 405 static int shm_fault(struct vm_fault *vmf) 406 { 407 struct file *file = vmf->vma->vm_file; 408 struct shm_file_data *sfd = shm_file_data(file); 409 410 return sfd->vm_ops->fault(vmf); 411 } 412 413 static int shm_split(struct vm_area_struct *vma, unsigned long addr) 414 { 415 struct file *file = vma->vm_file; 416 struct shm_file_data *sfd = shm_file_data(file); 417 418 if (sfd->vm_ops->split) 419 return sfd->vm_ops->split(vma, addr); 420 421 return 0; 422 } 423 424 #ifdef CONFIG_NUMA 425 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 426 { 427 struct file *file = vma->vm_file; 428 struct shm_file_data *sfd = shm_file_data(file); 429 int err = 0; 430 431 if (sfd->vm_ops->set_policy) 432 err = sfd->vm_ops->set_policy(vma, new); 433 return err; 434 } 435 436 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, 437 unsigned long addr) 438 { 439 struct file *file = vma->vm_file; 440 struct shm_file_data *sfd = shm_file_data(file); 441 struct mempolicy *pol = NULL; 442 443 if (sfd->vm_ops->get_policy) 444 pol = sfd->vm_ops->get_policy(vma, addr); 445 else if (vma->vm_policy) 446 pol = vma->vm_policy; 447 448 return pol; 449 } 450 #endif 451 452 static int shm_mmap(struct file *file, struct vm_area_struct *vma) 453 { 454 struct shm_file_data *sfd = shm_file_data(file); 455 int ret; 456 457 /* 458 * In case of remap_file_pages() emulation, the file can represent 459 * removed IPC ID: propogate shm_lock() error to caller. 460 */ 461 ret = __shm_open(vma); 462 if (ret) 463 return ret; 464 465 ret = call_mmap(sfd->file, vma); 466 if (ret) { 467 shm_close(vma); 468 return ret; 469 } 470 sfd->vm_ops = vma->vm_ops; 471 #ifdef CONFIG_MMU 472 WARN_ON(!sfd->vm_ops->fault); 473 #endif 474 vma->vm_ops = &shm_vm_ops; 475 return 0; 476 } 477 478 static int shm_release(struct inode *ino, struct file *file) 479 { 480 struct shm_file_data *sfd = shm_file_data(file); 481 482 put_ipc_ns(sfd->ns); 483 shm_file_data(file) = NULL; 484 kfree(sfd); 485 return 0; 486 } 487 488 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) 489 { 490 struct shm_file_data *sfd = shm_file_data(file); 491 492 if (!sfd->file->f_op->fsync) 493 return -EINVAL; 494 return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 495 } 496 497 static long shm_fallocate(struct file *file, int mode, loff_t offset, 498 loff_t len) 499 { 500 struct shm_file_data *sfd = shm_file_data(file); 501 502 if (!sfd->file->f_op->fallocate) 503 return -EOPNOTSUPP; 504 return sfd->file->f_op->fallocate(file, mode, offset, len); 505 } 506 507 static unsigned long shm_get_unmapped_area(struct file *file, 508 unsigned long addr, unsigned long len, unsigned long pgoff, 509 unsigned long flags) 510 { 511 struct shm_file_data *sfd = shm_file_data(file); 512 513 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, 514 pgoff, flags); 515 } 516 517 static const struct file_operations shm_file_operations = { 518 .mmap = shm_mmap, 519 .fsync = shm_fsync, 520 .release = shm_release, 521 .get_unmapped_area = shm_get_unmapped_area, 522 .llseek = noop_llseek, 523 .fallocate = shm_fallocate, 524 }; 525 526 /* 527 * shm_file_operations_huge is now identical to shm_file_operations, 528 * but we keep it distinct for the sake of is_file_shm_hugepages(). 529 */ 530 static const struct file_operations shm_file_operations_huge = { 531 .mmap = shm_mmap, 532 .fsync = shm_fsync, 533 .release = shm_release, 534 .get_unmapped_area = shm_get_unmapped_area, 535 .llseek = noop_llseek, 536 .fallocate = shm_fallocate, 537 }; 538 539 bool is_file_shm_hugepages(struct file *file) 540 { 541 return file->f_op == &shm_file_operations_huge; 542 } 543 544 static const struct vm_operations_struct shm_vm_ops = { 545 .open = shm_open, /* callback for a new vm-area open */ 546 .close = shm_close, /* callback for when the vm-area is released */ 547 .fault = shm_fault, 548 .split = shm_split, 549 #if defined(CONFIG_NUMA) 550 .set_policy = shm_set_policy, 551 .get_policy = shm_get_policy, 552 #endif 553 }; 554 555 /** 556 * newseg - Create a new shared memory segment 557 * @ns: namespace 558 * @params: ptr to the structure that contains key, size and shmflg 559 * 560 * Called with shm_ids.rwsem held as a writer. 561 */ 562 static int newseg(struct ipc_namespace *ns, struct ipc_params *params) 563 { 564 key_t key = params->key; 565 int shmflg = params->flg; 566 size_t size = params->u.size; 567 int error; 568 struct shmid_kernel *shp; 569 size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 570 struct file *file; 571 char name[13]; 572 vm_flags_t acctflag = 0; 573 574 if (size < SHMMIN || size > ns->shm_ctlmax) 575 return -EINVAL; 576 577 if (numpages << PAGE_SHIFT < size) 578 return -ENOSPC; 579 580 if (ns->shm_tot + numpages < ns->shm_tot || 581 ns->shm_tot + numpages > ns->shm_ctlall) 582 return -ENOSPC; 583 584 shp = kvmalloc(sizeof(*shp), GFP_KERNEL); 585 if (unlikely(!shp)) 586 return -ENOMEM; 587 588 shp->shm_perm.key = key; 589 shp->shm_perm.mode = (shmflg & S_IRWXUGO); 590 shp->mlock_user = NULL; 591 592 shp->shm_perm.security = NULL; 593 error = security_shm_alloc(&shp->shm_perm); 594 if (error) { 595 kvfree(shp); 596 return error; 597 } 598 599 sprintf(name, "SYSV%08x", key); 600 if (shmflg & SHM_HUGETLB) { 601 struct hstate *hs; 602 size_t hugesize; 603 604 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 605 if (!hs) { 606 error = -EINVAL; 607 goto no_file; 608 } 609 hugesize = ALIGN(size, huge_page_size(hs)); 610 611 /* hugetlb_file_setup applies strict accounting */ 612 if (shmflg & SHM_NORESERVE) 613 acctflag = VM_NORESERVE; 614 file = hugetlb_file_setup(name, hugesize, acctflag, 615 &shp->mlock_user, HUGETLB_SHMFS_INODE, 616 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 617 } else { 618 /* 619 * Do not allow no accounting for OVERCOMMIT_NEVER, even 620 * if it's asked for. 621 */ 622 if ((shmflg & SHM_NORESERVE) && 623 sysctl_overcommit_memory != OVERCOMMIT_NEVER) 624 acctflag = VM_NORESERVE; 625 file = shmem_kernel_file_setup(name, size, acctflag); 626 } 627 error = PTR_ERR(file); 628 if (IS_ERR(file)) 629 goto no_file; 630 631 shp->shm_cprid = get_pid(task_tgid(current)); 632 shp->shm_lprid = NULL; 633 shp->shm_atim = shp->shm_dtim = 0; 634 shp->shm_ctim = ktime_get_real_seconds(); 635 shp->shm_segsz = size; 636 shp->shm_nattch = 0; 637 shp->shm_file = file; 638 shp->shm_creator = current; 639 640 /* ipc_addid() locks shp upon success. */ 641 error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); 642 if (error < 0) 643 goto no_id; 644 645 list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); 646 647 /* 648 * shmid gets reported as "inode#" in /proc/pid/maps. 649 * proc-ps tools use this. Changing this will break them. 650 */ 651 file_inode(file)->i_ino = shp->shm_perm.id; 652 653 ns->shm_tot += numpages; 654 error = shp->shm_perm.id; 655 656 ipc_unlock_object(&shp->shm_perm); 657 rcu_read_unlock(); 658 return error; 659 660 no_id: 661 ipc_update_pid(&shp->shm_cprid, NULL); 662 ipc_update_pid(&shp->shm_lprid, NULL); 663 if (is_file_hugepages(file) && shp->mlock_user) 664 user_shm_unlock(size, shp->mlock_user); 665 fput(file); 666 no_file: 667 call_rcu(&shp->shm_perm.rcu, shm_rcu_free); 668 return error; 669 } 670 671 /* 672 * Called with shm_ids.rwsem and ipcp locked. 673 */ 674 static inline int shm_more_checks(struct kern_ipc_perm *ipcp, 675 struct ipc_params *params) 676 { 677 struct shmid_kernel *shp; 678 679 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 680 if (shp->shm_segsz < params->u.size) 681 return -EINVAL; 682 683 return 0; 684 } 685 686 long ksys_shmget(key_t key, size_t size, int shmflg) 687 { 688 struct ipc_namespace *ns; 689 static const struct ipc_ops shm_ops = { 690 .getnew = newseg, 691 .associate = security_shm_associate, 692 .more_checks = shm_more_checks, 693 }; 694 struct ipc_params shm_params; 695 696 ns = current->nsproxy->ipc_ns; 697 698 shm_params.key = key; 699 shm_params.flg = shmflg; 700 shm_params.u.size = size; 701 702 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); 703 } 704 705 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) 706 { 707 return ksys_shmget(key, size, shmflg); 708 } 709 710 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) 711 { 712 switch (version) { 713 case IPC_64: 714 return copy_to_user(buf, in, sizeof(*in)); 715 case IPC_OLD: 716 { 717 struct shmid_ds out; 718 719 memset(&out, 0, sizeof(out)); 720 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); 721 out.shm_segsz = in->shm_segsz; 722 out.shm_atime = in->shm_atime; 723 out.shm_dtime = in->shm_dtime; 724 out.shm_ctime = in->shm_ctime; 725 out.shm_cpid = in->shm_cpid; 726 out.shm_lpid = in->shm_lpid; 727 out.shm_nattch = in->shm_nattch; 728 729 return copy_to_user(buf, &out, sizeof(out)); 730 } 731 default: 732 return -EINVAL; 733 } 734 } 735 736 static inline unsigned long 737 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) 738 { 739 switch (version) { 740 case IPC_64: 741 if (copy_from_user(out, buf, sizeof(*out))) 742 return -EFAULT; 743 return 0; 744 case IPC_OLD: 745 { 746 struct shmid_ds tbuf_old; 747 748 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 749 return -EFAULT; 750 751 out->shm_perm.uid = tbuf_old.shm_perm.uid; 752 out->shm_perm.gid = tbuf_old.shm_perm.gid; 753 out->shm_perm.mode = tbuf_old.shm_perm.mode; 754 755 return 0; 756 } 757 default: 758 return -EINVAL; 759 } 760 } 761 762 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) 763 { 764 switch (version) { 765 case IPC_64: 766 return copy_to_user(buf, in, sizeof(*in)); 767 case IPC_OLD: 768 { 769 struct shminfo out; 770 771 if (in->shmmax > INT_MAX) 772 out.shmmax = INT_MAX; 773 else 774 out.shmmax = (int)in->shmmax; 775 776 out.shmmin = in->shmmin; 777 out.shmmni = in->shmmni; 778 out.shmseg = in->shmseg; 779 out.shmall = in->shmall; 780 781 return copy_to_user(buf, &out, sizeof(out)); 782 } 783 default: 784 return -EINVAL; 785 } 786 } 787 788 /* 789 * Calculate and add used RSS and swap pages of a shm. 790 * Called with shm_ids.rwsem held as a reader 791 */ 792 static void shm_add_rss_swap(struct shmid_kernel *shp, 793 unsigned long *rss_add, unsigned long *swp_add) 794 { 795 struct inode *inode; 796 797 inode = file_inode(shp->shm_file); 798 799 if (is_file_hugepages(shp->shm_file)) { 800 struct address_space *mapping = inode->i_mapping; 801 struct hstate *h = hstate_file(shp->shm_file); 802 *rss_add += pages_per_huge_page(h) * mapping->nrpages; 803 } else { 804 #ifdef CONFIG_SHMEM 805 struct shmem_inode_info *info = SHMEM_I(inode); 806 807 spin_lock_irq(&info->lock); 808 *rss_add += inode->i_mapping->nrpages; 809 *swp_add += info->swapped; 810 spin_unlock_irq(&info->lock); 811 #else 812 *rss_add += inode->i_mapping->nrpages; 813 #endif 814 } 815 } 816 817 /* 818 * Called with shm_ids.rwsem held as a reader 819 */ 820 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, 821 unsigned long *swp) 822 { 823 int next_id; 824 int total, in_use; 825 826 *rss = 0; 827 *swp = 0; 828 829 in_use = shm_ids(ns).in_use; 830 831 for (total = 0, next_id = 0; total < in_use; next_id++) { 832 struct kern_ipc_perm *ipc; 833 struct shmid_kernel *shp; 834 835 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); 836 if (ipc == NULL) 837 continue; 838 shp = container_of(ipc, struct shmid_kernel, shm_perm); 839 840 shm_add_rss_swap(shp, rss, swp); 841 842 total++; 843 } 844 } 845 846 /* 847 * This function handles some shmctl commands which require the rwsem 848 * to be held in write mode. 849 * NOTE: no locks must be held, the rwsem is taken inside this function. 850 */ 851 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, 852 struct shmid64_ds *shmid64) 853 { 854 struct kern_ipc_perm *ipcp; 855 struct shmid_kernel *shp; 856 int err; 857 858 down_write(&shm_ids(ns).rwsem); 859 rcu_read_lock(); 860 861 ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd, 862 &shmid64->shm_perm, 0); 863 if (IS_ERR(ipcp)) { 864 err = PTR_ERR(ipcp); 865 goto out_unlock1; 866 } 867 868 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 869 870 err = security_shm_shmctl(&shp->shm_perm, cmd); 871 if (err) 872 goto out_unlock1; 873 874 switch (cmd) { 875 case IPC_RMID: 876 ipc_lock_object(&shp->shm_perm); 877 /* do_shm_rmid unlocks the ipc object and rcu */ 878 do_shm_rmid(ns, ipcp); 879 goto out_up; 880 case IPC_SET: 881 ipc_lock_object(&shp->shm_perm); 882 err = ipc_update_perm(&shmid64->shm_perm, ipcp); 883 if (err) 884 goto out_unlock0; 885 shp->shm_ctim = ktime_get_real_seconds(); 886 break; 887 default: 888 err = -EINVAL; 889 goto out_unlock1; 890 } 891 892 out_unlock0: 893 ipc_unlock_object(&shp->shm_perm); 894 out_unlock1: 895 rcu_read_unlock(); 896 out_up: 897 up_write(&shm_ids(ns).rwsem); 898 return err; 899 } 900 901 static int shmctl_ipc_info(struct ipc_namespace *ns, 902 struct shminfo64 *shminfo) 903 { 904 int err = security_shm_shmctl(NULL, IPC_INFO); 905 if (!err) { 906 memset(shminfo, 0, sizeof(*shminfo)); 907 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni; 908 shminfo->shmmax = ns->shm_ctlmax; 909 shminfo->shmall = ns->shm_ctlall; 910 shminfo->shmmin = SHMMIN; 911 down_read(&shm_ids(ns).rwsem); 912 err = ipc_get_maxid(&shm_ids(ns)); 913 up_read(&shm_ids(ns).rwsem); 914 if (err < 0) 915 err = 0; 916 } 917 return err; 918 } 919 920 static int shmctl_shm_info(struct ipc_namespace *ns, 921 struct shm_info *shm_info) 922 { 923 int err = security_shm_shmctl(NULL, SHM_INFO); 924 if (!err) { 925 memset(shm_info, 0, sizeof(*shm_info)); 926 down_read(&shm_ids(ns).rwsem); 927 shm_info->used_ids = shm_ids(ns).in_use; 928 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp); 929 shm_info->shm_tot = ns->shm_tot; 930 shm_info->swap_attempts = 0; 931 shm_info->swap_successes = 0; 932 err = ipc_get_maxid(&shm_ids(ns)); 933 up_read(&shm_ids(ns).rwsem); 934 if (err < 0) 935 err = 0; 936 } 937 return err; 938 } 939 940 static int shmctl_stat(struct ipc_namespace *ns, int shmid, 941 int cmd, struct shmid64_ds *tbuf) 942 { 943 struct shmid_kernel *shp; 944 int id = 0; 945 int err; 946 947 memset(tbuf, 0, sizeof(*tbuf)); 948 949 rcu_read_lock(); 950 if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) { 951 shp = shm_obtain_object(ns, shmid); 952 if (IS_ERR(shp)) { 953 err = PTR_ERR(shp); 954 goto out_unlock; 955 } 956 id = shp->shm_perm.id; 957 } else { /* IPC_STAT */ 958 shp = shm_obtain_object_check(ns, shmid); 959 if (IS_ERR(shp)) { 960 err = PTR_ERR(shp); 961 goto out_unlock; 962 } 963 } 964 965 /* 966 * Semantically SHM_STAT_ANY ought to be identical to 967 * that functionality provided by the /proc/sysvipc/ 968 * interface. As such, only audit these calls and 969 * do not do traditional S_IRUGO permission checks on 970 * the ipc object. 971 */ 972 if (cmd == SHM_STAT_ANY) 973 audit_ipc_obj(&shp->shm_perm); 974 else { 975 err = -EACCES; 976 if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) 977 goto out_unlock; 978 } 979 980 err = security_shm_shmctl(&shp->shm_perm, cmd); 981 if (err) 982 goto out_unlock; 983 984 ipc_lock_object(&shp->shm_perm); 985 986 if (!ipc_valid_object(&shp->shm_perm)) { 987 ipc_unlock_object(&shp->shm_perm); 988 err = -EIDRM; 989 goto out_unlock; 990 } 991 992 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm); 993 tbuf->shm_segsz = shp->shm_segsz; 994 tbuf->shm_atime = shp->shm_atim; 995 tbuf->shm_dtime = shp->shm_dtim; 996 tbuf->shm_ctime = shp->shm_ctim; 997 tbuf->shm_cpid = pid_vnr(shp->shm_cprid); 998 tbuf->shm_lpid = pid_vnr(shp->shm_lprid); 999 tbuf->shm_nattch = shp->shm_nattch; 1000 1001 ipc_unlock_object(&shp->shm_perm); 1002 rcu_read_unlock(); 1003 return id; 1004 1005 out_unlock: 1006 rcu_read_unlock(); 1007 return err; 1008 } 1009 1010 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd) 1011 { 1012 struct shmid_kernel *shp; 1013 struct file *shm_file; 1014 int err; 1015 1016 rcu_read_lock(); 1017 shp = shm_obtain_object_check(ns, shmid); 1018 if (IS_ERR(shp)) { 1019 err = PTR_ERR(shp); 1020 goto out_unlock1; 1021 } 1022 1023 audit_ipc_obj(&(shp->shm_perm)); 1024 err = security_shm_shmctl(&shp->shm_perm, cmd); 1025 if (err) 1026 goto out_unlock1; 1027 1028 ipc_lock_object(&shp->shm_perm); 1029 1030 /* check if shm_destroy() is tearing down shp */ 1031 if (!ipc_valid_object(&shp->shm_perm)) { 1032 err = -EIDRM; 1033 goto out_unlock0; 1034 } 1035 1036 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { 1037 kuid_t euid = current_euid(); 1038 1039 if (!uid_eq(euid, shp->shm_perm.uid) && 1040 !uid_eq(euid, shp->shm_perm.cuid)) { 1041 err = -EPERM; 1042 goto out_unlock0; 1043 } 1044 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { 1045 err = -EPERM; 1046 goto out_unlock0; 1047 } 1048 } 1049 1050 shm_file = shp->shm_file; 1051 if (is_file_hugepages(shm_file)) 1052 goto out_unlock0; 1053 1054 if (cmd == SHM_LOCK) { 1055 struct user_struct *user = current_user(); 1056 1057 err = shmem_lock(shm_file, 1, user); 1058 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { 1059 shp->shm_perm.mode |= SHM_LOCKED; 1060 shp->mlock_user = user; 1061 } 1062 goto out_unlock0; 1063 } 1064 1065 /* SHM_UNLOCK */ 1066 if (!(shp->shm_perm.mode & SHM_LOCKED)) 1067 goto out_unlock0; 1068 shmem_lock(shm_file, 0, shp->mlock_user); 1069 shp->shm_perm.mode &= ~SHM_LOCKED; 1070 shp->mlock_user = NULL; 1071 get_file(shm_file); 1072 ipc_unlock_object(&shp->shm_perm); 1073 rcu_read_unlock(); 1074 shmem_unlock_mapping(shm_file->f_mapping); 1075 1076 fput(shm_file); 1077 return err; 1078 1079 out_unlock0: 1080 ipc_unlock_object(&shp->shm_perm); 1081 out_unlock1: 1082 rcu_read_unlock(); 1083 return err; 1084 } 1085 1086 long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) 1087 { 1088 int err, version; 1089 struct ipc_namespace *ns; 1090 struct shmid64_ds sem64; 1091 1092 if (cmd < 0 || shmid < 0) 1093 return -EINVAL; 1094 1095 version = ipc_parse_version(&cmd); 1096 ns = current->nsproxy->ipc_ns; 1097 1098 switch (cmd) { 1099 case IPC_INFO: { 1100 struct shminfo64 shminfo; 1101 err = shmctl_ipc_info(ns, &shminfo); 1102 if (err < 0) 1103 return err; 1104 if (copy_shminfo_to_user(buf, &shminfo, version)) 1105 err = -EFAULT; 1106 return err; 1107 } 1108 case SHM_INFO: { 1109 struct shm_info shm_info; 1110 err = shmctl_shm_info(ns, &shm_info); 1111 if (err < 0) 1112 return err; 1113 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) 1114 err = -EFAULT; 1115 return err; 1116 } 1117 case SHM_STAT: 1118 case SHM_STAT_ANY: 1119 case IPC_STAT: { 1120 err = shmctl_stat(ns, shmid, cmd, &sem64); 1121 if (err < 0) 1122 return err; 1123 if (copy_shmid_to_user(buf, &sem64, version)) 1124 err = -EFAULT; 1125 return err; 1126 } 1127 case IPC_SET: 1128 if (copy_shmid_from_user(&sem64, buf, version)) 1129 return -EFAULT; 1130 /* fallthru */ 1131 case IPC_RMID: 1132 return shmctl_down(ns, shmid, cmd, &sem64); 1133 case SHM_LOCK: 1134 case SHM_UNLOCK: 1135 return shmctl_do_lock(ns, shmid, cmd); 1136 default: 1137 return -EINVAL; 1138 } 1139 } 1140 1141 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) 1142 { 1143 return ksys_shmctl(shmid, cmd, buf); 1144 } 1145 1146 #ifdef CONFIG_COMPAT 1147 1148 struct compat_shmid_ds { 1149 struct compat_ipc_perm shm_perm; 1150 int shm_segsz; 1151 compat_time_t shm_atime; 1152 compat_time_t shm_dtime; 1153 compat_time_t shm_ctime; 1154 compat_ipc_pid_t shm_cpid; 1155 compat_ipc_pid_t shm_lpid; 1156 unsigned short shm_nattch; 1157 unsigned short shm_unused; 1158 compat_uptr_t shm_unused2; 1159 compat_uptr_t shm_unused3; 1160 }; 1161 1162 struct compat_shminfo64 { 1163 compat_ulong_t shmmax; 1164 compat_ulong_t shmmin; 1165 compat_ulong_t shmmni; 1166 compat_ulong_t shmseg; 1167 compat_ulong_t shmall; 1168 compat_ulong_t __unused1; 1169 compat_ulong_t __unused2; 1170 compat_ulong_t __unused3; 1171 compat_ulong_t __unused4; 1172 }; 1173 1174 struct compat_shm_info { 1175 compat_int_t used_ids; 1176 compat_ulong_t shm_tot, shm_rss, shm_swp; 1177 compat_ulong_t swap_attempts, swap_successes; 1178 }; 1179 1180 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in, 1181 int version) 1182 { 1183 if (in->shmmax > INT_MAX) 1184 in->shmmax = INT_MAX; 1185 if (version == IPC_64) { 1186 struct compat_shminfo64 info; 1187 memset(&info, 0, sizeof(info)); 1188 info.shmmax = in->shmmax; 1189 info.shmmin = in->shmmin; 1190 info.shmmni = in->shmmni; 1191 info.shmseg = in->shmseg; 1192 info.shmall = in->shmall; 1193 return copy_to_user(buf, &info, sizeof(info)); 1194 } else { 1195 struct shminfo info; 1196 memset(&info, 0, sizeof(info)); 1197 info.shmmax = in->shmmax; 1198 info.shmmin = in->shmmin; 1199 info.shmmni = in->shmmni; 1200 info.shmseg = in->shmseg; 1201 info.shmall = in->shmall; 1202 return copy_to_user(buf, &info, sizeof(info)); 1203 } 1204 } 1205 1206 static int put_compat_shm_info(struct shm_info *ip, 1207 struct compat_shm_info __user *uip) 1208 { 1209 struct compat_shm_info info; 1210 1211 memset(&info, 0, sizeof(info)); 1212 info.used_ids = ip->used_ids; 1213 info.shm_tot = ip->shm_tot; 1214 info.shm_rss = ip->shm_rss; 1215 info.shm_swp = ip->shm_swp; 1216 info.swap_attempts = ip->swap_attempts; 1217 info.swap_successes = ip->swap_successes; 1218 return copy_to_user(uip, &info, sizeof(info)); 1219 } 1220 1221 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in, 1222 int version) 1223 { 1224 if (version == IPC_64) { 1225 struct compat_shmid64_ds v; 1226 memset(&v, 0, sizeof(v)); 1227 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm); 1228 v.shm_atime = in->shm_atime; 1229 v.shm_dtime = in->shm_dtime; 1230 v.shm_ctime = in->shm_ctime; 1231 v.shm_segsz = in->shm_segsz; 1232 v.shm_nattch = in->shm_nattch; 1233 v.shm_cpid = in->shm_cpid; 1234 v.shm_lpid = in->shm_lpid; 1235 return copy_to_user(buf, &v, sizeof(v)); 1236 } else { 1237 struct compat_shmid_ds v; 1238 memset(&v, 0, sizeof(v)); 1239 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm); 1240 v.shm_perm.key = in->shm_perm.key; 1241 v.shm_atime = in->shm_atime; 1242 v.shm_dtime = in->shm_dtime; 1243 v.shm_ctime = in->shm_ctime; 1244 v.shm_segsz = in->shm_segsz; 1245 v.shm_nattch = in->shm_nattch; 1246 v.shm_cpid = in->shm_cpid; 1247 v.shm_lpid = in->shm_lpid; 1248 return copy_to_user(buf, &v, sizeof(v)); 1249 } 1250 } 1251 1252 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf, 1253 int version) 1254 { 1255 memset(out, 0, sizeof(*out)); 1256 if (version == IPC_64) { 1257 struct compat_shmid64_ds __user *p = buf; 1258 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm); 1259 } else { 1260 struct compat_shmid_ds __user *p = buf; 1261 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm); 1262 } 1263 } 1264 1265 long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr) 1266 { 1267 struct ipc_namespace *ns; 1268 struct shmid64_ds sem64; 1269 int version = compat_ipc_parse_version(&cmd); 1270 int err; 1271 1272 ns = current->nsproxy->ipc_ns; 1273 1274 if (cmd < 0 || shmid < 0) 1275 return -EINVAL; 1276 1277 switch (cmd) { 1278 case IPC_INFO: { 1279 struct shminfo64 shminfo; 1280 err = shmctl_ipc_info(ns, &shminfo); 1281 if (err < 0) 1282 return err; 1283 if (copy_compat_shminfo_to_user(uptr, &shminfo, version)) 1284 err = -EFAULT; 1285 return err; 1286 } 1287 case SHM_INFO: { 1288 struct shm_info shm_info; 1289 err = shmctl_shm_info(ns, &shm_info); 1290 if (err < 0) 1291 return err; 1292 if (put_compat_shm_info(&shm_info, uptr)) 1293 err = -EFAULT; 1294 return err; 1295 } 1296 case IPC_STAT: 1297 case SHM_STAT_ANY: 1298 case SHM_STAT: 1299 err = shmctl_stat(ns, shmid, cmd, &sem64); 1300 if (err < 0) 1301 return err; 1302 if (copy_compat_shmid_to_user(uptr, &sem64, version)) 1303 err = -EFAULT; 1304 return err; 1305 1306 case IPC_SET: 1307 if (copy_compat_shmid_from_user(&sem64, uptr, version)) 1308 return -EFAULT; 1309 /* fallthru */ 1310 case IPC_RMID: 1311 return shmctl_down(ns, shmid, cmd, &sem64); 1312 case SHM_LOCK: 1313 case SHM_UNLOCK: 1314 return shmctl_do_lock(ns, shmid, cmd); 1315 break; 1316 default: 1317 return -EINVAL; 1318 } 1319 return err; 1320 } 1321 1322 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) 1323 { 1324 return compat_ksys_shmctl(shmid, cmd, uptr); 1325 } 1326 #endif 1327 1328 /* 1329 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. 1330 * 1331 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The 1332 * "raddr" thing points to kernel space, and there has to be a wrapper around 1333 * this. 1334 */ 1335 long do_shmat(int shmid, char __user *shmaddr, int shmflg, 1336 ulong *raddr, unsigned long shmlba) 1337 { 1338 struct shmid_kernel *shp; 1339 unsigned long addr = (unsigned long)shmaddr; 1340 unsigned long size; 1341 struct file *file; 1342 int err; 1343 unsigned long flags = MAP_SHARED; 1344 unsigned long prot; 1345 int acc_mode; 1346 struct ipc_namespace *ns; 1347 struct shm_file_data *sfd; 1348 struct path path; 1349 fmode_t f_mode; 1350 unsigned long populate = 0; 1351 1352 err = -EINVAL; 1353 if (shmid < 0) 1354 goto out; 1355 1356 if (addr) { 1357 if (addr & (shmlba - 1)) { 1358 /* 1359 * Round down to the nearest multiple of shmlba. 1360 * For sane do_mmap_pgoff() parameters, avoid 1361 * round downs that trigger nil-page and MAP_FIXED. 1362 */ 1363 if ((shmflg & SHM_RND) && addr >= shmlba) 1364 addr &= ~(shmlba - 1); 1365 else 1366 #ifndef __ARCH_FORCE_SHMLBA 1367 if (addr & ~PAGE_MASK) 1368 #endif 1369 goto out; 1370 } 1371 1372 flags |= MAP_FIXED; 1373 } else if ((shmflg & SHM_REMAP)) 1374 goto out; 1375 1376 if (shmflg & SHM_RDONLY) { 1377 prot = PROT_READ; 1378 acc_mode = S_IRUGO; 1379 f_mode = FMODE_READ; 1380 } else { 1381 prot = PROT_READ | PROT_WRITE; 1382 acc_mode = S_IRUGO | S_IWUGO; 1383 f_mode = FMODE_READ | FMODE_WRITE; 1384 } 1385 if (shmflg & SHM_EXEC) { 1386 prot |= PROT_EXEC; 1387 acc_mode |= S_IXUGO; 1388 } 1389 1390 /* 1391 * We cannot rely on the fs check since SYSV IPC does have an 1392 * additional creator id... 1393 */ 1394 ns = current->nsproxy->ipc_ns; 1395 rcu_read_lock(); 1396 shp = shm_obtain_object_check(ns, shmid); 1397 if (IS_ERR(shp)) { 1398 err = PTR_ERR(shp); 1399 goto out_unlock; 1400 } 1401 1402 err = -EACCES; 1403 if (ipcperms(ns, &shp->shm_perm, acc_mode)) 1404 goto out_unlock; 1405 1406 err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg); 1407 if (err) 1408 goto out_unlock; 1409 1410 ipc_lock_object(&shp->shm_perm); 1411 1412 /* check if shm_destroy() is tearing down shp */ 1413 if (!ipc_valid_object(&shp->shm_perm)) { 1414 ipc_unlock_object(&shp->shm_perm); 1415 err = -EIDRM; 1416 goto out_unlock; 1417 } 1418 1419 path = shp->shm_file->f_path; 1420 path_get(&path); 1421 shp->shm_nattch++; 1422 size = i_size_read(d_inode(path.dentry)); 1423 ipc_unlock_object(&shp->shm_perm); 1424 rcu_read_unlock(); 1425 1426 err = -ENOMEM; 1427 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); 1428 if (!sfd) { 1429 path_put(&path); 1430 goto out_nattch; 1431 } 1432 1433 file = alloc_file(&path, f_mode, 1434 is_file_hugepages(shp->shm_file) ? 1435 &shm_file_operations_huge : 1436 &shm_file_operations); 1437 err = PTR_ERR(file); 1438 if (IS_ERR(file)) { 1439 kfree(sfd); 1440 path_put(&path); 1441 goto out_nattch; 1442 } 1443 1444 file->private_data = sfd; 1445 file->f_mapping = shp->shm_file->f_mapping; 1446 sfd->id = shp->shm_perm.id; 1447 sfd->ns = get_ipc_ns(ns); 1448 sfd->file = shp->shm_file; 1449 sfd->vm_ops = NULL; 1450 1451 err = security_mmap_file(file, prot, flags); 1452 if (err) 1453 goto out_fput; 1454 1455 if (down_write_killable(¤t->mm->mmap_sem)) { 1456 err = -EINTR; 1457 goto out_fput; 1458 } 1459 1460 if (addr && !(shmflg & SHM_REMAP)) { 1461 err = -EINVAL; 1462 if (addr + size < addr) 1463 goto invalid; 1464 1465 if (find_vma_intersection(current->mm, addr, addr + size)) 1466 goto invalid; 1467 } 1468 1469 addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL); 1470 *raddr = addr; 1471 err = 0; 1472 if (IS_ERR_VALUE(addr)) 1473 err = (long)addr; 1474 invalid: 1475 up_write(¤t->mm->mmap_sem); 1476 if (populate) 1477 mm_populate(addr, populate); 1478 1479 out_fput: 1480 fput(file); 1481 1482 out_nattch: 1483 down_write(&shm_ids(ns).rwsem); 1484 shp = shm_lock(ns, shmid); 1485 shp->shm_nattch--; 1486 if (shm_may_destroy(ns, shp)) 1487 shm_destroy(ns, shp); 1488 else 1489 shm_unlock(shp); 1490 up_write(&shm_ids(ns).rwsem); 1491 return err; 1492 1493 out_unlock: 1494 rcu_read_unlock(); 1495 out: 1496 return err; 1497 } 1498 1499 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) 1500 { 1501 unsigned long ret; 1502 long err; 1503 1504 err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); 1505 if (err) 1506 return err; 1507 force_successful_syscall_return(); 1508 return (long)ret; 1509 } 1510 1511 #ifdef CONFIG_COMPAT 1512 1513 #ifndef COMPAT_SHMLBA 1514 #define COMPAT_SHMLBA SHMLBA 1515 #endif 1516 1517 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) 1518 { 1519 unsigned long ret; 1520 long err; 1521 1522 err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA); 1523 if (err) 1524 return err; 1525 force_successful_syscall_return(); 1526 return (long)ret; 1527 } 1528 #endif 1529 1530 /* 1531 * detach and kill segment if marked destroyed. 1532 * The work is done in shm_close. 1533 */ 1534 long ksys_shmdt(char __user *shmaddr) 1535 { 1536 struct mm_struct *mm = current->mm; 1537 struct vm_area_struct *vma; 1538 unsigned long addr = (unsigned long)shmaddr; 1539 int retval = -EINVAL; 1540 #ifdef CONFIG_MMU 1541 loff_t size = 0; 1542 struct file *file; 1543 struct vm_area_struct *next; 1544 #endif 1545 1546 if (addr & ~PAGE_MASK) 1547 return retval; 1548 1549 if (down_write_killable(&mm->mmap_sem)) 1550 return -EINTR; 1551 1552 /* 1553 * This function tries to be smart and unmap shm segments that 1554 * were modified by partial mlock or munmap calls: 1555 * - It first determines the size of the shm segment that should be 1556 * unmapped: It searches for a vma that is backed by shm and that 1557 * started at address shmaddr. It records it's size and then unmaps 1558 * it. 1559 * - Then it unmaps all shm vmas that started at shmaddr and that 1560 * are within the initially determined size and that are from the 1561 * same shm segment from which we determined the size. 1562 * Errors from do_munmap are ignored: the function only fails if 1563 * it's called with invalid parameters or if it's called to unmap 1564 * a part of a vma. Both calls in this function are for full vmas, 1565 * the parameters are directly copied from the vma itself and always 1566 * valid - therefore do_munmap cannot fail. (famous last words?) 1567 */ 1568 /* 1569 * If it had been mremap()'d, the starting address would not 1570 * match the usual checks anyway. So assume all vma's are 1571 * above the starting address given. 1572 */ 1573 vma = find_vma(mm, addr); 1574 1575 #ifdef CONFIG_MMU 1576 while (vma) { 1577 next = vma->vm_next; 1578 1579 /* 1580 * Check if the starting address would match, i.e. it's 1581 * a fragment created by mprotect() and/or munmap(), or it 1582 * otherwise it starts at this address with no hassles. 1583 */ 1584 if ((vma->vm_ops == &shm_vm_ops) && 1585 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { 1586 1587 /* 1588 * Record the file of the shm segment being 1589 * unmapped. With mremap(), someone could place 1590 * page from another segment but with equal offsets 1591 * in the range we are unmapping. 1592 */ 1593 file = vma->vm_file; 1594 size = i_size_read(file_inode(vma->vm_file)); 1595 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1596 /* 1597 * We discovered the size of the shm segment, so 1598 * break out of here and fall through to the next 1599 * loop that uses the size information to stop 1600 * searching for matching vma's. 1601 */ 1602 retval = 0; 1603 vma = next; 1604 break; 1605 } 1606 vma = next; 1607 } 1608 1609 /* 1610 * We need look no further than the maximum address a fragment 1611 * could possibly have landed at. Also cast things to loff_t to 1612 * prevent overflows and make comparisons vs. equal-width types. 1613 */ 1614 size = PAGE_ALIGN(size); 1615 while (vma && (loff_t)(vma->vm_end - addr) <= size) { 1616 next = vma->vm_next; 1617 1618 /* finding a matching vma now does not alter retval */ 1619 if ((vma->vm_ops == &shm_vm_ops) && 1620 ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) && 1621 (vma->vm_file == file)) 1622 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1623 vma = next; 1624 } 1625 1626 #else /* CONFIG_MMU */ 1627 /* under NOMMU conditions, the exact address to be destroyed must be 1628 * given 1629 */ 1630 if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { 1631 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1632 retval = 0; 1633 } 1634 1635 #endif 1636 1637 up_write(&mm->mmap_sem); 1638 return retval; 1639 } 1640 1641 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) 1642 { 1643 return ksys_shmdt(shmaddr); 1644 } 1645 1646 #ifdef CONFIG_PROC_FS 1647 static int sysvipc_shm_proc_show(struct seq_file *s, void *it) 1648 { 1649 struct pid_namespace *pid_ns = ipc_seq_pid_ns(s); 1650 struct user_namespace *user_ns = seq_user_ns(s); 1651 struct kern_ipc_perm *ipcp = it; 1652 struct shmid_kernel *shp; 1653 unsigned long rss = 0, swp = 0; 1654 1655 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 1656 shm_add_rss_swap(shp, &rss, &swp); 1657 1658 #if BITS_PER_LONG <= 32 1659 #define SIZE_SPEC "%10lu" 1660 #else 1661 #define SIZE_SPEC "%21lu" 1662 #endif 1663 1664 seq_printf(s, 1665 "%10d %10d %4o " SIZE_SPEC " %5u %5u " 1666 "%5lu %5u %5u %5u %5u %10llu %10llu %10llu " 1667 SIZE_SPEC " " SIZE_SPEC "\n", 1668 shp->shm_perm.key, 1669 shp->shm_perm.id, 1670 shp->shm_perm.mode, 1671 shp->shm_segsz, 1672 pid_nr_ns(shp->shm_cprid, pid_ns), 1673 pid_nr_ns(shp->shm_lprid, pid_ns), 1674 shp->shm_nattch, 1675 from_kuid_munged(user_ns, shp->shm_perm.uid), 1676 from_kgid_munged(user_ns, shp->shm_perm.gid), 1677 from_kuid_munged(user_ns, shp->shm_perm.cuid), 1678 from_kgid_munged(user_ns, shp->shm_perm.cgid), 1679 shp->shm_atim, 1680 shp->shm_dtim, 1681 shp->shm_ctim, 1682 rss * PAGE_SIZE, 1683 swp * PAGE_SIZE); 1684 1685 return 0; 1686 } 1687 #endif 1688