1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/shm.c 4 * Copyright (C) 1992, 1993 Krishna Balasubramanian 5 * Many improvements/fixes by Bruno Haible. 6 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. 7 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. 8 * 9 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 10 * BIGMEM support, Andrea Arcangeli <andrea@suse.de> 11 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> 12 * HIGHMEM support, Ingo Molnar <mingo@redhat.com> 13 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> 14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> 15 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 * 24 * Better ipc lock (kern_ipc_perm.lock) handling 25 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. 26 */ 27 28 #include <linux/slab.h> 29 #include <linux/mm.h> 30 #include <linux/hugetlb.h> 31 #include <linux/shm.h> 32 #include <linux/init.h> 33 #include <linux/file.h> 34 #include <linux/mman.h> 35 #include <linux/shmem_fs.h> 36 #include <linux/security.h> 37 #include <linux/syscalls.h> 38 #include <linux/audit.h> 39 #include <linux/capability.h> 40 #include <linux/ptrace.h> 41 #include <linux/seq_file.h> 42 #include <linux/rwsem.h> 43 #include <linux/nsproxy.h> 44 #include <linux/mount.h> 45 #include <linux/ipc_namespace.h> 46 #include <linux/rhashtable.h> 47 48 #include <linux/uaccess.h> 49 50 #include "util.h" 51 52 struct shmid_kernel /* private to the kernel */ 53 { 54 struct kern_ipc_perm shm_perm; 55 struct file *shm_file; 56 unsigned long shm_nattch; 57 unsigned long shm_segsz; 58 time64_t shm_atim; 59 time64_t shm_dtim; 60 time64_t shm_ctim; 61 struct pid *shm_cprid; 62 struct pid *shm_lprid; 63 struct ucounts *mlock_ucounts; 64 65 /* The task created the shm object. NULL if the task is dead. */ 66 struct task_struct *shm_creator; 67 struct list_head shm_clist; /* list by creator */ 68 } __randomize_layout; 69 70 /* shm_mode upper byte flags */ 71 #define SHM_DEST 01000 /* segment will be destroyed on last detach */ 72 #define SHM_LOCKED 02000 /* segment will not be swapped */ 73 74 struct shm_file_data { 75 int id; 76 struct ipc_namespace *ns; 77 struct file *file; 78 const struct vm_operations_struct *vm_ops; 79 }; 80 81 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) 82 83 static const struct file_operations shm_file_operations; 84 static const struct vm_operations_struct shm_vm_ops; 85 86 #define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS]) 87 88 #define shm_unlock(shp) \ 89 ipc_unlock(&(shp)->shm_perm) 90 91 static int newseg(struct ipc_namespace *, struct ipc_params *); 92 static void shm_open(struct vm_area_struct *vma); 93 static void shm_close(struct vm_area_struct *vma); 94 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); 95 #ifdef CONFIG_PROC_FS 96 static int sysvipc_shm_proc_show(struct seq_file *s, void *it); 97 #endif 98 99 void shm_init_ns(struct ipc_namespace *ns) 100 { 101 ns->shm_ctlmax = SHMMAX; 102 ns->shm_ctlall = SHMALL; 103 ns->shm_ctlmni = SHMMNI; 104 ns->shm_rmid_forced = 0; 105 ns->shm_tot = 0; 106 ipc_init_ids(&shm_ids(ns)); 107 } 108 109 /* 110 * Called with shm_ids.rwsem (writer) and the shp structure locked. 111 * Only shm_ids.rwsem remains locked on exit. 112 */ 113 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 114 { 115 struct shmid_kernel *shp; 116 117 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 118 119 if (shp->shm_nattch) { 120 shp->shm_perm.mode |= SHM_DEST; 121 /* Do not find it any more */ 122 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm); 123 shm_unlock(shp); 124 } else 125 shm_destroy(ns, shp); 126 } 127 128 #ifdef CONFIG_IPC_NS 129 void shm_exit_ns(struct ipc_namespace *ns) 130 { 131 free_ipcs(ns, &shm_ids(ns), do_shm_rmid); 132 idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); 133 rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht); 134 } 135 #endif 136 137 static int __init ipc_ns_init(void) 138 { 139 shm_init_ns(&init_ipc_ns); 140 return 0; 141 } 142 143 pure_initcall(ipc_ns_init); 144 145 void __init shm_init(void) 146 { 147 ipc_init_proc_interface("sysvipc/shm", 148 #if BITS_PER_LONG <= 32 149 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 150 #else 151 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 152 #endif 153 IPC_SHM_IDS, sysvipc_shm_proc_show); 154 } 155 156 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) 157 { 158 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); 159 160 if (IS_ERR(ipcp)) 161 return ERR_CAST(ipcp); 162 163 return container_of(ipcp, struct shmid_kernel, shm_perm); 164 } 165 166 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) 167 { 168 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); 169 170 if (IS_ERR(ipcp)) 171 return ERR_CAST(ipcp); 172 173 return container_of(ipcp, struct shmid_kernel, shm_perm); 174 } 175 176 /* 177 * shm_lock_(check_) routines are called in the paths where the rwsem 178 * is not necessarily held. 179 */ 180 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) 181 { 182 struct kern_ipc_perm *ipcp; 183 184 rcu_read_lock(); 185 ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); 186 if (IS_ERR(ipcp)) 187 goto err; 188 189 ipc_lock_object(ipcp); 190 /* 191 * ipc_rmid() may have already freed the ID while ipc_lock_object() 192 * was spinning: here verify that the structure is still valid. 193 * Upon races with RMID, return -EIDRM, thus indicating that 194 * the ID points to a removed identifier. 195 */ 196 if (ipc_valid_object(ipcp)) { 197 /* return a locked ipc object upon success */ 198 return container_of(ipcp, struct shmid_kernel, shm_perm); 199 } 200 201 ipc_unlock_object(ipcp); 202 ipcp = ERR_PTR(-EIDRM); 203 err: 204 rcu_read_unlock(); 205 /* 206 * Callers of shm_lock() must validate the status of the returned ipc 207 * object pointer and error out as appropriate. 208 */ 209 return ERR_CAST(ipcp); 210 } 211 212 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) 213 { 214 rcu_read_lock(); 215 ipc_lock_object(&ipcp->shm_perm); 216 } 217 218 static void shm_rcu_free(struct rcu_head *head) 219 { 220 struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm, 221 rcu); 222 struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel, 223 shm_perm); 224 security_shm_free(&shp->shm_perm); 225 kfree(shp); 226 } 227 228 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 229 { 230 list_del(&s->shm_clist); 231 ipc_rmid(&shm_ids(ns), &s->shm_perm); 232 } 233 234 235 static int __shm_open(struct vm_area_struct *vma) 236 { 237 struct file *file = vma->vm_file; 238 struct shm_file_data *sfd = shm_file_data(file); 239 struct shmid_kernel *shp; 240 241 shp = shm_lock(sfd->ns, sfd->id); 242 243 if (IS_ERR(shp)) 244 return PTR_ERR(shp); 245 246 if (shp->shm_file != sfd->file) { 247 /* ID was reused */ 248 shm_unlock(shp); 249 return -EINVAL; 250 } 251 252 shp->shm_atim = ktime_get_real_seconds(); 253 ipc_update_pid(&shp->shm_lprid, task_tgid(current)); 254 shp->shm_nattch++; 255 shm_unlock(shp); 256 return 0; 257 } 258 259 /* This is called by fork, once for every shm attach. */ 260 static void shm_open(struct vm_area_struct *vma) 261 { 262 int err = __shm_open(vma); 263 /* 264 * We raced in the idr lookup or with shm_destroy(). 265 * Either way, the ID is busted. 266 */ 267 WARN_ON_ONCE(err); 268 } 269 270 /* 271 * shm_destroy - free the struct shmid_kernel 272 * 273 * @ns: namespace 274 * @shp: struct to free 275 * 276 * It has to be called with shp and shm_ids.rwsem (writer) locked, 277 * but returns with shp unlocked and freed. 278 */ 279 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 280 { 281 struct file *shm_file; 282 283 shm_file = shp->shm_file; 284 shp->shm_file = NULL; 285 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; 286 shm_rmid(ns, shp); 287 shm_unlock(shp); 288 if (!is_file_hugepages(shm_file)) 289 shmem_lock(shm_file, 0, shp->mlock_ucounts); 290 fput(shm_file); 291 ipc_update_pid(&shp->shm_cprid, NULL); 292 ipc_update_pid(&shp->shm_lprid, NULL); 293 ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); 294 } 295 296 /* 297 * shm_may_destroy - identifies whether shm segment should be destroyed now 298 * 299 * Returns true if and only if there are no active users of the segment and 300 * one of the following is true: 301 * 302 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp 303 * 304 * 2) sysctl kernel.shm_rmid_forced is set to 1. 305 */ 306 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 307 { 308 return (shp->shm_nattch == 0) && 309 (ns->shm_rmid_forced || 310 (shp->shm_perm.mode & SHM_DEST)); 311 } 312 313 /* 314 * remove the attach descriptor vma. 315 * free memory for segment if it is marked destroyed. 316 * The descriptor has already been removed from the current->mm->mmap list 317 * and will later be kfree()d. 318 */ 319 static void shm_close(struct vm_area_struct *vma) 320 { 321 struct file *file = vma->vm_file; 322 struct shm_file_data *sfd = shm_file_data(file); 323 struct shmid_kernel *shp; 324 struct ipc_namespace *ns = sfd->ns; 325 326 down_write(&shm_ids(ns).rwsem); 327 /* remove from the list of attaches of the shm segment */ 328 shp = shm_lock(ns, sfd->id); 329 330 /* 331 * We raced in the idr lookup or with shm_destroy(). 332 * Either way, the ID is busted. 333 */ 334 if (WARN_ON_ONCE(IS_ERR(shp))) 335 goto done; /* no-op */ 336 337 ipc_update_pid(&shp->shm_lprid, task_tgid(current)); 338 shp->shm_dtim = ktime_get_real_seconds(); 339 shp->shm_nattch--; 340 if (shm_may_destroy(ns, shp)) 341 shm_destroy(ns, shp); 342 else 343 shm_unlock(shp); 344 done: 345 up_write(&shm_ids(ns).rwsem); 346 } 347 348 /* Called with ns->shm_ids(ns).rwsem locked */ 349 static int shm_try_destroy_orphaned(int id, void *p, void *data) 350 { 351 struct ipc_namespace *ns = data; 352 struct kern_ipc_perm *ipcp = p; 353 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); 354 355 /* 356 * We want to destroy segments without users and with already 357 * exit'ed originating process. 358 * 359 * As shp->* are changed under rwsem, it's safe to skip shp locking. 360 */ 361 if (shp->shm_creator != NULL) 362 return 0; 363 364 if (shm_may_destroy(ns, shp)) { 365 shm_lock_by_ptr(shp); 366 shm_destroy(ns, shp); 367 } 368 return 0; 369 } 370 371 void shm_destroy_orphaned(struct ipc_namespace *ns) 372 { 373 down_write(&shm_ids(ns).rwsem); 374 if (shm_ids(ns).in_use) 375 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); 376 up_write(&shm_ids(ns).rwsem); 377 } 378 379 /* Locking assumes this will only be called with task == current */ 380 void exit_shm(struct task_struct *task) 381 { 382 struct ipc_namespace *ns = task->nsproxy->ipc_ns; 383 struct shmid_kernel *shp, *n; 384 385 if (list_empty(&task->sysvshm.shm_clist)) 386 return; 387 388 /* 389 * If kernel.shm_rmid_forced is not set then only keep track of 390 * which shmids are orphaned, so that a later set of the sysctl 391 * can clean them up. 392 */ 393 if (!ns->shm_rmid_forced) { 394 down_read(&shm_ids(ns).rwsem); 395 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) 396 shp->shm_creator = NULL; 397 /* 398 * Only under read lock but we are only called on current 399 * so no entry on the list will be shared. 400 */ 401 list_del(&task->sysvshm.shm_clist); 402 up_read(&shm_ids(ns).rwsem); 403 return; 404 } 405 406 /* 407 * Destroy all already created segments, that were not yet mapped, 408 * and mark any mapped as orphan to cover the sysctl toggling. 409 * Destroy is skipped if shm_may_destroy() returns false. 410 */ 411 down_write(&shm_ids(ns).rwsem); 412 list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { 413 shp->shm_creator = NULL; 414 415 if (shm_may_destroy(ns, shp)) { 416 shm_lock_by_ptr(shp); 417 shm_destroy(ns, shp); 418 } 419 } 420 421 /* Remove the list head from any segments still attached. */ 422 list_del(&task->sysvshm.shm_clist); 423 up_write(&shm_ids(ns).rwsem); 424 } 425 426 static vm_fault_t shm_fault(struct vm_fault *vmf) 427 { 428 struct file *file = vmf->vma->vm_file; 429 struct shm_file_data *sfd = shm_file_data(file); 430 431 return sfd->vm_ops->fault(vmf); 432 } 433 434 static int shm_may_split(struct vm_area_struct *vma, unsigned long addr) 435 { 436 struct file *file = vma->vm_file; 437 struct shm_file_data *sfd = shm_file_data(file); 438 439 if (sfd->vm_ops->may_split) 440 return sfd->vm_ops->may_split(vma, addr); 441 442 return 0; 443 } 444 445 static unsigned long shm_pagesize(struct vm_area_struct *vma) 446 { 447 struct file *file = vma->vm_file; 448 struct shm_file_data *sfd = shm_file_data(file); 449 450 if (sfd->vm_ops->pagesize) 451 return sfd->vm_ops->pagesize(vma); 452 453 return PAGE_SIZE; 454 } 455 456 #ifdef CONFIG_NUMA 457 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 458 { 459 struct file *file = vma->vm_file; 460 struct shm_file_data *sfd = shm_file_data(file); 461 int err = 0; 462 463 if (sfd->vm_ops->set_policy) 464 err = sfd->vm_ops->set_policy(vma, new); 465 return err; 466 } 467 468 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, 469 unsigned long addr) 470 { 471 struct file *file = vma->vm_file; 472 struct shm_file_data *sfd = shm_file_data(file); 473 struct mempolicy *pol = NULL; 474 475 if (sfd->vm_ops->get_policy) 476 pol = sfd->vm_ops->get_policy(vma, addr); 477 else if (vma->vm_policy) 478 pol = vma->vm_policy; 479 480 return pol; 481 } 482 #endif 483 484 static int shm_mmap(struct file *file, struct vm_area_struct *vma) 485 { 486 struct shm_file_data *sfd = shm_file_data(file); 487 int ret; 488 489 /* 490 * In case of remap_file_pages() emulation, the file can represent an 491 * IPC ID that was removed, and possibly even reused by another shm 492 * segment already. Propagate this case as an error to caller. 493 */ 494 ret = __shm_open(vma); 495 if (ret) 496 return ret; 497 498 ret = call_mmap(sfd->file, vma); 499 if (ret) { 500 shm_close(vma); 501 return ret; 502 } 503 sfd->vm_ops = vma->vm_ops; 504 #ifdef CONFIG_MMU 505 WARN_ON(!sfd->vm_ops->fault); 506 #endif 507 vma->vm_ops = &shm_vm_ops; 508 return 0; 509 } 510 511 static int shm_release(struct inode *ino, struct file *file) 512 { 513 struct shm_file_data *sfd = shm_file_data(file); 514 515 put_ipc_ns(sfd->ns); 516 fput(sfd->file); 517 shm_file_data(file) = NULL; 518 kfree(sfd); 519 return 0; 520 } 521 522 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) 523 { 524 struct shm_file_data *sfd = shm_file_data(file); 525 526 if (!sfd->file->f_op->fsync) 527 return -EINVAL; 528 return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 529 } 530 531 static long shm_fallocate(struct file *file, int mode, loff_t offset, 532 loff_t len) 533 { 534 struct shm_file_data *sfd = shm_file_data(file); 535 536 if (!sfd->file->f_op->fallocate) 537 return -EOPNOTSUPP; 538 return sfd->file->f_op->fallocate(file, mode, offset, len); 539 } 540 541 static unsigned long shm_get_unmapped_area(struct file *file, 542 unsigned long addr, unsigned long len, unsigned long pgoff, 543 unsigned long flags) 544 { 545 struct shm_file_data *sfd = shm_file_data(file); 546 547 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, 548 pgoff, flags); 549 } 550 551 static const struct file_operations shm_file_operations = { 552 .mmap = shm_mmap, 553 .fsync = shm_fsync, 554 .release = shm_release, 555 .get_unmapped_area = shm_get_unmapped_area, 556 .llseek = noop_llseek, 557 .fallocate = shm_fallocate, 558 }; 559 560 /* 561 * shm_file_operations_huge is now identical to shm_file_operations, 562 * but we keep it distinct for the sake of is_file_shm_hugepages(). 563 */ 564 static const struct file_operations shm_file_operations_huge = { 565 .mmap = shm_mmap, 566 .fsync = shm_fsync, 567 .release = shm_release, 568 .get_unmapped_area = shm_get_unmapped_area, 569 .llseek = noop_llseek, 570 .fallocate = shm_fallocate, 571 }; 572 573 bool is_file_shm_hugepages(struct file *file) 574 { 575 return file->f_op == &shm_file_operations_huge; 576 } 577 578 static const struct vm_operations_struct shm_vm_ops = { 579 .open = shm_open, /* callback for a new vm-area open */ 580 .close = shm_close, /* callback for when the vm-area is released */ 581 .fault = shm_fault, 582 .may_split = shm_may_split, 583 .pagesize = shm_pagesize, 584 #if defined(CONFIG_NUMA) 585 .set_policy = shm_set_policy, 586 .get_policy = shm_get_policy, 587 #endif 588 }; 589 590 /** 591 * newseg - Create a new shared memory segment 592 * @ns: namespace 593 * @params: ptr to the structure that contains key, size and shmflg 594 * 595 * Called with shm_ids.rwsem held as a writer. 596 */ 597 static int newseg(struct ipc_namespace *ns, struct ipc_params *params) 598 { 599 key_t key = params->key; 600 int shmflg = params->flg; 601 size_t size = params->u.size; 602 int error; 603 struct shmid_kernel *shp; 604 size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 605 struct file *file; 606 char name[13]; 607 vm_flags_t acctflag = 0; 608 609 if (size < SHMMIN || size > ns->shm_ctlmax) 610 return -EINVAL; 611 612 if (numpages << PAGE_SHIFT < size) 613 return -ENOSPC; 614 615 if (ns->shm_tot + numpages < ns->shm_tot || 616 ns->shm_tot + numpages > ns->shm_ctlall) 617 return -ENOSPC; 618 619 shp = kmalloc(sizeof(*shp), GFP_KERNEL_ACCOUNT); 620 if (unlikely(!shp)) 621 return -ENOMEM; 622 623 shp->shm_perm.key = key; 624 shp->shm_perm.mode = (shmflg & S_IRWXUGO); 625 shp->mlock_ucounts = NULL; 626 627 shp->shm_perm.security = NULL; 628 error = security_shm_alloc(&shp->shm_perm); 629 if (error) { 630 kfree(shp); 631 return error; 632 } 633 634 sprintf(name, "SYSV%08x", key); 635 if (shmflg & SHM_HUGETLB) { 636 struct hstate *hs; 637 size_t hugesize; 638 639 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 640 if (!hs) { 641 error = -EINVAL; 642 goto no_file; 643 } 644 hugesize = ALIGN(size, huge_page_size(hs)); 645 646 /* hugetlb_file_setup applies strict accounting */ 647 if (shmflg & SHM_NORESERVE) 648 acctflag = VM_NORESERVE; 649 file = hugetlb_file_setup(name, hugesize, acctflag, 650 HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 651 } else { 652 /* 653 * Do not allow no accounting for OVERCOMMIT_NEVER, even 654 * if it's asked for. 655 */ 656 if ((shmflg & SHM_NORESERVE) && 657 sysctl_overcommit_memory != OVERCOMMIT_NEVER) 658 acctflag = VM_NORESERVE; 659 file = shmem_kernel_file_setup(name, size, acctflag); 660 } 661 error = PTR_ERR(file); 662 if (IS_ERR(file)) 663 goto no_file; 664 665 shp->shm_cprid = get_pid(task_tgid(current)); 666 shp->shm_lprid = NULL; 667 shp->shm_atim = shp->shm_dtim = 0; 668 shp->shm_ctim = ktime_get_real_seconds(); 669 shp->shm_segsz = size; 670 shp->shm_nattch = 0; 671 shp->shm_file = file; 672 shp->shm_creator = current; 673 674 /* ipc_addid() locks shp upon success. */ 675 error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); 676 if (error < 0) 677 goto no_id; 678 679 list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); 680 681 /* 682 * shmid gets reported as "inode#" in /proc/pid/maps. 683 * proc-ps tools use this. Changing this will break them. 684 */ 685 file_inode(file)->i_ino = shp->shm_perm.id; 686 687 ns->shm_tot += numpages; 688 error = shp->shm_perm.id; 689 690 ipc_unlock_object(&shp->shm_perm); 691 rcu_read_unlock(); 692 return error; 693 694 no_id: 695 ipc_update_pid(&shp->shm_cprid, NULL); 696 ipc_update_pid(&shp->shm_lprid, NULL); 697 fput(file); 698 ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); 699 return error; 700 no_file: 701 call_rcu(&shp->shm_perm.rcu, shm_rcu_free); 702 return error; 703 } 704 705 /* 706 * Called with shm_ids.rwsem and ipcp locked. 707 */ 708 static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) 709 { 710 struct shmid_kernel *shp; 711 712 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 713 if (shp->shm_segsz < params->u.size) 714 return -EINVAL; 715 716 return 0; 717 } 718 719 long ksys_shmget(key_t key, size_t size, int shmflg) 720 { 721 struct ipc_namespace *ns; 722 static const struct ipc_ops shm_ops = { 723 .getnew = newseg, 724 .associate = security_shm_associate, 725 .more_checks = shm_more_checks, 726 }; 727 struct ipc_params shm_params; 728 729 ns = current->nsproxy->ipc_ns; 730 731 shm_params.key = key; 732 shm_params.flg = shmflg; 733 shm_params.u.size = size; 734 735 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); 736 } 737 738 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) 739 { 740 return ksys_shmget(key, size, shmflg); 741 } 742 743 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) 744 { 745 switch (version) { 746 case IPC_64: 747 return copy_to_user(buf, in, sizeof(*in)); 748 case IPC_OLD: 749 { 750 struct shmid_ds out; 751 752 memset(&out, 0, sizeof(out)); 753 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); 754 out.shm_segsz = in->shm_segsz; 755 out.shm_atime = in->shm_atime; 756 out.shm_dtime = in->shm_dtime; 757 out.shm_ctime = in->shm_ctime; 758 out.shm_cpid = in->shm_cpid; 759 out.shm_lpid = in->shm_lpid; 760 out.shm_nattch = in->shm_nattch; 761 762 return copy_to_user(buf, &out, sizeof(out)); 763 } 764 default: 765 return -EINVAL; 766 } 767 } 768 769 static inline unsigned long 770 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) 771 { 772 switch (version) { 773 case IPC_64: 774 if (copy_from_user(out, buf, sizeof(*out))) 775 return -EFAULT; 776 return 0; 777 case IPC_OLD: 778 { 779 struct shmid_ds tbuf_old; 780 781 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 782 return -EFAULT; 783 784 out->shm_perm.uid = tbuf_old.shm_perm.uid; 785 out->shm_perm.gid = tbuf_old.shm_perm.gid; 786 out->shm_perm.mode = tbuf_old.shm_perm.mode; 787 788 return 0; 789 } 790 default: 791 return -EINVAL; 792 } 793 } 794 795 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) 796 { 797 switch (version) { 798 case IPC_64: 799 return copy_to_user(buf, in, sizeof(*in)); 800 case IPC_OLD: 801 { 802 struct shminfo out; 803 804 if (in->shmmax > INT_MAX) 805 out.shmmax = INT_MAX; 806 else 807 out.shmmax = (int)in->shmmax; 808 809 out.shmmin = in->shmmin; 810 out.shmmni = in->shmmni; 811 out.shmseg = in->shmseg; 812 out.shmall = in->shmall; 813 814 return copy_to_user(buf, &out, sizeof(out)); 815 } 816 default: 817 return -EINVAL; 818 } 819 } 820 821 /* 822 * Calculate and add used RSS and swap pages of a shm. 823 * Called with shm_ids.rwsem held as a reader 824 */ 825 static void shm_add_rss_swap(struct shmid_kernel *shp, 826 unsigned long *rss_add, unsigned long *swp_add) 827 { 828 struct inode *inode; 829 830 inode = file_inode(shp->shm_file); 831 832 if (is_file_hugepages(shp->shm_file)) { 833 struct address_space *mapping = inode->i_mapping; 834 struct hstate *h = hstate_file(shp->shm_file); 835 *rss_add += pages_per_huge_page(h) * mapping->nrpages; 836 } else { 837 #ifdef CONFIG_SHMEM 838 struct shmem_inode_info *info = SHMEM_I(inode); 839 840 spin_lock_irq(&info->lock); 841 *rss_add += inode->i_mapping->nrpages; 842 *swp_add += info->swapped; 843 spin_unlock_irq(&info->lock); 844 #else 845 *rss_add += inode->i_mapping->nrpages; 846 #endif 847 } 848 } 849 850 /* 851 * Called with shm_ids.rwsem held as a reader 852 */ 853 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, 854 unsigned long *swp) 855 { 856 int next_id; 857 int total, in_use; 858 859 *rss = 0; 860 *swp = 0; 861 862 in_use = shm_ids(ns).in_use; 863 864 for (total = 0, next_id = 0; total < in_use; next_id++) { 865 struct kern_ipc_perm *ipc; 866 struct shmid_kernel *shp; 867 868 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); 869 if (ipc == NULL) 870 continue; 871 shp = container_of(ipc, struct shmid_kernel, shm_perm); 872 873 shm_add_rss_swap(shp, rss, swp); 874 875 total++; 876 } 877 } 878 879 /* 880 * This function handles some shmctl commands which require the rwsem 881 * to be held in write mode. 882 * NOTE: no locks must be held, the rwsem is taken inside this function. 883 */ 884 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, 885 struct shmid64_ds *shmid64) 886 { 887 struct kern_ipc_perm *ipcp; 888 struct shmid_kernel *shp; 889 int err; 890 891 down_write(&shm_ids(ns).rwsem); 892 rcu_read_lock(); 893 894 ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd, 895 &shmid64->shm_perm, 0); 896 if (IS_ERR(ipcp)) { 897 err = PTR_ERR(ipcp); 898 goto out_unlock1; 899 } 900 901 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 902 903 err = security_shm_shmctl(&shp->shm_perm, cmd); 904 if (err) 905 goto out_unlock1; 906 907 switch (cmd) { 908 case IPC_RMID: 909 ipc_lock_object(&shp->shm_perm); 910 /* do_shm_rmid unlocks the ipc object and rcu */ 911 do_shm_rmid(ns, ipcp); 912 goto out_up; 913 case IPC_SET: 914 ipc_lock_object(&shp->shm_perm); 915 err = ipc_update_perm(&shmid64->shm_perm, ipcp); 916 if (err) 917 goto out_unlock0; 918 shp->shm_ctim = ktime_get_real_seconds(); 919 break; 920 default: 921 err = -EINVAL; 922 goto out_unlock1; 923 } 924 925 out_unlock0: 926 ipc_unlock_object(&shp->shm_perm); 927 out_unlock1: 928 rcu_read_unlock(); 929 out_up: 930 up_write(&shm_ids(ns).rwsem); 931 return err; 932 } 933 934 static int shmctl_ipc_info(struct ipc_namespace *ns, 935 struct shminfo64 *shminfo) 936 { 937 int err = security_shm_shmctl(NULL, IPC_INFO); 938 if (!err) { 939 memset(shminfo, 0, sizeof(*shminfo)); 940 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni; 941 shminfo->shmmax = ns->shm_ctlmax; 942 shminfo->shmall = ns->shm_ctlall; 943 shminfo->shmmin = SHMMIN; 944 down_read(&shm_ids(ns).rwsem); 945 err = ipc_get_maxidx(&shm_ids(ns)); 946 up_read(&shm_ids(ns).rwsem); 947 if (err < 0) 948 err = 0; 949 } 950 return err; 951 } 952 953 static int shmctl_shm_info(struct ipc_namespace *ns, 954 struct shm_info *shm_info) 955 { 956 int err = security_shm_shmctl(NULL, SHM_INFO); 957 if (!err) { 958 memset(shm_info, 0, sizeof(*shm_info)); 959 down_read(&shm_ids(ns).rwsem); 960 shm_info->used_ids = shm_ids(ns).in_use; 961 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp); 962 shm_info->shm_tot = ns->shm_tot; 963 shm_info->swap_attempts = 0; 964 shm_info->swap_successes = 0; 965 err = ipc_get_maxidx(&shm_ids(ns)); 966 up_read(&shm_ids(ns).rwsem); 967 if (err < 0) 968 err = 0; 969 } 970 return err; 971 } 972 973 static int shmctl_stat(struct ipc_namespace *ns, int shmid, 974 int cmd, struct shmid64_ds *tbuf) 975 { 976 struct shmid_kernel *shp; 977 int err; 978 979 memset(tbuf, 0, sizeof(*tbuf)); 980 981 rcu_read_lock(); 982 if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) { 983 shp = shm_obtain_object(ns, shmid); 984 if (IS_ERR(shp)) { 985 err = PTR_ERR(shp); 986 goto out_unlock; 987 } 988 } else { /* IPC_STAT */ 989 shp = shm_obtain_object_check(ns, shmid); 990 if (IS_ERR(shp)) { 991 err = PTR_ERR(shp); 992 goto out_unlock; 993 } 994 } 995 996 /* 997 * Semantically SHM_STAT_ANY ought to be identical to 998 * that functionality provided by the /proc/sysvipc/ 999 * interface. As such, only audit these calls and 1000 * do not do traditional S_IRUGO permission checks on 1001 * the ipc object. 1002 */ 1003 if (cmd == SHM_STAT_ANY) 1004 audit_ipc_obj(&shp->shm_perm); 1005 else { 1006 err = -EACCES; 1007 if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) 1008 goto out_unlock; 1009 } 1010 1011 err = security_shm_shmctl(&shp->shm_perm, cmd); 1012 if (err) 1013 goto out_unlock; 1014 1015 ipc_lock_object(&shp->shm_perm); 1016 1017 if (!ipc_valid_object(&shp->shm_perm)) { 1018 ipc_unlock_object(&shp->shm_perm); 1019 err = -EIDRM; 1020 goto out_unlock; 1021 } 1022 1023 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm); 1024 tbuf->shm_segsz = shp->shm_segsz; 1025 tbuf->shm_atime = shp->shm_atim; 1026 tbuf->shm_dtime = shp->shm_dtim; 1027 tbuf->shm_ctime = shp->shm_ctim; 1028 #ifndef CONFIG_64BIT 1029 tbuf->shm_atime_high = shp->shm_atim >> 32; 1030 tbuf->shm_dtime_high = shp->shm_dtim >> 32; 1031 tbuf->shm_ctime_high = shp->shm_ctim >> 32; 1032 #endif 1033 tbuf->shm_cpid = pid_vnr(shp->shm_cprid); 1034 tbuf->shm_lpid = pid_vnr(shp->shm_lprid); 1035 tbuf->shm_nattch = shp->shm_nattch; 1036 1037 if (cmd == IPC_STAT) { 1038 /* 1039 * As defined in SUS: 1040 * Return 0 on success 1041 */ 1042 err = 0; 1043 } else { 1044 /* 1045 * SHM_STAT and SHM_STAT_ANY (both Linux specific) 1046 * Return the full id, including the sequence number 1047 */ 1048 err = shp->shm_perm.id; 1049 } 1050 1051 ipc_unlock_object(&shp->shm_perm); 1052 out_unlock: 1053 rcu_read_unlock(); 1054 return err; 1055 } 1056 1057 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd) 1058 { 1059 struct shmid_kernel *shp; 1060 struct file *shm_file; 1061 int err; 1062 1063 rcu_read_lock(); 1064 shp = shm_obtain_object_check(ns, shmid); 1065 if (IS_ERR(shp)) { 1066 err = PTR_ERR(shp); 1067 goto out_unlock1; 1068 } 1069 1070 audit_ipc_obj(&(shp->shm_perm)); 1071 err = security_shm_shmctl(&shp->shm_perm, cmd); 1072 if (err) 1073 goto out_unlock1; 1074 1075 ipc_lock_object(&shp->shm_perm); 1076 1077 /* check if shm_destroy() is tearing down shp */ 1078 if (!ipc_valid_object(&shp->shm_perm)) { 1079 err = -EIDRM; 1080 goto out_unlock0; 1081 } 1082 1083 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { 1084 kuid_t euid = current_euid(); 1085 1086 if (!uid_eq(euid, shp->shm_perm.uid) && 1087 !uid_eq(euid, shp->shm_perm.cuid)) { 1088 err = -EPERM; 1089 goto out_unlock0; 1090 } 1091 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { 1092 err = -EPERM; 1093 goto out_unlock0; 1094 } 1095 } 1096 1097 shm_file = shp->shm_file; 1098 if (is_file_hugepages(shm_file)) 1099 goto out_unlock0; 1100 1101 if (cmd == SHM_LOCK) { 1102 struct ucounts *ucounts = current_ucounts(); 1103 1104 err = shmem_lock(shm_file, 1, ucounts); 1105 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { 1106 shp->shm_perm.mode |= SHM_LOCKED; 1107 shp->mlock_ucounts = ucounts; 1108 } 1109 goto out_unlock0; 1110 } 1111 1112 /* SHM_UNLOCK */ 1113 if (!(shp->shm_perm.mode & SHM_LOCKED)) 1114 goto out_unlock0; 1115 shmem_lock(shm_file, 0, shp->mlock_ucounts); 1116 shp->shm_perm.mode &= ~SHM_LOCKED; 1117 shp->mlock_ucounts = NULL; 1118 get_file(shm_file); 1119 ipc_unlock_object(&shp->shm_perm); 1120 rcu_read_unlock(); 1121 shmem_unlock_mapping(shm_file->f_mapping); 1122 1123 fput(shm_file); 1124 return err; 1125 1126 out_unlock0: 1127 ipc_unlock_object(&shp->shm_perm); 1128 out_unlock1: 1129 rcu_read_unlock(); 1130 return err; 1131 } 1132 1133 static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version) 1134 { 1135 int err; 1136 struct ipc_namespace *ns; 1137 struct shmid64_ds sem64; 1138 1139 if (cmd < 0 || shmid < 0) 1140 return -EINVAL; 1141 1142 ns = current->nsproxy->ipc_ns; 1143 1144 switch (cmd) { 1145 case IPC_INFO: { 1146 struct shminfo64 shminfo; 1147 err = shmctl_ipc_info(ns, &shminfo); 1148 if (err < 0) 1149 return err; 1150 if (copy_shminfo_to_user(buf, &shminfo, version)) 1151 err = -EFAULT; 1152 return err; 1153 } 1154 case SHM_INFO: { 1155 struct shm_info shm_info; 1156 err = shmctl_shm_info(ns, &shm_info); 1157 if (err < 0) 1158 return err; 1159 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) 1160 err = -EFAULT; 1161 return err; 1162 } 1163 case SHM_STAT: 1164 case SHM_STAT_ANY: 1165 case IPC_STAT: { 1166 err = shmctl_stat(ns, shmid, cmd, &sem64); 1167 if (err < 0) 1168 return err; 1169 if (copy_shmid_to_user(buf, &sem64, version)) 1170 err = -EFAULT; 1171 return err; 1172 } 1173 case IPC_SET: 1174 if (copy_shmid_from_user(&sem64, buf, version)) 1175 return -EFAULT; 1176 fallthrough; 1177 case IPC_RMID: 1178 return shmctl_down(ns, shmid, cmd, &sem64); 1179 case SHM_LOCK: 1180 case SHM_UNLOCK: 1181 return shmctl_do_lock(ns, shmid, cmd); 1182 default: 1183 return -EINVAL; 1184 } 1185 } 1186 1187 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) 1188 { 1189 return ksys_shmctl(shmid, cmd, buf, IPC_64); 1190 } 1191 1192 #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION 1193 long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) 1194 { 1195 int version = ipc_parse_version(&cmd); 1196 1197 return ksys_shmctl(shmid, cmd, buf, version); 1198 } 1199 1200 SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) 1201 { 1202 return ksys_old_shmctl(shmid, cmd, buf); 1203 } 1204 #endif 1205 1206 #ifdef CONFIG_COMPAT 1207 1208 struct compat_shmid_ds { 1209 struct compat_ipc_perm shm_perm; 1210 int shm_segsz; 1211 old_time32_t shm_atime; 1212 old_time32_t shm_dtime; 1213 old_time32_t shm_ctime; 1214 compat_ipc_pid_t shm_cpid; 1215 compat_ipc_pid_t shm_lpid; 1216 unsigned short shm_nattch; 1217 unsigned short shm_unused; 1218 compat_uptr_t shm_unused2; 1219 compat_uptr_t shm_unused3; 1220 }; 1221 1222 struct compat_shminfo64 { 1223 compat_ulong_t shmmax; 1224 compat_ulong_t shmmin; 1225 compat_ulong_t shmmni; 1226 compat_ulong_t shmseg; 1227 compat_ulong_t shmall; 1228 compat_ulong_t __unused1; 1229 compat_ulong_t __unused2; 1230 compat_ulong_t __unused3; 1231 compat_ulong_t __unused4; 1232 }; 1233 1234 struct compat_shm_info { 1235 compat_int_t used_ids; 1236 compat_ulong_t shm_tot, shm_rss, shm_swp; 1237 compat_ulong_t swap_attempts, swap_successes; 1238 }; 1239 1240 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in, 1241 int version) 1242 { 1243 if (in->shmmax > INT_MAX) 1244 in->shmmax = INT_MAX; 1245 if (version == IPC_64) { 1246 struct compat_shminfo64 info; 1247 memset(&info, 0, sizeof(info)); 1248 info.shmmax = in->shmmax; 1249 info.shmmin = in->shmmin; 1250 info.shmmni = in->shmmni; 1251 info.shmseg = in->shmseg; 1252 info.shmall = in->shmall; 1253 return copy_to_user(buf, &info, sizeof(info)); 1254 } else { 1255 struct shminfo info; 1256 memset(&info, 0, sizeof(info)); 1257 info.shmmax = in->shmmax; 1258 info.shmmin = in->shmmin; 1259 info.shmmni = in->shmmni; 1260 info.shmseg = in->shmseg; 1261 info.shmall = in->shmall; 1262 return copy_to_user(buf, &info, sizeof(info)); 1263 } 1264 } 1265 1266 static int put_compat_shm_info(struct shm_info *ip, 1267 struct compat_shm_info __user *uip) 1268 { 1269 struct compat_shm_info info; 1270 1271 memset(&info, 0, sizeof(info)); 1272 info.used_ids = ip->used_ids; 1273 info.shm_tot = ip->shm_tot; 1274 info.shm_rss = ip->shm_rss; 1275 info.shm_swp = ip->shm_swp; 1276 info.swap_attempts = ip->swap_attempts; 1277 info.swap_successes = ip->swap_successes; 1278 return copy_to_user(uip, &info, sizeof(info)); 1279 } 1280 1281 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in, 1282 int version) 1283 { 1284 if (version == IPC_64) { 1285 struct compat_shmid64_ds v; 1286 memset(&v, 0, sizeof(v)); 1287 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm); 1288 v.shm_atime = lower_32_bits(in->shm_atime); 1289 v.shm_atime_high = upper_32_bits(in->shm_atime); 1290 v.shm_dtime = lower_32_bits(in->shm_dtime); 1291 v.shm_dtime_high = upper_32_bits(in->shm_dtime); 1292 v.shm_ctime = lower_32_bits(in->shm_ctime); 1293 v.shm_ctime_high = upper_32_bits(in->shm_ctime); 1294 v.shm_segsz = in->shm_segsz; 1295 v.shm_nattch = in->shm_nattch; 1296 v.shm_cpid = in->shm_cpid; 1297 v.shm_lpid = in->shm_lpid; 1298 return copy_to_user(buf, &v, sizeof(v)); 1299 } else { 1300 struct compat_shmid_ds v; 1301 memset(&v, 0, sizeof(v)); 1302 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm); 1303 v.shm_perm.key = in->shm_perm.key; 1304 v.shm_atime = in->shm_atime; 1305 v.shm_dtime = in->shm_dtime; 1306 v.shm_ctime = in->shm_ctime; 1307 v.shm_segsz = in->shm_segsz; 1308 v.shm_nattch = in->shm_nattch; 1309 v.shm_cpid = in->shm_cpid; 1310 v.shm_lpid = in->shm_lpid; 1311 return copy_to_user(buf, &v, sizeof(v)); 1312 } 1313 } 1314 1315 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf, 1316 int version) 1317 { 1318 memset(out, 0, sizeof(*out)); 1319 if (version == IPC_64) { 1320 struct compat_shmid64_ds __user *p = buf; 1321 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm); 1322 } else { 1323 struct compat_shmid_ds __user *p = buf; 1324 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm); 1325 } 1326 } 1327 1328 static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version) 1329 { 1330 struct ipc_namespace *ns; 1331 struct shmid64_ds sem64; 1332 int err; 1333 1334 ns = current->nsproxy->ipc_ns; 1335 1336 if (cmd < 0 || shmid < 0) 1337 return -EINVAL; 1338 1339 switch (cmd) { 1340 case IPC_INFO: { 1341 struct shminfo64 shminfo; 1342 err = shmctl_ipc_info(ns, &shminfo); 1343 if (err < 0) 1344 return err; 1345 if (copy_compat_shminfo_to_user(uptr, &shminfo, version)) 1346 err = -EFAULT; 1347 return err; 1348 } 1349 case SHM_INFO: { 1350 struct shm_info shm_info; 1351 err = shmctl_shm_info(ns, &shm_info); 1352 if (err < 0) 1353 return err; 1354 if (put_compat_shm_info(&shm_info, uptr)) 1355 err = -EFAULT; 1356 return err; 1357 } 1358 case IPC_STAT: 1359 case SHM_STAT_ANY: 1360 case SHM_STAT: 1361 err = shmctl_stat(ns, shmid, cmd, &sem64); 1362 if (err < 0) 1363 return err; 1364 if (copy_compat_shmid_to_user(uptr, &sem64, version)) 1365 err = -EFAULT; 1366 return err; 1367 1368 case IPC_SET: 1369 if (copy_compat_shmid_from_user(&sem64, uptr, version)) 1370 return -EFAULT; 1371 fallthrough; 1372 case IPC_RMID: 1373 return shmctl_down(ns, shmid, cmd, &sem64); 1374 case SHM_LOCK: 1375 case SHM_UNLOCK: 1376 return shmctl_do_lock(ns, shmid, cmd); 1377 default: 1378 return -EINVAL; 1379 } 1380 return err; 1381 } 1382 1383 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) 1384 { 1385 return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64); 1386 } 1387 1388 #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION 1389 long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr) 1390 { 1391 int version = compat_ipc_parse_version(&cmd); 1392 1393 return compat_ksys_shmctl(shmid, cmd, uptr, version); 1394 } 1395 1396 COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr) 1397 { 1398 return compat_ksys_old_shmctl(shmid, cmd, uptr); 1399 } 1400 #endif 1401 #endif 1402 1403 /* 1404 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. 1405 * 1406 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The 1407 * "raddr" thing points to kernel space, and there has to be a wrapper around 1408 * this. 1409 */ 1410 long do_shmat(int shmid, char __user *shmaddr, int shmflg, 1411 ulong *raddr, unsigned long shmlba) 1412 { 1413 struct shmid_kernel *shp; 1414 unsigned long addr = (unsigned long)shmaddr; 1415 unsigned long size; 1416 struct file *file, *base; 1417 int err; 1418 unsigned long flags = MAP_SHARED; 1419 unsigned long prot; 1420 int acc_mode; 1421 struct ipc_namespace *ns; 1422 struct shm_file_data *sfd; 1423 int f_flags; 1424 unsigned long populate = 0; 1425 1426 err = -EINVAL; 1427 if (shmid < 0) 1428 goto out; 1429 1430 if (addr) { 1431 if (addr & (shmlba - 1)) { 1432 if (shmflg & SHM_RND) { 1433 addr &= ~(shmlba - 1); /* round down */ 1434 1435 /* 1436 * Ensure that the round-down is non-nil 1437 * when remapping. This can happen for 1438 * cases when addr < shmlba. 1439 */ 1440 if (!addr && (shmflg & SHM_REMAP)) 1441 goto out; 1442 } else 1443 #ifndef __ARCH_FORCE_SHMLBA 1444 if (addr & ~PAGE_MASK) 1445 #endif 1446 goto out; 1447 } 1448 1449 flags |= MAP_FIXED; 1450 } else if ((shmflg & SHM_REMAP)) 1451 goto out; 1452 1453 if (shmflg & SHM_RDONLY) { 1454 prot = PROT_READ; 1455 acc_mode = S_IRUGO; 1456 f_flags = O_RDONLY; 1457 } else { 1458 prot = PROT_READ | PROT_WRITE; 1459 acc_mode = S_IRUGO | S_IWUGO; 1460 f_flags = O_RDWR; 1461 } 1462 if (shmflg & SHM_EXEC) { 1463 prot |= PROT_EXEC; 1464 acc_mode |= S_IXUGO; 1465 } 1466 1467 /* 1468 * We cannot rely on the fs check since SYSV IPC does have an 1469 * additional creator id... 1470 */ 1471 ns = current->nsproxy->ipc_ns; 1472 rcu_read_lock(); 1473 shp = shm_obtain_object_check(ns, shmid); 1474 if (IS_ERR(shp)) { 1475 err = PTR_ERR(shp); 1476 goto out_unlock; 1477 } 1478 1479 err = -EACCES; 1480 if (ipcperms(ns, &shp->shm_perm, acc_mode)) 1481 goto out_unlock; 1482 1483 err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg); 1484 if (err) 1485 goto out_unlock; 1486 1487 ipc_lock_object(&shp->shm_perm); 1488 1489 /* check if shm_destroy() is tearing down shp */ 1490 if (!ipc_valid_object(&shp->shm_perm)) { 1491 ipc_unlock_object(&shp->shm_perm); 1492 err = -EIDRM; 1493 goto out_unlock; 1494 } 1495 1496 /* 1497 * We need to take a reference to the real shm file to prevent the 1498 * pointer from becoming stale in cases where the lifetime of the outer 1499 * file extends beyond that of the shm segment. It's not usually 1500 * possible, but it can happen during remap_file_pages() emulation as 1501 * that unmaps the memory, then does ->mmap() via file reference only. 1502 * We'll deny the ->mmap() if the shm segment was since removed, but to 1503 * detect shm ID reuse we need to compare the file pointers. 1504 */ 1505 base = get_file(shp->shm_file); 1506 shp->shm_nattch++; 1507 size = i_size_read(file_inode(base)); 1508 ipc_unlock_object(&shp->shm_perm); 1509 rcu_read_unlock(); 1510 1511 err = -ENOMEM; 1512 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); 1513 if (!sfd) { 1514 fput(base); 1515 goto out_nattch; 1516 } 1517 1518 file = alloc_file_clone(base, f_flags, 1519 is_file_hugepages(base) ? 1520 &shm_file_operations_huge : 1521 &shm_file_operations); 1522 err = PTR_ERR(file); 1523 if (IS_ERR(file)) { 1524 kfree(sfd); 1525 fput(base); 1526 goto out_nattch; 1527 } 1528 1529 sfd->id = shp->shm_perm.id; 1530 sfd->ns = get_ipc_ns(ns); 1531 sfd->file = base; 1532 sfd->vm_ops = NULL; 1533 file->private_data = sfd; 1534 1535 err = security_mmap_file(file, prot, flags); 1536 if (err) 1537 goto out_fput; 1538 1539 if (mmap_write_lock_killable(current->mm)) { 1540 err = -EINTR; 1541 goto out_fput; 1542 } 1543 1544 if (addr && !(shmflg & SHM_REMAP)) { 1545 err = -EINVAL; 1546 if (addr + size < addr) 1547 goto invalid; 1548 1549 if (find_vma_intersection(current->mm, addr, addr + size)) 1550 goto invalid; 1551 } 1552 1553 addr = do_mmap(file, addr, size, prot, flags, 0, &populate, NULL); 1554 *raddr = addr; 1555 err = 0; 1556 if (IS_ERR_VALUE(addr)) 1557 err = (long)addr; 1558 invalid: 1559 mmap_write_unlock(current->mm); 1560 if (populate) 1561 mm_populate(addr, populate); 1562 1563 out_fput: 1564 fput(file); 1565 1566 out_nattch: 1567 down_write(&shm_ids(ns).rwsem); 1568 shp = shm_lock(ns, shmid); 1569 shp->shm_nattch--; 1570 if (shm_may_destroy(ns, shp)) 1571 shm_destroy(ns, shp); 1572 else 1573 shm_unlock(shp); 1574 up_write(&shm_ids(ns).rwsem); 1575 return err; 1576 1577 out_unlock: 1578 rcu_read_unlock(); 1579 out: 1580 return err; 1581 } 1582 1583 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) 1584 { 1585 unsigned long ret; 1586 long err; 1587 1588 err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); 1589 if (err) 1590 return err; 1591 force_successful_syscall_return(); 1592 return (long)ret; 1593 } 1594 1595 #ifdef CONFIG_COMPAT 1596 1597 #ifndef COMPAT_SHMLBA 1598 #define COMPAT_SHMLBA SHMLBA 1599 #endif 1600 1601 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) 1602 { 1603 unsigned long ret; 1604 long err; 1605 1606 err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA); 1607 if (err) 1608 return err; 1609 force_successful_syscall_return(); 1610 return (long)ret; 1611 } 1612 #endif 1613 1614 /* 1615 * detach and kill segment if marked destroyed. 1616 * The work is done in shm_close. 1617 */ 1618 long ksys_shmdt(char __user *shmaddr) 1619 { 1620 struct mm_struct *mm = current->mm; 1621 struct vm_area_struct *vma; 1622 unsigned long addr = (unsigned long)shmaddr; 1623 int retval = -EINVAL; 1624 #ifdef CONFIG_MMU 1625 loff_t size = 0; 1626 struct file *file; 1627 struct vm_area_struct *next; 1628 #endif 1629 1630 if (addr & ~PAGE_MASK) 1631 return retval; 1632 1633 if (mmap_write_lock_killable(mm)) 1634 return -EINTR; 1635 1636 /* 1637 * This function tries to be smart and unmap shm segments that 1638 * were modified by partial mlock or munmap calls: 1639 * - It first determines the size of the shm segment that should be 1640 * unmapped: It searches for a vma that is backed by shm and that 1641 * started at address shmaddr. It records it's size and then unmaps 1642 * it. 1643 * - Then it unmaps all shm vmas that started at shmaddr and that 1644 * are within the initially determined size and that are from the 1645 * same shm segment from which we determined the size. 1646 * Errors from do_munmap are ignored: the function only fails if 1647 * it's called with invalid parameters or if it's called to unmap 1648 * a part of a vma. Both calls in this function are for full vmas, 1649 * the parameters are directly copied from the vma itself and always 1650 * valid - therefore do_munmap cannot fail. (famous last words?) 1651 */ 1652 /* 1653 * If it had been mremap()'d, the starting address would not 1654 * match the usual checks anyway. So assume all vma's are 1655 * above the starting address given. 1656 */ 1657 vma = find_vma(mm, addr); 1658 1659 #ifdef CONFIG_MMU 1660 while (vma) { 1661 next = vma->vm_next; 1662 1663 /* 1664 * Check if the starting address would match, i.e. it's 1665 * a fragment created by mprotect() and/or munmap(), or it 1666 * otherwise it starts at this address with no hassles. 1667 */ 1668 if ((vma->vm_ops == &shm_vm_ops) && 1669 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { 1670 1671 /* 1672 * Record the file of the shm segment being 1673 * unmapped. With mremap(), someone could place 1674 * page from another segment but with equal offsets 1675 * in the range we are unmapping. 1676 */ 1677 file = vma->vm_file; 1678 size = i_size_read(file_inode(vma->vm_file)); 1679 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1680 /* 1681 * We discovered the size of the shm segment, so 1682 * break out of here and fall through to the next 1683 * loop that uses the size information to stop 1684 * searching for matching vma's. 1685 */ 1686 retval = 0; 1687 vma = next; 1688 break; 1689 } 1690 vma = next; 1691 } 1692 1693 /* 1694 * We need look no further than the maximum address a fragment 1695 * could possibly have landed at. Also cast things to loff_t to 1696 * prevent overflows and make comparisons vs. equal-width types. 1697 */ 1698 size = PAGE_ALIGN(size); 1699 while (vma && (loff_t)(vma->vm_end - addr) <= size) { 1700 next = vma->vm_next; 1701 1702 /* finding a matching vma now does not alter retval */ 1703 if ((vma->vm_ops == &shm_vm_ops) && 1704 ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) && 1705 (vma->vm_file == file)) 1706 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1707 vma = next; 1708 } 1709 1710 #else /* CONFIG_MMU */ 1711 /* under NOMMU conditions, the exact address to be destroyed must be 1712 * given 1713 */ 1714 if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { 1715 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1716 retval = 0; 1717 } 1718 1719 #endif 1720 1721 mmap_write_unlock(mm); 1722 return retval; 1723 } 1724 1725 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) 1726 { 1727 return ksys_shmdt(shmaddr); 1728 } 1729 1730 #ifdef CONFIG_PROC_FS 1731 static int sysvipc_shm_proc_show(struct seq_file *s, void *it) 1732 { 1733 struct pid_namespace *pid_ns = ipc_seq_pid_ns(s); 1734 struct user_namespace *user_ns = seq_user_ns(s); 1735 struct kern_ipc_perm *ipcp = it; 1736 struct shmid_kernel *shp; 1737 unsigned long rss = 0, swp = 0; 1738 1739 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 1740 shm_add_rss_swap(shp, &rss, &swp); 1741 1742 #if BITS_PER_LONG <= 32 1743 #define SIZE_SPEC "%10lu" 1744 #else 1745 #define SIZE_SPEC "%21lu" 1746 #endif 1747 1748 seq_printf(s, 1749 "%10d %10d %4o " SIZE_SPEC " %5u %5u " 1750 "%5lu %5u %5u %5u %5u %10llu %10llu %10llu " 1751 SIZE_SPEC " " SIZE_SPEC "\n", 1752 shp->shm_perm.key, 1753 shp->shm_perm.id, 1754 shp->shm_perm.mode, 1755 shp->shm_segsz, 1756 pid_nr_ns(shp->shm_cprid, pid_ns), 1757 pid_nr_ns(shp->shm_lprid, pid_ns), 1758 shp->shm_nattch, 1759 from_kuid_munged(user_ns, shp->shm_perm.uid), 1760 from_kgid_munged(user_ns, shp->shm_perm.gid), 1761 from_kuid_munged(user_ns, shp->shm_perm.cuid), 1762 from_kgid_munged(user_ns, shp->shm_perm.cgid), 1763 shp->shm_atim, 1764 shp->shm_dtim, 1765 shp->shm_ctim, 1766 rss * PAGE_SIZE, 1767 swp * PAGE_SIZE); 1768 1769 return 0; 1770 } 1771 #endif 1772