1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/sched/wake_q.h> 35 #include <linux/syscalls.h> 36 #include <linux/audit.h> 37 #include <linux/seq_file.h> 38 #include <linux/rwsem.h> 39 #include <linux/nsproxy.h> 40 #include <linux/ipc_namespace.h> 41 42 #include <asm/current.h> 43 #include <linux/uaccess.h> 44 #include "util.h" 45 46 /* one msg_receiver structure for each sleeping receiver */ 47 struct msg_receiver { 48 struct list_head r_list; 49 struct task_struct *r_tsk; 50 51 int r_mode; 52 long r_msgtype; 53 long r_maxsize; 54 55 struct msg_msg *r_msg; 56 }; 57 58 /* one msg_sender for each sleeping sender */ 59 struct msg_sender { 60 struct list_head list; 61 struct task_struct *tsk; 62 size_t msgsz; 63 }; 64 65 #define SEARCH_ANY 1 66 #define SEARCH_EQUAL 2 67 #define SEARCH_NOTEQUAL 3 68 #define SEARCH_LESSEQUAL 4 69 #define SEARCH_NUMBER 5 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 72 73 static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) 74 { 75 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id); 76 77 if (IS_ERR(ipcp)) 78 return ERR_CAST(ipcp); 79 80 return container_of(ipcp, struct msg_queue, q_perm); 81 } 82 83 static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, 84 int id) 85 { 86 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); 87 88 if (IS_ERR(ipcp)) 89 return ERR_CAST(ipcp); 90 91 return container_of(ipcp, struct msg_queue, q_perm); 92 } 93 94 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 95 { 96 ipc_rmid(&msg_ids(ns), &s->q_perm); 97 } 98 99 static void msg_rcu_free(struct rcu_head *head) 100 { 101 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); 102 struct msg_queue *msq = ipc_rcu_to_struct(p); 103 104 security_msg_queue_free(msq); 105 ipc_rcu_free(head); 106 } 107 108 /** 109 * newque - Create a new msg queue 110 * @ns: namespace 111 * @params: ptr to the structure that contains the key and msgflg 112 * 113 * Called with msg_ids.rwsem held (writer) 114 */ 115 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 116 { 117 struct msg_queue *msq; 118 int id, retval; 119 key_t key = params->key; 120 int msgflg = params->flg; 121 122 msq = ipc_rcu_alloc(sizeof(*msq)); 123 if (!msq) 124 return -ENOMEM; 125 126 msq->q_perm.mode = msgflg & S_IRWXUGO; 127 msq->q_perm.key = key; 128 129 msq->q_perm.security = NULL; 130 retval = security_msg_queue_alloc(msq); 131 if (retval) { 132 ipc_rcu_putref(msq, ipc_rcu_free); 133 return retval; 134 } 135 136 msq->q_stime = msq->q_rtime = 0; 137 msq->q_ctime = get_seconds(); 138 msq->q_cbytes = msq->q_qnum = 0; 139 msq->q_qbytes = ns->msg_ctlmnb; 140 msq->q_lspid = msq->q_lrpid = 0; 141 INIT_LIST_HEAD(&msq->q_messages); 142 INIT_LIST_HEAD(&msq->q_receivers); 143 INIT_LIST_HEAD(&msq->q_senders); 144 145 /* ipc_addid() locks msq upon success. */ 146 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 147 if (id < 0) { 148 ipc_rcu_putref(msq, msg_rcu_free); 149 return id; 150 } 151 152 ipc_unlock_object(&msq->q_perm); 153 rcu_read_unlock(); 154 155 return msq->q_perm.id; 156 } 157 158 static inline bool msg_fits_inqueue(struct msg_queue *msq, size_t msgsz) 159 { 160 return msgsz + msq->q_cbytes <= msq->q_qbytes && 161 1 + msq->q_qnum <= msq->q_qbytes; 162 } 163 164 static inline void ss_add(struct msg_queue *msq, 165 struct msg_sender *mss, size_t msgsz) 166 { 167 mss->tsk = current; 168 mss->msgsz = msgsz; 169 __set_current_state(TASK_INTERRUPTIBLE); 170 list_add_tail(&mss->list, &msq->q_senders); 171 } 172 173 static inline void ss_del(struct msg_sender *mss) 174 { 175 if (mss->list.next) 176 list_del(&mss->list); 177 } 178 179 static void ss_wakeup(struct msg_queue *msq, 180 struct wake_q_head *wake_q, bool kill) 181 { 182 struct msg_sender *mss, *t; 183 struct task_struct *stop_tsk = NULL; 184 struct list_head *h = &msq->q_senders; 185 186 list_for_each_entry_safe(mss, t, h, list) { 187 if (kill) 188 mss->list.next = NULL; 189 190 /* 191 * Stop at the first task we don't wakeup, 192 * we've already iterated the original 193 * sender queue. 194 */ 195 else if (stop_tsk == mss->tsk) 196 break; 197 /* 198 * We are not in an EIDRM scenario here, therefore 199 * verify that we really need to wakeup the task. 200 * To maintain current semantics and wakeup order, 201 * move the sender to the tail on behalf of the 202 * blocked task. 203 */ 204 else if (!msg_fits_inqueue(msq, mss->msgsz)) { 205 if (!stop_tsk) 206 stop_tsk = mss->tsk; 207 208 list_move_tail(&mss->list, &msq->q_senders); 209 continue; 210 } 211 212 wake_q_add(wake_q, mss->tsk); 213 } 214 } 215 216 static void expunge_all(struct msg_queue *msq, int res, 217 struct wake_q_head *wake_q) 218 { 219 struct msg_receiver *msr, *t; 220 221 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 222 wake_q_add(wake_q, msr->r_tsk); 223 WRITE_ONCE(msr->r_msg, ERR_PTR(res)); 224 } 225 } 226 227 /* 228 * freeque() wakes up waiters on the sender and receiver waiting queue, 229 * removes the message queue from message queue ID IDR, and cleans up all the 230 * messages associated with this queue. 231 * 232 * msg_ids.rwsem (writer) and the spinlock for this message queue are held 233 * before freeque() is called. msg_ids.rwsem remains locked on exit. 234 */ 235 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 236 { 237 struct msg_msg *msg, *t; 238 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 239 DEFINE_WAKE_Q(wake_q); 240 241 expunge_all(msq, -EIDRM, &wake_q); 242 ss_wakeup(msq, &wake_q, true); 243 msg_rmid(ns, msq); 244 ipc_unlock_object(&msq->q_perm); 245 wake_up_q(&wake_q); 246 rcu_read_unlock(); 247 248 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 249 atomic_dec(&ns->msg_hdrs); 250 free_msg(msg); 251 } 252 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 253 ipc_rcu_putref(msq, msg_rcu_free); 254 } 255 256 /* 257 * Called with msg_ids.rwsem and ipcp locked. 258 */ 259 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 260 { 261 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 262 263 return security_msg_queue_associate(msq, msgflg); 264 } 265 266 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 267 { 268 struct ipc_namespace *ns; 269 static const struct ipc_ops msg_ops = { 270 .getnew = newque, 271 .associate = msg_security, 272 }; 273 struct ipc_params msg_params; 274 275 ns = current->nsproxy->ipc_ns; 276 277 msg_params.key = key; 278 msg_params.flg = msgflg; 279 280 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 281 } 282 283 static inline unsigned long 284 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 285 { 286 switch (version) { 287 case IPC_64: 288 return copy_to_user(buf, in, sizeof(*in)); 289 case IPC_OLD: 290 { 291 struct msqid_ds out; 292 293 memset(&out, 0, sizeof(out)); 294 295 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 296 297 out.msg_stime = in->msg_stime; 298 out.msg_rtime = in->msg_rtime; 299 out.msg_ctime = in->msg_ctime; 300 301 if (in->msg_cbytes > USHRT_MAX) 302 out.msg_cbytes = USHRT_MAX; 303 else 304 out.msg_cbytes = in->msg_cbytes; 305 out.msg_lcbytes = in->msg_cbytes; 306 307 if (in->msg_qnum > USHRT_MAX) 308 out.msg_qnum = USHRT_MAX; 309 else 310 out.msg_qnum = in->msg_qnum; 311 312 if (in->msg_qbytes > USHRT_MAX) 313 out.msg_qbytes = USHRT_MAX; 314 else 315 out.msg_qbytes = in->msg_qbytes; 316 out.msg_lqbytes = in->msg_qbytes; 317 318 out.msg_lspid = in->msg_lspid; 319 out.msg_lrpid = in->msg_lrpid; 320 321 return copy_to_user(buf, &out, sizeof(out)); 322 } 323 default: 324 return -EINVAL; 325 } 326 } 327 328 static inline unsigned long 329 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 330 { 331 switch (version) { 332 case IPC_64: 333 if (copy_from_user(out, buf, sizeof(*out))) 334 return -EFAULT; 335 return 0; 336 case IPC_OLD: 337 { 338 struct msqid_ds tbuf_old; 339 340 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 341 return -EFAULT; 342 343 out->msg_perm.uid = tbuf_old.msg_perm.uid; 344 out->msg_perm.gid = tbuf_old.msg_perm.gid; 345 out->msg_perm.mode = tbuf_old.msg_perm.mode; 346 347 if (tbuf_old.msg_qbytes == 0) 348 out->msg_qbytes = tbuf_old.msg_lqbytes; 349 else 350 out->msg_qbytes = tbuf_old.msg_qbytes; 351 352 return 0; 353 } 354 default: 355 return -EINVAL; 356 } 357 } 358 359 /* 360 * This function handles some msgctl commands which require the rwsem 361 * to be held in write mode. 362 * NOTE: no locks must be held, the rwsem is taken inside this function. 363 */ 364 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 365 struct msqid_ds __user *buf, int version) 366 { 367 struct kern_ipc_perm *ipcp; 368 struct msqid64_ds uninitialized_var(msqid64); 369 struct msg_queue *msq; 370 int err; 371 372 if (cmd == IPC_SET) { 373 if (copy_msqid_from_user(&msqid64, buf, version)) 374 return -EFAULT; 375 } 376 377 down_write(&msg_ids(ns).rwsem); 378 rcu_read_lock(); 379 380 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 381 &msqid64.msg_perm, msqid64.msg_qbytes); 382 if (IS_ERR(ipcp)) { 383 err = PTR_ERR(ipcp); 384 goto out_unlock1; 385 } 386 387 msq = container_of(ipcp, struct msg_queue, q_perm); 388 389 err = security_msg_queue_msgctl(msq, cmd); 390 if (err) 391 goto out_unlock1; 392 393 switch (cmd) { 394 case IPC_RMID: 395 ipc_lock_object(&msq->q_perm); 396 /* freeque unlocks the ipc object and rcu */ 397 freeque(ns, ipcp); 398 goto out_up; 399 case IPC_SET: 400 { 401 DEFINE_WAKE_Q(wake_q); 402 403 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 404 !capable(CAP_SYS_RESOURCE)) { 405 err = -EPERM; 406 goto out_unlock1; 407 } 408 409 ipc_lock_object(&msq->q_perm); 410 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 411 if (err) 412 goto out_unlock0; 413 414 msq->q_qbytes = msqid64.msg_qbytes; 415 416 msq->q_ctime = get_seconds(); 417 /* 418 * Sleeping receivers might be excluded by 419 * stricter permissions. 420 */ 421 expunge_all(msq, -EAGAIN, &wake_q); 422 /* 423 * Sleeping senders might be able to send 424 * due to a larger queue size. 425 */ 426 ss_wakeup(msq, &wake_q, false); 427 ipc_unlock_object(&msq->q_perm); 428 wake_up_q(&wake_q); 429 430 goto out_unlock1; 431 } 432 default: 433 err = -EINVAL; 434 goto out_unlock1; 435 } 436 437 out_unlock0: 438 ipc_unlock_object(&msq->q_perm); 439 out_unlock1: 440 rcu_read_unlock(); 441 out_up: 442 up_write(&msg_ids(ns).rwsem); 443 return err; 444 } 445 446 static int msgctl_nolock(struct ipc_namespace *ns, int msqid, 447 int cmd, int version, void __user *buf) 448 { 449 int err; 450 struct msg_queue *msq; 451 452 switch (cmd) { 453 case IPC_INFO: 454 case MSG_INFO: 455 { 456 struct msginfo msginfo; 457 int max_id; 458 459 if (!buf) 460 return -EFAULT; 461 462 /* 463 * We must not return kernel stack data. 464 * due to padding, it's not enough 465 * to set all member fields. 466 */ 467 err = security_msg_queue_msgctl(NULL, cmd); 468 if (err) 469 return err; 470 471 memset(&msginfo, 0, sizeof(msginfo)); 472 msginfo.msgmni = ns->msg_ctlmni; 473 msginfo.msgmax = ns->msg_ctlmax; 474 msginfo.msgmnb = ns->msg_ctlmnb; 475 msginfo.msgssz = MSGSSZ; 476 msginfo.msgseg = MSGSEG; 477 down_read(&msg_ids(ns).rwsem); 478 if (cmd == MSG_INFO) { 479 msginfo.msgpool = msg_ids(ns).in_use; 480 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 481 msginfo.msgtql = atomic_read(&ns->msg_bytes); 482 } else { 483 msginfo.msgmap = MSGMAP; 484 msginfo.msgpool = MSGPOOL; 485 msginfo.msgtql = MSGTQL; 486 } 487 max_id = ipc_get_maxid(&msg_ids(ns)); 488 up_read(&msg_ids(ns).rwsem); 489 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 490 return -EFAULT; 491 return (max_id < 0) ? 0 : max_id; 492 } 493 494 case MSG_STAT: 495 case IPC_STAT: 496 { 497 struct msqid64_ds tbuf; 498 int success_return; 499 500 if (!buf) 501 return -EFAULT; 502 503 memset(&tbuf, 0, sizeof(tbuf)); 504 505 rcu_read_lock(); 506 if (cmd == MSG_STAT) { 507 msq = msq_obtain_object(ns, msqid); 508 if (IS_ERR(msq)) { 509 err = PTR_ERR(msq); 510 goto out_unlock; 511 } 512 success_return = msq->q_perm.id; 513 } else { 514 msq = msq_obtain_object_check(ns, msqid); 515 if (IS_ERR(msq)) { 516 err = PTR_ERR(msq); 517 goto out_unlock; 518 } 519 success_return = 0; 520 } 521 522 err = -EACCES; 523 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 524 goto out_unlock; 525 526 err = security_msg_queue_msgctl(msq, cmd); 527 if (err) 528 goto out_unlock; 529 530 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 531 tbuf.msg_stime = msq->q_stime; 532 tbuf.msg_rtime = msq->q_rtime; 533 tbuf.msg_ctime = msq->q_ctime; 534 tbuf.msg_cbytes = msq->q_cbytes; 535 tbuf.msg_qnum = msq->q_qnum; 536 tbuf.msg_qbytes = msq->q_qbytes; 537 tbuf.msg_lspid = msq->q_lspid; 538 tbuf.msg_lrpid = msq->q_lrpid; 539 rcu_read_unlock(); 540 541 if (copy_msqid_to_user(buf, &tbuf, version)) 542 return -EFAULT; 543 return success_return; 544 } 545 546 default: 547 return -EINVAL; 548 } 549 550 return err; 551 out_unlock: 552 rcu_read_unlock(); 553 return err; 554 } 555 556 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 557 { 558 int version; 559 struct ipc_namespace *ns; 560 561 if (msqid < 0 || cmd < 0) 562 return -EINVAL; 563 564 version = ipc_parse_version(&cmd); 565 ns = current->nsproxy->ipc_ns; 566 567 switch (cmd) { 568 case IPC_INFO: 569 case MSG_INFO: 570 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 571 case IPC_STAT: 572 return msgctl_nolock(ns, msqid, cmd, version, buf); 573 case IPC_SET: 574 case IPC_RMID: 575 return msgctl_down(ns, msqid, cmd, buf, version); 576 default: 577 return -EINVAL; 578 } 579 } 580 581 static int testmsg(struct msg_msg *msg, long type, int mode) 582 { 583 switch (mode) { 584 case SEARCH_ANY: 585 case SEARCH_NUMBER: 586 return 1; 587 case SEARCH_LESSEQUAL: 588 if (msg->m_type <= type) 589 return 1; 590 break; 591 case SEARCH_EQUAL: 592 if (msg->m_type == type) 593 return 1; 594 break; 595 case SEARCH_NOTEQUAL: 596 if (msg->m_type != type) 597 return 1; 598 break; 599 } 600 return 0; 601 } 602 603 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg, 604 struct wake_q_head *wake_q) 605 { 606 struct msg_receiver *msr, *t; 607 608 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 609 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 610 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 611 msr->r_msgtype, msr->r_mode)) { 612 613 list_del(&msr->r_list); 614 if (msr->r_maxsize < msg->m_ts) { 615 wake_q_add(wake_q, msr->r_tsk); 616 WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG)); 617 } else { 618 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 619 msq->q_rtime = get_seconds(); 620 621 wake_q_add(wake_q, msr->r_tsk); 622 WRITE_ONCE(msr->r_msg, msg); 623 return 1; 624 } 625 } 626 } 627 628 return 0; 629 } 630 631 long do_msgsnd(int msqid, long mtype, void __user *mtext, 632 size_t msgsz, int msgflg) 633 { 634 struct msg_queue *msq; 635 struct msg_msg *msg; 636 int err; 637 struct ipc_namespace *ns; 638 DEFINE_WAKE_Q(wake_q); 639 640 ns = current->nsproxy->ipc_ns; 641 642 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 643 return -EINVAL; 644 if (mtype < 1) 645 return -EINVAL; 646 647 msg = load_msg(mtext, msgsz); 648 if (IS_ERR(msg)) 649 return PTR_ERR(msg); 650 651 msg->m_type = mtype; 652 msg->m_ts = msgsz; 653 654 rcu_read_lock(); 655 msq = msq_obtain_object_check(ns, msqid); 656 if (IS_ERR(msq)) { 657 err = PTR_ERR(msq); 658 goto out_unlock1; 659 } 660 661 ipc_lock_object(&msq->q_perm); 662 663 for (;;) { 664 struct msg_sender s; 665 666 err = -EACCES; 667 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 668 goto out_unlock0; 669 670 /* raced with RMID? */ 671 if (!ipc_valid_object(&msq->q_perm)) { 672 err = -EIDRM; 673 goto out_unlock0; 674 } 675 676 err = security_msg_queue_msgsnd(msq, msg, msgflg); 677 if (err) 678 goto out_unlock0; 679 680 if (msg_fits_inqueue(msq, msgsz)) 681 break; 682 683 /* queue full, wait: */ 684 if (msgflg & IPC_NOWAIT) { 685 err = -EAGAIN; 686 goto out_unlock0; 687 } 688 689 /* enqueue the sender and prepare to block */ 690 ss_add(msq, &s, msgsz); 691 692 if (!ipc_rcu_getref(msq)) { 693 err = -EIDRM; 694 goto out_unlock0; 695 } 696 697 ipc_unlock_object(&msq->q_perm); 698 rcu_read_unlock(); 699 schedule(); 700 701 rcu_read_lock(); 702 ipc_lock_object(&msq->q_perm); 703 704 ipc_rcu_putref(msq, msg_rcu_free); 705 /* raced with RMID? */ 706 if (!ipc_valid_object(&msq->q_perm)) { 707 err = -EIDRM; 708 goto out_unlock0; 709 } 710 ss_del(&s); 711 712 if (signal_pending(current)) { 713 err = -ERESTARTNOHAND; 714 goto out_unlock0; 715 } 716 717 } 718 719 msq->q_lspid = task_tgid_vnr(current); 720 msq->q_stime = get_seconds(); 721 722 if (!pipelined_send(msq, msg, &wake_q)) { 723 /* no one is waiting for this message, enqueue it */ 724 list_add_tail(&msg->m_list, &msq->q_messages); 725 msq->q_cbytes += msgsz; 726 msq->q_qnum++; 727 atomic_add(msgsz, &ns->msg_bytes); 728 atomic_inc(&ns->msg_hdrs); 729 } 730 731 err = 0; 732 msg = NULL; 733 734 out_unlock0: 735 ipc_unlock_object(&msq->q_perm); 736 wake_up_q(&wake_q); 737 out_unlock1: 738 rcu_read_unlock(); 739 if (msg != NULL) 740 free_msg(msg); 741 return err; 742 } 743 744 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 745 int, msgflg) 746 { 747 long mtype; 748 749 if (get_user(mtype, &msgp->mtype)) 750 return -EFAULT; 751 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 752 } 753 754 static inline int convert_mode(long *msgtyp, int msgflg) 755 { 756 if (msgflg & MSG_COPY) 757 return SEARCH_NUMBER; 758 /* 759 * find message of correct type. 760 * msgtyp = 0 => get first. 761 * msgtyp > 0 => get first message of matching type. 762 * msgtyp < 0 => get message with least type must be < abs(msgtype). 763 */ 764 if (*msgtyp == 0) 765 return SEARCH_ANY; 766 if (*msgtyp < 0) { 767 if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */ 768 *msgtyp = LONG_MAX; 769 else 770 *msgtyp = -*msgtyp; 771 return SEARCH_LESSEQUAL; 772 } 773 if (msgflg & MSG_EXCEPT) 774 return SEARCH_NOTEQUAL; 775 return SEARCH_EQUAL; 776 } 777 778 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 779 { 780 struct msgbuf __user *msgp = dest; 781 size_t msgsz; 782 783 if (put_user(msg->m_type, &msgp->mtype)) 784 return -EFAULT; 785 786 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 787 if (store_msg(msgp->mtext, msg, msgsz)) 788 return -EFAULT; 789 return msgsz; 790 } 791 792 #ifdef CONFIG_CHECKPOINT_RESTORE 793 /* 794 * This function creates new kernel message structure, large enough to store 795 * bufsz message bytes. 796 */ 797 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 798 { 799 struct msg_msg *copy; 800 801 /* 802 * Create dummy message to copy real message to. 803 */ 804 copy = load_msg(buf, bufsz); 805 if (!IS_ERR(copy)) 806 copy->m_ts = bufsz; 807 return copy; 808 } 809 810 static inline void free_copy(struct msg_msg *copy) 811 { 812 if (copy) 813 free_msg(copy); 814 } 815 #else 816 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 817 { 818 return ERR_PTR(-ENOSYS); 819 } 820 821 static inline void free_copy(struct msg_msg *copy) 822 { 823 } 824 #endif 825 826 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 827 { 828 struct msg_msg *msg, *found = NULL; 829 long count = 0; 830 831 list_for_each_entry(msg, &msq->q_messages, m_list) { 832 if (testmsg(msg, *msgtyp, mode) && 833 !security_msg_queue_msgrcv(msq, msg, current, 834 *msgtyp, mode)) { 835 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 836 *msgtyp = msg->m_type - 1; 837 found = msg; 838 } else if (mode == SEARCH_NUMBER) { 839 if (*msgtyp == count) 840 return msg; 841 } else 842 return msg; 843 count++; 844 } 845 } 846 847 return found ?: ERR_PTR(-EAGAIN); 848 } 849 850 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, 851 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 852 { 853 int mode; 854 struct msg_queue *msq; 855 struct ipc_namespace *ns; 856 struct msg_msg *msg, *copy = NULL; 857 DEFINE_WAKE_Q(wake_q); 858 859 ns = current->nsproxy->ipc_ns; 860 861 if (msqid < 0 || (long) bufsz < 0) 862 return -EINVAL; 863 864 if (msgflg & MSG_COPY) { 865 if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT)) 866 return -EINVAL; 867 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 868 if (IS_ERR(copy)) 869 return PTR_ERR(copy); 870 } 871 mode = convert_mode(&msgtyp, msgflg); 872 873 rcu_read_lock(); 874 msq = msq_obtain_object_check(ns, msqid); 875 if (IS_ERR(msq)) { 876 rcu_read_unlock(); 877 free_copy(copy); 878 return PTR_ERR(msq); 879 } 880 881 for (;;) { 882 struct msg_receiver msr_d; 883 884 msg = ERR_PTR(-EACCES); 885 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 886 goto out_unlock1; 887 888 ipc_lock_object(&msq->q_perm); 889 890 /* raced with RMID? */ 891 if (!ipc_valid_object(&msq->q_perm)) { 892 msg = ERR_PTR(-EIDRM); 893 goto out_unlock0; 894 } 895 896 msg = find_msg(msq, &msgtyp, mode); 897 if (!IS_ERR(msg)) { 898 /* 899 * Found a suitable message. 900 * Unlink it from the queue. 901 */ 902 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 903 msg = ERR_PTR(-E2BIG); 904 goto out_unlock0; 905 } 906 /* 907 * If we are copying, then do not unlink message and do 908 * not update queue parameters. 909 */ 910 if (msgflg & MSG_COPY) { 911 msg = copy_msg(msg, copy); 912 goto out_unlock0; 913 } 914 915 list_del(&msg->m_list); 916 msq->q_qnum--; 917 msq->q_rtime = get_seconds(); 918 msq->q_lrpid = task_tgid_vnr(current); 919 msq->q_cbytes -= msg->m_ts; 920 atomic_sub(msg->m_ts, &ns->msg_bytes); 921 atomic_dec(&ns->msg_hdrs); 922 ss_wakeup(msq, &wake_q, false); 923 924 goto out_unlock0; 925 } 926 927 /* No message waiting. Wait for a message */ 928 if (msgflg & IPC_NOWAIT) { 929 msg = ERR_PTR(-ENOMSG); 930 goto out_unlock0; 931 } 932 933 list_add_tail(&msr_d.r_list, &msq->q_receivers); 934 msr_d.r_tsk = current; 935 msr_d.r_msgtype = msgtyp; 936 msr_d.r_mode = mode; 937 if (msgflg & MSG_NOERROR) 938 msr_d.r_maxsize = INT_MAX; 939 else 940 msr_d.r_maxsize = bufsz; 941 msr_d.r_msg = ERR_PTR(-EAGAIN); 942 __set_current_state(TASK_INTERRUPTIBLE); 943 944 ipc_unlock_object(&msq->q_perm); 945 rcu_read_unlock(); 946 schedule(); 947 948 /* 949 * Lockless receive, part 1: 950 * We don't hold a reference to the queue and getting a 951 * reference would defeat the idea of a lockless operation, 952 * thus the code relies on rcu to guarantee the existence of 953 * msq: 954 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 955 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 956 */ 957 rcu_read_lock(); 958 959 /* 960 * Lockless receive, part 2: 961 * The work in pipelined_send() and expunge_all(): 962 * - Set pointer to message 963 * - Queue the receiver task for later wakeup 964 * - Wake up the process after the lock is dropped. 965 * 966 * Should the process wake up before this wakeup (due to a 967 * signal) it will either see the message and continue ... 968 */ 969 msg = READ_ONCE(msr_d.r_msg); 970 if (msg != ERR_PTR(-EAGAIN)) 971 goto out_unlock1; 972 973 /* 974 * ... or see -EAGAIN, acquire the lock to check the message 975 * again. 976 */ 977 ipc_lock_object(&msq->q_perm); 978 979 msg = msr_d.r_msg; 980 if (msg != ERR_PTR(-EAGAIN)) 981 goto out_unlock0; 982 983 list_del(&msr_d.r_list); 984 if (signal_pending(current)) { 985 msg = ERR_PTR(-ERESTARTNOHAND); 986 goto out_unlock0; 987 } 988 989 ipc_unlock_object(&msq->q_perm); 990 } 991 992 out_unlock0: 993 ipc_unlock_object(&msq->q_perm); 994 wake_up_q(&wake_q); 995 out_unlock1: 996 rcu_read_unlock(); 997 if (IS_ERR(msg)) { 998 free_copy(copy); 999 return PTR_ERR(msg); 1000 } 1001 1002 bufsz = msg_handler(buf, msg, bufsz); 1003 free_msg(msg); 1004 1005 return bufsz; 1006 } 1007 1008 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 1009 long, msgtyp, int, msgflg) 1010 { 1011 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 1012 } 1013 1014 1015 void msg_init_ns(struct ipc_namespace *ns) 1016 { 1017 ns->msg_ctlmax = MSGMAX; 1018 ns->msg_ctlmnb = MSGMNB; 1019 ns->msg_ctlmni = MSGMNI; 1020 1021 atomic_set(&ns->msg_bytes, 0); 1022 atomic_set(&ns->msg_hdrs, 0); 1023 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 1024 } 1025 1026 #ifdef CONFIG_IPC_NS 1027 void msg_exit_ns(struct ipc_namespace *ns) 1028 { 1029 free_ipcs(ns, &msg_ids(ns), freeque); 1030 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 1031 } 1032 #endif 1033 1034 #ifdef CONFIG_PROC_FS 1035 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 1036 { 1037 struct user_namespace *user_ns = seq_user_ns(s); 1038 struct msg_queue *msq = it; 1039 1040 seq_printf(s, 1041 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1042 msq->q_perm.key, 1043 msq->q_perm.id, 1044 msq->q_perm.mode, 1045 msq->q_cbytes, 1046 msq->q_qnum, 1047 msq->q_lspid, 1048 msq->q_lrpid, 1049 from_kuid_munged(user_ns, msq->q_perm.uid), 1050 from_kgid_munged(user_ns, msq->q_perm.gid), 1051 from_kuid_munged(user_ns, msq->q_perm.cuid), 1052 from_kgid_munged(user_ns, msq->q_perm.cgid), 1053 msq->q_stime, 1054 msq->q_rtime, 1055 msq->q_ctime); 1056 1057 return 0; 1058 } 1059 #endif 1060 1061 void __init msg_init(void) 1062 { 1063 msg_init_ns(&init_ipc_ns); 1064 1065 ipc_init_proc_interface("sysvipc/msg", 1066 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 1067 IPC_MSG_IDS, sysvipc_msg_proc_show); 1068 } 1069