1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/slab.h> 27 #include <linux/msg.h> 28 #include <linux/spinlock.h> 29 #include <linux/init.h> 30 #include <linux/mm.h> 31 #include <linux/proc_fs.h> 32 #include <linux/list.h> 33 #include <linux/security.h> 34 #include <linux/sched.h> 35 #include <linux/syscalls.h> 36 #include <linux/audit.h> 37 #include <linux/seq_file.h> 38 #include <linux/rwsem.h> 39 #include <linux/nsproxy.h> 40 #include <linux/ipc_namespace.h> 41 42 #include <asm/current.h> 43 #include <asm/uaccess.h> 44 #include "util.h" 45 46 /* 47 * one msg_receiver structure for each sleeping receiver: 48 */ 49 struct msg_receiver { 50 struct list_head r_list; 51 struct task_struct *r_tsk; 52 53 int r_mode; 54 long r_msgtype; 55 long r_maxsize; 56 57 struct msg_msg *volatile r_msg; 58 }; 59 60 /* one msg_sender for each sleeping sender */ 61 struct msg_sender { 62 struct list_head list; 63 struct task_struct *tsk; 64 }; 65 66 #define SEARCH_ANY 1 67 #define SEARCH_EQUAL 2 68 #define SEARCH_NOTEQUAL 3 69 #define SEARCH_LESSEQUAL 4 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 72 73 #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) 74 75 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 76 static int newque(struct ipc_namespace *, struct ipc_params *); 77 #ifdef CONFIG_PROC_FS 78 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 79 #endif 80 81 /* 82 * Scale msgmni with the available lowmem size: the memory dedicated to msg 83 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. 84 * Also take into account the number of nsproxies created so far. 85 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. 86 */ 87 void recompute_msgmni(struct ipc_namespace *ns) 88 { 89 struct sysinfo i; 90 unsigned long allowed; 91 int nb_ns; 92 93 si_meminfo(&i); 94 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) 95 / MSGMNB; 96 nb_ns = atomic_read(&nr_ipc_ns); 97 allowed /= nb_ns; 98 99 if (allowed < MSGMNI) { 100 ns->msg_ctlmni = MSGMNI; 101 return; 102 } 103 104 if (allowed > IPCMNI / nb_ns) { 105 ns->msg_ctlmni = IPCMNI / nb_ns; 106 return; 107 } 108 109 ns->msg_ctlmni = allowed; 110 } 111 112 void msg_init_ns(struct ipc_namespace *ns) 113 { 114 ns->msg_ctlmax = MSGMAX; 115 ns->msg_ctlmnb = MSGMNB; 116 117 recompute_msgmni(ns); 118 119 atomic_set(&ns->msg_bytes, 0); 120 atomic_set(&ns->msg_hdrs, 0); 121 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 122 } 123 124 #ifdef CONFIG_IPC_NS 125 void msg_exit_ns(struct ipc_namespace *ns) 126 { 127 free_ipcs(ns, &msg_ids(ns), freeque); 128 } 129 #endif 130 131 void __init msg_init(void) 132 { 133 msg_init_ns(&init_ipc_ns); 134 135 printk(KERN_INFO "msgmni has been set to %d\n", 136 init_ipc_ns.msg_ctlmni); 137 138 ipc_init_proc_interface("sysvipc/msg", 139 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 140 IPC_MSG_IDS, sysvipc_msg_proc_show); 141 } 142 143 /* 144 * msg_lock_(check_) routines are called in the paths where the rw_mutex 145 * is not held. 146 */ 147 static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) 148 { 149 struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); 150 151 if (IS_ERR(ipcp)) 152 return (struct msg_queue *)ipcp; 153 154 return container_of(ipcp, struct msg_queue, q_perm); 155 } 156 157 static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, 158 int id) 159 { 160 struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); 161 162 if (IS_ERR(ipcp)) 163 return (struct msg_queue *)ipcp; 164 165 return container_of(ipcp, struct msg_queue, q_perm); 166 } 167 168 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 169 { 170 ipc_rmid(&msg_ids(ns), &s->q_perm); 171 } 172 173 /** 174 * newque - Create a new msg queue 175 * @ns: namespace 176 * @params: ptr to the structure that contains the key and msgflg 177 * 178 * Called with msg_ids.rw_mutex held (writer) 179 */ 180 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 181 { 182 struct msg_queue *msq; 183 int id, retval; 184 key_t key = params->key; 185 int msgflg = params->flg; 186 187 msq = ipc_rcu_alloc(sizeof(*msq)); 188 if (!msq) 189 return -ENOMEM; 190 191 msq->q_perm.mode = msgflg & S_IRWXUGO; 192 msq->q_perm.key = key; 193 194 msq->q_perm.security = NULL; 195 retval = security_msg_queue_alloc(msq); 196 if (retval) { 197 ipc_rcu_putref(msq); 198 return retval; 199 } 200 201 /* 202 * ipc_addid() locks msq 203 */ 204 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 205 if (id < 0) { 206 security_msg_queue_free(msq); 207 ipc_rcu_putref(msq); 208 return id; 209 } 210 211 msq->q_stime = msq->q_rtime = 0; 212 msq->q_ctime = get_seconds(); 213 msq->q_cbytes = msq->q_qnum = 0; 214 msq->q_qbytes = ns->msg_ctlmnb; 215 msq->q_lspid = msq->q_lrpid = 0; 216 INIT_LIST_HEAD(&msq->q_messages); 217 INIT_LIST_HEAD(&msq->q_receivers); 218 INIT_LIST_HEAD(&msq->q_senders); 219 220 msg_unlock(msq); 221 222 return msq->q_perm.id; 223 } 224 225 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 226 { 227 mss->tsk = current; 228 current->state = TASK_INTERRUPTIBLE; 229 list_add_tail(&mss->list, &msq->q_senders); 230 } 231 232 static inline void ss_del(struct msg_sender *mss) 233 { 234 if (mss->list.next != NULL) 235 list_del(&mss->list); 236 } 237 238 static void ss_wakeup(struct list_head *h, int kill) 239 { 240 struct list_head *tmp; 241 242 tmp = h->next; 243 while (tmp != h) { 244 struct msg_sender *mss; 245 246 mss = list_entry(tmp, struct msg_sender, list); 247 tmp = tmp->next; 248 if (kill) 249 mss->list.next = NULL; 250 wake_up_process(mss->tsk); 251 } 252 } 253 254 static void expunge_all(struct msg_queue *msq, int res) 255 { 256 struct list_head *tmp; 257 258 tmp = msq->q_receivers.next; 259 while (tmp != &msq->q_receivers) { 260 struct msg_receiver *msr; 261 262 msr = list_entry(tmp, struct msg_receiver, r_list); 263 tmp = tmp->next; 264 msr->r_msg = NULL; 265 wake_up_process(msr->r_tsk); 266 smp_mb(); 267 msr->r_msg = ERR_PTR(res); 268 } 269 } 270 271 /* 272 * freeque() wakes up waiters on the sender and receiver waiting queue, 273 * removes the message queue from message queue ID IDR, and cleans up all the 274 * messages associated with this queue. 275 * 276 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held 277 * before freeque() is called. msg_ids.rw_mutex remains locked on exit. 278 */ 279 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 280 { 281 struct list_head *tmp; 282 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 283 284 expunge_all(msq, -EIDRM); 285 ss_wakeup(&msq->q_senders, 1); 286 msg_rmid(ns, msq); 287 msg_unlock(msq); 288 289 tmp = msq->q_messages.next; 290 while (tmp != &msq->q_messages) { 291 struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); 292 293 tmp = tmp->next; 294 atomic_dec(&ns->msg_hdrs); 295 free_msg(msg); 296 } 297 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 298 security_msg_queue_free(msq); 299 ipc_rcu_putref(msq); 300 } 301 302 /* 303 * Called with msg_ids.rw_mutex and ipcp locked. 304 */ 305 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 306 { 307 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 308 309 return security_msg_queue_associate(msq, msgflg); 310 } 311 312 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 313 { 314 struct ipc_namespace *ns; 315 struct ipc_ops msg_ops; 316 struct ipc_params msg_params; 317 318 ns = current->nsproxy->ipc_ns; 319 320 msg_ops.getnew = newque; 321 msg_ops.associate = msg_security; 322 msg_ops.more_checks = NULL; 323 324 msg_params.key = key; 325 msg_params.flg = msgflg; 326 327 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 328 } 329 330 static inline unsigned long 331 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 332 { 333 switch(version) { 334 case IPC_64: 335 return copy_to_user(buf, in, sizeof(*in)); 336 case IPC_OLD: 337 { 338 struct msqid_ds out; 339 340 memset(&out, 0, sizeof(out)); 341 342 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 343 344 out.msg_stime = in->msg_stime; 345 out.msg_rtime = in->msg_rtime; 346 out.msg_ctime = in->msg_ctime; 347 348 if (in->msg_cbytes > USHORT_MAX) 349 out.msg_cbytes = USHORT_MAX; 350 else 351 out.msg_cbytes = in->msg_cbytes; 352 out.msg_lcbytes = in->msg_cbytes; 353 354 if (in->msg_qnum > USHORT_MAX) 355 out.msg_qnum = USHORT_MAX; 356 else 357 out.msg_qnum = in->msg_qnum; 358 359 if (in->msg_qbytes > USHORT_MAX) 360 out.msg_qbytes = USHORT_MAX; 361 else 362 out.msg_qbytes = in->msg_qbytes; 363 out.msg_lqbytes = in->msg_qbytes; 364 365 out.msg_lspid = in->msg_lspid; 366 out.msg_lrpid = in->msg_lrpid; 367 368 return copy_to_user(buf, &out, sizeof(out)); 369 } 370 default: 371 return -EINVAL; 372 } 373 } 374 375 static inline unsigned long 376 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 377 { 378 switch(version) { 379 case IPC_64: 380 if (copy_from_user(out, buf, sizeof(*out))) 381 return -EFAULT; 382 return 0; 383 case IPC_OLD: 384 { 385 struct msqid_ds tbuf_old; 386 387 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 388 return -EFAULT; 389 390 out->msg_perm.uid = tbuf_old.msg_perm.uid; 391 out->msg_perm.gid = tbuf_old.msg_perm.gid; 392 out->msg_perm.mode = tbuf_old.msg_perm.mode; 393 394 if (tbuf_old.msg_qbytes == 0) 395 out->msg_qbytes = tbuf_old.msg_lqbytes; 396 else 397 out->msg_qbytes = tbuf_old.msg_qbytes; 398 399 return 0; 400 } 401 default: 402 return -EINVAL; 403 } 404 } 405 406 /* 407 * This function handles some msgctl commands which require the rw_mutex 408 * to be held in write mode. 409 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 410 */ 411 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 412 struct msqid_ds __user *buf, int version) 413 { 414 struct kern_ipc_perm *ipcp; 415 struct msqid64_ds msqid64; 416 struct msg_queue *msq; 417 int err; 418 419 if (cmd == IPC_SET) { 420 if (copy_msqid_from_user(&msqid64, buf, version)) 421 return -EFAULT; 422 } 423 424 ipcp = ipcctl_pre_down(&msg_ids(ns), msqid, cmd, 425 &msqid64.msg_perm, msqid64.msg_qbytes); 426 if (IS_ERR(ipcp)) 427 return PTR_ERR(ipcp); 428 429 msq = container_of(ipcp, struct msg_queue, q_perm); 430 431 err = security_msg_queue_msgctl(msq, cmd); 432 if (err) 433 goto out_unlock; 434 435 switch (cmd) { 436 case IPC_RMID: 437 freeque(ns, ipcp); 438 goto out_up; 439 case IPC_SET: 440 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 441 !capable(CAP_SYS_RESOURCE)) { 442 err = -EPERM; 443 goto out_unlock; 444 } 445 446 msq->q_qbytes = msqid64.msg_qbytes; 447 448 ipc_update_perm(&msqid64.msg_perm, ipcp); 449 msq->q_ctime = get_seconds(); 450 /* sleeping receivers might be excluded by 451 * stricter permissions. 452 */ 453 expunge_all(msq, -EAGAIN); 454 /* sleeping senders might be able to send 455 * due to a larger queue size. 456 */ 457 ss_wakeup(&msq->q_senders, 0); 458 break; 459 default: 460 err = -EINVAL; 461 } 462 out_unlock: 463 msg_unlock(msq); 464 out_up: 465 up_write(&msg_ids(ns).rw_mutex); 466 return err; 467 } 468 469 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 470 { 471 struct msg_queue *msq; 472 int err, version; 473 struct ipc_namespace *ns; 474 475 if (msqid < 0 || cmd < 0) 476 return -EINVAL; 477 478 version = ipc_parse_version(&cmd); 479 ns = current->nsproxy->ipc_ns; 480 481 switch (cmd) { 482 case IPC_INFO: 483 case MSG_INFO: 484 { 485 struct msginfo msginfo; 486 int max_id; 487 488 if (!buf) 489 return -EFAULT; 490 /* 491 * We must not return kernel stack data. 492 * due to padding, it's not enough 493 * to set all member fields. 494 */ 495 err = security_msg_queue_msgctl(NULL, cmd); 496 if (err) 497 return err; 498 499 memset(&msginfo, 0, sizeof(msginfo)); 500 msginfo.msgmni = ns->msg_ctlmni; 501 msginfo.msgmax = ns->msg_ctlmax; 502 msginfo.msgmnb = ns->msg_ctlmnb; 503 msginfo.msgssz = MSGSSZ; 504 msginfo.msgseg = MSGSEG; 505 down_read(&msg_ids(ns).rw_mutex); 506 if (cmd == MSG_INFO) { 507 msginfo.msgpool = msg_ids(ns).in_use; 508 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 509 msginfo.msgtql = atomic_read(&ns->msg_bytes); 510 } else { 511 msginfo.msgmap = MSGMAP; 512 msginfo.msgpool = MSGPOOL; 513 msginfo.msgtql = MSGTQL; 514 } 515 max_id = ipc_get_maxid(&msg_ids(ns)); 516 up_read(&msg_ids(ns).rw_mutex); 517 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 518 return -EFAULT; 519 return (max_id < 0) ? 0 : max_id; 520 } 521 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 522 case IPC_STAT: 523 { 524 struct msqid64_ds tbuf; 525 int success_return; 526 527 if (!buf) 528 return -EFAULT; 529 530 if (cmd == MSG_STAT) { 531 msq = msg_lock(ns, msqid); 532 if (IS_ERR(msq)) 533 return PTR_ERR(msq); 534 success_return = msq->q_perm.id; 535 } else { 536 msq = msg_lock_check(ns, msqid); 537 if (IS_ERR(msq)) 538 return PTR_ERR(msq); 539 success_return = 0; 540 } 541 err = -EACCES; 542 if (ipcperms(&msq->q_perm, S_IRUGO)) 543 goto out_unlock; 544 545 err = security_msg_queue_msgctl(msq, cmd); 546 if (err) 547 goto out_unlock; 548 549 memset(&tbuf, 0, sizeof(tbuf)); 550 551 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 552 tbuf.msg_stime = msq->q_stime; 553 tbuf.msg_rtime = msq->q_rtime; 554 tbuf.msg_ctime = msq->q_ctime; 555 tbuf.msg_cbytes = msq->q_cbytes; 556 tbuf.msg_qnum = msq->q_qnum; 557 tbuf.msg_qbytes = msq->q_qbytes; 558 tbuf.msg_lspid = msq->q_lspid; 559 tbuf.msg_lrpid = msq->q_lrpid; 560 msg_unlock(msq); 561 if (copy_msqid_to_user(buf, &tbuf, version)) 562 return -EFAULT; 563 return success_return; 564 } 565 case IPC_SET: 566 case IPC_RMID: 567 err = msgctl_down(ns, msqid, cmd, buf, version); 568 return err; 569 default: 570 return -EINVAL; 571 } 572 573 out_unlock: 574 msg_unlock(msq); 575 return err; 576 } 577 578 static int testmsg(struct msg_msg *msg, long type, int mode) 579 { 580 switch(mode) 581 { 582 case SEARCH_ANY: 583 return 1; 584 case SEARCH_LESSEQUAL: 585 if (msg->m_type <=type) 586 return 1; 587 break; 588 case SEARCH_EQUAL: 589 if (msg->m_type == type) 590 return 1; 591 break; 592 case SEARCH_NOTEQUAL: 593 if (msg->m_type != type) 594 return 1; 595 break; 596 } 597 return 0; 598 } 599 600 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 601 { 602 struct list_head *tmp; 603 604 tmp = msq->q_receivers.next; 605 while (tmp != &msq->q_receivers) { 606 struct msg_receiver *msr; 607 608 msr = list_entry(tmp, struct msg_receiver, r_list); 609 tmp = tmp->next; 610 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 611 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 612 msr->r_msgtype, msr->r_mode)) { 613 614 list_del(&msr->r_list); 615 if (msr->r_maxsize < msg->m_ts) { 616 msr->r_msg = NULL; 617 wake_up_process(msr->r_tsk); 618 smp_mb(); 619 msr->r_msg = ERR_PTR(-E2BIG); 620 } else { 621 msr->r_msg = NULL; 622 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 623 msq->q_rtime = get_seconds(); 624 wake_up_process(msr->r_tsk); 625 smp_mb(); 626 msr->r_msg = msg; 627 628 return 1; 629 } 630 } 631 } 632 return 0; 633 } 634 635 long do_msgsnd(int msqid, long mtype, void __user *mtext, 636 size_t msgsz, int msgflg) 637 { 638 struct msg_queue *msq; 639 struct msg_msg *msg; 640 int err; 641 struct ipc_namespace *ns; 642 643 ns = current->nsproxy->ipc_ns; 644 645 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 646 return -EINVAL; 647 if (mtype < 1) 648 return -EINVAL; 649 650 msg = load_msg(mtext, msgsz); 651 if (IS_ERR(msg)) 652 return PTR_ERR(msg); 653 654 msg->m_type = mtype; 655 msg->m_ts = msgsz; 656 657 msq = msg_lock_check(ns, msqid); 658 if (IS_ERR(msq)) { 659 err = PTR_ERR(msq); 660 goto out_free; 661 } 662 663 for (;;) { 664 struct msg_sender s; 665 666 err = -EACCES; 667 if (ipcperms(&msq->q_perm, S_IWUGO)) 668 goto out_unlock_free; 669 670 err = security_msg_queue_msgsnd(msq, msg, msgflg); 671 if (err) 672 goto out_unlock_free; 673 674 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 675 1 + msq->q_qnum <= msq->q_qbytes) { 676 break; 677 } 678 679 /* queue full, wait: */ 680 if (msgflg & IPC_NOWAIT) { 681 err = -EAGAIN; 682 goto out_unlock_free; 683 } 684 ss_add(msq, &s); 685 ipc_rcu_getref(msq); 686 msg_unlock(msq); 687 schedule(); 688 689 ipc_lock_by_ptr(&msq->q_perm); 690 ipc_rcu_putref(msq); 691 if (msq->q_perm.deleted) { 692 err = -EIDRM; 693 goto out_unlock_free; 694 } 695 ss_del(&s); 696 697 if (signal_pending(current)) { 698 err = -ERESTARTNOHAND; 699 goto out_unlock_free; 700 } 701 } 702 703 msq->q_lspid = task_tgid_vnr(current); 704 msq->q_stime = get_seconds(); 705 706 if (!pipelined_send(msq, msg)) { 707 /* noone is waiting for this message, enqueue it */ 708 list_add_tail(&msg->m_list, &msq->q_messages); 709 msq->q_cbytes += msgsz; 710 msq->q_qnum++; 711 atomic_add(msgsz, &ns->msg_bytes); 712 atomic_inc(&ns->msg_hdrs); 713 } 714 715 err = 0; 716 msg = NULL; 717 718 out_unlock_free: 719 msg_unlock(msq); 720 out_free: 721 if (msg != NULL) 722 free_msg(msg); 723 return err; 724 } 725 726 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 727 int, msgflg) 728 { 729 long mtype; 730 731 if (get_user(mtype, &msgp->mtype)) 732 return -EFAULT; 733 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 734 } 735 736 static inline int convert_mode(long *msgtyp, int msgflg) 737 { 738 /* 739 * find message of correct type. 740 * msgtyp = 0 => get first. 741 * msgtyp > 0 => get first message of matching type. 742 * msgtyp < 0 => get message with least type must be < abs(msgtype). 743 */ 744 if (*msgtyp == 0) 745 return SEARCH_ANY; 746 if (*msgtyp < 0) { 747 *msgtyp = -*msgtyp; 748 return SEARCH_LESSEQUAL; 749 } 750 if (msgflg & MSG_EXCEPT) 751 return SEARCH_NOTEQUAL; 752 return SEARCH_EQUAL; 753 } 754 755 long do_msgrcv(int msqid, long *pmtype, void __user *mtext, 756 size_t msgsz, long msgtyp, int msgflg) 757 { 758 struct msg_queue *msq; 759 struct msg_msg *msg; 760 int mode; 761 struct ipc_namespace *ns; 762 763 if (msqid < 0 || (long) msgsz < 0) 764 return -EINVAL; 765 mode = convert_mode(&msgtyp, msgflg); 766 ns = current->nsproxy->ipc_ns; 767 768 msq = msg_lock_check(ns, msqid); 769 if (IS_ERR(msq)) 770 return PTR_ERR(msq); 771 772 for (;;) { 773 struct msg_receiver msr_d; 774 struct list_head *tmp; 775 776 msg = ERR_PTR(-EACCES); 777 if (ipcperms(&msq->q_perm, S_IRUGO)) 778 goto out_unlock; 779 780 msg = ERR_PTR(-EAGAIN); 781 tmp = msq->q_messages.next; 782 while (tmp != &msq->q_messages) { 783 struct msg_msg *walk_msg; 784 785 walk_msg = list_entry(tmp, struct msg_msg, m_list); 786 if (testmsg(walk_msg, msgtyp, mode) && 787 !security_msg_queue_msgrcv(msq, walk_msg, current, 788 msgtyp, mode)) { 789 790 msg = walk_msg; 791 if (mode == SEARCH_LESSEQUAL && 792 walk_msg->m_type != 1) { 793 msg = walk_msg; 794 msgtyp = walk_msg->m_type - 1; 795 } else { 796 msg = walk_msg; 797 break; 798 } 799 } 800 tmp = tmp->next; 801 } 802 if (!IS_ERR(msg)) { 803 /* 804 * Found a suitable message. 805 * Unlink it from the queue. 806 */ 807 if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 808 msg = ERR_PTR(-E2BIG); 809 goto out_unlock; 810 } 811 list_del(&msg->m_list); 812 msq->q_qnum--; 813 msq->q_rtime = get_seconds(); 814 msq->q_lrpid = task_tgid_vnr(current); 815 msq->q_cbytes -= msg->m_ts; 816 atomic_sub(msg->m_ts, &ns->msg_bytes); 817 atomic_dec(&ns->msg_hdrs); 818 ss_wakeup(&msq->q_senders, 0); 819 msg_unlock(msq); 820 break; 821 } 822 /* No message waiting. Wait for a message */ 823 if (msgflg & IPC_NOWAIT) { 824 msg = ERR_PTR(-ENOMSG); 825 goto out_unlock; 826 } 827 list_add_tail(&msr_d.r_list, &msq->q_receivers); 828 msr_d.r_tsk = current; 829 msr_d.r_msgtype = msgtyp; 830 msr_d.r_mode = mode; 831 if (msgflg & MSG_NOERROR) 832 msr_d.r_maxsize = INT_MAX; 833 else 834 msr_d.r_maxsize = msgsz; 835 msr_d.r_msg = ERR_PTR(-EAGAIN); 836 current->state = TASK_INTERRUPTIBLE; 837 msg_unlock(msq); 838 839 schedule(); 840 841 /* Lockless receive, part 1: 842 * Disable preemption. We don't hold a reference to the queue 843 * and getting a reference would defeat the idea of a lockless 844 * operation, thus the code relies on rcu to guarantee the 845 * existance of msq: 846 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 847 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 848 * rcu_read_lock() prevents preemption between reading r_msg 849 * and the spin_lock() inside ipc_lock_by_ptr(). 850 */ 851 rcu_read_lock(); 852 853 /* Lockless receive, part 2: 854 * Wait until pipelined_send or expunge_all are outside of 855 * wake_up_process(). There is a race with exit(), see 856 * ipc/mqueue.c for the details. 857 */ 858 msg = (struct msg_msg*)msr_d.r_msg; 859 while (msg == NULL) { 860 cpu_relax(); 861 msg = (struct msg_msg *)msr_d.r_msg; 862 } 863 864 /* Lockless receive, part 3: 865 * If there is a message or an error then accept it without 866 * locking. 867 */ 868 if (msg != ERR_PTR(-EAGAIN)) { 869 rcu_read_unlock(); 870 break; 871 } 872 873 /* Lockless receive, part 3: 874 * Acquire the queue spinlock. 875 */ 876 ipc_lock_by_ptr(&msq->q_perm); 877 rcu_read_unlock(); 878 879 /* Lockless receive, part 4: 880 * Repeat test after acquiring the spinlock. 881 */ 882 msg = (struct msg_msg*)msr_d.r_msg; 883 if (msg != ERR_PTR(-EAGAIN)) 884 goto out_unlock; 885 886 list_del(&msr_d.r_list); 887 if (signal_pending(current)) { 888 msg = ERR_PTR(-ERESTARTNOHAND); 889 out_unlock: 890 msg_unlock(msq); 891 break; 892 } 893 } 894 if (IS_ERR(msg)) 895 return PTR_ERR(msg); 896 897 msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; 898 *pmtype = msg->m_type; 899 if (store_msg(mtext, msg, msgsz)) 900 msgsz = -EFAULT; 901 902 free_msg(msg); 903 904 return msgsz; 905 } 906 907 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 908 long, msgtyp, int, msgflg) 909 { 910 long err, mtype; 911 912 err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); 913 if (err < 0) 914 goto out; 915 916 if (put_user(mtype, &msgp->mtype)) 917 err = -EFAULT; 918 out: 919 return err; 920 } 921 922 #ifdef CONFIG_PROC_FS 923 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 924 { 925 struct msg_queue *msq = it; 926 927 return seq_printf(s, 928 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 929 msq->q_perm.key, 930 msq->q_perm.id, 931 msq->q_perm.mode, 932 msq->q_cbytes, 933 msq->q_qnum, 934 msq->q_lspid, 935 msq->q_lrpid, 936 msq->q_perm.uid, 937 msq->q_perm.gid, 938 msq->q_perm.cuid, 939 msq->q_perm.cgid, 940 msq->q_stime, 941 msq->q_rtime, 942 msq->q_ctime); 943 } 944 #endif 945