1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <asm/uaccess.h> 43 #include "util.h" 44 45 /* 46 * one msg_receiver structure for each sleeping receiver: 47 */ 48 struct msg_receiver { 49 struct list_head r_list; 50 struct task_struct *r_tsk; 51 52 int r_mode; 53 long r_msgtype; 54 long r_maxsize; 55 56 struct msg_msg *volatile r_msg; 57 }; 58 59 /* one msg_sender for each sleeping sender */ 60 struct msg_sender { 61 struct list_head list; 62 struct task_struct *tsk; 63 }; 64 65 #define SEARCH_ANY 1 66 #define SEARCH_EQUAL 2 67 #define SEARCH_NOTEQUAL 3 68 #define SEARCH_LESSEQUAL 4 69 70 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 71 72 #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) 73 74 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 75 static int newque(struct ipc_namespace *, struct ipc_params *); 76 #ifdef CONFIG_PROC_FS 77 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 78 #endif 79 80 /* 81 * Scale msgmni with the available lowmem size: the memory dedicated to msg 82 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. 83 * Also take into account the number of nsproxies created so far. 84 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. 85 */ 86 void recompute_msgmni(struct ipc_namespace *ns) 87 { 88 struct sysinfo i; 89 unsigned long allowed; 90 int nb_ns; 91 92 si_meminfo(&i); 93 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) 94 / MSGMNB; 95 nb_ns = atomic_read(&nr_ipc_ns); 96 allowed /= nb_ns; 97 98 if (allowed < MSGMNI) { 99 ns->msg_ctlmni = MSGMNI; 100 return; 101 } 102 103 if (allowed > IPCMNI / nb_ns) { 104 ns->msg_ctlmni = IPCMNI / nb_ns; 105 return; 106 } 107 108 ns->msg_ctlmni = allowed; 109 } 110 111 void msg_init_ns(struct ipc_namespace *ns) 112 { 113 ns->msg_ctlmax = MSGMAX; 114 ns->msg_ctlmnb = MSGMNB; 115 116 recompute_msgmni(ns); 117 118 atomic_set(&ns->msg_bytes, 0); 119 atomic_set(&ns->msg_hdrs, 0); 120 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 121 } 122 123 #ifdef CONFIG_IPC_NS 124 void msg_exit_ns(struct ipc_namespace *ns) 125 { 126 free_ipcs(ns, &msg_ids(ns), freeque); 127 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 128 } 129 #endif 130 131 void __init msg_init(void) 132 { 133 msg_init_ns(&init_ipc_ns); 134 135 printk(KERN_INFO "msgmni has been set to %d\n", 136 init_ipc_ns.msg_ctlmni); 137 138 ipc_init_proc_interface("sysvipc/msg", 139 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 140 IPC_MSG_IDS, sysvipc_msg_proc_show); 141 } 142 143 /* 144 * msg_lock_(check_) routines are called in the paths where the rw_mutex 145 * is not held. 146 */ 147 static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) 148 { 149 struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); 150 151 if (IS_ERR(ipcp)) 152 return (struct msg_queue *)ipcp; 153 154 return container_of(ipcp, struct msg_queue, q_perm); 155 } 156 157 static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, 158 int id) 159 { 160 struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); 161 162 if (IS_ERR(ipcp)) 163 return (struct msg_queue *)ipcp; 164 165 return container_of(ipcp, struct msg_queue, q_perm); 166 } 167 168 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 169 { 170 ipc_rmid(&msg_ids(ns), &s->q_perm); 171 } 172 173 /** 174 * newque - Create a new msg queue 175 * @ns: namespace 176 * @params: ptr to the structure that contains the key and msgflg 177 * 178 * Called with msg_ids.rw_mutex held (writer) 179 */ 180 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 181 { 182 struct msg_queue *msq; 183 int id, retval; 184 key_t key = params->key; 185 int msgflg = params->flg; 186 187 msq = ipc_rcu_alloc(sizeof(*msq)); 188 if (!msq) 189 return -ENOMEM; 190 191 msq->q_perm.mode = msgflg & S_IRWXUGO; 192 msq->q_perm.key = key; 193 194 msq->q_perm.security = NULL; 195 retval = security_msg_queue_alloc(msq); 196 if (retval) { 197 ipc_rcu_putref(msq); 198 return retval; 199 } 200 201 /* 202 * ipc_addid() locks msq 203 */ 204 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 205 if (id < 0) { 206 security_msg_queue_free(msq); 207 ipc_rcu_putref(msq); 208 return id; 209 } 210 211 msq->q_stime = msq->q_rtime = 0; 212 msq->q_ctime = get_seconds(); 213 msq->q_cbytes = msq->q_qnum = 0; 214 msq->q_qbytes = ns->msg_ctlmnb; 215 msq->q_lspid = msq->q_lrpid = 0; 216 INIT_LIST_HEAD(&msq->q_messages); 217 INIT_LIST_HEAD(&msq->q_receivers); 218 INIT_LIST_HEAD(&msq->q_senders); 219 220 msg_unlock(msq); 221 222 return msq->q_perm.id; 223 } 224 225 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 226 { 227 mss->tsk = current; 228 current->state = TASK_INTERRUPTIBLE; 229 list_add_tail(&mss->list, &msq->q_senders); 230 } 231 232 static inline void ss_del(struct msg_sender *mss) 233 { 234 if (mss->list.next != NULL) 235 list_del(&mss->list); 236 } 237 238 static void ss_wakeup(struct list_head *h, int kill) 239 { 240 struct list_head *tmp; 241 242 tmp = h->next; 243 while (tmp != h) { 244 struct msg_sender *mss; 245 246 mss = list_entry(tmp, struct msg_sender, list); 247 tmp = tmp->next; 248 if (kill) 249 mss->list.next = NULL; 250 wake_up_process(mss->tsk); 251 } 252 } 253 254 static void expunge_all(struct msg_queue *msq, int res) 255 { 256 struct list_head *tmp; 257 258 tmp = msq->q_receivers.next; 259 while (tmp != &msq->q_receivers) { 260 struct msg_receiver *msr; 261 262 msr = list_entry(tmp, struct msg_receiver, r_list); 263 tmp = tmp->next; 264 msr->r_msg = NULL; 265 wake_up_process(msr->r_tsk); 266 smp_mb(); 267 msr->r_msg = ERR_PTR(res); 268 } 269 } 270 271 /* 272 * freeque() wakes up waiters on the sender and receiver waiting queue, 273 * removes the message queue from message queue ID IDR, and cleans up all the 274 * messages associated with this queue. 275 * 276 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held 277 * before freeque() is called. msg_ids.rw_mutex remains locked on exit. 278 */ 279 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 280 { 281 struct list_head *tmp; 282 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 283 284 expunge_all(msq, -EIDRM); 285 ss_wakeup(&msq->q_senders, 1); 286 msg_rmid(ns, msq); 287 msg_unlock(msq); 288 289 tmp = msq->q_messages.next; 290 while (tmp != &msq->q_messages) { 291 struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); 292 293 tmp = tmp->next; 294 atomic_dec(&ns->msg_hdrs); 295 free_msg(msg); 296 } 297 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 298 security_msg_queue_free(msq); 299 ipc_rcu_putref(msq); 300 } 301 302 /* 303 * Called with msg_ids.rw_mutex and ipcp locked. 304 */ 305 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 306 { 307 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 308 309 return security_msg_queue_associate(msq, msgflg); 310 } 311 312 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 313 { 314 struct ipc_namespace *ns; 315 struct ipc_ops msg_ops; 316 struct ipc_params msg_params; 317 318 ns = current->nsproxy->ipc_ns; 319 320 msg_ops.getnew = newque; 321 msg_ops.associate = msg_security; 322 msg_ops.more_checks = NULL; 323 324 msg_params.key = key; 325 msg_params.flg = msgflg; 326 327 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 328 } 329 330 static inline unsigned long 331 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 332 { 333 switch(version) { 334 case IPC_64: 335 return copy_to_user(buf, in, sizeof(*in)); 336 case IPC_OLD: 337 { 338 struct msqid_ds out; 339 340 memset(&out, 0, sizeof(out)); 341 342 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 343 344 out.msg_stime = in->msg_stime; 345 out.msg_rtime = in->msg_rtime; 346 out.msg_ctime = in->msg_ctime; 347 348 if (in->msg_cbytes > USHRT_MAX) 349 out.msg_cbytes = USHRT_MAX; 350 else 351 out.msg_cbytes = in->msg_cbytes; 352 out.msg_lcbytes = in->msg_cbytes; 353 354 if (in->msg_qnum > USHRT_MAX) 355 out.msg_qnum = USHRT_MAX; 356 else 357 out.msg_qnum = in->msg_qnum; 358 359 if (in->msg_qbytes > USHRT_MAX) 360 out.msg_qbytes = USHRT_MAX; 361 else 362 out.msg_qbytes = in->msg_qbytes; 363 out.msg_lqbytes = in->msg_qbytes; 364 365 out.msg_lspid = in->msg_lspid; 366 out.msg_lrpid = in->msg_lrpid; 367 368 return copy_to_user(buf, &out, sizeof(out)); 369 } 370 default: 371 return -EINVAL; 372 } 373 } 374 375 static inline unsigned long 376 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 377 { 378 switch(version) { 379 case IPC_64: 380 if (copy_from_user(out, buf, sizeof(*out))) 381 return -EFAULT; 382 return 0; 383 case IPC_OLD: 384 { 385 struct msqid_ds tbuf_old; 386 387 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 388 return -EFAULT; 389 390 out->msg_perm.uid = tbuf_old.msg_perm.uid; 391 out->msg_perm.gid = tbuf_old.msg_perm.gid; 392 out->msg_perm.mode = tbuf_old.msg_perm.mode; 393 394 if (tbuf_old.msg_qbytes == 0) 395 out->msg_qbytes = tbuf_old.msg_lqbytes; 396 else 397 out->msg_qbytes = tbuf_old.msg_qbytes; 398 399 return 0; 400 } 401 default: 402 return -EINVAL; 403 } 404 } 405 406 /* 407 * This function handles some msgctl commands which require the rw_mutex 408 * to be held in write mode. 409 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 410 */ 411 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 412 struct msqid_ds __user *buf, int version) 413 { 414 struct kern_ipc_perm *ipcp; 415 struct msqid64_ds uninitialized_var(msqid64); 416 struct msg_queue *msq; 417 int err; 418 419 if (cmd == IPC_SET) { 420 if (copy_msqid_from_user(&msqid64, buf, version)) 421 return -EFAULT; 422 } 423 424 ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd, 425 &msqid64.msg_perm, msqid64.msg_qbytes); 426 if (IS_ERR(ipcp)) 427 return PTR_ERR(ipcp); 428 429 msq = container_of(ipcp, struct msg_queue, q_perm); 430 431 err = security_msg_queue_msgctl(msq, cmd); 432 if (err) 433 goto out_unlock; 434 435 switch (cmd) { 436 case IPC_RMID: 437 freeque(ns, ipcp); 438 goto out_up; 439 case IPC_SET: 440 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 441 !capable(CAP_SYS_RESOURCE)) { 442 err = -EPERM; 443 goto out_unlock; 444 } 445 446 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 447 if (err) 448 goto out_unlock; 449 450 msq->q_qbytes = msqid64.msg_qbytes; 451 452 msq->q_ctime = get_seconds(); 453 /* sleeping receivers might be excluded by 454 * stricter permissions. 455 */ 456 expunge_all(msq, -EAGAIN); 457 /* sleeping senders might be able to send 458 * due to a larger queue size. 459 */ 460 ss_wakeup(&msq->q_senders, 0); 461 break; 462 default: 463 err = -EINVAL; 464 } 465 out_unlock: 466 msg_unlock(msq); 467 out_up: 468 up_write(&msg_ids(ns).rw_mutex); 469 return err; 470 } 471 472 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 473 { 474 struct msg_queue *msq; 475 int err, version; 476 struct ipc_namespace *ns; 477 478 if (msqid < 0 || cmd < 0) 479 return -EINVAL; 480 481 version = ipc_parse_version(&cmd); 482 ns = current->nsproxy->ipc_ns; 483 484 switch (cmd) { 485 case IPC_INFO: 486 case MSG_INFO: 487 { 488 struct msginfo msginfo; 489 int max_id; 490 491 if (!buf) 492 return -EFAULT; 493 /* 494 * We must not return kernel stack data. 495 * due to padding, it's not enough 496 * to set all member fields. 497 */ 498 err = security_msg_queue_msgctl(NULL, cmd); 499 if (err) 500 return err; 501 502 memset(&msginfo, 0, sizeof(msginfo)); 503 msginfo.msgmni = ns->msg_ctlmni; 504 msginfo.msgmax = ns->msg_ctlmax; 505 msginfo.msgmnb = ns->msg_ctlmnb; 506 msginfo.msgssz = MSGSSZ; 507 msginfo.msgseg = MSGSEG; 508 down_read(&msg_ids(ns).rw_mutex); 509 if (cmd == MSG_INFO) { 510 msginfo.msgpool = msg_ids(ns).in_use; 511 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 512 msginfo.msgtql = atomic_read(&ns->msg_bytes); 513 } else { 514 msginfo.msgmap = MSGMAP; 515 msginfo.msgpool = MSGPOOL; 516 msginfo.msgtql = MSGTQL; 517 } 518 max_id = ipc_get_maxid(&msg_ids(ns)); 519 up_read(&msg_ids(ns).rw_mutex); 520 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 521 return -EFAULT; 522 return (max_id < 0) ? 0 : max_id; 523 } 524 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 525 case IPC_STAT: 526 { 527 struct msqid64_ds tbuf; 528 int success_return; 529 530 if (!buf) 531 return -EFAULT; 532 533 if (cmd == MSG_STAT) { 534 msq = msg_lock(ns, msqid); 535 if (IS_ERR(msq)) 536 return PTR_ERR(msq); 537 success_return = msq->q_perm.id; 538 } else { 539 msq = msg_lock_check(ns, msqid); 540 if (IS_ERR(msq)) 541 return PTR_ERR(msq); 542 success_return = 0; 543 } 544 err = -EACCES; 545 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 546 goto out_unlock; 547 548 err = security_msg_queue_msgctl(msq, cmd); 549 if (err) 550 goto out_unlock; 551 552 memset(&tbuf, 0, sizeof(tbuf)); 553 554 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 555 tbuf.msg_stime = msq->q_stime; 556 tbuf.msg_rtime = msq->q_rtime; 557 tbuf.msg_ctime = msq->q_ctime; 558 tbuf.msg_cbytes = msq->q_cbytes; 559 tbuf.msg_qnum = msq->q_qnum; 560 tbuf.msg_qbytes = msq->q_qbytes; 561 tbuf.msg_lspid = msq->q_lspid; 562 tbuf.msg_lrpid = msq->q_lrpid; 563 msg_unlock(msq); 564 if (copy_msqid_to_user(buf, &tbuf, version)) 565 return -EFAULT; 566 return success_return; 567 } 568 case IPC_SET: 569 case IPC_RMID: 570 err = msgctl_down(ns, msqid, cmd, buf, version); 571 return err; 572 default: 573 return -EINVAL; 574 } 575 576 out_unlock: 577 msg_unlock(msq); 578 return err; 579 } 580 581 static int testmsg(struct msg_msg *msg, long type, int mode) 582 { 583 switch(mode) 584 { 585 case SEARCH_ANY: 586 return 1; 587 case SEARCH_LESSEQUAL: 588 if (msg->m_type <=type) 589 return 1; 590 break; 591 case SEARCH_EQUAL: 592 if (msg->m_type == type) 593 return 1; 594 break; 595 case SEARCH_NOTEQUAL: 596 if (msg->m_type != type) 597 return 1; 598 break; 599 } 600 return 0; 601 } 602 603 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 604 { 605 struct list_head *tmp; 606 607 tmp = msq->q_receivers.next; 608 while (tmp != &msq->q_receivers) { 609 struct msg_receiver *msr; 610 611 msr = list_entry(tmp, struct msg_receiver, r_list); 612 tmp = tmp->next; 613 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 614 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 615 msr->r_msgtype, msr->r_mode)) { 616 617 list_del(&msr->r_list); 618 if (msr->r_maxsize < msg->m_ts) { 619 msr->r_msg = NULL; 620 wake_up_process(msr->r_tsk); 621 smp_mb(); 622 msr->r_msg = ERR_PTR(-E2BIG); 623 } else { 624 msr->r_msg = NULL; 625 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 626 msq->q_rtime = get_seconds(); 627 wake_up_process(msr->r_tsk); 628 smp_mb(); 629 msr->r_msg = msg; 630 631 return 1; 632 } 633 } 634 } 635 return 0; 636 } 637 638 long do_msgsnd(int msqid, long mtype, void __user *mtext, 639 size_t msgsz, int msgflg) 640 { 641 struct msg_queue *msq; 642 struct msg_msg *msg; 643 int err; 644 struct ipc_namespace *ns; 645 646 ns = current->nsproxy->ipc_ns; 647 648 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 649 return -EINVAL; 650 if (mtype < 1) 651 return -EINVAL; 652 653 msg = load_msg(mtext, msgsz); 654 if (IS_ERR(msg)) 655 return PTR_ERR(msg); 656 657 msg->m_type = mtype; 658 msg->m_ts = msgsz; 659 660 msq = msg_lock_check(ns, msqid); 661 if (IS_ERR(msq)) { 662 err = PTR_ERR(msq); 663 goto out_free; 664 } 665 666 for (;;) { 667 struct msg_sender s; 668 669 err = -EACCES; 670 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 671 goto out_unlock_free; 672 673 err = security_msg_queue_msgsnd(msq, msg, msgflg); 674 if (err) 675 goto out_unlock_free; 676 677 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 678 1 + msq->q_qnum <= msq->q_qbytes) { 679 break; 680 } 681 682 /* queue full, wait: */ 683 if (msgflg & IPC_NOWAIT) { 684 err = -EAGAIN; 685 goto out_unlock_free; 686 } 687 ss_add(msq, &s); 688 ipc_rcu_getref(msq); 689 msg_unlock(msq); 690 schedule(); 691 692 ipc_lock_by_ptr(&msq->q_perm); 693 ipc_rcu_putref(msq); 694 if (msq->q_perm.deleted) { 695 err = -EIDRM; 696 goto out_unlock_free; 697 } 698 ss_del(&s); 699 700 if (signal_pending(current)) { 701 err = -ERESTARTNOHAND; 702 goto out_unlock_free; 703 } 704 } 705 706 msq->q_lspid = task_tgid_vnr(current); 707 msq->q_stime = get_seconds(); 708 709 if (!pipelined_send(msq, msg)) { 710 /* no one is waiting for this message, enqueue it */ 711 list_add_tail(&msg->m_list, &msq->q_messages); 712 msq->q_cbytes += msgsz; 713 msq->q_qnum++; 714 atomic_add(msgsz, &ns->msg_bytes); 715 atomic_inc(&ns->msg_hdrs); 716 } 717 718 err = 0; 719 msg = NULL; 720 721 out_unlock_free: 722 msg_unlock(msq); 723 out_free: 724 if (msg != NULL) 725 free_msg(msg); 726 return err; 727 } 728 729 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 730 int, msgflg) 731 { 732 long mtype; 733 734 if (get_user(mtype, &msgp->mtype)) 735 return -EFAULT; 736 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 737 } 738 739 static inline int convert_mode(long *msgtyp, int msgflg) 740 { 741 /* 742 * find message of correct type. 743 * msgtyp = 0 => get first. 744 * msgtyp > 0 => get first message of matching type. 745 * msgtyp < 0 => get message with least type must be < abs(msgtype). 746 */ 747 if (*msgtyp == 0) 748 return SEARCH_ANY; 749 if (*msgtyp < 0) { 750 *msgtyp = -*msgtyp; 751 return SEARCH_LESSEQUAL; 752 } 753 if (msgflg & MSG_EXCEPT) 754 return SEARCH_NOTEQUAL; 755 return SEARCH_EQUAL; 756 } 757 758 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 759 { 760 struct msgbuf __user *msgp = dest; 761 size_t msgsz; 762 763 if (put_user(msg->m_type, &msgp->mtype)) 764 return -EFAULT; 765 766 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 767 if (store_msg(msgp->mtext, msg, msgsz)) 768 return -EFAULT; 769 return msgsz; 770 } 771 772 #ifdef CONFIG_CHECKPOINT_RESTORE 773 /* 774 * This function creates new kernel message structure, large enough to store 775 * bufsz message bytes. 776 */ 777 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, 778 int msgflg, long *msgtyp, 779 unsigned long *copy_number) 780 { 781 struct msg_msg *copy; 782 783 *copy_number = *msgtyp; 784 *msgtyp = 0; 785 /* 786 * Create dummy message to copy real message to. 787 */ 788 copy = load_msg(buf, bufsz); 789 if (!IS_ERR(copy)) 790 copy->m_ts = bufsz; 791 return copy; 792 } 793 794 static inline void free_copy(struct msg_msg *copy) 795 { 796 if (copy) 797 free_msg(copy); 798 } 799 #else 800 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, 801 int msgflg, long *msgtyp, 802 unsigned long *copy_number) 803 { 804 return ERR_PTR(-ENOSYS); 805 } 806 807 static inline void free_copy(struct msg_msg *copy) 808 { 809 } 810 #endif 811 812 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, 813 int msgflg, 814 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 815 { 816 struct msg_queue *msq; 817 struct msg_msg *msg; 818 int mode; 819 struct ipc_namespace *ns; 820 struct msg_msg *copy = NULL; 821 unsigned long copy_number = 0; 822 823 if (msqid < 0 || (long) bufsz < 0) 824 return -EINVAL; 825 if (msgflg & MSG_COPY) { 826 copy = prepare_copy(buf, bufsz, msgflg, &msgtyp, ©_number); 827 if (IS_ERR(copy)) 828 return PTR_ERR(copy); 829 } 830 mode = convert_mode(&msgtyp, msgflg); 831 ns = current->nsproxy->ipc_ns; 832 833 msq = msg_lock_check(ns, msqid); 834 if (IS_ERR(msq)) { 835 free_copy(copy); 836 return PTR_ERR(msq); 837 } 838 839 for (;;) { 840 struct msg_receiver msr_d; 841 struct list_head *tmp; 842 long msg_counter = 0; 843 844 msg = ERR_PTR(-EACCES); 845 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 846 goto out_unlock; 847 848 msg = ERR_PTR(-EAGAIN); 849 tmp = msq->q_messages.next; 850 while (tmp != &msq->q_messages) { 851 struct msg_msg *walk_msg; 852 853 walk_msg = list_entry(tmp, struct msg_msg, m_list); 854 if (testmsg(walk_msg, msgtyp, mode) && 855 !security_msg_queue_msgrcv(msq, walk_msg, current, 856 msgtyp, mode)) { 857 858 msg = walk_msg; 859 if (mode == SEARCH_LESSEQUAL && 860 walk_msg->m_type != 1) { 861 msgtyp = walk_msg->m_type - 1; 862 } else if (msgflg & MSG_COPY) { 863 if (copy_number == msg_counter) { 864 /* 865 * Found requested message. 866 * Copy it. 867 */ 868 msg = copy_msg(msg, copy); 869 if (IS_ERR(msg)) 870 goto out_unlock; 871 break; 872 } 873 } else 874 break; 875 msg_counter++; 876 } 877 tmp = tmp->next; 878 } 879 if (!IS_ERR(msg)) { 880 /* 881 * Found a suitable message. 882 * Unlink it from the queue. 883 */ 884 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 885 msg = ERR_PTR(-E2BIG); 886 goto out_unlock; 887 } 888 /* 889 * If we are copying, then do not unlink message and do 890 * not update queue parameters. 891 */ 892 if (msgflg & MSG_COPY) 893 goto out_unlock; 894 list_del(&msg->m_list); 895 msq->q_qnum--; 896 msq->q_rtime = get_seconds(); 897 msq->q_lrpid = task_tgid_vnr(current); 898 msq->q_cbytes -= msg->m_ts; 899 atomic_sub(msg->m_ts, &ns->msg_bytes); 900 atomic_dec(&ns->msg_hdrs); 901 ss_wakeup(&msq->q_senders, 0); 902 msg_unlock(msq); 903 break; 904 } 905 /* No message waiting. Wait for a message */ 906 if (msgflg & IPC_NOWAIT) { 907 msg = ERR_PTR(-ENOMSG); 908 goto out_unlock; 909 } 910 list_add_tail(&msr_d.r_list, &msq->q_receivers); 911 msr_d.r_tsk = current; 912 msr_d.r_msgtype = msgtyp; 913 msr_d.r_mode = mode; 914 if (msgflg & MSG_NOERROR) 915 msr_d.r_maxsize = INT_MAX; 916 else 917 msr_d.r_maxsize = bufsz; 918 msr_d.r_msg = ERR_PTR(-EAGAIN); 919 current->state = TASK_INTERRUPTIBLE; 920 msg_unlock(msq); 921 922 schedule(); 923 924 /* Lockless receive, part 1: 925 * Disable preemption. We don't hold a reference to the queue 926 * and getting a reference would defeat the idea of a lockless 927 * operation, thus the code relies on rcu to guarantee the 928 * existence of msq: 929 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 930 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 931 * rcu_read_lock() prevents preemption between reading r_msg 932 * and the spin_lock() inside ipc_lock_by_ptr(). 933 */ 934 rcu_read_lock(); 935 936 /* Lockless receive, part 2: 937 * Wait until pipelined_send or expunge_all are outside of 938 * wake_up_process(). There is a race with exit(), see 939 * ipc/mqueue.c for the details. 940 */ 941 msg = (struct msg_msg*)msr_d.r_msg; 942 while (msg == NULL) { 943 cpu_relax(); 944 msg = (struct msg_msg *)msr_d.r_msg; 945 } 946 947 /* Lockless receive, part 3: 948 * If there is a message or an error then accept it without 949 * locking. 950 */ 951 if (msg != ERR_PTR(-EAGAIN)) { 952 rcu_read_unlock(); 953 break; 954 } 955 956 /* Lockless receive, part 3: 957 * Acquire the queue spinlock. 958 */ 959 ipc_lock_by_ptr(&msq->q_perm); 960 rcu_read_unlock(); 961 962 /* Lockless receive, part 4: 963 * Repeat test after acquiring the spinlock. 964 */ 965 msg = (struct msg_msg*)msr_d.r_msg; 966 if (msg != ERR_PTR(-EAGAIN)) 967 goto out_unlock; 968 969 list_del(&msr_d.r_list); 970 if (signal_pending(current)) { 971 msg = ERR_PTR(-ERESTARTNOHAND); 972 out_unlock: 973 msg_unlock(msq); 974 break; 975 } 976 } 977 if (IS_ERR(msg)) { 978 free_copy(copy); 979 return PTR_ERR(msg); 980 } 981 982 bufsz = msg_handler(buf, msg, bufsz); 983 free_msg(msg); 984 985 return bufsz; 986 } 987 988 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 989 long, msgtyp, int, msgflg) 990 { 991 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 992 } 993 994 #ifdef CONFIG_PROC_FS 995 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 996 { 997 struct user_namespace *user_ns = seq_user_ns(s); 998 struct msg_queue *msq = it; 999 1000 return seq_printf(s, 1001 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1002 msq->q_perm.key, 1003 msq->q_perm.id, 1004 msq->q_perm.mode, 1005 msq->q_cbytes, 1006 msq->q_qnum, 1007 msq->q_lspid, 1008 msq->q_lrpid, 1009 from_kuid_munged(user_ns, msq->q_perm.uid), 1010 from_kgid_munged(user_ns, msq->q_perm.gid), 1011 from_kuid_munged(user_ns, msq->q_perm.cuid), 1012 from_kgid_munged(user_ns, msq->q_perm.cgid), 1013 msq->q_stime, 1014 msq->q_rtime, 1015 msq->q_ctime); 1016 } 1017 #endif 1018