1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <asm/uaccess.h> 43 #include "util.h" 44 45 /* 46 * one msg_receiver structure for each sleeping receiver: 47 */ 48 struct msg_receiver { 49 struct list_head r_list; 50 struct task_struct *r_tsk; 51 52 int r_mode; 53 long r_msgtype; 54 long r_maxsize; 55 56 struct msg_msg *volatile r_msg; 57 }; 58 59 /* one msg_sender for each sleeping sender */ 60 struct msg_sender { 61 struct list_head list; 62 struct task_struct *tsk; 63 }; 64 65 #define SEARCH_ANY 1 66 #define SEARCH_EQUAL 2 67 #define SEARCH_NOTEQUAL 3 68 #define SEARCH_LESSEQUAL 4 69 #define SEARCH_NUMBER 5 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 72 73 #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) 74 75 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 76 static int newque(struct ipc_namespace *, struct ipc_params *); 77 #ifdef CONFIG_PROC_FS 78 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 79 #endif 80 81 /* 82 * Scale msgmni with the available lowmem size: the memory dedicated to msg 83 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. 84 * Also take into account the number of nsproxies created so far. 85 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. 86 */ 87 void recompute_msgmni(struct ipc_namespace *ns) 88 { 89 struct sysinfo i; 90 unsigned long allowed; 91 int nb_ns; 92 93 si_meminfo(&i); 94 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) 95 / MSGMNB; 96 nb_ns = atomic_read(&nr_ipc_ns); 97 allowed /= nb_ns; 98 99 if (allowed < MSGMNI) { 100 ns->msg_ctlmni = MSGMNI; 101 return; 102 } 103 104 if (allowed > IPCMNI / nb_ns) { 105 ns->msg_ctlmni = IPCMNI / nb_ns; 106 return; 107 } 108 109 ns->msg_ctlmni = allowed; 110 } 111 112 void msg_init_ns(struct ipc_namespace *ns) 113 { 114 ns->msg_ctlmax = MSGMAX; 115 ns->msg_ctlmnb = MSGMNB; 116 117 recompute_msgmni(ns); 118 119 atomic_set(&ns->msg_bytes, 0); 120 atomic_set(&ns->msg_hdrs, 0); 121 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 122 } 123 124 #ifdef CONFIG_IPC_NS 125 void msg_exit_ns(struct ipc_namespace *ns) 126 { 127 free_ipcs(ns, &msg_ids(ns), freeque); 128 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 129 } 130 #endif 131 132 void __init msg_init(void) 133 { 134 msg_init_ns(&init_ipc_ns); 135 136 printk(KERN_INFO "msgmni has been set to %d\n", 137 init_ipc_ns.msg_ctlmni); 138 139 ipc_init_proc_interface("sysvipc/msg", 140 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 141 IPC_MSG_IDS, sysvipc_msg_proc_show); 142 } 143 144 /* 145 * msg_lock_(check_) routines are called in the paths where the rw_mutex 146 * is not held. 147 */ 148 static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) 149 { 150 struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); 151 152 if (IS_ERR(ipcp)) 153 return (struct msg_queue *)ipcp; 154 155 return container_of(ipcp, struct msg_queue, q_perm); 156 } 157 158 static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, 159 int id) 160 { 161 struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); 162 163 if (IS_ERR(ipcp)) 164 return (struct msg_queue *)ipcp; 165 166 return container_of(ipcp, struct msg_queue, q_perm); 167 } 168 169 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 170 { 171 ipc_rmid(&msg_ids(ns), &s->q_perm); 172 } 173 174 /** 175 * newque - Create a new msg queue 176 * @ns: namespace 177 * @params: ptr to the structure that contains the key and msgflg 178 * 179 * Called with msg_ids.rw_mutex held (writer) 180 */ 181 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 182 { 183 struct msg_queue *msq; 184 int id, retval; 185 key_t key = params->key; 186 int msgflg = params->flg; 187 188 msq = ipc_rcu_alloc(sizeof(*msq)); 189 if (!msq) 190 return -ENOMEM; 191 192 msq->q_perm.mode = msgflg & S_IRWXUGO; 193 msq->q_perm.key = key; 194 195 msq->q_perm.security = NULL; 196 retval = security_msg_queue_alloc(msq); 197 if (retval) { 198 ipc_rcu_putref(msq); 199 return retval; 200 } 201 202 /* 203 * ipc_addid() locks msq 204 */ 205 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 206 if (id < 0) { 207 security_msg_queue_free(msq); 208 ipc_rcu_putref(msq); 209 return id; 210 } 211 212 msq->q_stime = msq->q_rtime = 0; 213 msq->q_ctime = get_seconds(); 214 msq->q_cbytes = msq->q_qnum = 0; 215 msq->q_qbytes = ns->msg_ctlmnb; 216 msq->q_lspid = msq->q_lrpid = 0; 217 INIT_LIST_HEAD(&msq->q_messages); 218 INIT_LIST_HEAD(&msq->q_receivers); 219 INIT_LIST_HEAD(&msq->q_senders); 220 221 msg_unlock(msq); 222 223 return msq->q_perm.id; 224 } 225 226 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 227 { 228 mss->tsk = current; 229 current->state = TASK_INTERRUPTIBLE; 230 list_add_tail(&mss->list, &msq->q_senders); 231 } 232 233 static inline void ss_del(struct msg_sender *mss) 234 { 235 if (mss->list.next != NULL) 236 list_del(&mss->list); 237 } 238 239 static void ss_wakeup(struct list_head *h, int kill) 240 { 241 struct msg_sender *mss, *t; 242 243 list_for_each_entry_safe(mss, t, h, list) { 244 if (kill) 245 mss->list.next = NULL; 246 wake_up_process(mss->tsk); 247 } 248 } 249 250 static void expunge_all(struct msg_queue *msq, int res) 251 { 252 struct msg_receiver *msr, *t; 253 254 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 255 msr->r_msg = NULL; 256 wake_up_process(msr->r_tsk); 257 smp_mb(); 258 msr->r_msg = ERR_PTR(res); 259 } 260 } 261 262 /* 263 * freeque() wakes up waiters on the sender and receiver waiting queue, 264 * removes the message queue from message queue ID IDR, and cleans up all the 265 * messages associated with this queue. 266 * 267 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held 268 * before freeque() is called. msg_ids.rw_mutex remains locked on exit. 269 */ 270 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 271 { 272 struct msg_msg *msg, *t; 273 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 274 275 expunge_all(msq, -EIDRM); 276 ss_wakeup(&msq->q_senders, 1); 277 msg_rmid(ns, msq); 278 msg_unlock(msq); 279 280 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 281 atomic_dec(&ns->msg_hdrs); 282 free_msg(msg); 283 } 284 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 285 security_msg_queue_free(msq); 286 ipc_rcu_putref(msq); 287 } 288 289 /* 290 * Called with msg_ids.rw_mutex and ipcp locked. 291 */ 292 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 293 { 294 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 295 296 return security_msg_queue_associate(msq, msgflg); 297 } 298 299 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 300 { 301 struct ipc_namespace *ns; 302 struct ipc_ops msg_ops; 303 struct ipc_params msg_params; 304 305 ns = current->nsproxy->ipc_ns; 306 307 msg_ops.getnew = newque; 308 msg_ops.associate = msg_security; 309 msg_ops.more_checks = NULL; 310 311 msg_params.key = key; 312 msg_params.flg = msgflg; 313 314 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 315 } 316 317 static inline unsigned long 318 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 319 { 320 switch(version) { 321 case IPC_64: 322 return copy_to_user(buf, in, sizeof(*in)); 323 case IPC_OLD: 324 { 325 struct msqid_ds out; 326 327 memset(&out, 0, sizeof(out)); 328 329 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 330 331 out.msg_stime = in->msg_stime; 332 out.msg_rtime = in->msg_rtime; 333 out.msg_ctime = in->msg_ctime; 334 335 if (in->msg_cbytes > USHRT_MAX) 336 out.msg_cbytes = USHRT_MAX; 337 else 338 out.msg_cbytes = in->msg_cbytes; 339 out.msg_lcbytes = in->msg_cbytes; 340 341 if (in->msg_qnum > USHRT_MAX) 342 out.msg_qnum = USHRT_MAX; 343 else 344 out.msg_qnum = in->msg_qnum; 345 346 if (in->msg_qbytes > USHRT_MAX) 347 out.msg_qbytes = USHRT_MAX; 348 else 349 out.msg_qbytes = in->msg_qbytes; 350 out.msg_lqbytes = in->msg_qbytes; 351 352 out.msg_lspid = in->msg_lspid; 353 out.msg_lrpid = in->msg_lrpid; 354 355 return copy_to_user(buf, &out, sizeof(out)); 356 } 357 default: 358 return -EINVAL; 359 } 360 } 361 362 static inline unsigned long 363 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 364 { 365 switch(version) { 366 case IPC_64: 367 if (copy_from_user(out, buf, sizeof(*out))) 368 return -EFAULT; 369 return 0; 370 case IPC_OLD: 371 { 372 struct msqid_ds tbuf_old; 373 374 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 375 return -EFAULT; 376 377 out->msg_perm.uid = tbuf_old.msg_perm.uid; 378 out->msg_perm.gid = tbuf_old.msg_perm.gid; 379 out->msg_perm.mode = tbuf_old.msg_perm.mode; 380 381 if (tbuf_old.msg_qbytes == 0) 382 out->msg_qbytes = tbuf_old.msg_lqbytes; 383 else 384 out->msg_qbytes = tbuf_old.msg_qbytes; 385 386 return 0; 387 } 388 default: 389 return -EINVAL; 390 } 391 } 392 393 /* 394 * This function handles some msgctl commands which require the rw_mutex 395 * to be held in write mode. 396 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 397 */ 398 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 399 struct msqid_ds __user *buf, int version) 400 { 401 struct kern_ipc_perm *ipcp; 402 struct msqid64_ds uninitialized_var(msqid64); 403 struct msg_queue *msq; 404 int err; 405 406 if (cmd == IPC_SET) { 407 if (copy_msqid_from_user(&msqid64, buf, version)) 408 return -EFAULT; 409 } 410 411 ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd, 412 &msqid64.msg_perm, msqid64.msg_qbytes); 413 if (IS_ERR(ipcp)) 414 return PTR_ERR(ipcp); 415 416 msq = container_of(ipcp, struct msg_queue, q_perm); 417 418 err = security_msg_queue_msgctl(msq, cmd); 419 if (err) 420 goto out_unlock; 421 422 switch (cmd) { 423 case IPC_RMID: 424 freeque(ns, ipcp); 425 goto out_up; 426 case IPC_SET: 427 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 428 !capable(CAP_SYS_RESOURCE)) { 429 err = -EPERM; 430 goto out_unlock; 431 } 432 433 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 434 if (err) 435 goto out_unlock; 436 437 msq->q_qbytes = msqid64.msg_qbytes; 438 439 msq->q_ctime = get_seconds(); 440 /* sleeping receivers might be excluded by 441 * stricter permissions. 442 */ 443 expunge_all(msq, -EAGAIN); 444 /* sleeping senders might be able to send 445 * due to a larger queue size. 446 */ 447 ss_wakeup(&msq->q_senders, 0); 448 break; 449 default: 450 err = -EINVAL; 451 } 452 out_unlock: 453 msg_unlock(msq); 454 out_up: 455 up_write(&msg_ids(ns).rw_mutex); 456 return err; 457 } 458 459 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 460 { 461 struct msg_queue *msq; 462 int err, version; 463 struct ipc_namespace *ns; 464 465 if (msqid < 0 || cmd < 0) 466 return -EINVAL; 467 468 version = ipc_parse_version(&cmd); 469 ns = current->nsproxy->ipc_ns; 470 471 switch (cmd) { 472 case IPC_INFO: 473 case MSG_INFO: 474 { 475 struct msginfo msginfo; 476 int max_id; 477 478 if (!buf) 479 return -EFAULT; 480 /* 481 * We must not return kernel stack data. 482 * due to padding, it's not enough 483 * to set all member fields. 484 */ 485 err = security_msg_queue_msgctl(NULL, cmd); 486 if (err) 487 return err; 488 489 memset(&msginfo, 0, sizeof(msginfo)); 490 msginfo.msgmni = ns->msg_ctlmni; 491 msginfo.msgmax = ns->msg_ctlmax; 492 msginfo.msgmnb = ns->msg_ctlmnb; 493 msginfo.msgssz = MSGSSZ; 494 msginfo.msgseg = MSGSEG; 495 down_read(&msg_ids(ns).rw_mutex); 496 if (cmd == MSG_INFO) { 497 msginfo.msgpool = msg_ids(ns).in_use; 498 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 499 msginfo.msgtql = atomic_read(&ns->msg_bytes); 500 } else { 501 msginfo.msgmap = MSGMAP; 502 msginfo.msgpool = MSGPOOL; 503 msginfo.msgtql = MSGTQL; 504 } 505 max_id = ipc_get_maxid(&msg_ids(ns)); 506 up_read(&msg_ids(ns).rw_mutex); 507 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 508 return -EFAULT; 509 return (max_id < 0) ? 0 : max_id; 510 } 511 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 512 case IPC_STAT: 513 { 514 struct msqid64_ds tbuf; 515 int success_return; 516 517 if (!buf) 518 return -EFAULT; 519 520 if (cmd == MSG_STAT) { 521 msq = msg_lock(ns, msqid); 522 if (IS_ERR(msq)) 523 return PTR_ERR(msq); 524 success_return = msq->q_perm.id; 525 } else { 526 msq = msg_lock_check(ns, msqid); 527 if (IS_ERR(msq)) 528 return PTR_ERR(msq); 529 success_return = 0; 530 } 531 err = -EACCES; 532 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 533 goto out_unlock; 534 535 err = security_msg_queue_msgctl(msq, cmd); 536 if (err) 537 goto out_unlock; 538 539 memset(&tbuf, 0, sizeof(tbuf)); 540 541 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 542 tbuf.msg_stime = msq->q_stime; 543 tbuf.msg_rtime = msq->q_rtime; 544 tbuf.msg_ctime = msq->q_ctime; 545 tbuf.msg_cbytes = msq->q_cbytes; 546 tbuf.msg_qnum = msq->q_qnum; 547 tbuf.msg_qbytes = msq->q_qbytes; 548 tbuf.msg_lspid = msq->q_lspid; 549 tbuf.msg_lrpid = msq->q_lrpid; 550 msg_unlock(msq); 551 if (copy_msqid_to_user(buf, &tbuf, version)) 552 return -EFAULT; 553 return success_return; 554 } 555 case IPC_SET: 556 case IPC_RMID: 557 err = msgctl_down(ns, msqid, cmd, buf, version); 558 return err; 559 default: 560 return -EINVAL; 561 } 562 563 out_unlock: 564 msg_unlock(msq); 565 return err; 566 } 567 568 static int testmsg(struct msg_msg *msg, long type, int mode) 569 { 570 switch(mode) 571 { 572 case SEARCH_ANY: 573 case SEARCH_NUMBER: 574 return 1; 575 case SEARCH_LESSEQUAL: 576 if (msg->m_type <=type) 577 return 1; 578 break; 579 case SEARCH_EQUAL: 580 if (msg->m_type == type) 581 return 1; 582 break; 583 case SEARCH_NOTEQUAL: 584 if (msg->m_type != type) 585 return 1; 586 break; 587 } 588 return 0; 589 } 590 591 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 592 { 593 struct msg_receiver *msr, *t; 594 595 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 596 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 597 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 598 msr->r_msgtype, msr->r_mode)) { 599 600 list_del(&msr->r_list); 601 if (msr->r_maxsize < msg->m_ts) { 602 msr->r_msg = NULL; 603 wake_up_process(msr->r_tsk); 604 smp_mb(); 605 msr->r_msg = ERR_PTR(-E2BIG); 606 } else { 607 msr->r_msg = NULL; 608 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 609 msq->q_rtime = get_seconds(); 610 wake_up_process(msr->r_tsk); 611 smp_mb(); 612 msr->r_msg = msg; 613 614 return 1; 615 } 616 } 617 } 618 return 0; 619 } 620 621 long do_msgsnd(int msqid, long mtype, void __user *mtext, 622 size_t msgsz, int msgflg) 623 { 624 struct msg_queue *msq; 625 struct msg_msg *msg; 626 int err; 627 struct ipc_namespace *ns; 628 629 ns = current->nsproxy->ipc_ns; 630 631 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 632 return -EINVAL; 633 if (mtype < 1) 634 return -EINVAL; 635 636 msg = load_msg(mtext, msgsz); 637 if (IS_ERR(msg)) 638 return PTR_ERR(msg); 639 640 msg->m_type = mtype; 641 msg->m_ts = msgsz; 642 643 msq = msg_lock_check(ns, msqid); 644 if (IS_ERR(msq)) { 645 err = PTR_ERR(msq); 646 goto out_free; 647 } 648 649 for (;;) { 650 struct msg_sender s; 651 652 err = -EACCES; 653 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 654 goto out_unlock_free; 655 656 err = security_msg_queue_msgsnd(msq, msg, msgflg); 657 if (err) 658 goto out_unlock_free; 659 660 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 661 1 + msq->q_qnum <= msq->q_qbytes) { 662 break; 663 } 664 665 /* queue full, wait: */ 666 if (msgflg & IPC_NOWAIT) { 667 err = -EAGAIN; 668 goto out_unlock_free; 669 } 670 ss_add(msq, &s); 671 672 if (!ipc_rcu_getref(msq)) { 673 err = -EIDRM; 674 goto out_unlock_free; 675 } 676 677 msg_unlock(msq); 678 schedule(); 679 680 ipc_lock_by_ptr(&msq->q_perm); 681 ipc_rcu_putref(msq); 682 if (msq->q_perm.deleted) { 683 err = -EIDRM; 684 goto out_unlock_free; 685 } 686 ss_del(&s); 687 688 if (signal_pending(current)) { 689 err = -ERESTARTNOHAND; 690 goto out_unlock_free; 691 } 692 } 693 694 msq->q_lspid = task_tgid_vnr(current); 695 msq->q_stime = get_seconds(); 696 697 if (!pipelined_send(msq, msg)) { 698 /* no one is waiting for this message, enqueue it */ 699 list_add_tail(&msg->m_list, &msq->q_messages); 700 msq->q_cbytes += msgsz; 701 msq->q_qnum++; 702 atomic_add(msgsz, &ns->msg_bytes); 703 atomic_inc(&ns->msg_hdrs); 704 } 705 706 err = 0; 707 msg = NULL; 708 709 out_unlock_free: 710 msg_unlock(msq); 711 out_free: 712 if (msg != NULL) 713 free_msg(msg); 714 return err; 715 } 716 717 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 718 int, msgflg) 719 { 720 long mtype; 721 722 if (get_user(mtype, &msgp->mtype)) 723 return -EFAULT; 724 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 725 } 726 727 static inline int convert_mode(long *msgtyp, int msgflg) 728 { 729 if (msgflg & MSG_COPY) 730 return SEARCH_NUMBER; 731 /* 732 * find message of correct type. 733 * msgtyp = 0 => get first. 734 * msgtyp > 0 => get first message of matching type. 735 * msgtyp < 0 => get message with least type must be < abs(msgtype). 736 */ 737 if (*msgtyp == 0) 738 return SEARCH_ANY; 739 if (*msgtyp < 0) { 740 *msgtyp = -*msgtyp; 741 return SEARCH_LESSEQUAL; 742 } 743 if (msgflg & MSG_EXCEPT) 744 return SEARCH_NOTEQUAL; 745 return SEARCH_EQUAL; 746 } 747 748 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 749 { 750 struct msgbuf __user *msgp = dest; 751 size_t msgsz; 752 753 if (put_user(msg->m_type, &msgp->mtype)) 754 return -EFAULT; 755 756 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 757 if (store_msg(msgp->mtext, msg, msgsz)) 758 return -EFAULT; 759 return msgsz; 760 } 761 762 #ifdef CONFIG_CHECKPOINT_RESTORE 763 /* 764 * This function creates new kernel message structure, large enough to store 765 * bufsz message bytes. 766 */ 767 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 768 { 769 struct msg_msg *copy; 770 771 /* 772 * Create dummy message to copy real message to. 773 */ 774 copy = load_msg(buf, bufsz); 775 if (!IS_ERR(copy)) 776 copy->m_ts = bufsz; 777 return copy; 778 } 779 780 static inline void free_copy(struct msg_msg *copy) 781 { 782 if (copy) 783 free_msg(copy); 784 } 785 #else 786 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 787 { 788 return ERR_PTR(-ENOSYS); 789 } 790 791 static inline void free_copy(struct msg_msg *copy) 792 { 793 } 794 #endif 795 796 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 797 { 798 struct msg_msg *msg; 799 long count = 0; 800 801 list_for_each_entry(msg, &msq->q_messages, m_list) { 802 if (testmsg(msg, *msgtyp, mode) && 803 !security_msg_queue_msgrcv(msq, msg, current, 804 *msgtyp, mode)) { 805 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 806 *msgtyp = msg->m_type - 1; 807 } else if (mode == SEARCH_NUMBER) { 808 if (*msgtyp == count) 809 return msg; 810 } else 811 return msg; 812 count++; 813 } 814 } 815 816 return ERR_PTR(-EAGAIN); 817 } 818 819 820 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, 821 int msgflg, 822 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 823 { 824 struct msg_queue *msq; 825 struct msg_msg *msg; 826 int mode; 827 struct ipc_namespace *ns; 828 struct msg_msg *copy = NULL; 829 830 ns = current->nsproxy->ipc_ns; 831 832 if (msqid < 0 || (long) bufsz < 0) 833 return -EINVAL; 834 if (msgflg & MSG_COPY) { 835 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 836 if (IS_ERR(copy)) 837 return PTR_ERR(copy); 838 } 839 mode = convert_mode(&msgtyp, msgflg); 840 841 msq = msg_lock_check(ns, msqid); 842 if (IS_ERR(msq)) { 843 free_copy(copy); 844 return PTR_ERR(msq); 845 } 846 847 for (;;) { 848 struct msg_receiver msr_d; 849 850 msg = ERR_PTR(-EACCES); 851 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 852 goto out_unlock; 853 854 msg = find_msg(msq, &msgtyp, mode); 855 856 if (!IS_ERR(msg)) { 857 /* 858 * Found a suitable message. 859 * Unlink it from the queue. 860 */ 861 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 862 msg = ERR_PTR(-E2BIG); 863 goto out_unlock; 864 } 865 /* 866 * If we are copying, then do not unlink message and do 867 * not update queue parameters. 868 */ 869 if (msgflg & MSG_COPY) { 870 msg = copy_msg(msg, copy); 871 goto out_unlock; 872 } 873 list_del(&msg->m_list); 874 msq->q_qnum--; 875 msq->q_rtime = get_seconds(); 876 msq->q_lrpid = task_tgid_vnr(current); 877 msq->q_cbytes -= msg->m_ts; 878 atomic_sub(msg->m_ts, &ns->msg_bytes); 879 atomic_dec(&ns->msg_hdrs); 880 ss_wakeup(&msq->q_senders, 0); 881 msg_unlock(msq); 882 break; 883 } 884 /* No message waiting. Wait for a message */ 885 if (msgflg & IPC_NOWAIT) { 886 msg = ERR_PTR(-ENOMSG); 887 goto out_unlock; 888 } 889 list_add_tail(&msr_d.r_list, &msq->q_receivers); 890 msr_d.r_tsk = current; 891 msr_d.r_msgtype = msgtyp; 892 msr_d.r_mode = mode; 893 if (msgflg & MSG_NOERROR) 894 msr_d.r_maxsize = INT_MAX; 895 else 896 msr_d.r_maxsize = bufsz; 897 msr_d.r_msg = ERR_PTR(-EAGAIN); 898 current->state = TASK_INTERRUPTIBLE; 899 msg_unlock(msq); 900 901 schedule(); 902 903 /* Lockless receive, part 1: 904 * Disable preemption. We don't hold a reference to the queue 905 * and getting a reference would defeat the idea of a lockless 906 * operation, thus the code relies on rcu to guarantee the 907 * existence of msq: 908 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 909 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 910 * rcu_read_lock() prevents preemption between reading r_msg 911 * and the spin_lock() inside ipc_lock_by_ptr(). 912 */ 913 rcu_read_lock(); 914 915 /* Lockless receive, part 2: 916 * Wait until pipelined_send or expunge_all are outside of 917 * wake_up_process(). There is a race with exit(), see 918 * ipc/mqueue.c for the details. 919 */ 920 msg = (struct msg_msg*)msr_d.r_msg; 921 while (msg == NULL) { 922 cpu_relax(); 923 msg = (struct msg_msg *)msr_d.r_msg; 924 } 925 926 /* Lockless receive, part 3: 927 * If there is a message or an error then accept it without 928 * locking. 929 */ 930 if (msg != ERR_PTR(-EAGAIN)) { 931 rcu_read_unlock(); 932 break; 933 } 934 935 /* Lockless receive, part 3: 936 * Acquire the queue spinlock. 937 */ 938 ipc_lock_by_ptr(&msq->q_perm); 939 rcu_read_unlock(); 940 941 /* Lockless receive, part 4: 942 * Repeat test after acquiring the spinlock. 943 */ 944 msg = (struct msg_msg*)msr_d.r_msg; 945 if (msg != ERR_PTR(-EAGAIN)) 946 goto out_unlock; 947 948 list_del(&msr_d.r_list); 949 if (signal_pending(current)) { 950 msg = ERR_PTR(-ERESTARTNOHAND); 951 out_unlock: 952 msg_unlock(msq); 953 break; 954 } 955 } 956 if (IS_ERR(msg)) { 957 free_copy(copy); 958 return PTR_ERR(msg); 959 } 960 961 bufsz = msg_handler(buf, msg, bufsz); 962 free_msg(msg); 963 964 return bufsz; 965 } 966 967 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 968 long, msgtyp, int, msgflg) 969 { 970 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 971 } 972 973 #ifdef CONFIG_PROC_FS 974 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 975 { 976 struct user_namespace *user_ns = seq_user_ns(s); 977 struct msg_queue *msq = it; 978 979 return seq_printf(s, 980 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 981 msq->q_perm.key, 982 msq->q_perm.id, 983 msq->q_perm.mode, 984 msq->q_cbytes, 985 msq->q_qnum, 986 msq->q_lspid, 987 msq->q_lrpid, 988 from_kuid_munged(user_ns, msq->q_perm.uid), 989 from_kgid_munged(user_ns, msq->q_perm.gid), 990 from_kuid_munged(user_ns, msq->q_perm.cuid), 991 from_kgid_munged(user_ns, msq->q_perm.cgid), 992 msq->q_stime, 993 msq->q_rtime, 994 msq->q_ctime); 995 } 996 #endif 997