1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <asm/uaccess.h> 43 #include "util.h" 44 45 /* 46 * one msg_receiver structure for each sleeping receiver: 47 */ 48 struct msg_receiver { 49 struct list_head r_list; 50 struct task_struct *r_tsk; 51 52 int r_mode; 53 long r_msgtype; 54 long r_maxsize; 55 56 struct msg_msg *volatile r_msg; 57 }; 58 59 /* one msg_sender for each sleeping sender */ 60 struct msg_sender { 61 struct list_head list; 62 struct task_struct *tsk; 63 }; 64 65 #define SEARCH_ANY 1 66 #define SEARCH_EQUAL 2 67 #define SEARCH_NOTEQUAL 3 68 #define SEARCH_LESSEQUAL 4 69 #define SEARCH_NUMBER 5 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 72 73 #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) 74 75 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 76 static int newque(struct ipc_namespace *, struct ipc_params *); 77 #ifdef CONFIG_PROC_FS 78 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 79 #endif 80 81 /* 82 * Scale msgmni with the available lowmem size: the memory dedicated to msg 83 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. 84 * Also take into account the number of nsproxies created so far. 85 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. 86 */ 87 void recompute_msgmni(struct ipc_namespace *ns) 88 { 89 struct sysinfo i; 90 unsigned long allowed; 91 int nb_ns; 92 93 si_meminfo(&i); 94 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) 95 / MSGMNB; 96 nb_ns = atomic_read(&nr_ipc_ns); 97 allowed /= nb_ns; 98 99 if (allowed < MSGMNI) { 100 ns->msg_ctlmni = MSGMNI; 101 return; 102 } 103 104 if (allowed > IPCMNI / nb_ns) { 105 ns->msg_ctlmni = IPCMNI / nb_ns; 106 return; 107 } 108 109 ns->msg_ctlmni = allowed; 110 } 111 112 void msg_init_ns(struct ipc_namespace *ns) 113 { 114 ns->msg_ctlmax = MSGMAX; 115 ns->msg_ctlmnb = MSGMNB; 116 117 recompute_msgmni(ns); 118 119 atomic_set(&ns->msg_bytes, 0); 120 atomic_set(&ns->msg_hdrs, 0); 121 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 122 } 123 124 #ifdef CONFIG_IPC_NS 125 void msg_exit_ns(struct ipc_namespace *ns) 126 { 127 free_ipcs(ns, &msg_ids(ns), freeque); 128 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 129 } 130 #endif 131 132 void __init msg_init(void) 133 { 134 msg_init_ns(&init_ipc_ns); 135 136 printk(KERN_INFO "msgmni has been set to %d\n", 137 init_ipc_ns.msg_ctlmni); 138 139 ipc_init_proc_interface("sysvipc/msg", 140 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 141 IPC_MSG_IDS, sysvipc_msg_proc_show); 142 } 143 144 static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) 145 { 146 struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); 147 148 if (IS_ERR(ipcp)) 149 return ERR_CAST(ipcp); 150 151 return container_of(ipcp, struct msg_queue, q_perm); 152 } 153 154 static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, 155 int id) 156 { 157 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); 158 159 if (IS_ERR(ipcp)) 160 return ERR_CAST(ipcp); 161 162 return container_of(ipcp, struct msg_queue, q_perm); 163 } 164 165 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 166 { 167 ipc_rmid(&msg_ids(ns), &s->q_perm); 168 } 169 170 /** 171 * newque - Create a new msg queue 172 * @ns: namespace 173 * @params: ptr to the structure that contains the key and msgflg 174 * 175 * Called with msg_ids.rw_mutex held (writer) 176 */ 177 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 178 { 179 struct msg_queue *msq; 180 int id, retval; 181 key_t key = params->key; 182 int msgflg = params->flg; 183 184 msq = ipc_rcu_alloc(sizeof(*msq)); 185 if (!msq) 186 return -ENOMEM; 187 188 msq->q_perm.mode = msgflg & S_IRWXUGO; 189 msq->q_perm.key = key; 190 191 msq->q_perm.security = NULL; 192 retval = security_msg_queue_alloc(msq); 193 if (retval) { 194 ipc_rcu_putref(msq); 195 return retval; 196 } 197 198 /* ipc_addid() locks msq upon success. */ 199 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 200 if (id < 0) { 201 security_msg_queue_free(msq); 202 ipc_rcu_putref(msq); 203 return id; 204 } 205 206 msq->q_stime = msq->q_rtime = 0; 207 msq->q_ctime = get_seconds(); 208 msq->q_cbytes = msq->q_qnum = 0; 209 msq->q_qbytes = ns->msg_ctlmnb; 210 msq->q_lspid = msq->q_lrpid = 0; 211 INIT_LIST_HEAD(&msq->q_messages); 212 INIT_LIST_HEAD(&msq->q_receivers); 213 INIT_LIST_HEAD(&msq->q_senders); 214 215 ipc_unlock_object(&msq->q_perm); 216 rcu_read_unlock(); 217 218 return msq->q_perm.id; 219 } 220 221 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 222 { 223 mss->tsk = current; 224 current->state = TASK_INTERRUPTIBLE; 225 list_add_tail(&mss->list, &msq->q_senders); 226 } 227 228 static inline void ss_del(struct msg_sender *mss) 229 { 230 if (mss->list.next != NULL) 231 list_del(&mss->list); 232 } 233 234 static void ss_wakeup(struct list_head *h, int kill) 235 { 236 struct msg_sender *mss, *t; 237 238 list_for_each_entry_safe(mss, t, h, list) { 239 if (kill) 240 mss->list.next = NULL; 241 wake_up_process(mss->tsk); 242 } 243 } 244 245 static void expunge_all(struct msg_queue *msq, int res) 246 { 247 struct msg_receiver *msr, *t; 248 249 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 250 msr->r_msg = NULL; 251 wake_up_process(msr->r_tsk); 252 smp_mb(); 253 msr->r_msg = ERR_PTR(res); 254 } 255 } 256 257 /* 258 * freeque() wakes up waiters on the sender and receiver waiting queue, 259 * removes the message queue from message queue ID IDR, and cleans up all the 260 * messages associated with this queue. 261 * 262 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held 263 * before freeque() is called. msg_ids.rw_mutex remains locked on exit. 264 */ 265 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 266 { 267 struct msg_msg *msg, *t; 268 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 269 270 expunge_all(msq, -EIDRM); 271 ss_wakeup(&msq->q_senders, 1); 272 msg_rmid(ns, msq); 273 msg_unlock(msq); 274 275 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 276 atomic_dec(&ns->msg_hdrs); 277 free_msg(msg); 278 } 279 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 280 security_msg_queue_free(msq); 281 ipc_rcu_putref(msq); 282 } 283 284 /* 285 * Called with msg_ids.rw_mutex and ipcp locked. 286 */ 287 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 288 { 289 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 290 291 return security_msg_queue_associate(msq, msgflg); 292 } 293 294 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 295 { 296 struct ipc_namespace *ns; 297 struct ipc_ops msg_ops; 298 struct ipc_params msg_params; 299 300 ns = current->nsproxy->ipc_ns; 301 302 msg_ops.getnew = newque; 303 msg_ops.associate = msg_security; 304 msg_ops.more_checks = NULL; 305 306 msg_params.key = key; 307 msg_params.flg = msgflg; 308 309 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 310 } 311 312 static inline unsigned long 313 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 314 { 315 switch(version) { 316 case IPC_64: 317 return copy_to_user(buf, in, sizeof(*in)); 318 case IPC_OLD: 319 { 320 struct msqid_ds out; 321 322 memset(&out, 0, sizeof(out)); 323 324 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 325 326 out.msg_stime = in->msg_stime; 327 out.msg_rtime = in->msg_rtime; 328 out.msg_ctime = in->msg_ctime; 329 330 if (in->msg_cbytes > USHRT_MAX) 331 out.msg_cbytes = USHRT_MAX; 332 else 333 out.msg_cbytes = in->msg_cbytes; 334 out.msg_lcbytes = in->msg_cbytes; 335 336 if (in->msg_qnum > USHRT_MAX) 337 out.msg_qnum = USHRT_MAX; 338 else 339 out.msg_qnum = in->msg_qnum; 340 341 if (in->msg_qbytes > USHRT_MAX) 342 out.msg_qbytes = USHRT_MAX; 343 else 344 out.msg_qbytes = in->msg_qbytes; 345 out.msg_lqbytes = in->msg_qbytes; 346 347 out.msg_lspid = in->msg_lspid; 348 out.msg_lrpid = in->msg_lrpid; 349 350 return copy_to_user(buf, &out, sizeof(out)); 351 } 352 default: 353 return -EINVAL; 354 } 355 } 356 357 static inline unsigned long 358 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 359 { 360 switch(version) { 361 case IPC_64: 362 if (copy_from_user(out, buf, sizeof(*out))) 363 return -EFAULT; 364 return 0; 365 case IPC_OLD: 366 { 367 struct msqid_ds tbuf_old; 368 369 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 370 return -EFAULT; 371 372 out->msg_perm.uid = tbuf_old.msg_perm.uid; 373 out->msg_perm.gid = tbuf_old.msg_perm.gid; 374 out->msg_perm.mode = tbuf_old.msg_perm.mode; 375 376 if (tbuf_old.msg_qbytes == 0) 377 out->msg_qbytes = tbuf_old.msg_lqbytes; 378 else 379 out->msg_qbytes = tbuf_old.msg_qbytes; 380 381 return 0; 382 } 383 default: 384 return -EINVAL; 385 } 386 } 387 388 /* 389 * This function handles some msgctl commands which require the rw_mutex 390 * to be held in write mode. 391 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 392 */ 393 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 394 struct msqid_ds __user *buf, int version) 395 { 396 struct kern_ipc_perm *ipcp; 397 struct msqid64_ds uninitialized_var(msqid64); 398 struct msg_queue *msq; 399 int err; 400 401 if (cmd == IPC_SET) { 402 if (copy_msqid_from_user(&msqid64, buf, version)) 403 return -EFAULT; 404 } 405 406 down_write(&msg_ids(ns).rw_mutex); 407 rcu_read_lock(); 408 409 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 410 &msqid64.msg_perm, msqid64.msg_qbytes); 411 if (IS_ERR(ipcp)) { 412 err = PTR_ERR(ipcp); 413 goto out_unlock1; 414 } 415 416 msq = container_of(ipcp, struct msg_queue, q_perm); 417 418 err = security_msg_queue_msgctl(msq, cmd); 419 if (err) 420 goto out_unlock1; 421 422 switch (cmd) { 423 case IPC_RMID: 424 ipc_lock_object(&msq->q_perm); 425 /* freeque unlocks the ipc object and rcu */ 426 freeque(ns, ipcp); 427 goto out_up; 428 case IPC_SET: 429 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 430 !capable(CAP_SYS_RESOURCE)) { 431 err = -EPERM; 432 goto out_unlock1; 433 } 434 435 ipc_lock_object(&msq->q_perm); 436 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 437 if (err) 438 goto out_unlock0; 439 440 msq->q_qbytes = msqid64.msg_qbytes; 441 442 msq->q_ctime = get_seconds(); 443 /* sleeping receivers might be excluded by 444 * stricter permissions. 445 */ 446 expunge_all(msq, -EAGAIN); 447 /* sleeping senders might be able to send 448 * due to a larger queue size. 449 */ 450 ss_wakeup(&msq->q_senders, 0); 451 break; 452 default: 453 err = -EINVAL; 454 goto out_unlock1; 455 } 456 457 out_unlock0: 458 ipc_unlock_object(&msq->q_perm); 459 out_unlock1: 460 rcu_read_unlock(); 461 out_up: 462 up_write(&msg_ids(ns).rw_mutex); 463 return err; 464 } 465 466 static int msgctl_nolock(struct ipc_namespace *ns, int msqid, 467 int cmd, int version, void __user *buf) 468 { 469 int err; 470 struct msg_queue *msq; 471 472 switch (cmd) { 473 case IPC_INFO: 474 case MSG_INFO: 475 { 476 struct msginfo msginfo; 477 int max_id; 478 479 if (!buf) 480 return -EFAULT; 481 482 /* 483 * We must not return kernel stack data. 484 * due to padding, it's not enough 485 * to set all member fields. 486 */ 487 err = security_msg_queue_msgctl(NULL, cmd); 488 if (err) 489 return err; 490 491 memset(&msginfo, 0, sizeof(msginfo)); 492 msginfo.msgmni = ns->msg_ctlmni; 493 msginfo.msgmax = ns->msg_ctlmax; 494 msginfo.msgmnb = ns->msg_ctlmnb; 495 msginfo.msgssz = MSGSSZ; 496 msginfo.msgseg = MSGSEG; 497 down_read(&msg_ids(ns).rw_mutex); 498 if (cmd == MSG_INFO) { 499 msginfo.msgpool = msg_ids(ns).in_use; 500 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 501 msginfo.msgtql = atomic_read(&ns->msg_bytes); 502 } else { 503 msginfo.msgmap = MSGMAP; 504 msginfo.msgpool = MSGPOOL; 505 msginfo.msgtql = MSGTQL; 506 } 507 max_id = ipc_get_maxid(&msg_ids(ns)); 508 up_read(&msg_ids(ns).rw_mutex); 509 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 510 return -EFAULT; 511 return (max_id < 0) ? 0 : max_id; 512 } 513 514 case MSG_STAT: 515 case IPC_STAT: 516 { 517 struct msqid64_ds tbuf; 518 int success_return; 519 520 if (!buf) 521 return -EFAULT; 522 523 memset(&tbuf, 0, sizeof(tbuf)); 524 525 rcu_read_lock(); 526 if (cmd == MSG_STAT) { 527 msq = msq_obtain_object(ns, msqid); 528 if (IS_ERR(msq)) { 529 err = PTR_ERR(msq); 530 goto out_unlock; 531 } 532 success_return = msq->q_perm.id; 533 } else { 534 msq = msq_obtain_object_check(ns, msqid); 535 if (IS_ERR(msq)) { 536 err = PTR_ERR(msq); 537 goto out_unlock; 538 } 539 success_return = 0; 540 } 541 542 err = -EACCES; 543 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 544 goto out_unlock; 545 546 err = security_msg_queue_msgctl(msq, cmd); 547 if (err) 548 goto out_unlock; 549 550 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 551 tbuf.msg_stime = msq->q_stime; 552 tbuf.msg_rtime = msq->q_rtime; 553 tbuf.msg_ctime = msq->q_ctime; 554 tbuf.msg_cbytes = msq->q_cbytes; 555 tbuf.msg_qnum = msq->q_qnum; 556 tbuf.msg_qbytes = msq->q_qbytes; 557 tbuf.msg_lspid = msq->q_lspid; 558 tbuf.msg_lrpid = msq->q_lrpid; 559 rcu_read_unlock(); 560 561 if (copy_msqid_to_user(buf, &tbuf, version)) 562 return -EFAULT; 563 return success_return; 564 } 565 566 default: 567 return -EINVAL; 568 } 569 570 return err; 571 out_unlock: 572 rcu_read_unlock(); 573 return err; 574 } 575 576 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 577 { 578 int version; 579 struct ipc_namespace *ns; 580 581 if (msqid < 0 || cmd < 0) 582 return -EINVAL; 583 584 version = ipc_parse_version(&cmd); 585 ns = current->nsproxy->ipc_ns; 586 587 switch (cmd) { 588 case IPC_INFO: 589 case MSG_INFO: 590 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 591 case IPC_STAT: 592 return msgctl_nolock(ns, msqid, cmd, version, buf); 593 case IPC_SET: 594 case IPC_RMID: 595 return msgctl_down(ns, msqid, cmd, buf, version); 596 default: 597 return -EINVAL; 598 } 599 } 600 601 static int testmsg(struct msg_msg *msg, long type, int mode) 602 { 603 switch(mode) 604 { 605 case SEARCH_ANY: 606 case SEARCH_NUMBER: 607 return 1; 608 case SEARCH_LESSEQUAL: 609 if (msg->m_type <=type) 610 return 1; 611 break; 612 case SEARCH_EQUAL: 613 if (msg->m_type == type) 614 return 1; 615 break; 616 case SEARCH_NOTEQUAL: 617 if (msg->m_type != type) 618 return 1; 619 break; 620 } 621 return 0; 622 } 623 624 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 625 { 626 struct msg_receiver *msr, *t; 627 628 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 629 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 630 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 631 msr->r_msgtype, msr->r_mode)) { 632 633 list_del(&msr->r_list); 634 if (msr->r_maxsize < msg->m_ts) { 635 msr->r_msg = NULL; 636 wake_up_process(msr->r_tsk); 637 smp_mb(); 638 msr->r_msg = ERR_PTR(-E2BIG); 639 } else { 640 msr->r_msg = NULL; 641 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 642 msq->q_rtime = get_seconds(); 643 wake_up_process(msr->r_tsk); 644 smp_mb(); 645 msr->r_msg = msg; 646 647 return 1; 648 } 649 } 650 } 651 return 0; 652 } 653 654 long do_msgsnd(int msqid, long mtype, void __user *mtext, 655 size_t msgsz, int msgflg) 656 { 657 struct msg_queue *msq; 658 struct msg_msg *msg; 659 int err; 660 struct ipc_namespace *ns; 661 662 ns = current->nsproxy->ipc_ns; 663 664 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 665 return -EINVAL; 666 if (mtype < 1) 667 return -EINVAL; 668 669 msg = load_msg(mtext, msgsz); 670 if (IS_ERR(msg)) 671 return PTR_ERR(msg); 672 673 msg->m_type = mtype; 674 msg->m_ts = msgsz; 675 676 rcu_read_lock(); 677 msq = msq_obtain_object_check(ns, msqid); 678 if (IS_ERR(msq)) { 679 err = PTR_ERR(msq); 680 goto out_unlock1; 681 } 682 683 for (;;) { 684 struct msg_sender s; 685 686 err = -EACCES; 687 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 688 goto out_unlock1; 689 690 err = security_msg_queue_msgsnd(msq, msg, msgflg); 691 if (err) 692 goto out_unlock1; 693 694 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 695 1 + msq->q_qnum <= msq->q_qbytes) { 696 break; 697 } 698 699 /* queue full, wait: */ 700 if (msgflg & IPC_NOWAIT) { 701 err = -EAGAIN; 702 goto out_unlock1; 703 } 704 705 ipc_lock_object(&msq->q_perm); 706 ss_add(msq, &s); 707 708 if (!ipc_rcu_getref(msq)) { 709 err = -EIDRM; 710 goto out_unlock0; 711 } 712 713 ipc_unlock_object(&msq->q_perm); 714 rcu_read_unlock(); 715 schedule(); 716 717 rcu_read_lock(); 718 ipc_lock_object(&msq->q_perm); 719 720 ipc_rcu_putref(msq); 721 if (msq->q_perm.deleted) { 722 err = -EIDRM; 723 goto out_unlock0; 724 } 725 726 ss_del(&s); 727 728 if (signal_pending(current)) { 729 err = -ERESTARTNOHAND; 730 goto out_unlock0; 731 } 732 733 ipc_unlock_object(&msq->q_perm); 734 } 735 736 ipc_lock_object(&msq->q_perm); 737 msq->q_lspid = task_tgid_vnr(current); 738 msq->q_stime = get_seconds(); 739 740 if (!pipelined_send(msq, msg)) { 741 /* no one is waiting for this message, enqueue it */ 742 list_add_tail(&msg->m_list, &msq->q_messages); 743 msq->q_cbytes += msgsz; 744 msq->q_qnum++; 745 atomic_add(msgsz, &ns->msg_bytes); 746 atomic_inc(&ns->msg_hdrs); 747 } 748 749 err = 0; 750 msg = NULL; 751 752 out_unlock0: 753 ipc_unlock_object(&msq->q_perm); 754 out_unlock1: 755 rcu_read_unlock(); 756 if (msg != NULL) 757 free_msg(msg); 758 return err; 759 } 760 761 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 762 int, msgflg) 763 { 764 long mtype; 765 766 if (get_user(mtype, &msgp->mtype)) 767 return -EFAULT; 768 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 769 } 770 771 static inline int convert_mode(long *msgtyp, int msgflg) 772 { 773 if (msgflg & MSG_COPY) 774 return SEARCH_NUMBER; 775 /* 776 * find message of correct type. 777 * msgtyp = 0 => get first. 778 * msgtyp > 0 => get first message of matching type. 779 * msgtyp < 0 => get message with least type must be < abs(msgtype). 780 */ 781 if (*msgtyp == 0) 782 return SEARCH_ANY; 783 if (*msgtyp < 0) { 784 *msgtyp = -*msgtyp; 785 return SEARCH_LESSEQUAL; 786 } 787 if (msgflg & MSG_EXCEPT) 788 return SEARCH_NOTEQUAL; 789 return SEARCH_EQUAL; 790 } 791 792 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 793 { 794 struct msgbuf __user *msgp = dest; 795 size_t msgsz; 796 797 if (put_user(msg->m_type, &msgp->mtype)) 798 return -EFAULT; 799 800 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 801 if (store_msg(msgp->mtext, msg, msgsz)) 802 return -EFAULT; 803 return msgsz; 804 } 805 806 #ifdef CONFIG_CHECKPOINT_RESTORE 807 /* 808 * This function creates new kernel message structure, large enough to store 809 * bufsz message bytes. 810 */ 811 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 812 { 813 struct msg_msg *copy; 814 815 /* 816 * Create dummy message to copy real message to. 817 */ 818 copy = load_msg(buf, bufsz); 819 if (!IS_ERR(copy)) 820 copy->m_ts = bufsz; 821 return copy; 822 } 823 824 static inline void free_copy(struct msg_msg *copy) 825 { 826 if (copy) 827 free_msg(copy); 828 } 829 #else 830 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 831 { 832 return ERR_PTR(-ENOSYS); 833 } 834 835 static inline void free_copy(struct msg_msg *copy) 836 { 837 } 838 #endif 839 840 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 841 { 842 struct msg_msg *msg; 843 long count = 0; 844 845 list_for_each_entry(msg, &msq->q_messages, m_list) { 846 if (testmsg(msg, *msgtyp, mode) && 847 !security_msg_queue_msgrcv(msq, msg, current, 848 *msgtyp, mode)) { 849 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 850 *msgtyp = msg->m_type - 1; 851 } else if (mode == SEARCH_NUMBER) { 852 if (*msgtyp == count) 853 return msg; 854 } else 855 return msg; 856 count++; 857 } 858 } 859 860 return ERR_PTR(-EAGAIN); 861 } 862 863 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, 864 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 865 { 866 int mode; 867 struct msg_queue *msq; 868 struct ipc_namespace *ns; 869 struct msg_msg *msg, *copy = NULL; 870 871 ns = current->nsproxy->ipc_ns; 872 873 if (msqid < 0 || (long) bufsz < 0) 874 return -EINVAL; 875 876 if (msgflg & MSG_COPY) { 877 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 878 if (IS_ERR(copy)) 879 return PTR_ERR(copy); 880 } 881 mode = convert_mode(&msgtyp, msgflg); 882 883 rcu_read_lock(); 884 msq = msq_obtain_object_check(ns, msqid); 885 if (IS_ERR(msq)) { 886 rcu_read_unlock(); 887 free_copy(copy); 888 return PTR_ERR(msq); 889 } 890 891 for (;;) { 892 struct msg_receiver msr_d; 893 894 msg = ERR_PTR(-EACCES); 895 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 896 goto out_unlock1; 897 898 ipc_lock_object(&msq->q_perm); 899 msg = find_msg(msq, &msgtyp, mode); 900 if (!IS_ERR(msg)) { 901 /* 902 * Found a suitable message. 903 * Unlink it from the queue. 904 */ 905 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 906 msg = ERR_PTR(-E2BIG); 907 goto out_unlock0; 908 } 909 /* 910 * If we are copying, then do not unlink message and do 911 * not update queue parameters. 912 */ 913 if (msgflg & MSG_COPY) { 914 msg = copy_msg(msg, copy); 915 goto out_unlock0; 916 } 917 918 list_del(&msg->m_list); 919 msq->q_qnum--; 920 msq->q_rtime = get_seconds(); 921 msq->q_lrpid = task_tgid_vnr(current); 922 msq->q_cbytes -= msg->m_ts; 923 atomic_sub(msg->m_ts, &ns->msg_bytes); 924 atomic_dec(&ns->msg_hdrs); 925 ss_wakeup(&msq->q_senders, 0); 926 927 goto out_unlock0; 928 } 929 930 /* No message waiting. Wait for a message */ 931 if (msgflg & IPC_NOWAIT) { 932 msg = ERR_PTR(-ENOMSG); 933 goto out_unlock0; 934 } 935 936 list_add_tail(&msr_d.r_list, &msq->q_receivers); 937 msr_d.r_tsk = current; 938 msr_d.r_msgtype = msgtyp; 939 msr_d.r_mode = mode; 940 if (msgflg & MSG_NOERROR) 941 msr_d.r_maxsize = INT_MAX; 942 else 943 msr_d.r_maxsize = bufsz; 944 msr_d.r_msg = ERR_PTR(-EAGAIN); 945 current->state = TASK_INTERRUPTIBLE; 946 947 ipc_unlock_object(&msq->q_perm); 948 rcu_read_unlock(); 949 schedule(); 950 951 /* Lockless receive, part 1: 952 * Disable preemption. We don't hold a reference to the queue 953 * and getting a reference would defeat the idea of a lockless 954 * operation, thus the code relies on rcu to guarantee the 955 * existence of msq: 956 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 957 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 958 * rcu_read_lock() prevents preemption between reading r_msg 959 * and acquiring the q_perm.lock in ipc_lock_object(). 960 */ 961 rcu_read_lock(); 962 963 /* Lockless receive, part 2: 964 * Wait until pipelined_send or expunge_all are outside of 965 * wake_up_process(). There is a race with exit(), see 966 * ipc/mqueue.c for the details. 967 */ 968 msg = (struct msg_msg*)msr_d.r_msg; 969 while (msg == NULL) { 970 cpu_relax(); 971 msg = (struct msg_msg *)msr_d.r_msg; 972 } 973 974 /* Lockless receive, part 3: 975 * If there is a message or an error then accept it without 976 * locking. 977 */ 978 if (msg != ERR_PTR(-EAGAIN)) 979 goto out_unlock1; 980 981 /* Lockless receive, part 3: 982 * Acquire the queue spinlock. 983 */ 984 ipc_lock_object(&msq->q_perm); 985 986 /* Lockless receive, part 4: 987 * Repeat test after acquiring the spinlock. 988 */ 989 msg = (struct msg_msg*)msr_d.r_msg; 990 if (msg != ERR_PTR(-EAGAIN)) 991 goto out_unlock0; 992 993 list_del(&msr_d.r_list); 994 if (signal_pending(current)) { 995 msg = ERR_PTR(-ERESTARTNOHAND); 996 goto out_unlock0; 997 } 998 999 ipc_unlock_object(&msq->q_perm); 1000 } 1001 1002 out_unlock0: 1003 ipc_unlock_object(&msq->q_perm); 1004 out_unlock1: 1005 rcu_read_unlock(); 1006 if (IS_ERR(msg)) { 1007 free_copy(copy); 1008 return PTR_ERR(msg); 1009 } 1010 1011 bufsz = msg_handler(buf, msg, bufsz); 1012 free_msg(msg); 1013 1014 return bufsz; 1015 } 1016 1017 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 1018 long, msgtyp, int, msgflg) 1019 { 1020 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 1021 } 1022 1023 #ifdef CONFIG_PROC_FS 1024 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 1025 { 1026 struct user_namespace *user_ns = seq_user_ns(s); 1027 struct msg_queue *msq = it; 1028 1029 return seq_printf(s, 1030 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1031 msq->q_perm.key, 1032 msq->q_perm.id, 1033 msq->q_perm.mode, 1034 msq->q_cbytes, 1035 msq->q_qnum, 1036 msq->q_lspid, 1037 msq->q_lrpid, 1038 from_kuid_munged(user_ns, msq->q_perm.uid), 1039 from_kgid_munged(user_ns, msq->q_perm.gid), 1040 from_kuid_munged(user_ns, msq->q_perm.cuid), 1041 from_kgid_munged(user_ns, msq->q_perm.cgid), 1042 msq->q_stime, 1043 msq->q_rtime, 1044 msq->q_ctime); 1045 } 1046 #endif 1047