1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <asm/uaccess.h> 43 #include "util.h" 44 45 /* 46 * one msg_receiver structure for each sleeping receiver: 47 */ 48 struct msg_receiver { 49 struct list_head r_list; 50 struct task_struct *r_tsk; 51 52 int r_mode; 53 long r_msgtype; 54 long r_maxsize; 55 56 struct msg_msg *volatile r_msg; 57 }; 58 59 /* one msg_sender for each sleeping sender */ 60 struct msg_sender { 61 struct list_head list; 62 struct task_struct *tsk; 63 }; 64 65 #define SEARCH_ANY 1 66 #define SEARCH_EQUAL 2 67 #define SEARCH_NOTEQUAL 3 68 #define SEARCH_LESSEQUAL 4 69 #define SEARCH_NUMBER 5 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 72 73 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 74 static int newque(struct ipc_namespace *, struct ipc_params *); 75 #ifdef CONFIG_PROC_FS 76 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 77 #endif 78 79 /* 80 * Scale msgmni with the available lowmem size: the memory dedicated to msg 81 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. 82 * Also take into account the number of nsproxies created so far. 83 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. 84 */ 85 void recompute_msgmni(struct ipc_namespace *ns) 86 { 87 struct sysinfo i; 88 unsigned long allowed; 89 int nb_ns; 90 91 si_meminfo(&i); 92 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) 93 / MSGMNB; 94 nb_ns = atomic_read(&nr_ipc_ns); 95 allowed /= nb_ns; 96 97 if (allowed < MSGMNI) { 98 ns->msg_ctlmni = MSGMNI; 99 return; 100 } 101 102 if (allowed > IPCMNI / nb_ns) { 103 ns->msg_ctlmni = IPCMNI / nb_ns; 104 return; 105 } 106 107 ns->msg_ctlmni = allowed; 108 } 109 110 void msg_init_ns(struct ipc_namespace *ns) 111 { 112 ns->msg_ctlmax = MSGMAX; 113 ns->msg_ctlmnb = MSGMNB; 114 115 recompute_msgmni(ns); 116 117 atomic_set(&ns->msg_bytes, 0); 118 atomic_set(&ns->msg_hdrs, 0); 119 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 120 } 121 122 #ifdef CONFIG_IPC_NS 123 void msg_exit_ns(struct ipc_namespace *ns) 124 { 125 free_ipcs(ns, &msg_ids(ns), freeque); 126 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 127 } 128 #endif 129 130 void __init msg_init(void) 131 { 132 msg_init_ns(&init_ipc_ns); 133 134 printk(KERN_INFO "msgmni has been set to %d\n", 135 init_ipc_ns.msg_ctlmni); 136 137 ipc_init_proc_interface("sysvipc/msg", 138 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 139 IPC_MSG_IDS, sysvipc_msg_proc_show); 140 } 141 142 static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) 143 { 144 struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); 145 146 if (IS_ERR(ipcp)) 147 return ERR_CAST(ipcp); 148 149 return container_of(ipcp, struct msg_queue, q_perm); 150 } 151 152 static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, 153 int id) 154 { 155 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); 156 157 if (IS_ERR(ipcp)) 158 return ERR_CAST(ipcp); 159 160 return container_of(ipcp, struct msg_queue, q_perm); 161 } 162 163 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 164 { 165 ipc_rmid(&msg_ids(ns), &s->q_perm); 166 } 167 168 static void msg_rcu_free(struct rcu_head *head) 169 { 170 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); 171 struct msg_queue *msq = ipc_rcu_to_struct(p); 172 173 security_msg_queue_free(msq); 174 ipc_rcu_free(head); 175 } 176 177 /** 178 * newque - Create a new msg queue 179 * @ns: namespace 180 * @params: ptr to the structure that contains the key and msgflg 181 * 182 * Called with msg_ids.rwsem held (writer) 183 */ 184 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 185 { 186 struct msg_queue *msq; 187 int id, retval; 188 key_t key = params->key; 189 int msgflg = params->flg; 190 191 msq = ipc_rcu_alloc(sizeof(*msq)); 192 if (!msq) 193 return -ENOMEM; 194 195 msq->q_perm.mode = msgflg & S_IRWXUGO; 196 msq->q_perm.key = key; 197 198 msq->q_perm.security = NULL; 199 retval = security_msg_queue_alloc(msq); 200 if (retval) { 201 ipc_rcu_putref(msq, ipc_rcu_free); 202 return retval; 203 } 204 205 /* ipc_addid() locks msq upon success. */ 206 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 207 if (id < 0) { 208 ipc_rcu_putref(msq, msg_rcu_free); 209 return id; 210 } 211 212 msq->q_stime = msq->q_rtime = 0; 213 msq->q_ctime = get_seconds(); 214 msq->q_cbytes = msq->q_qnum = 0; 215 msq->q_qbytes = ns->msg_ctlmnb; 216 msq->q_lspid = msq->q_lrpid = 0; 217 INIT_LIST_HEAD(&msq->q_messages); 218 INIT_LIST_HEAD(&msq->q_receivers); 219 INIT_LIST_HEAD(&msq->q_senders); 220 221 ipc_unlock_object(&msq->q_perm); 222 rcu_read_unlock(); 223 224 return msq->q_perm.id; 225 } 226 227 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 228 { 229 mss->tsk = current; 230 current->state = TASK_INTERRUPTIBLE; 231 list_add_tail(&mss->list, &msq->q_senders); 232 } 233 234 static inline void ss_del(struct msg_sender *mss) 235 { 236 if (mss->list.next != NULL) 237 list_del(&mss->list); 238 } 239 240 static void ss_wakeup(struct list_head *h, int kill) 241 { 242 struct msg_sender *mss, *t; 243 244 list_for_each_entry_safe(mss, t, h, list) { 245 if (kill) 246 mss->list.next = NULL; 247 wake_up_process(mss->tsk); 248 } 249 } 250 251 static void expunge_all(struct msg_queue *msq, int res) 252 { 253 struct msg_receiver *msr, *t; 254 255 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 256 msr->r_msg = NULL; 257 wake_up_process(msr->r_tsk); 258 smp_mb(); 259 msr->r_msg = ERR_PTR(res); 260 } 261 } 262 263 /* 264 * freeque() wakes up waiters on the sender and receiver waiting queue, 265 * removes the message queue from message queue ID IDR, and cleans up all the 266 * messages associated with this queue. 267 * 268 * msg_ids.rwsem (writer) and the spinlock for this message queue are held 269 * before freeque() is called. msg_ids.rwsem remains locked on exit. 270 */ 271 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 272 { 273 struct msg_msg *msg, *t; 274 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 275 276 expunge_all(msq, -EIDRM); 277 ss_wakeup(&msq->q_senders, 1); 278 msg_rmid(ns, msq); 279 ipc_unlock_object(&msq->q_perm); 280 rcu_read_unlock(); 281 282 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 283 atomic_dec(&ns->msg_hdrs); 284 free_msg(msg); 285 } 286 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 287 ipc_rcu_putref(msq, msg_rcu_free); 288 } 289 290 /* 291 * Called with msg_ids.rwsem and ipcp locked. 292 */ 293 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 294 { 295 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 296 297 return security_msg_queue_associate(msq, msgflg); 298 } 299 300 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 301 { 302 struct ipc_namespace *ns; 303 struct ipc_ops msg_ops; 304 struct ipc_params msg_params; 305 306 ns = current->nsproxy->ipc_ns; 307 308 msg_ops.getnew = newque; 309 msg_ops.associate = msg_security; 310 msg_ops.more_checks = NULL; 311 312 msg_params.key = key; 313 msg_params.flg = msgflg; 314 315 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 316 } 317 318 static inline unsigned long 319 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 320 { 321 switch(version) { 322 case IPC_64: 323 return copy_to_user(buf, in, sizeof(*in)); 324 case IPC_OLD: 325 { 326 struct msqid_ds out; 327 328 memset(&out, 0, sizeof(out)); 329 330 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 331 332 out.msg_stime = in->msg_stime; 333 out.msg_rtime = in->msg_rtime; 334 out.msg_ctime = in->msg_ctime; 335 336 if (in->msg_cbytes > USHRT_MAX) 337 out.msg_cbytes = USHRT_MAX; 338 else 339 out.msg_cbytes = in->msg_cbytes; 340 out.msg_lcbytes = in->msg_cbytes; 341 342 if (in->msg_qnum > USHRT_MAX) 343 out.msg_qnum = USHRT_MAX; 344 else 345 out.msg_qnum = in->msg_qnum; 346 347 if (in->msg_qbytes > USHRT_MAX) 348 out.msg_qbytes = USHRT_MAX; 349 else 350 out.msg_qbytes = in->msg_qbytes; 351 out.msg_lqbytes = in->msg_qbytes; 352 353 out.msg_lspid = in->msg_lspid; 354 out.msg_lrpid = in->msg_lrpid; 355 356 return copy_to_user(buf, &out, sizeof(out)); 357 } 358 default: 359 return -EINVAL; 360 } 361 } 362 363 static inline unsigned long 364 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 365 { 366 switch(version) { 367 case IPC_64: 368 if (copy_from_user(out, buf, sizeof(*out))) 369 return -EFAULT; 370 return 0; 371 case IPC_OLD: 372 { 373 struct msqid_ds tbuf_old; 374 375 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 376 return -EFAULT; 377 378 out->msg_perm.uid = tbuf_old.msg_perm.uid; 379 out->msg_perm.gid = tbuf_old.msg_perm.gid; 380 out->msg_perm.mode = tbuf_old.msg_perm.mode; 381 382 if (tbuf_old.msg_qbytes == 0) 383 out->msg_qbytes = tbuf_old.msg_lqbytes; 384 else 385 out->msg_qbytes = tbuf_old.msg_qbytes; 386 387 return 0; 388 } 389 default: 390 return -EINVAL; 391 } 392 } 393 394 /* 395 * This function handles some msgctl commands which require the rwsem 396 * to be held in write mode. 397 * NOTE: no locks must be held, the rwsem is taken inside this function. 398 */ 399 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 400 struct msqid_ds __user *buf, int version) 401 { 402 struct kern_ipc_perm *ipcp; 403 struct msqid64_ds uninitialized_var(msqid64); 404 struct msg_queue *msq; 405 int err; 406 407 if (cmd == IPC_SET) { 408 if (copy_msqid_from_user(&msqid64, buf, version)) 409 return -EFAULT; 410 } 411 412 down_write(&msg_ids(ns).rwsem); 413 rcu_read_lock(); 414 415 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 416 &msqid64.msg_perm, msqid64.msg_qbytes); 417 if (IS_ERR(ipcp)) { 418 err = PTR_ERR(ipcp); 419 goto out_unlock1; 420 } 421 422 msq = container_of(ipcp, struct msg_queue, q_perm); 423 424 err = security_msg_queue_msgctl(msq, cmd); 425 if (err) 426 goto out_unlock1; 427 428 switch (cmd) { 429 case IPC_RMID: 430 ipc_lock_object(&msq->q_perm); 431 /* freeque unlocks the ipc object and rcu */ 432 freeque(ns, ipcp); 433 goto out_up; 434 case IPC_SET: 435 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 436 !capable(CAP_SYS_RESOURCE)) { 437 err = -EPERM; 438 goto out_unlock1; 439 } 440 441 ipc_lock_object(&msq->q_perm); 442 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 443 if (err) 444 goto out_unlock0; 445 446 msq->q_qbytes = msqid64.msg_qbytes; 447 448 msq->q_ctime = get_seconds(); 449 /* sleeping receivers might be excluded by 450 * stricter permissions. 451 */ 452 expunge_all(msq, -EAGAIN); 453 /* sleeping senders might be able to send 454 * due to a larger queue size. 455 */ 456 ss_wakeup(&msq->q_senders, 0); 457 break; 458 default: 459 err = -EINVAL; 460 goto out_unlock1; 461 } 462 463 out_unlock0: 464 ipc_unlock_object(&msq->q_perm); 465 out_unlock1: 466 rcu_read_unlock(); 467 out_up: 468 up_write(&msg_ids(ns).rwsem); 469 return err; 470 } 471 472 static int msgctl_nolock(struct ipc_namespace *ns, int msqid, 473 int cmd, int version, void __user *buf) 474 { 475 int err; 476 struct msg_queue *msq; 477 478 switch (cmd) { 479 case IPC_INFO: 480 case MSG_INFO: 481 { 482 struct msginfo msginfo; 483 int max_id; 484 485 if (!buf) 486 return -EFAULT; 487 488 /* 489 * We must not return kernel stack data. 490 * due to padding, it's not enough 491 * to set all member fields. 492 */ 493 err = security_msg_queue_msgctl(NULL, cmd); 494 if (err) 495 return err; 496 497 memset(&msginfo, 0, sizeof(msginfo)); 498 msginfo.msgmni = ns->msg_ctlmni; 499 msginfo.msgmax = ns->msg_ctlmax; 500 msginfo.msgmnb = ns->msg_ctlmnb; 501 msginfo.msgssz = MSGSSZ; 502 msginfo.msgseg = MSGSEG; 503 down_read(&msg_ids(ns).rwsem); 504 if (cmd == MSG_INFO) { 505 msginfo.msgpool = msg_ids(ns).in_use; 506 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 507 msginfo.msgtql = atomic_read(&ns->msg_bytes); 508 } else { 509 msginfo.msgmap = MSGMAP; 510 msginfo.msgpool = MSGPOOL; 511 msginfo.msgtql = MSGTQL; 512 } 513 max_id = ipc_get_maxid(&msg_ids(ns)); 514 up_read(&msg_ids(ns).rwsem); 515 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 516 return -EFAULT; 517 return (max_id < 0) ? 0 : max_id; 518 } 519 520 case MSG_STAT: 521 case IPC_STAT: 522 { 523 struct msqid64_ds tbuf; 524 int success_return; 525 526 if (!buf) 527 return -EFAULT; 528 529 memset(&tbuf, 0, sizeof(tbuf)); 530 531 rcu_read_lock(); 532 if (cmd == MSG_STAT) { 533 msq = msq_obtain_object(ns, msqid); 534 if (IS_ERR(msq)) { 535 err = PTR_ERR(msq); 536 goto out_unlock; 537 } 538 success_return = msq->q_perm.id; 539 } else { 540 msq = msq_obtain_object_check(ns, msqid); 541 if (IS_ERR(msq)) { 542 err = PTR_ERR(msq); 543 goto out_unlock; 544 } 545 success_return = 0; 546 } 547 548 err = -EACCES; 549 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 550 goto out_unlock; 551 552 err = security_msg_queue_msgctl(msq, cmd); 553 if (err) 554 goto out_unlock; 555 556 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 557 tbuf.msg_stime = msq->q_stime; 558 tbuf.msg_rtime = msq->q_rtime; 559 tbuf.msg_ctime = msq->q_ctime; 560 tbuf.msg_cbytes = msq->q_cbytes; 561 tbuf.msg_qnum = msq->q_qnum; 562 tbuf.msg_qbytes = msq->q_qbytes; 563 tbuf.msg_lspid = msq->q_lspid; 564 tbuf.msg_lrpid = msq->q_lrpid; 565 rcu_read_unlock(); 566 567 if (copy_msqid_to_user(buf, &tbuf, version)) 568 return -EFAULT; 569 return success_return; 570 } 571 572 default: 573 return -EINVAL; 574 } 575 576 return err; 577 out_unlock: 578 rcu_read_unlock(); 579 return err; 580 } 581 582 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 583 { 584 int version; 585 struct ipc_namespace *ns; 586 587 if (msqid < 0 || cmd < 0) 588 return -EINVAL; 589 590 version = ipc_parse_version(&cmd); 591 ns = current->nsproxy->ipc_ns; 592 593 switch (cmd) { 594 case IPC_INFO: 595 case MSG_INFO: 596 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 597 case IPC_STAT: 598 return msgctl_nolock(ns, msqid, cmd, version, buf); 599 case IPC_SET: 600 case IPC_RMID: 601 return msgctl_down(ns, msqid, cmd, buf, version); 602 default: 603 return -EINVAL; 604 } 605 } 606 607 static int testmsg(struct msg_msg *msg, long type, int mode) 608 { 609 switch(mode) 610 { 611 case SEARCH_ANY: 612 case SEARCH_NUMBER: 613 return 1; 614 case SEARCH_LESSEQUAL: 615 if (msg->m_type <=type) 616 return 1; 617 break; 618 case SEARCH_EQUAL: 619 if (msg->m_type == type) 620 return 1; 621 break; 622 case SEARCH_NOTEQUAL: 623 if (msg->m_type != type) 624 return 1; 625 break; 626 } 627 return 0; 628 } 629 630 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 631 { 632 struct msg_receiver *msr, *t; 633 634 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 635 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 636 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 637 msr->r_msgtype, msr->r_mode)) { 638 639 list_del(&msr->r_list); 640 if (msr->r_maxsize < msg->m_ts) { 641 msr->r_msg = NULL; 642 wake_up_process(msr->r_tsk); 643 smp_mb(); 644 msr->r_msg = ERR_PTR(-E2BIG); 645 } else { 646 msr->r_msg = NULL; 647 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 648 msq->q_rtime = get_seconds(); 649 wake_up_process(msr->r_tsk); 650 smp_mb(); 651 msr->r_msg = msg; 652 653 return 1; 654 } 655 } 656 } 657 return 0; 658 } 659 660 long do_msgsnd(int msqid, long mtype, void __user *mtext, 661 size_t msgsz, int msgflg) 662 { 663 struct msg_queue *msq; 664 struct msg_msg *msg; 665 int err; 666 struct ipc_namespace *ns; 667 668 ns = current->nsproxy->ipc_ns; 669 670 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 671 return -EINVAL; 672 if (mtype < 1) 673 return -EINVAL; 674 675 msg = load_msg(mtext, msgsz); 676 if (IS_ERR(msg)) 677 return PTR_ERR(msg); 678 679 msg->m_type = mtype; 680 msg->m_ts = msgsz; 681 682 rcu_read_lock(); 683 msq = msq_obtain_object_check(ns, msqid); 684 if (IS_ERR(msq)) { 685 err = PTR_ERR(msq); 686 goto out_unlock1; 687 } 688 689 ipc_lock_object(&msq->q_perm); 690 691 for (;;) { 692 struct msg_sender s; 693 694 err = -EACCES; 695 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 696 goto out_unlock0; 697 698 /* raced with RMID? */ 699 if (msq->q_perm.deleted) { 700 err = -EIDRM; 701 goto out_unlock0; 702 } 703 704 err = security_msg_queue_msgsnd(msq, msg, msgflg); 705 if (err) 706 goto out_unlock0; 707 708 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 709 1 + msq->q_qnum <= msq->q_qbytes) { 710 break; 711 } 712 713 /* queue full, wait: */ 714 if (msgflg & IPC_NOWAIT) { 715 err = -EAGAIN; 716 goto out_unlock0; 717 } 718 719 ss_add(msq, &s); 720 721 if (!ipc_rcu_getref(msq)) { 722 err = -EIDRM; 723 goto out_unlock0; 724 } 725 726 ipc_unlock_object(&msq->q_perm); 727 rcu_read_unlock(); 728 schedule(); 729 730 rcu_read_lock(); 731 ipc_lock_object(&msq->q_perm); 732 733 ipc_rcu_putref(msq, ipc_rcu_free); 734 if (msq->q_perm.deleted) { 735 err = -EIDRM; 736 goto out_unlock0; 737 } 738 739 ss_del(&s); 740 741 if (signal_pending(current)) { 742 err = -ERESTARTNOHAND; 743 goto out_unlock0; 744 } 745 746 } 747 msq->q_lspid = task_tgid_vnr(current); 748 msq->q_stime = get_seconds(); 749 750 if (!pipelined_send(msq, msg)) { 751 /* no one is waiting for this message, enqueue it */ 752 list_add_tail(&msg->m_list, &msq->q_messages); 753 msq->q_cbytes += msgsz; 754 msq->q_qnum++; 755 atomic_add(msgsz, &ns->msg_bytes); 756 atomic_inc(&ns->msg_hdrs); 757 } 758 759 err = 0; 760 msg = NULL; 761 762 out_unlock0: 763 ipc_unlock_object(&msq->q_perm); 764 out_unlock1: 765 rcu_read_unlock(); 766 if (msg != NULL) 767 free_msg(msg); 768 return err; 769 } 770 771 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 772 int, msgflg) 773 { 774 long mtype; 775 776 if (get_user(mtype, &msgp->mtype)) 777 return -EFAULT; 778 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 779 } 780 781 static inline int convert_mode(long *msgtyp, int msgflg) 782 { 783 if (msgflg & MSG_COPY) 784 return SEARCH_NUMBER; 785 /* 786 * find message of correct type. 787 * msgtyp = 0 => get first. 788 * msgtyp > 0 => get first message of matching type. 789 * msgtyp < 0 => get message with least type must be < abs(msgtype). 790 */ 791 if (*msgtyp == 0) 792 return SEARCH_ANY; 793 if (*msgtyp < 0) { 794 *msgtyp = -*msgtyp; 795 return SEARCH_LESSEQUAL; 796 } 797 if (msgflg & MSG_EXCEPT) 798 return SEARCH_NOTEQUAL; 799 return SEARCH_EQUAL; 800 } 801 802 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 803 { 804 struct msgbuf __user *msgp = dest; 805 size_t msgsz; 806 807 if (put_user(msg->m_type, &msgp->mtype)) 808 return -EFAULT; 809 810 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 811 if (store_msg(msgp->mtext, msg, msgsz)) 812 return -EFAULT; 813 return msgsz; 814 } 815 816 #ifdef CONFIG_CHECKPOINT_RESTORE 817 /* 818 * This function creates new kernel message structure, large enough to store 819 * bufsz message bytes. 820 */ 821 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 822 { 823 struct msg_msg *copy; 824 825 /* 826 * Create dummy message to copy real message to. 827 */ 828 copy = load_msg(buf, bufsz); 829 if (!IS_ERR(copy)) 830 copy->m_ts = bufsz; 831 return copy; 832 } 833 834 static inline void free_copy(struct msg_msg *copy) 835 { 836 if (copy) 837 free_msg(copy); 838 } 839 #else 840 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 841 { 842 return ERR_PTR(-ENOSYS); 843 } 844 845 static inline void free_copy(struct msg_msg *copy) 846 { 847 } 848 #endif 849 850 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 851 { 852 struct msg_msg *msg, *found = NULL; 853 long count = 0; 854 855 list_for_each_entry(msg, &msq->q_messages, m_list) { 856 if (testmsg(msg, *msgtyp, mode) && 857 !security_msg_queue_msgrcv(msq, msg, current, 858 *msgtyp, mode)) { 859 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 860 *msgtyp = msg->m_type - 1; 861 found = msg; 862 } else if (mode == SEARCH_NUMBER) { 863 if (*msgtyp == count) 864 return msg; 865 } else 866 return msg; 867 count++; 868 } 869 } 870 871 return found ?: ERR_PTR(-EAGAIN); 872 } 873 874 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, 875 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 876 { 877 int mode; 878 struct msg_queue *msq; 879 struct ipc_namespace *ns; 880 struct msg_msg *msg, *copy = NULL; 881 882 ns = current->nsproxy->ipc_ns; 883 884 if (msqid < 0 || (long) bufsz < 0) 885 return -EINVAL; 886 887 if (msgflg & MSG_COPY) { 888 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 889 if (IS_ERR(copy)) 890 return PTR_ERR(copy); 891 } 892 mode = convert_mode(&msgtyp, msgflg); 893 894 rcu_read_lock(); 895 msq = msq_obtain_object_check(ns, msqid); 896 if (IS_ERR(msq)) { 897 rcu_read_unlock(); 898 free_copy(copy); 899 return PTR_ERR(msq); 900 } 901 902 for (;;) { 903 struct msg_receiver msr_d; 904 905 msg = ERR_PTR(-EACCES); 906 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 907 goto out_unlock1; 908 909 ipc_lock_object(&msq->q_perm); 910 911 /* raced with RMID? */ 912 if (msq->q_perm.deleted) { 913 msg = ERR_PTR(-EIDRM); 914 goto out_unlock0; 915 } 916 917 msg = find_msg(msq, &msgtyp, mode); 918 if (!IS_ERR(msg)) { 919 /* 920 * Found a suitable message. 921 * Unlink it from the queue. 922 */ 923 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 924 msg = ERR_PTR(-E2BIG); 925 goto out_unlock0; 926 } 927 /* 928 * If we are copying, then do not unlink message and do 929 * not update queue parameters. 930 */ 931 if (msgflg & MSG_COPY) { 932 msg = copy_msg(msg, copy); 933 goto out_unlock0; 934 } 935 936 list_del(&msg->m_list); 937 msq->q_qnum--; 938 msq->q_rtime = get_seconds(); 939 msq->q_lrpid = task_tgid_vnr(current); 940 msq->q_cbytes -= msg->m_ts; 941 atomic_sub(msg->m_ts, &ns->msg_bytes); 942 atomic_dec(&ns->msg_hdrs); 943 ss_wakeup(&msq->q_senders, 0); 944 945 goto out_unlock0; 946 } 947 948 /* No message waiting. Wait for a message */ 949 if (msgflg & IPC_NOWAIT) { 950 msg = ERR_PTR(-ENOMSG); 951 goto out_unlock0; 952 } 953 954 list_add_tail(&msr_d.r_list, &msq->q_receivers); 955 msr_d.r_tsk = current; 956 msr_d.r_msgtype = msgtyp; 957 msr_d.r_mode = mode; 958 if (msgflg & MSG_NOERROR) 959 msr_d.r_maxsize = INT_MAX; 960 else 961 msr_d.r_maxsize = bufsz; 962 msr_d.r_msg = ERR_PTR(-EAGAIN); 963 current->state = TASK_INTERRUPTIBLE; 964 965 ipc_unlock_object(&msq->q_perm); 966 rcu_read_unlock(); 967 schedule(); 968 969 /* Lockless receive, part 1: 970 * Disable preemption. We don't hold a reference to the queue 971 * and getting a reference would defeat the idea of a lockless 972 * operation, thus the code relies on rcu to guarantee the 973 * existence of msq: 974 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 975 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 976 * rcu_read_lock() prevents preemption between reading r_msg 977 * and acquiring the q_perm.lock in ipc_lock_object(). 978 */ 979 rcu_read_lock(); 980 981 /* Lockless receive, part 2: 982 * Wait until pipelined_send or expunge_all are outside of 983 * wake_up_process(). There is a race with exit(), see 984 * ipc/mqueue.c for the details. 985 */ 986 msg = (struct msg_msg*)msr_d.r_msg; 987 while (msg == NULL) { 988 cpu_relax(); 989 msg = (struct msg_msg *)msr_d.r_msg; 990 } 991 992 /* Lockless receive, part 3: 993 * If there is a message or an error then accept it without 994 * locking. 995 */ 996 if (msg != ERR_PTR(-EAGAIN)) 997 goto out_unlock1; 998 999 /* Lockless receive, part 3: 1000 * Acquire the queue spinlock. 1001 */ 1002 ipc_lock_object(&msq->q_perm); 1003 1004 /* Lockless receive, part 4: 1005 * Repeat test after acquiring the spinlock. 1006 */ 1007 msg = (struct msg_msg*)msr_d.r_msg; 1008 if (msg != ERR_PTR(-EAGAIN)) 1009 goto out_unlock0; 1010 1011 list_del(&msr_d.r_list); 1012 if (signal_pending(current)) { 1013 msg = ERR_PTR(-ERESTARTNOHAND); 1014 goto out_unlock0; 1015 } 1016 1017 ipc_unlock_object(&msq->q_perm); 1018 } 1019 1020 out_unlock0: 1021 ipc_unlock_object(&msq->q_perm); 1022 out_unlock1: 1023 rcu_read_unlock(); 1024 if (IS_ERR(msg)) { 1025 free_copy(copy); 1026 return PTR_ERR(msg); 1027 } 1028 1029 bufsz = msg_handler(buf, msg, bufsz); 1030 free_msg(msg); 1031 1032 return bufsz; 1033 } 1034 1035 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 1036 long, msgtyp, int, msgflg) 1037 { 1038 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 1039 } 1040 1041 #ifdef CONFIG_PROC_FS 1042 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 1043 { 1044 struct user_namespace *user_ns = seq_user_ns(s); 1045 struct msg_queue *msq = it; 1046 1047 return seq_printf(s, 1048 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1049 msq->q_perm.key, 1050 msq->q_perm.id, 1051 msq->q_perm.mode, 1052 msq->q_cbytes, 1053 msq->q_qnum, 1054 msq->q_lspid, 1055 msq->q_lrpid, 1056 from_kuid_munged(user_ns, msq->q_perm.uid), 1057 from_kgid_munged(user_ns, msq->q_perm.gid), 1058 from_kuid_munged(user_ns, msq->q_perm.cuid), 1059 from_kgid_munged(user_ns, msq->q_perm.cgid), 1060 msq->q_stime, 1061 msq->q_rtime, 1062 msq->q_ctime); 1063 } 1064 #endif 1065