1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <asm/uaccess.h> 43 #include "util.h" 44 45 /* 46 * one msg_receiver structure for each sleeping receiver: 47 */ 48 struct msg_receiver { 49 struct list_head r_list; 50 struct task_struct *r_tsk; 51 52 int r_mode; 53 long r_msgtype; 54 long r_maxsize; 55 56 struct msg_msg *volatile r_msg; 57 }; 58 59 /* one msg_sender for each sleeping sender */ 60 struct msg_sender { 61 struct list_head list; 62 struct task_struct *tsk; 63 }; 64 65 #define SEARCH_ANY 1 66 #define SEARCH_EQUAL 2 67 #define SEARCH_NOTEQUAL 3 68 #define SEARCH_LESSEQUAL 4 69 #define SEARCH_NUMBER 5 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 72 73 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 74 static int newque(struct ipc_namespace *, struct ipc_params *); 75 #ifdef CONFIG_PROC_FS 76 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 77 #endif 78 79 /* 80 * Scale msgmni with the available lowmem size: the memory dedicated to msg 81 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. 82 * Also take into account the number of nsproxies created so far. 83 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. 84 */ 85 void recompute_msgmni(struct ipc_namespace *ns) 86 { 87 struct sysinfo i; 88 unsigned long allowed; 89 int nb_ns; 90 91 si_meminfo(&i); 92 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) 93 / MSGMNB; 94 nb_ns = atomic_read(&nr_ipc_ns); 95 allowed /= nb_ns; 96 97 if (allowed < MSGMNI) { 98 ns->msg_ctlmni = MSGMNI; 99 return; 100 } 101 102 if (allowed > IPCMNI / nb_ns) { 103 ns->msg_ctlmni = IPCMNI / nb_ns; 104 return; 105 } 106 107 ns->msg_ctlmni = allowed; 108 } 109 110 void msg_init_ns(struct ipc_namespace *ns) 111 { 112 ns->msg_ctlmax = MSGMAX; 113 ns->msg_ctlmnb = MSGMNB; 114 115 recompute_msgmni(ns); 116 117 atomic_set(&ns->msg_bytes, 0); 118 atomic_set(&ns->msg_hdrs, 0); 119 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 120 } 121 122 #ifdef CONFIG_IPC_NS 123 void msg_exit_ns(struct ipc_namespace *ns) 124 { 125 free_ipcs(ns, &msg_ids(ns), freeque); 126 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 127 } 128 #endif 129 130 void __init msg_init(void) 131 { 132 msg_init_ns(&init_ipc_ns); 133 134 printk(KERN_INFO "msgmni has been set to %d\n", 135 init_ipc_ns.msg_ctlmni); 136 137 ipc_init_proc_interface("sysvipc/msg", 138 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 139 IPC_MSG_IDS, sysvipc_msg_proc_show); 140 } 141 142 static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) 143 { 144 struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); 145 146 if (IS_ERR(ipcp)) 147 return ERR_CAST(ipcp); 148 149 return container_of(ipcp, struct msg_queue, q_perm); 150 } 151 152 static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, 153 int id) 154 { 155 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); 156 157 if (IS_ERR(ipcp)) 158 return ERR_CAST(ipcp); 159 160 return container_of(ipcp, struct msg_queue, q_perm); 161 } 162 163 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 164 { 165 ipc_rmid(&msg_ids(ns), &s->q_perm); 166 } 167 168 static void msg_rcu_free(struct rcu_head *head) 169 { 170 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); 171 struct msg_queue *msq = ipc_rcu_to_struct(p); 172 173 security_msg_queue_free(msq); 174 ipc_rcu_free(head); 175 } 176 177 /** 178 * newque - Create a new msg queue 179 * @ns: namespace 180 * @params: ptr to the structure that contains the key and msgflg 181 * 182 * Called with msg_ids.rwsem held (writer) 183 */ 184 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 185 { 186 struct msg_queue *msq; 187 int id, retval; 188 key_t key = params->key; 189 int msgflg = params->flg; 190 191 msq = ipc_rcu_alloc(sizeof(*msq)); 192 if (!msq) 193 return -ENOMEM; 194 195 msq->q_perm.mode = msgflg & S_IRWXUGO; 196 msq->q_perm.key = key; 197 198 msq->q_perm.security = NULL; 199 retval = security_msg_queue_alloc(msq); 200 if (retval) { 201 ipc_rcu_putref(msq, ipc_rcu_free); 202 return retval; 203 } 204 205 /* ipc_addid() locks msq upon success. */ 206 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 207 if (id < 0) { 208 ipc_rcu_putref(msq, msg_rcu_free); 209 return id; 210 } 211 212 msq->q_stime = msq->q_rtime = 0; 213 msq->q_ctime = get_seconds(); 214 msq->q_cbytes = msq->q_qnum = 0; 215 msq->q_qbytes = ns->msg_ctlmnb; 216 msq->q_lspid = msq->q_lrpid = 0; 217 INIT_LIST_HEAD(&msq->q_messages); 218 INIT_LIST_HEAD(&msq->q_receivers); 219 INIT_LIST_HEAD(&msq->q_senders); 220 221 ipc_unlock_object(&msq->q_perm); 222 rcu_read_unlock(); 223 224 return msq->q_perm.id; 225 } 226 227 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 228 { 229 mss->tsk = current; 230 current->state = TASK_INTERRUPTIBLE; 231 list_add_tail(&mss->list, &msq->q_senders); 232 } 233 234 static inline void ss_del(struct msg_sender *mss) 235 { 236 if (mss->list.next != NULL) 237 list_del(&mss->list); 238 } 239 240 static void ss_wakeup(struct list_head *h, int kill) 241 { 242 struct msg_sender *mss, *t; 243 244 list_for_each_entry_safe(mss, t, h, list) { 245 if (kill) 246 mss->list.next = NULL; 247 wake_up_process(mss->tsk); 248 } 249 } 250 251 static void expunge_all(struct msg_queue *msq, int res) 252 { 253 struct msg_receiver *msr, *t; 254 255 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 256 msr->r_msg = NULL; /* initialize expunge ordering */ 257 wake_up_process(msr->r_tsk); 258 /* 259 * Ensure that the wakeup is visible before setting r_msg as 260 * the receiving end depends on it: either spinning on a nil, 261 * or dealing with -EAGAIN cases. See lockless receive part 1 262 * and 2 in do_msgrcv(). 263 */ 264 smp_mb(); 265 msr->r_msg = ERR_PTR(res); 266 } 267 } 268 269 /* 270 * freeque() wakes up waiters on the sender and receiver waiting queue, 271 * removes the message queue from message queue ID IDR, and cleans up all the 272 * messages associated with this queue. 273 * 274 * msg_ids.rwsem (writer) and the spinlock for this message queue are held 275 * before freeque() is called. msg_ids.rwsem remains locked on exit. 276 */ 277 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 278 { 279 struct msg_msg *msg, *t; 280 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 281 282 expunge_all(msq, -EIDRM); 283 ss_wakeup(&msq->q_senders, 1); 284 msg_rmid(ns, msq); 285 ipc_unlock_object(&msq->q_perm); 286 rcu_read_unlock(); 287 288 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 289 atomic_dec(&ns->msg_hdrs); 290 free_msg(msg); 291 } 292 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 293 ipc_rcu_putref(msq, msg_rcu_free); 294 } 295 296 /* 297 * Called with msg_ids.rwsem and ipcp locked. 298 */ 299 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 300 { 301 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 302 303 return security_msg_queue_associate(msq, msgflg); 304 } 305 306 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 307 { 308 struct ipc_namespace *ns; 309 struct ipc_ops msg_ops; 310 struct ipc_params msg_params; 311 312 ns = current->nsproxy->ipc_ns; 313 314 msg_ops.getnew = newque; 315 msg_ops.associate = msg_security; 316 msg_ops.more_checks = NULL; 317 318 msg_params.key = key; 319 msg_params.flg = msgflg; 320 321 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 322 } 323 324 static inline unsigned long 325 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 326 { 327 switch (version) { 328 case IPC_64: 329 return copy_to_user(buf, in, sizeof(*in)); 330 case IPC_OLD: 331 { 332 struct msqid_ds out; 333 334 memset(&out, 0, sizeof(out)); 335 336 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 337 338 out.msg_stime = in->msg_stime; 339 out.msg_rtime = in->msg_rtime; 340 out.msg_ctime = in->msg_ctime; 341 342 if (in->msg_cbytes > USHRT_MAX) 343 out.msg_cbytes = USHRT_MAX; 344 else 345 out.msg_cbytes = in->msg_cbytes; 346 out.msg_lcbytes = in->msg_cbytes; 347 348 if (in->msg_qnum > USHRT_MAX) 349 out.msg_qnum = USHRT_MAX; 350 else 351 out.msg_qnum = in->msg_qnum; 352 353 if (in->msg_qbytes > USHRT_MAX) 354 out.msg_qbytes = USHRT_MAX; 355 else 356 out.msg_qbytes = in->msg_qbytes; 357 out.msg_lqbytes = in->msg_qbytes; 358 359 out.msg_lspid = in->msg_lspid; 360 out.msg_lrpid = in->msg_lrpid; 361 362 return copy_to_user(buf, &out, sizeof(out)); 363 } 364 default: 365 return -EINVAL; 366 } 367 } 368 369 static inline unsigned long 370 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 371 { 372 switch (version) { 373 case IPC_64: 374 if (copy_from_user(out, buf, sizeof(*out))) 375 return -EFAULT; 376 return 0; 377 case IPC_OLD: 378 { 379 struct msqid_ds tbuf_old; 380 381 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 382 return -EFAULT; 383 384 out->msg_perm.uid = tbuf_old.msg_perm.uid; 385 out->msg_perm.gid = tbuf_old.msg_perm.gid; 386 out->msg_perm.mode = tbuf_old.msg_perm.mode; 387 388 if (tbuf_old.msg_qbytes == 0) 389 out->msg_qbytes = tbuf_old.msg_lqbytes; 390 else 391 out->msg_qbytes = tbuf_old.msg_qbytes; 392 393 return 0; 394 } 395 default: 396 return -EINVAL; 397 } 398 } 399 400 /* 401 * This function handles some msgctl commands which require the rwsem 402 * to be held in write mode. 403 * NOTE: no locks must be held, the rwsem is taken inside this function. 404 */ 405 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 406 struct msqid_ds __user *buf, int version) 407 { 408 struct kern_ipc_perm *ipcp; 409 struct msqid64_ds uninitialized_var(msqid64); 410 struct msg_queue *msq; 411 int err; 412 413 if (cmd == IPC_SET) { 414 if (copy_msqid_from_user(&msqid64, buf, version)) 415 return -EFAULT; 416 } 417 418 down_write(&msg_ids(ns).rwsem); 419 rcu_read_lock(); 420 421 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 422 &msqid64.msg_perm, msqid64.msg_qbytes); 423 if (IS_ERR(ipcp)) { 424 err = PTR_ERR(ipcp); 425 goto out_unlock1; 426 } 427 428 msq = container_of(ipcp, struct msg_queue, q_perm); 429 430 err = security_msg_queue_msgctl(msq, cmd); 431 if (err) 432 goto out_unlock1; 433 434 switch (cmd) { 435 case IPC_RMID: 436 ipc_lock_object(&msq->q_perm); 437 /* freeque unlocks the ipc object and rcu */ 438 freeque(ns, ipcp); 439 goto out_up; 440 case IPC_SET: 441 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 442 !capable(CAP_SYS_RESOURCE)) { 443 err = -EPERM; 444 goto out_unlock1; 445 } 446 447 ipc_lock_object(&msq->q_perm); 448 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 449 if (err) 450 goto out_unlock0; 451 452 msq->q_qbytes = msqid64.msg_qbytes; 453 454 msq->q_ctime = get_seconds(); 455 /* sleeping receivers might be excluded by 456 * stricter permissions. 457 */ 458 expunge_all(msq, -EAGAIN); 459 /* sleeping senders might be able to send 460 * due to a larger queue size. 461 */ 462 ss_wakeup(&msq->q_senders, 0); 463 break; 464 default: 465 err = -EINVAL; 466 goto out_unlock1; 467 } 468 469 out_unlock0: 470 ipc_unlock_object(&msq->q_perm); 471 out_unlock1: 472 rcu_read_unlock(); 473 out_up: 474 up_write(&msg_ids(ns).rwsem); 475 return err; 476 } 477 478 static int msgctl_nolock(struct ipc_namespace *ns, int msqid, 479 int cmd, int version, void __user *buf) 480 { 481 int err; 482 struct msg_queue *msq; 483 484 switch (cmd) { 485 case IPC_INFO: 486 case MSG_INFO: 487 { 488 struct msginfo msginfo; 489 int max_id; 490 491 if (!buf) 492 return -EFAULT; 493 494 /* 495 * We must not return kernel stack data. 496 * due to padding, it's not enough 497 * to set all member fields. 498 */ 499 err = security_msg_queue_msgctl(NULL, cmd); 500 if (err) 501 return err; 502 503 memset(&msginfo, 0, sizeof(msginfo)); 504 msginfo.msgmni = ns->msg_ctlmni; 505 msginfo.msgmax = ns->msg_ctlmax; 506 msginfo.msgmnb = ns->msg_ctlmnb; 507 msginfo.msgssz = MSGSSZ; 508 msginfo.msgseg = MSGSEG; 509 down_read(&msg_ids(ns).rwsem); 510 if (cmd == MSG_INFO) { 511 msginfo.msgpool = msg_ids(ns).in_use; 512 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 513 msginfo.msgtql = atomic_read(&ns->msg_bytes); 514 } else { 515 msginfo.msgmap = MSGMAP; 516 msginfo.msgpool = MSGPOOL; 517 msginfo.msgtql = MSGTQL; 518 } 519 max_id = ipc_get_maxid(&msg_ids(ns)); 520 up_read(&msg_ids(ns).rwsem); 521 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 522 return -EFAULT; 523 return (max_id < 0) ? 0 : max_id; 524 } 525 526 case MSG_STAT: 527 case IPC_STAT: 528 { 529 struct msqid64_ds tbuf; 530 int success_return; 531 532 if (!buf) 533 return -EFAULT; 534 535 memset(&tbuf, 0, sizeof(tbuf)); 536 537 rcu_read_lock(); 538 if (cmd == MSG_STAT) { 539 msq = msq_obtain_object(ns, msqid); 540 if (IS_ERR(msq)) { 541 err = PTR_ERR(msq); 542 goto out_unlock; 543 } 544 success_return = msq->q_perm.id; 545 } else { 546 msq = msq_obtain_object_check(ns, msqid); 547 if (IS_ERR(msq)) { 548 err = PTR_ERR(msq); 549 goto out_unlock; 550 } 551 success_return = 0; 552 } 553 554 err = -EACCES; 555 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 556 goto out_unlock; 557 558 err = security_msg_queue_msgctl(msq, cmd); 559 if (err) 560 goto out_unlock; 561 562 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 563 tbuf.msg_stime = msq->q_stime; 564 tbuf.msg_rtime = msq->q_rtime; 565 tbuf.msg_ctime = msq->q_ctime; 566 tbuf.msg_cbytes = msq->q_cbytes; 567 tbuf.msg_qnum = msq->q_qnum; 568 tbuf.msg_qbytes = msq->q_qbytes; 569 tbuf.msg_lspid = msq->q_lspid; 570 tbuf.msg_lrpid = msq->q_lrpid; 571 rcu_read_unlock(); 572 573 if (copy_msqid_to_user(buf, &tbuf, version)) 574 return -EFAULT; 575 return success_return; 576 } 577 578 default: 579 return -EINVAL; 580 } 581 582 return err; 583 out_unlock: 584 rcu_read_unlock(); 585 return err; 586 } 587 588 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 589 { 590 int version; 591 struct ipc_namespace *ns; 592 593 if (msqid < 0 || cmd < 0) 594 return -EINVAL; 595 596 version = ipc_parse_version(&cmd); 597 ns = current->nsproxy->ipc_ns; 598 599 switch (cmd) { 600 case IPC_INFO: 601 case MSG_INFO: 602 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 603 case IPC_STAT: 604 return msgctl_nolock(ns, msqid, cmd, version, buf); 605 case IPC_SET: 606 case IPC_RMID: 607 return msgctl_down(ns, msqid, cmd, buf, version); 608 default: 609 return -EINVAL; 610 } 611 } 612 613 static int testmsg(struct msg_msg *msg, long type, int mode) 614 { 615 switch (mode) 616 { 617 case SEARCH_ANY: 618 case SEARCH_NUMBER: 619 return 1; 620 case SEARCH_LESSEQUAL: 621 if (msg->m_type <= type) 622 return 1; 623 break; 624 case SEARCH_EQUAL: 625 if (msg->m_type == type) 626 return 1; 627 break; 628 case SEARCH_NOTEQUAL: 629 if (msg->m_type != type) 630 return 1; 631 break; 632 } 633 return 0; 634 } 635 636 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 637 { 638 struct msg_receiver *msr, *t; 639 640 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 641 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 642 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 643 msr->r_msgtype, msr->r_mode)) { 644 645 list_del(&msr->r_list); 646 if (msr->r_maxsize < msg->m_ts) { 647 /* initialize pipelined send ordering */ 648 msr->r_msg = NULL; 649 wake_up_process(msr->r_tsk); 650 smp_mb(); /* see barrier comment below */ 651 msr->r_msg = ERR_PTR(-E2BIG); 652 } else { 653 msr->r_msg = NULL; 654 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 655 msq->q_rtime = get_seconds(); 656 wake_up_process(msr->r_tsk); 657 /* 658 * Ensure that the wakeup is visible before 659 * setting r_msg, as the receiving end depends 660 * on it. See lockless receive part 1 and 2 in 661 * do_msgrcv(). 662 */ 663 smp_mb(); 664 msr->r_msg = msg; 665 666 return 1; 667 } 668 } 669 } 670 671 return 0; 672 } 673 674 long do_msgsnd(int msqid, long mtype, void __user *mtext, 675 size_t msgsz, int msgflg) 676 { 677 struct msg_queue *msq; 678 struct msg_msg *msg; 679 int err; 680 struct ipc_namespace *ns; 681 682 ns = current->nsproxy->ipc_ns; 683 684 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 685 return -EINVAL; 686 if (mtype < 1) 687 return -EINVAL; 688 689 msg = load_msg(mtext, msgsz); 690 if (IS_ERR(msg)) 691 return PTR_ERR(msg); 692 693 msg->m_type = mtype; 694 msg->m_ts = msgsz; 695 696 rcu_read_lock(); 697 msq = msq_obtain_object_check(ns, msqid); 698 if (IS_ERR(msq)) { 699 err = PTR_ERR(msq); 700 goto out_unlock1; 701 } 702 703 ipc_lock_object(&msq->q_perm); 704 705 for (;;) { 706 struct msg_sender s; 707 708 err = -EACCES; 709 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 710 goto out_unlock0; 711 712 /* raced with RMID? */ 713 if (!ipc_valid_object(&msq->q_perm)) { 714 err = -EIDRM; 715 goto out_unlock0; 716 } 717 718 err = security_msg_queue_msgsnd(msq, msg, msgflg); 719 if (err) 720 goto out_unlock0; 721 722 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 723 1 + msq->q_qnum <= msq->q_qbytes) { 724 break; 725 } 726 727 /* queue full, wait: */ 728 if (msgflg & IPC_NOWAIT) { 729 err = -EAGAIN; 730 goto out_unlock0; 731 } 732 733 /* enqueue the sender and prepare to block */ 734 ss_add(msq, &s); 735 736 if (!ipc_rcu_getref(msq)) { 737 err = -EIDRM; 738 goto out_unlock0; 739 } 740 741 ipc_unlock_object(&msq->q_perm); 742 rcu_read_unlock(); 743 schedule(); 744 745 rcu_read_lock(); 746 ipc_lock_object(&msq->q_perm); 747 748 ipc_rcu_putref(msq, ipc_rcu_free); 749 /* raced with RMID? */ 750 if (!ipc_valid_object(&msq->q_perm)) { 751 err = -EIDRM; 752 goto out_unlock0; 753 } 754 755 ss_del(&s); 756 757 if (signal_pending(current)) { 758 err = -ERESTARTNOHAND; 759 goto out_unlock0; 760 } 761 762 } 763 msq->q_lspid = task_tgid_vnr(current); 764 msq->q_stime = get_seconds(); 765 766 if (!pipelined_send(msq, msg)) { 767 /* no one is waiting for this message, enqueue it */ 768 list_add_tail(&msg->m_list, &msq->q_messages); 769 msq->q_cbytes += msgsz; 770 msq->q_qnum++; 771 atomic_add(msgsz, &ns->msg_bytes); 772 atomic_inc(&ns->msg_hdrs); 773 } 774 775 err = 0; 776 msg = NULL; 777 778 out_unlock0: 779 ipc_unlock_object(&msq->q_perm); 780 out_unlock1: 781 rcu_read_unlock(); 782 if (msg != NULL) 783 free_msg(msg); 784 return err; 785 } 786 787 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 788 int, msgflg) 789 { 790 long mtype; 791 792 if (get_user(mtype, &msgp->mtype)) 793 return -EFAULT; 794 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 795 } 796 797 static inline int convert_mode(long *msgtyp, int msgflg) 798 { 799 if (msgflg & MSG_COPY) 800 return SEARCH_NUMBER; 801 /* 802 * find message of correct type. 803 * msgtyp = 0 => get first. 804 * msgtyp > 0 => get first message of matching type. 805 * msgtyp < 0 => get message with least type must be < abs(msgtype). 806 */ 807 if (*msgtyp == 0) 808 return SEARCH_ANY; 809 if (*msgtyp < 0) { 810 *msgtyp = -*msgtyp; 811 return SEARCH_LESSEQUAL; 812 } 813 if (msgflg & MSG_EXCEPT) 814 return SEARCH_NOTEQUAL; 815 return SEARCH_EQUAL; 816 } 817 818 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 819 { 820 struct msgbuf __user *msgp = dest; 821 size_t msgsz; 822 823 if (put_user(msg->m_type, &msgp->mtype)) 824 return -EFAULT; 825 826 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 827 if (store_msg(msgp->mtext, msg, msgsz)) 828 return -EFAULT; 829 return msgsz; 830 } 831 832 #ifdef CONFIG_CHECKPOINT_RESTORE 833 /* 834 * This function creates new kernel message structure, large enough to store 835 * bufsz message bytes. 836 */ 837 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 838 { 839 struct msg_msg *copy; 840 841 /* 842 * Create dummy message to copy real message to. 843 */ 844 copy = load_msg(buf, bufsz); 845 if (!IS_ERR(copy)) 846 copy->m_ts = bufsz; 847 return copy; 848 } 849 850 static inline void free_copy(struct msg_msg *copy) 851 { 852 if (copy) 853 free_msg(copy); 854 } 855 #else 856 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 857 { 858 return ERR_PTR(-ENOSYS); 859 } 860 861 static inline void free_copy(struct msg_msg *copy) 862 { 863 } 864 #endif 865 866 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 867 { 868 struct msg_msg *msg, *found = NULL; 869 long count = 0; 870 871 list_for_each_entry(msg, &msq->q_messages, m_list) { 872 if (testmsg(msg, *msgtyp, mode) && 873 !security_msg_queue_msgrcv(msq, msg, current, 874 *msgtyp, mode)) { 875 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 876 *msgtyp = msg->m_type - 1; 877 found = msg; 878 } else if (mode == SEARCH_NUMBER) { 879 if (*msgtyp == count) 880 return msg; 881 } else 882 return msg; 883 count++; 884 } 885 } 886 887 return found ?: ERR_PTR(-EAGAIN); 888 } 889 890 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, 891 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 892 { 893 int mode; 894 struct msg_queue *msq; 895 struct ipc_namespace *ns; 896 struct msg_msg *msg, *copy = NULL; 897 898 ns = current->nsproxy->ipc_ns; 899 900 if (msqid < 0 || (long) bufsz < 0) 901 return -EINVAL; 902 903 if (msgflg & MSG_COPY) { 904 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 905 if (IS_ERR(copy)) 906 return PTR_ERR(copy); 907 } 908 mode = convert_mode(&msgtyp, msgflg); 909 910 rcu_read_lock(); 911 msq = msq_obtain_object_check(ns, msqid); 912 if (IS_ERR(msq)) { 913 rcu_read_unlock(); 914 free_copy(copy); 915 return PTR_ERR(msq); 916 } 917 918 for (;;) { 919 struct msg_receiver msr_d; 920 921 msg = ERR_PTR(-EACCES); 922 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 923 goto out_unlock1; 924 925 ipc_lock_object(&msq->q_perm); 926 927 /* raced with RMID? */ 928 if (!ipc_valid_object(&msq->q_perm)) { 929 msg = ERR_PTR(-EIDRM); 930 goto out_unlock0; 931 } 932 933 msg = find_msg(msq, &msgtyp, mode); 934 if (!IS_ERR(msg)) { 935 /* 936 * Found a suitable message. 937 * Unlink it from the queue. 938 */ 939 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 940 msg = ERR_PTR(-E2BIG); 941 goto out_unlock0; 942 } 943 /* 944 * If we are copying, then do not unlink message and do 945 * not update queue parameters. 946 */ 947 if (msgflg & MSG_COPY) { 948 msg = copy_msg(msg, copy); 949 goto out_unlock0; 950 } 951 952 list_del(&msg->m_list); 953 msq->q_qnum--; 954 msq->q_rtime = get_seconds(); 955 msq->q_lrpid = task_tgid_vnr(current); 956 msq->q_cbytes -= msg->m_ts; 957 atomic_sub(msg->m_ts, &ns->msg_bytes); 958 atomic_dec(&ns->msg_hdrs); 959 ss_wakeup(&msq->q_senders, 0); 960 961 goto out_unlock0; 962 } 963 964 /* No message waiting. Wait for a message */ 965 if (msgflg & IPC_NOWAIT) { 966 msg = ERR_PTR(-ENOMSG); 967 goto out_unlock0; 968 } 969 970 list_add_tail(&msr_d.r_list, &msq->q_receivers); 971 msr_d.r_tsk = current; 972 msr_d.r_msgtype = msgtyp; 973 msr_d.r_mode = mode; 974 if (msgflg & MSG_NOERROR) 975 msr_d.r_maxsize = INT_MAX; 976 else 977 msr_d.r_maxsize = bufsz; 978 msr_d.r_msg = ERR_PTR(-EAGAIN); 979 current->state = TASK_INTERRUPTIBLE; 980 981 ipc_unlock_object(&msq->q_perm); 982 rcu_read_unlock(); 983 schedule(); 984 985 /* Lockless receive, part 1: 986 * Disable preemption. We don't hold a reference to the queue 987 * and getting a reference would defeat the idea of a lockless 988 * operation, thus the code relies on rcu to guarantee the 989 * existence of msq: 990 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 991 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 992 * rcu_read_lock() prevents preemption between reading r_msg 993 * and acquiring the q_perm.lock in ipc_lock_object(). 994 */ 995 rcu_read_lock(); 996 997 /* Lockless receive, part 2: 998 * Wait until pipelined_send or expunge_all are outside of 999 * wake_up_process(). There is a race with exit(), see 1000 * ipc/mqueue.c for the details. 1001 */ 1002 msg = (struct msg_msg *)msr_d.r_msg; 1003 while (msg == NULL) { 1004 cpu_relax(); 1005 msg = (struct msg_msg *)msr_d.r_msg; 1006 } 1007 1008 /* Lockless receive, part 3: 1009 * If there is a message or an error then accept it without 1010 * locking. 1011 */ 1012 if (msg != ERR_PTR(-EAGAIN)) 1013 goto out_unlock1; 1014 1015 /* Lockless receive, part 3: 1016 * Acquire the queue spinlock. 1017 */ 1018 ipc_lock_object(&msq->q_perm); 1019 1020 /* Lockless receive, part 4: 1021 * Repeat test after acquiring the spinlock. 1022 */ 1023 msg = (struct msg_msg *)msr_d.r_msg; 1024 if (msg != ERR_PTR(-EAGAIN)) 1025 goto out_unlock0; 1026 1027 list_del(&msr_d.r_list); 1028 if (signal_pending(current)) { 1029 msg = ERR_PTR(-ERESTARTNOHAND); 1030 goto out_unlock0; 1031 } 1032 1033 ipc_unlock_object(&msq->q_perm); 1034 } 1035 1036 out_unlock0: 1037 ipc_unlock_object(&msq->q_perm); 1038 out_unlock1: 1039 rcu_read_unlock(); 1040 if (IS_ERR(msg)) { 1041 free_copy(copy); 1042 return PTR_ERR(msg); 1043 } 1044 1045 bufsz = msg_handler(buf, msg, bufsz); 1046 free_msg(msg); 1047 1048 return bufsz; 1049 } 1050 1051 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 1052 long, msgtyp, int, msgflg) 1053 { 1054 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 1055 } 1056 1057 #ifdef CONFIG_PROC_FS 1058 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 1059 { 1060 struct user_namespace *user_ns = seq_user_ns(s); 1061 struct msg_queue *msq = it; 1062 1063 return seq_printf(s, 1064 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1065 msq->q_perm.key, 1066 msq->q_perm.id, 1067 msq->q_perm.mode, 1068 msq->q_cbytes, 1069 msq->q_qnum, 1070 msq->q_lspid, 1071 msq->q_lrpid, 1072 from_kuid_munged(user_ns, msq->q_perm.uid), 1073 from_kgid_munged(user_ns, msq->q_perm.gid), 1074 from_kuid_munged(user_ns, msq->q_perm.cuid), 1075 from_kgid_munged(user_ns, msq->q_perm.cgid), 1076 msq->q_stime, 1077 msq->q_rtime, 1078 msq->q_ctime); 1079 } 1080 #endif 1081