1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <asm/uaccess.h> 43 #include "util.h" 44 45 /* 46 * one msg_receiver structure for each sleeping receiver: 47 */ 48 struct msg_receiver { 49 struct list_head r_list; 50 struct task_struct *r_tsk; 51 52 int r_mode; 53 long r_msgtype; 54 long r_maxsize; 55 56 struct msg_msg *volatile r_msg; 57 }; 58 59 /* one msg_sender for each sleeping sender */ 60 struct msg_sender { 61 struct list_head list; 62 struct task_struct *tsk; 63 }; 64 65 #define SEARCH_ANY 1 66 #define SEARCH_EQUAL 2 67 #define SEARCH_NOTEQUAL 3 68 #define SEARCH_LESSEQUAL 4 69 #define SEARCH_NUMBER 5 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 72 73 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 74 static int newque(struct ipc_namespace *, struct ipc_params *); 75 #ifdef CONFIG_PROC_FS 76 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 77 #endif 78 79 /* 80 * Scale msgmni with the available lowmem size: the memory dedicated to msg 81 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. 82 * Also take into account the number of nsproxies created so far. 83 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. 84 */ 85 void recompute_msgmni(struct ipc_namespace *ns) 86 { 87 struct sysinfo i; 88 unsigned long allowed; 89 int nb_ns; 90 91 si_meminfo(&i); 92 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) 93 / MSGMNB; 94 nb_ns = atomic_read(&nr_ipc_ns); 95 allowed /= nb_ns; 96 97 if (allowed < MSGMNI) { 98 ns->msg_ctlmni = MSGMNI; 99 return; 100 } 101 102 if (allowed > IPCMNI / nb_ns) { 103 ns->msg_ctlmni = IPCMNI / nb_ns; 104 return; 105 } 106 107 ns->msg_ctlmni = allowed; 108 } 109 110 void msg_init_ns(struct ipc_namespace *ns) 111 { 112 ns->msg_ctlmax = MSGMAX; 113 ns->msg_ctlmnb = MSGMNB; 114 115 recompute_msgmni(ns); 116 117 atomic_set(&ns->msg_bytes, 0); 118 atomic_set(&ns->msg_hdrs, 0); 119 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 120 } 121 122 #ifdef CONFIG_IPC_NS 123 void msg_exit_ns(struct ipc_namespace *ns) 124 { 125 free_ipcs(ns, &msg_ids(ns), freeque); 126 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 127 } 128 #endif 129 130 void __init msg_init(void) 131 { 132 msg_init_ns(&init_ipc_ns); 133 134 printk(KERN_INFO "msgmni has been set to %d\n", 135 init_ipc_ns.msg_ctlmni); 136 137 ipc_init_proc_interface("sysvipc/msg", 138 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 139 IPC_MSG_IDS, sysvipc_msg_proc_show); 140 } 141 142 static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) 143 { 144 struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); 145 146 if (IS_ERR(ipcp)) 147 return ERR_CAST(ipcp); 148 149 return container_of(ipcp, struct msg_queue, q_perm); 150 } 151 152 static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, 153 int id) 154 { 155 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); 156 157 if (IS_ERR(ipcp)) 158 return ERR_CAST(ipcp); 159 160 return container_of(ipcp, struct msg_queue, q_perm); 161 } 162 163 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 164 { 165 ipc_rmid(&msg_ids(ns), &s->q_perm); 166 } 167 168 /** 169 * newque - Create a new msg queue 170 * @ns: namespace 171 * @params: ptr to the structure that contains the key and msgflg 172 * 173 * Called with msg_ids.rwsem held (writer) 174 */ 175 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 176 { 177 struct msg_queue *msq; 178 int id, retval; 179 key_t key = params->key; 180 int msgflg = params->flg; 181 182 msq = ipc_rcu_alloc(sizeof(*msq)); 183 if (!msq) 184 return -ENOMEM; 185 186 msq->q_perm.mode = msgflg & S_IRWXUGO; 187 msq->q_perm.key = key; 188 189 msq->q_perm.security = NULL; 190 retval = security_msg_queue_alloc(msq); 191 if (retval) { 192 ipc_rcu_putref(msq); 193 return retval; 194 } 195 196 /* ipc_addid() locks msq upon success. */ 197 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 198 if (id < 0) { 199 security_msg_queue_free(msq); 200 ipc_rcu_putref(msq); 201 return id; 202 } 203 204 msq->q_stime = msq->q_rtime = 0; 205 msq->q_ctime = get_seconds(); 206 msq->q_cbytes = msq->q_qnum = 0; 207 msq->q_qbytes = ns->msg_ctlmnb; 208 msq->q_lspid = msq->q_lrpid = 0; 209 INIT_LIST_HEAD(&msq->q_messages); 210 INIT_LIST_HEAD(&msq->q_receivers); 211 INIT_LIST_HEAD(&msq->q_senders); 212 213 ipc_unlock_object(&msq->q_perm); 214 rcu_read_unlock(); 215 216 return msq->q_perm.id; 217 } 218 219 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 220 { 221 mss->tsk = current; 222 current->state = TASK_INTERRUPTIBLE; 223 list_add_tail(&mss->list, &msq->q_senders); 224 } 225 226 static inline void ss_del(struct msg_sender *mss) 227 { 228 if (mss->list.next != NULL) 229 list_del(&mss->list); 230 } 231 232 static void ss_wakeup(struct list_head *h, int kill) 233 { 234 struct msg_sender *mss, *t; 235 236 list_for_each_entry_safe(mss, t, h, list) { 237 if (kill) 238 mss->list.next = NULL; 239 wake_up_process(mss->tsk); 240 } 241 } 242 243 static void expunge_all(struct msg_queue *msq, int res) 244 { 245 struct msg_receiver *msr, *t; 246 247 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 248 msr->r_msg = NULL; 249 wake_up_process(msr->r_tsk); 250 smp_mb(); 251 msr->r_msg = ERR_PTR(res); 252 } 253 } 254 255 /* 256 * freeque() wakes up waiters on the sender and receiver waiting queue, 257 * removes the message queue from message queue ID IDR, and cleans up all the 258 * messages associated with this queue. 259 * 260 * msg_ids.rwsem (writer) and the spinlock for this message queue are held 261 * before freeque() is called. msg_ids.rwsem remains locked on exit. 262 */ 263 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 264 { 265 struct msg_msg *msg, *t; 266 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 267 268 expunge_all(msq, -EIDRM); 269 ss_wakeup(&msq->q_senders, 1); 270 msg_rmid(ns, msq); 271 ipc_unlock_object(&msq->q_perm); 272 rcu_read_unlock(); 273 274 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 275 atomic_dec(&ns->msg_hdrs); 276 free_msg(msg); 277 } 278 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 279 security_msg_queue_free(msq); 280 ipc_rcu_putref(msq); 281 } 282 283 /* 284 * Called with msg_ids.rwsem and ipcp locked. 285 */ 286 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 287 { 288 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 289 290 return security_msg_queue_associate(msq, msgflg); 291 } 292 293 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 294 { 295 struct ipc_namespace *ns; 296 struct ipc_ops msg_ops; 297 struct ipc_params msg_params; 298 299 ns = current->nsproxy->ipc_ns; 300 301 msg_ops.getnew = newque; 302 msg_ops.associate = msg_security; 303 msg_ops.more_checks = NULL; 304 305 msg_params.key = key; 306 msg_params.flg = msgflg; 307 308 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 309 } 310 311 static inline unsigned long 312 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 313 { 314 switch(version) { 315 case IPC_64: 316 return copy_to_user(buf, in, sizeof(*in)); 317 case IPC_OLD: 318 { 319 struct msqid_ds out; 320 321 memset(&out, 0, sizeof(out)); 322 323 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 324 325 out.msg_stime = in->msg_stime; 326 out.msg_rtime = in->msg_rtime; 327 out.msg_ctime = in->msg_ctime; 328 329 if (in->msg_cbytes > USHRT_MAX) 330 out.msg_cbytes = USHRT_MAX; 331 else 332 out.msg_cbytes = in->msg_cbytes; 333 out.msg_lcbytes = in->msg_cbytes; 334 335 if (in->msg_qnum > USHRT_MAX) 336 out.msg_qnum = USHRT_MAX; 337 else 338 out.msg_qnum = in->msg_qnum; 339 340 if (in->msg_qbytes > USHRT_MAX) 341 out.msg_qbytes = USHRT_MAX; 342 else 343 out.msg_qbytes = in->msg_qbytes; 344 out.msg_lqbytes = in->msg_qbytes; 345 346 out.msg_lspid = in->msg_lspid; 347 out.msg_lrpid = in->msg_lrpid; 348 349 return copy_to_user(buf, &out, sizeof(out)); 350 } 351 default: 352 return -EINVAL; 353 } 354 } 355 356 static inline unsigned long 357 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 358 { 359 switch(version) { 360 case IPC_64: 361 if (copy_from_user(out, buf, sizeof(*out))) 362 return -EFAULT; 363 return 0; 364 case IPC_OLD: 365 { 366 struct msqid_ds tbuf_old; 367 368 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 369 return -EFAULT; 370 371 out->msg_perm.uid = tbuf_old.msg_perm.uid; 372 out->msg_perm.gid = tbuf_old.msg_perm.gid; 373 out->msg_perm.mode = tbuf_old.msg_perm.mode; 374 375 if (tbuf_old.msg_qbytes == 0) 376 out->msg_qbytes = tbuf_old.msg_lqbytes; 377 else 378 out->msg_qbytes = tbuf_old.msg_qbytes; 379 380 return 0; 381 } 382 default: 383 return -EINVAL; 384 } 385 } 386 387 /* 388 * This function handles some msgctl commands which require the rwsem 389 * to be held in write mode. 390 * NOTE: no locks must be held, the rwsem is taken inside this function. 391 */ 392 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 393 struct msqid_ds __user *buf, int version) 394 { 395 struct kern_ipc_perm *ipcp; 396 struct msqid64_ds uninitialized_var(msqid64); 397 struct msg_queue *msq; 398 int err; 399 400 if (cmd == IPC_SET) { 401 if (copy_msqid_from_user(&msqid64, buf, version)) 402 return -EFAULT; 403 } 404 405 down_write(&msg_ids(ns).rwsem); 406 rcu_read_lock(); 407 408 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 409 &msqid64.msg_perm, msqid64.msg_qbytes); 410 if (IS_ERR(ipcp)) { 411 err = PTR_ERR(ipcp); 412 goto out_unlock1; 413 } 414 415 msq = container_of(ipcp, struct msg_queue, q_perm); 416 417 err = security_msg_queue_msgctl(msq, cmd); 418 if (err) 419 goto out_unlock1; 420 421 switch (cmd) { 422 case IPC_RMID: 423 ipc_lock_object(&msq->q_perm); 424 /* freeque unlocks the ipc object and rcu */ 425 freeque(ns, ipcp); 426 goto out_up; 427 case IPC_SET: 428 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 429 !capable(CAP_SYS_RESOURCE)) { 430 err = -EPERM; 431 goto out_unlock1; 432 } 433 434 ipc_lock_object(&msq->q_perm); 435 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 436 if (err) 437 goto out_unlock0; 438 439 msq->q_qbytes = msqid64.msg_qbytes; 440 441 msq->q_ctime = get_seconds(); 442 /* sleeping receivers might be excluded by 443 * stricter permissions. 444 */ 445 expunge_all(msq, -EAGAIN); 446 /* sleeping senders might be able to send 447 * due to a larger queue size. 448 */ 449 ss_wakeup(&msq->q_senders, 0); 450 break; 451 default: 452 err = -EINVAL; 453 goto out_unlock1; 454 } 455 456 out_unlock0: 457 ipc_unlock_object(&msq->q_perm); 458 out_unlock1: 459 rcu_read_unlock(); 460 out_up: 461 up_write(&msg_ids(ns).rwsem); 462 return err; 463 } 464 465 static int msgctl_nolock(struct ipc_namespace *ns, int msqid, 466 int cmd, int version, void __user *buf) 467 { 468 int err; 469 struct msg_queue *msq; 470 471 switch (cmd) { 472 case IPC_INFO: 473 case MSG_INFO: 474 { 475 struct msginfo msginfo; 476 int max_id; 477 478 if (!buf) 479 return -EFAULT; 480 481 /* 482 * We must not return kernel stack data. 483 * due to padding, it's not enough 484 * to set all member fields. 485 */ 486 err = security_msg_queue_msgctl(NULL, cmd); 487 if (err) 488 return err; 489 490 memset(&msginfo, 0, sizeof(msginfo)); 491 msginfo.msgmni = ns->msg_ctlmni; 492 msginfo.msgmax = ns->msg_ctlmax; 493 msginfo.msgmnb = ns->msg_ctlmnb; 494 msginfo.msgssz = MSGSSZ; 495 msginfo.msgseg = MSGSEG; 496 down_read(&msg_ids(ns).rwsem); 497 if (cmd == MSG_INFO) { 498 msginfo.msgpool = msg_ids(ns).in_use; 499 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 500 msginfo.msgtql = atomic_read(&ns->msg_bytes); 501 } else { 502 msginfo.msgmap = MSGMAP; 503 msginfo.msgpool = MSGPOOL; 504 msginfo.msgtql = MSGTQL; 505 } 506 max_id = ipc_get_maxid(&msg_ids(ns)); 507 up_read(&msg_ids(ns).rwsem); 508 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 509 return -EFAULT; 510 return (max_id < 0) ? 0 : max_id; 511 } 512 513 case MSG_STAT: 514 case IPC_STAT: 515 { 516 struct msqid64_ds tbuf; 517 int success_return; 518 519 if (!buf) 520 return -EFAULT; 521 522 memset(&tbuf, 0, sizeof(tbuf)); 523 524 rcu_read_lock(); 525 if (cmd == MSG_STAT) { 526 msq = msq_obtain_object(ns, msqid); 527 if (IS_ERR(msq)) { 528 err = PTR_ERR(msq); 529 goto out_unlock; 530 } 531 success_return = msq->q_perm.id; 532 } else { 533 msq = msq_obtain_object_check(ns, msqid); 534 if (IS_ERR(msq)) { 535 err = PTR_ERR(msq); 536 goto out_unlock; 537 } 538 success_return = 0; 539 } 540 541 err = -EACCES; 542 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 543 goto out_unlock; 544 545 err = security_msg_queue_msgctl(msq, cmd); 546 if (err) 547 goto out_unlock; 548 549 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 550 tbuf.msg_stime = msq->q_stime; 551 tbuf.msg_rtime = msq->q_rtime; 552 tbuf.msg_ctime = msq->q_ctime; 553 tbuf.msg_cbytes = msq->q_cbytes; 554 tbuf.msg_qnum = msq->q_qnum; 555 tbuf.msg_qbytes = msq->q_qbytes; 556 tbuf.msg_lspid = msq->q_lspid; 557 tbuf.msg_lrpid = msq->q_lrpid; 558 rcu_read_unlock(); 559 560 if (copy_msqid_to_user(buf, &tbuf, version)) 561 return -EFAULT; 562 return success_return; 563 } 564 565 default: 566 return -EINVAL; 567 } 568 569 return err; 570 out_unlock: 571 rcu_read_unlock(); 572 return err; 573 } 574 575 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 576 { 577 int version; 578 struct ipc_namespace *ns; 579 580 if (msqid < 0 || cmd < 0) 581 return -EINVAL; 582 583 version = ipc_parse_version(&cmd); 584 ns = current->nsproxy->ipc_ns; 585 586 switch (cmd) { 587 case IPC_INFO: 588 case MSG_INFO: 589 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 590 case IPC_STAT: 591 return msgctl_nolock(ns, msqid, cmd, version, buf); 592 case IPC_SET: 593 case IPC_RMID: 594 return msgctl_down(ns, msqid, cmd, buf, version); 595 default: 596 return -EINVAL; 597 } 598 } 599 600 static int testmsg(struct msg_msg *msg, long type, int mode) 601 { 602 switch(mode) 603 { 604 case SEARCH_ANY: 605 case SEARCH_NUMBER: 606 return 1; 607 case SEARCH_LESSEQUAL: 608 if (msg->m_type <=type) 609 return 1; 610 break; 611 case SEARCH_EQUAL: 612 if (msg->m_type == type) 613 return 1; 614 break; 615 case SEARCH_NOTEQUAL: 616 if (msg->m_type != type) 617 return 1; 618 break; 619 } 620 return 0; 621 } 622 623 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 624 { 625 struct msg_receiver *msr, *t; 626 627 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 628 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 629 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 630 msr->r_msgtype, msr->r_mode)) { 631 632 list_del(&msr->r_list); 633 if (msr->r_maxsize < msg->m_ts) { 634 msr->r_msg = NULL; 635 wake_up_process(msr->r_tsk); 636 smp_mb(); 637 msr->r_msg = ERR_PTR(-E2BIG); 638 } else { 639 msr->r_msg = NULL; 640 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 641 msq->q_rtime = get_seconds(); 642 wake_up_process(msr->r_tsk); 643 smp_mb(); 644 msr->r_msg = msg; 645 646 return 1; 647 } 648 } 649 } 650 return 0; 651 } 652 653 long do_msgsnd(int msqid, long mtype, void __user *mtext, 654 size_t msgsz, int msgflg) 655 { 656 struct msg_queue *msq; 657 struct msg_msg *msg; 658 int err; 659 struct ipc_namespace *ns; 660 661 ns = current->nsproxy->ipc_ns; 662 663 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 664 return -EINVAL; 665 if (mtype < 1) 666 return -EINVAL; 667 668 msg = load_msg(mtext, msgsz); 669 if (IS_ERR(msg)) 670 return PTR_ERR(msg); 671 672 msg->m_type = mtype; 673 msg->m_ts = msgsz; 674 675 rcu_read_lock(); 676 msq = msq_obtain_object_check(ns, msqid); 677 if (IS_ERR(msq)) { 678 err = PTR_ERR(msq); 679 goto out_unlock1; 680 } 681 682 ipc_lock_object(&msq->q_perm); 683 684 for (;;) { 685 struct msg_sender s; 686 687 err = -EACCES; 688 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 689 goto out_unlock0; 690 691 err = security_msg_queue_msgsnd(msq, msg, msgflg); 692 if (err) 693 goto out_unlock0; 694 695 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 696 1 + msq->q_qnum <= msq->q_qbytes) { 697 break; 698 } 699 700 /* queue full, wait: */ 701 if (msgflg & IPC_NOWAIT) { 702 err = -EAGAIN; 703 goto out_unlock0; 704 } 705 706 ss_add(msq, &s); 707 708 if (!ipc_rcu_getref(msq)) { 709 err = -EIDRM; 710 goto out_unlock0; 711 } 712 713 ipc_unlock_object(&msq->q_perm); 714 rcu_read_unlock(); 715 schedule(); 716 717 rcu_read_lock(); 718 ipc_lock_object(&msq->q_perm); 719 720 ipc_rcu_putref(msq); 721 if (msq->q_perm.deleted) { 722 err = -EIDRM; 723 goto out_unlock0; 724 } 725 726 ss_del(&s); 727 728 if (signal_pending(current)) { 729 err = -ERESTARTNOHAND; 730 goto out_unlock0; 731 } 732 733 } 734 msq->q_lspid = task_tgid_vnr(current); 735 msq->q_stime = get_seconds(); 736 737 if (!pipelined_send(msq, msg)) { 738 /* no one is waiting for this message, enqueue it */ 739 list_add_tail(&msg->m_list, &msq->q_messages); 740 msq->q_cbytes += msgsz; 741 msq->q_qnum++; 742 atomic_add(msgsz, &ns->msg_bytes); 743 atomic_inc(&ns->msg_hdrs); 744 } 745 746 err = 0; 747 msg = NULL; 748 749 out_unlock0: 750 ipc_unlock_object(&msq->q_perm); 751 out_unlock1: 752 rcu_read_unlock(); 753 if (msg != NULL) 754 free_msg(msg); 755 return err; 756 } 757 758 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 759 int, msgflg) 760 { 761 long mtype; 762 763 if (get_user(mtype, &msgp->mtype)) 764 return -EFAULT; 765 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 766 } 767 768 static inline int convert_mode(long *msgtyp, int msgflg) 769 { 770 if (msgflg & MSG_COPY) 771 return SEARCH_NUMBER; 772 /* 773 * find message of correct type. 774 * msgtyp = 0 => get first. 775 * msgtyp > 0 => get first message of matching type. 776 * msgtyp < 0 => get message with least type must be < abs(msgtype). 777 */ 778 if (*msgtyp == 0) 779 return SEARCH_ANY; 780 if (*msgtyp < 0) { 781 *msgtyp = -*msgtyp; 782 return SEARCH_LESSEQUAL; 783 } 784 if (msgflg & MSG_EXCEPT) 785 return SEARCH_NOTEQUAL; 786 return SEARCH_EQUAL; 787 } 788 789 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 790 { 791 struct msgbuf __user *msgp = dest; 792 size_t msgsz; 793 794 if (put_user(msg->m_type, &msgp->mtype)) 795 return -EFAULT; 796 797 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 798 if (store_msg(msgp->mtext, msg, msgsz)) 799 return -EFAULT; 800 return msgsz; 801 } 802 803 #ifdef CONFIG_CHECKPOINT_RESTORE 804 /* 805 * This function creates new kernel message structure, large enough to store 806 * bufsz message bytes. 807 */ 808 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 809 { 810 struct msg_msg *copy; 811 812 /* 813 * Create dummy message to copy real message to. 814 */ 815 copy = load_msg(buf, bufsz); 816 if (!IS_ERR(copy)) 817 copy->m_ts = bufsz; 818 return copy; 819 } 820 821 static inline void free_copy(struct msg_msg *copy) 822 { 823 if (copy) 824 free_msg(copy); 825 } 826 #else 827 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 828 { 829 return ERR_PTR(-ENOSYS); 830 } 831 832 static inline void free_copy(struct msg_msg *copy) 833 { 834 } 835 #endif 836 837 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 838 { 839 struct msg_msg *msg, *found = NULL; 840 long count = 0; 841 842 list_for_each_entry(msg, &msq->q_messages, m_list) { 843 if (testmsg(msg, *msgtyp, mode) && 844 !security_msg_queue_msgrcv(msq, msg, current, 845 *msgtyp, mode)) { 846 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 847 *msgtyp = msg->m_type - 1; 848 found = msg; 849 } else if (mode == SEARCH_NUMBER) { 850 if (*msgtyp == count) 851 return msg; 852 } else 853 return msg; 854 count++; 855 } 856 } 857 858 return found ?: ERR_PTR(-EAGAIN); 859 } 860 861 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, 862 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 863 { 864 int mode; 865 struct msg_queue *msq; 866 struct ipc_namespace *ns; 867 struct msg_msg *msg, *copy = NULL; 868 869 ns = current->nsproxy->ipc_ns; 870 871 if (msqid < 0 || (long) bufsz < 0) 872 return -EINVAL; 873 874 if (msgflg & MSG_COPY) { 875 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 876 if (IS_ERR(copy)) 877 return PTR_ERR(copy); 878 } 879 mode = convert_mode(&msgtyp, msgflg); 880 881 rcu_read_lock(); 882 msq = msq_obtain_object_check(ns, msqid); 883 if (IS_ERR(msq)) { 884 rcu_read_unlock(); 885 free_copy(copy); 886 return PTR_ERR(msq); 887 } 888 889 for (;;) { 890 struct msg_receiver msr_d; 891 892 msg = ERR_PTR(-EACCES); 893 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 894 goto out_unlock1; 895 896 ipc_lock_object(&msq->q_perm); 897 msg = find_msg(msq, &msgtyp, mode); 898 if (!IS_ERR(msg)) { 899 /* 900 * Found a suitable message. 901 * Unlink it from the queue. 902 */ 903 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 904 msg = ERR_PTR(-E2BIG); 905 goto out_unlock0; 906 } 907 /* 908 * If we are copying, then do not unlink message and do 909 * not update queue parameters. 910 */ 911 if (msgflg & MSG_COPY) { 912 msg = copy_msg(msg, copy); 913 goto out_unlock0; 914 } 915 916 list_del(&msg->m_list); 917 msq->q_qnum--; 918 msq->q_rtime = get_seconds(); 919 msq->q_lrpid = task_tgid_vnr(current); 920 msq->q_cbytes -= msg->m_ts; 921 atomic_sub(msg->m_ts, &ns->msg_bytes); 922 atomic_dec(&ns->msg_hdrs); 923 ss_wakeup(&msq->q_senders, 0); 924 925 goto out_unlock0; 926 } 927 928 /* No message waiting. Wait for a message */ 929 if (msgflg & IPC_NOWAIT) { 930 msg = ERR_PTR(-ENOMSG); 931 goto out_unlock0; 932 } 933 934 list_add_tail(&msr_d.r_list, &msq->q_receivers); 935 msr_d.r_tsk = current; 936 msr_d.r_msgtype = msgtyp; 937 msr_d.r_mode = mode; 938 if (msgflg & MSG_NOERROR) 939 msr_d.r_maxsize = INT_MAX; 940 else 941 msr_d.r_maxsize = bufsz; 942 msr_d.r_msg = ERR_PTR(-EAGAIN); 943 current->state = TASK_INTERRUPTIBLE; 944 945 ipc_unlock_object(&msq->q_perm); 946 rcu_read_unlock(); 947 schedule(); 948 949 /* Lockless receive, part 1: 950 * Disable preemption. We don't hold a reference to the queue 951 * and getting a reference would defeat the idea of a lockless 952 * operation, thus the code relies on rcu to guarantee the 953 * existence of msq: 954 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 955 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 956 * rcu_read_lock() prevents preemption between reading r_msg 957 * and acquiring the q_perm.lock in ipc_lock_object(). 958 */ 959 rcu_read_lock(); 960 961 /* Lockless receive, part 2: 962 * Wait until pipelined_send or expunge_all are outside of 963 * wake_up_process(). There is a race with exit(), see 964 * ipc/mqueue.c for the details. 965 */ 966 msg = (struct msg_msg*)msr_d.r_msg; 967 while (msg == NULL) { 968 cpu_relax(); 969 msg = (struct msg_msg *)msr_d.r_msg; 970 } 971 972 /* Lockless receive, part 3: 973 * If there is a message or an error then accept it without 974 * locking. 975 */ 976 if (msg != ERR_PTR(-EAGAIN)) 977 goto out_unlock1; 978 979 /* Lockless receive, part 3: 980 * Acquire the queue spinlock. 981 */ 982 ipc_lock_object(&msq->q_perm); 983 984 /* Lockless receive, part 4: 985 * Repeat test after acquiring the spinlock. 986 */ 987 msg = (struct msg_msg*)msr_d.r_msg; 988 if (msg != ERR_PTR(-EAGAIN)) 989 goto out_unlock0; 990 991 list_del(&msr_d.r_list); 992 if (signal_pending(current)) { 993 msg = ERR_PTR(-ERESTARTNOHAND); 994 goto out_unlock0; 995 } 996 997 ipc_unlock_object(&msq->q_perm); 998 } 999 1000 out_unlock0: 1001 ipc_unlock_object(&msq->q_perm); 1002 out_unlock1: 1003 rcu_read_unlock(); 1004 if (IS_ERR(msg)) { 1005 free_copy(copy); 1006 return PTR_ERR(msg); 1007 } 1008 1009 bufsz = msg_handler(buf, msg, bufsz); 1010 free_msg(msg); 1011 1012 return bufsz; 1013 } 1014 1015 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 1016 long, msgtyp, int, msgflg) 1017 { 1018 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 1019 } 1020 1021 #ifdef CONFIG_PROC_FS 1022 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 1023 { 1024 struct user_namespace *user_ns = seq_user_ns(s); 1025 struct msg_queue *msq = it; 1026 1027 return seq_printf(s, 1028 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1029 msq->q_perm.key, 1030 msq->q_perm.id, 1031 msq->q_perm.mode, 1032 msq->q_cbytes, 1033 msq->q_qnum, 1034 msq->q_lspid, 1035 msq->q_lrpid, 1036 from_kuid_munged(user_ns, msq->q_perm.uid), 1037 from_kgid_munged(user_ns, msq->q_perm.gid), 1038 from_kuid_munged(user_ns, msq->q_perm.cuid), 1039 from_kgid_munged(user_ns, msq->q_perm.cgid), 1040 msq->q_stime, 1041 msq->q_rtime, 1042 msq->q_ctime); 1043 } 1044 #endif 1045