1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <asm/uaccess.h> 43 #include "util.h" 44 45 /* 46 * one msg_receiver structure for each sleeping receiver: 47 */ 48 struct msg_receiver { 49 struct list_head r_list; 50 struct task_struct *r_tsk; 51 52 int r_mode; 53 long r_msgtype; 54 long r_maxsize; 55 56 struct msg_msg *volatile r_msg; 57 }; 58 59 /* one msg_sender for each sleeping sender */ 60 struct msg_sender { 61 struct list_head list; 62 struct task_struct *tsk; 63 }; 64 65 #define SEARCH_ANY 1 66 #define SEARCH_EQUAL 2 67 #define SEARCH_NOTEQUAL 3 68 #define SEARCH_LESSEQUAL 4 69 #define SEARCH_NUMBER 5 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 72 73 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 74 static int newque(struct ipc_namespace *, struct ipc_params *); 75 #ifdef CONFIG_PROC_FS 76 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 77 #endif 78 79 /* 80 * Scale msgmni with the available lowmem size: the memory dedicated to msg 81 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. 82 * Also take into account the number of nsproxies created so far. 83 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. 84 */ 85 void recompute_msgmni(struct ipc_namespace *ns) 86 { 87 struct sysinfo i; 88 unsigned long allowed; 89 int nb_ns; 90 91 si_meminfo(&i); 92 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) 93 / MSGMNB; 94 nb_ns = atomic_read(&nr_ipc_ns); 95 allowed /= nb_ns; 96 97 if (allowed < MSGMNI) { 98 ns->msg_ctlmni = MSGMNI; 99 return; 100 } 101 102 if (allowed > IPCMNI / nb_ns) { 103 ns->msg_ctlmni = IPCMNI / nb_ns; 104 return; 105 } 106 107 ns->msg_ctlmni = allowed; 108 } 109 110 void msg_init_ns(struct ipc_namespace *ns) 111 { 112 ns->msg_ctlmax = MSGMAX; 113 ns->msg_ctlmnb = MSGMNB; 114 115 recompute_msgmni(ns); 116 117 atomic_set(&ns->msg_bytes, 0); 118 atomic_set(&ns->msg_hdrs, 0); 119 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 120 } 121 122 #ifdef CONFIG_IPC_NS 123 void msg_exit_ns(struct ipc_namespace *ns) 124 { 125 free_ipcs(ns, &msg_ids(ns), freeque); 126 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 127 } 128 #endif 129 130 void __init msg_init(void) 131 { 132 msg_init_ns(&init_ipc_ns); 133 134 printk(KERN_INFO "msgmni has been set to %d\n", 135 init_ipc_ns.msg_ctlmni); 136 137 ipc_init_proc_interface("sysvipc/msg", 138 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 139 IPC_MSG_IDS, sysvipc_msg_proc_show); 140 } 141 142 static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) 143 { 144 struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); 145 146 if (IS_ERR(ipcp)) 147 return ERR_CAST(ipcp); 148 149 return container_of(ipcp, struct msg_queue, q_perm); 150 } 151 152 static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, 153 int id) 154 { 155 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); 156 157 if (IS_ERR(ipcp)) 158 return ERR_CAST(ipcp); 159 160 return container_of(ipcp, struct msg_queue, q_perm); 161 } 162 163 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 164 { 165 ipc_rmid(&msg_ids(ns), &s->q_perm); 166 } 167 168 static void msg_rcu_free(struct rcu_head *head) 169 { 170 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); 171 struct msg_queue *msq = ipc_rcu_to_struct(p); 172 173 security_msg_queue_free(msq); 174 ipc_rcu_free(head); 175 } 176 177 /** 178 * newque - Create a new msg queue 179 * @ns: namespace 180 * @params: ptr to the structure that contains the key and msgflg 181 * 182 * Called with msg_ids.rwsem held (writer) 183 */ 184 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 185 { 186 struct msg_queue *msq; 187 int id, retval; 188 key_t key = params->key; 189 int msgflg = params->flg; 190 191 msq = ipc_rcu_alloc(sizeof(*msq)); 192 if (!msq) 193 return -ENOMEM; 194 195 msq->q_perm.mode = msgflg & S_IRWXUGO; 196 msq->q_perm.key = key; 197 198 msq->q_perm.security = NULL; 199 retval = security_msg_queue_alloc(msq); 200 if (retval) { 201 ipc_rcu_putref(msq, ipc_rcu_free); 202 return retval; 203 } 204 205 /* ipc_addid() locks msq upon success. */ 206 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 207 if (id < 0) { 208 ipc_rcu_putref(msq, msg_rcu_free); 209 return id; 210 } 211 212 msq->q_stime = msq->q_rtime = 0; 213 msq->q_ctime = get_seconds(); 214 msq->q_cbytes = msq->q_qnum = 0; 215 msq->q_qbytes = ns->msg_ctlmnb; 216 msq->q_lspid = msq->q_lrpid = 0; 217 INIT_LIST_HEAD(&msq->q_messages); 218 INIT_LIST_HEAD(&msq->q_receivers); 219 INIT_LIST_HEAD(&msq->q_senders); 220 221 ipc_unlock_object(&msq->q_perm); 222 rcu_read_unlock(); 223 224 return msq->q_perm.id; 225 } 226 227 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 228 { 229 mss->tsk = current; 230 current->state = TASK_INTERRUPTIBLE; 231 list_add_tail(&mss->list, &msq->q_senders); 232 } 233 234 static inline void ss_del(struct msg_sender *mss) 235 { 236 if (mss->list.next != NULL) 237 list_del(&mss->list); 238 } 239 240 static void ss_wakeup(struct list_head *h, int kill) 241 { 242 struct msg_sender *mss, *t; 243 244 list_for_each_entry_safe(mss, t, h, list) { 245 if (kill) 246 mss->list.next = NULL; 247 wake_up_process(mss->tsk); 248 } 249 } 250 251 static void expunge_all(struct msg_queue *msq, int res) 252 { 253 struct msg_receiver *msr, *t; 254 255 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 256 msr->r_msg = NULL; 257 wake_up_process(msr->r_tsk); 258 smp_mb(); 259 msr->r_msg = ERR_PTR(res); 260 } 261 } 262 263 /* 264 * freeque() wakes up waiters on the sender and receiver waiting queue, 265 * removes the message queue from message queue ID IDR, and cleans up all the 266 * messages associated with this queue. 267 * 268 * msg_ids.rwsem (writer) and the spinlock for this message queue are held 269 * before freeque() is called. msg_ids.rwsem remains locked on exit. 270 */ 271 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 272 { 273 struct msg_msg *msg, *t; 274 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 275 276 expunge_all(msq, -EIDRM); 277 ss_wakeup(&msq->q_senders, 1); 278 msg_rmid(ns, msq); 279 ipc_unlock_object(&msq->q_perm); 280 rcu_read_unlock(); 281 282 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 283 atomic_dec(&ns->msg_hdrs); 284 free_msg(msg); 285 } 286 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 287 ipc_rcu_putref(msq, msg_rcu_free); 288 } 289 290 /* 291 * Called with msg_ids.rwsem and ipcp locked. 292 */ 293 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 294 { 295 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 296 297 return security_msg_queue_associate(msq, msgflg); 298 } 299 300 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 301 { 302 struct ipc_namespace *ns; 303 struct ipc_ops msg_ops; 304 struct ipc_params msg_params; 305 306 ns = current->nsproxy->ipc_ns; 307 308 msg_ops.getnew = newque; 309 msg_ops.associate = msg_security; 310 msg_ops.more_checks = NULL; 311 312 msg_params.key = key; 313 msg_params.flg = msgflg; 314 315 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 316 } 317 318 static inline unsigned long 319 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 320 { 321 switch(version) { 322 case IPC_64: 323 return copy_to_user(buf, in, sizeof(*in)); 324 case IPC_OLD: 325 { 326 struct msqid_ds out; 327 328 memset(&out, 0, sizeof(out)); 329 330 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 331 332 out.msg_stime = in->msg_stime; 333 out.msg_rtime = in->msg_rtime; 334 out.msg_ctime = in->msg_ctime; 335 336 if (in->msg_cbytes > USHRT_MAX) 337 out.msg_cbytes = USHRT_MAX; 338 else 339 out.msg_cbytes = in->msg_cbytes; 340 out.msg_lcbytes = in->msg_cbytes; 341 342 if (in->msg_qnum > USHRT_MAX) 343 out.msg_qnum = USHRT_MAX; 344 else 345 out.msg_qnum = in->msg_qnum; 346 347 if (in->msg_qbytes > USHRT_MAX) 348 out.msg_qbytes = USHRT_MAX; 349 else 350 out.msg_qbytes = in->msg_qbytes; 351 out.msg_lqbytes = in->msg_qbytes; 352 353 out.msg_lspid = in->msg_lspid; 354 out.msg_lrpid = in->msg_lrpid; 355 356 return copy_to_user(buf, &out, sizeof(out)); 357 } 358 default: 359 return -EINVAL; 360 } 361 } 362 363 static inline unsigned long 364 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 365 { 366 switch(version) { 367 case IPC_64: 368 if (copy_from_user(out, buf, sizeof(*out))) 369 return -EFAULT; 370 return 0; 371 case IPC_OLD: 372 { 373 struct msqid_ds tbuf_old; 374 375 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 376 return -EFAULT; 377 378 out->msg_perm.uid = tbuf_old.msg_perm.uid; 379 out->msg_perm.gid = tbuf_old.msg_perm.gid; 380 out->msg_perm.mode = tbuf_old.msg_perm.mode; 381 382 if (tbuf_old.msg_qbytes == 0) 383 out->msg_qbytes = tbuf_old.msg_lqbytes; 384 else 385 out->msg_qbytes = tbuf_old.msg_qbytes; 386 387 return 0; 388 } 389 default: 390 return -EINVAL; 391 } 392 } 393 394 /* 395 * This function handles some msgctl commands which require the rwsem 396 * to be held in write mode. 397 * NOTE: no locks must be held, the rwsem is taken inside this function. 398 */ 399 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 400 struct msqid_ds __user *buf, int version) 401 { 402 struct kern_ipc_perm *ipcp; 403 struct msqid64_ds uninitialized_var(msqid64); 404 struct msg_queue *msq; 405 int err; 406 407 if (cmd == IPC_SET) { 408 if (copy_msqid_from_user(&msqid64, buf, version)) 409 return -EFAULT; 410 } 411 412 down_write(&msg_ids(ns).rwsem); 413 rcu_read_lock(); 414 415 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 416 &msqid64.msg_perm, msqid64.msg_qbytes); 417 if (IS_ERR(ipcp)) { 418 err = PTR_ERR(ipcp); 419 goto out_unlock1; 420 } 421 422 msq = container_of(ipcp, struct msg_queue, q_perm); 423 424 err = security_msg_queue_msgctl(msq, cmd); 425 if (err) 426 goto out_unlock1; 427 428 switch (cmd) { 429 case IPC_RMID: 430 ipc_lock_object(&msq->q_perm); 431 /* freeque unlocks the ipc object and rcu */ 432 freeque(ns, ipcp); 433 goto out_up; 434 case IPC_SET: 435 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 436 !capable(CAP_SYS_RESOURCE)) { 437 err = -EPERM; 438 goto out_unlock1; 439 } 440 441 ipc_lock_object(&msq->q_perm); 442 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 443 if (err) 444 goto out_unlock0; 445 446 msq->q_qbytes = msqid64.msg_qbytes; 447 448 msq->q_ctime = get_seconds(); 449 /* sleeping receivers might be excluded by 450 * stricter permissions. 451 */ 452 expunge_all(msq, -EAGAIN); 453 /* sleeping senders might be able to send 454 * due to a larger queue size. 455 */ 456 ss_wakeup(&msq->q_senders, 0); 457 break; 458 default: 459 err = -EINVAL; 460 goto out_unlock1; 461 } 462 463 out_unlock0: 464 ipc_unlock_object(&msq->q_perm); 465 out_unlock1: 466 rcu_read_unlock(); 467 out_up: 468 up_write(&msg_ids(ns).rwsem); 469 return err; 470 } 471 472 static int msgctl_nolock(struct ipc_namespace *ns, int msqid, 473 int cmd, int version, void __user *buf) 474 { 475 int err; 476 struct msg_queue *msq; 477 478 switch (cmd) { 479 case IPC_INFO: 480 case MSG_INFO: 481 { 482 struct msginfo msginfo; 483 int max_id; 484 485 if (!buf) 486 return -EFAULT; 487 488 /* 489 * We must not return kernel stack data. 490 * due to padding, it's not enough 491 * to set all member fields. 492 */ 493 err = security_msg_queue_msgctl(NULL, cmd); 494 if (err) 495 return err; 496 497 memset(&msginfo, 0, sizeof(msginfo)); 498 msginfo.msgmni = ns->msg_ctlmni; 499 msginfo.msgmax = ns->msg_ctlmax; 500 msginfo.msgmnb = ns->msg_ctlmnb; 501 msginfo.msgssz = MSGSSZ; 502 msginfo.msgseg = MSGSEG; 503 down_read(&msg_ids(ns).rwsem); 504 if (cmd == MSG_INFO) { 505 msginfo.msgpool = msg_ids(ns).in_use; 506 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 507 msginfo.msgtql = atomic_read(&ns->msg_bytes); 508 } else { 509 msginfo.msgmap = MSGMAP; 510 msginfo.msgpool = MSGPOOL; 511 msginfo.msgtql = MSGTQL; 512 } 513 max_id = ipc_get_maxid(&msg_ids(ns)); 514 up_read(&msg_ids(ns).rwsem); 515 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 516 return -EFAULT; 517 return (max_id < 0) ? 0 : max_id; 518 } 519 520 case MSG_STAT: 521 case IPC_STAT: 522 { 523 struct msqid64_ds tbuf; 524 int success_return; 525 526 if (!buf) 527 return -EFAULT; 528 529 memset(&tbuf, 0, sizeof(tbuf)); 530 531 rcu_read_lock(); 532 if (cmd == MSG_STAT) { 533 msq = msq_obtain_object(ns, msqid); 534 if (IS_ERR(msq)) { 535 err = PTR_ERR(msq); 536 goto out_unlock; 537 } 538 success_return = msq->q_perm.id; 539 } else { 540 msq = msq_obtain_object_check(ns, msqid); 541 if (IS_ERR(msq)) { 542 err = PTR_ERR(msq); 543 goto out_unlock; 544 } 545 success_return = 0; 546 } 547 548 err = -EACCES; 549 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 550 goto out_unlock; 551 552 err = security_msg_queue_msgctl(msq, cmd); 553 if (err) 554 goto out_unlock; 555 556 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 557 tbuf.msg_stime = msq->q_stime; 558 tbuf.msg_rtime = msq->q_rtime; 559 tbuf.msg_ctime = msq->q_ctime; 560 tbuf.msg_cbytes = msq->q_cbytes; 561 tbuf.msg_qnum = msq->q_qnum; 562 tbuf.msg_qbytes = msq->q_qbytes; 563 tbuf.msg_lspid = msq->q_lspid; 564 tbuf.msg_lrpid = msq->q_lrpid; 565 rcu_read_unlock(); 566 567 if (copy_msqid_to_user(buf, &tbuf, version)) 568 return -EFAULT; 569 return success_return; 570 } 571 572 default: 573 return -EINVAL; 574 } 575 576 return err; 577 out_unlock: 578 rcu_read_unlock(); 579 return err; 580 } 581 582 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 583 { 584 int version; 585 struct ipc_namespace *ns; 586 587 if (msqid < 0 || cmd < 0) 588 return -EINVAL; 589 590 version = ipc_parse_version(&cmd); 591 ns = current->nsproxy->ipc_ns; 592 593 switch (cmd) { 594 case IPC_INFO: 595 case MSG_INFO: 596 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 597 case IPC_STAT: 598 return msgctl_nolock(ns, msqid, cmd, version, buf); 599 case IPC_SET: 600 case IPC_RMID: 601 return msgctl_down(ns, msqid, cmd, buf, version); 602 default: 603 return -EINVAL; 604 } 605 } 606 607 static int testmsg(struct msg_msg *msg, long type, int mode) 608 { 609 switch(mode) 610 { 611 case SEARCH_ANY: 612 case SEARCH_NUMBER: 613 return 1; 614 case SEARCH_LESSEQUAL: 615 if (msg->m_type <=type) 616 return 1; 617 break; 618 case SEARCH_EQUAL: 619 if (msg->m_type == type) 620 return 1; 621 break; 622 case SEARCH_NOTEQUAL: 623 if (msg->m_type != type) 624 return 1; 625 break; 626 } 627 return 0; 628 } 629 630 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 631 { 632 struct msg_receiver *msr, *t; 633 634 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 635 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 636 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 637 msr->r_msgtype, msr->r_mode)) { 638 639 list_del(&msr->r_list); 640 if (msr->r_maxsize < msg->m_ts) { 641 msr->r_msg = NULL; 642 wake_up_process(msr->r_tsk); 643 smp_mb(); 644 msr->r_msg = ERR_PTR(-E2BIG); 645 } else { 646 msr->r_msg = NULL; 647 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 648 msq->q_rtime = get_seconds(); 649 wake_up_process(msr->r_tsk); 650 smp_mb(); 651 msr->r_msg = msg; 652 653 return 1; 654 } 655 } 656 } 657 return 0; 658 } 659 660 long do_msgsnd(int msqid, long mtype, void __user *mtext, 661 size_t msgsz, int msgflg) 662 { 663 struct msg_queue *msq; 664 struct msg_msg *msg; 665 int err; 666 struct ipc_namespace *ns; 667 668 ns = current->nsproxy->ipc_ns; 669 670 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 671 return -EINVAL; 672 if (mtype < 1) 673 return -EINVAL; 674 675 msg = load_msg(mtext, msgsz); 676 if (IS_ERR(msg)) 677 return PTR_ERR(msg); 678 679 msg->m_type = mtype; 680 msg->m_ts = msgsz; 681 682 rcu_read_lock(); 683 msq = msq_obtain_object_check(ns, msqid); 684 if (IS_ERR(msq)) { 685 err = PTR_ERR(msq); 686 goto out_unlock1; 687 } 688 689 ipc_lock_object(&msq->q_perm); 690 691 for (;;) { 692 struct msg_sender s; 693 694 err = -EACCES; 695 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 696 goto out_unlock0; 697 698 err = security_msg_queue_msgsnd(msq, msg, msgflg); 699 if (err) 700 goto out_unlock0; 701 702 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 703 1 + msq->q_qnum <= msq->q_qbytes) { 704 break; 705 } 706 707 /* queue full, wait: */ 708 if (msgflg & IPC_NOWAIT) { 709 err = -EAGAIN; 710 goto out_unlock0; 711 } 712 713 ss_add(msq, &s); 714 715 if (!ipc_rcu_getref(msq)) { 716 err = -EIDRM; 717 goto out_unlock0; 718 } 719 720 ipc_unlock_object(&msq->q_perm); 721 rcu_read_unlock(); 722 schedule(); 723 724 rcu_read_lock(); 725 ipc_lock_object(&msq->q_perm); 726 727 ipc_rcu_putref(msq, ipc_rcu_free); 728 if (msq->q_perm.deleted) { 729 err = -EIDRM; 730 goto out_unlock0; 731 } 732 733 ss_del(&s); 734 735 if (signal_pending(current)) { 736 err = -ERESTARTNOHAND; 737 goto out_unlock0; 738 } 739 740 } 741 msq->q_lspid = task_tgid_vnr(current); 742 msq->q_stime = get_seconds(); 743 744 if (!pipelined_send(msq, msg)) { 745 /* no one is waiting for this message, enqueue it */ 746 list_add_tail(&msg->m_list, &msq->q_messages); 747 msq->q_cbytes += msgsz; 748 msq->q_qnum++; 749 atomic_add(msgsz, &ns->msg_bytes); 750 atomic_inc(&ns->msg_hdrs); 751 } 752 753 err = 0; 754 msg = NULL; 755 756 out_unlock0: 757 ipc_unlock_object(&msq->q_perm); 758 out_unlock1: 759 rcu_read_unlock(); 760 if (msg != NULL) 761 free_msg(msg); 762 return err; 763 } 764 765 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 766 int, msgflg) 767 { 768 long mtype; 769 770 if (get_user(mtype, &msgp->mtype)) 771 return -EFAULT; 772 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 773 } 774 775 static inline int convert_mode(long *msgtyp, int msgflg) 776 { 777 if (msgflg & MSG_COPY) 778 return SEARCH_NUMBER; 779 /* 780 * find message of correct type. 781 * msgtyp = 0 => get first. 782 * msgtyp > 0 => get first message of matching type. 783 * msgtyp < 0 => get message with least type must be < abs(msgtype). 784 */ 785 if (*msgtyp == 0) 786 return SEARCH_ANY; 787 if (*msgtyp < 0) { 788 *msgtyp = -*msgtyp; 789 return SEARCH_LESSEQUAL; 790 } 791 if (msgflg & MSG_EXCEPT) 792 return SEARCH_NOTEQUAL; 793 return SEARCH_EQUAL; 794 } 795 796 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 797 { 798 struct msgbuf __user *msgp = dest; 799 size_t msgsz; 800 801 if (put_user(msg->m_type, &msgp->mtype)) 802 return -EFAULT; 803 804 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 805 if (store_msg(msgp->mtext, msg, msgsz)) 806 return -EFAULT; 807 return msgsz; 808 } 809 810 #ifdef CONFIG_CHECKPOINT_RESTORE 811 /* 812 * This function creates new kernel message structure, large enough to store 813 * bufsz message bytes. 814 */ 815 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 816 { 817 struct msg_msg *copy; 818 819 /* 820 * Create dummy message to copy real message to. 821 */ 822 copy = load_msg(buf, bufsz); 823 if (!IS_ERR(copy)) 824 copy->m_ts = bufsz; 825 return copy; 826 } 827 828 static inline void free_copy(struct msg_msg *copy) 829 { 830 if (copy) 831 free_msg(copy); 832 } 833 #else 834 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 835 { 836 return ERR_PTR(-ENOSYS); 837 } 838 839 static inline void free_copy(struct msg_msg *copy) 840 { 841 } 842 #endif 843 844 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 845 { 846 struct msg_msg *msg, *found = NULL; 847 long count = 0; 848 849 list_for_each_entry(msg, &msq->q_messages, m_list) { 850 if (testmsg(msg, *msgtyp, mode) && 851 !security_msg_queue_msgrcv(msq, msg, current, 852 *msgtyp, mode)) { 853 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 854 *msgtyp = msg->m_type - 1; 855 found = msg; 856 } else if (mode == SEARCH_NUMBER) { 857 if (*msgtyp == count) 858 return msg; 859 } else 860 return msg; 861 count++; 862 } 863 } 864 865 return found ?: ERR_PTR(-EAGAIN); 866 } 867 868 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, 869 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 870 { 871 int mode; 872 struct msg_queue *msq; 873 struct ipc_namespace *ns; 874 struct msg_msg *msg, *copy = NULL; 875 876 ns = current->nsproxy->ipc_ns; 877 878 if (msqid < 0 || (long) bufsz < 0) 879 return -EINVAL; 880 881 if (msgflg & MSG_COPY) { 882 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 883 if (IS_ERR(copy)) 884 return PTR_ERR(copy); 885 } 886 mode = convert_mode(&msgtyp, msgflg); 887 888 rcu_read_lock(); 889 msq = msq_obtain_object_check(ns, msqid); 890 if (IS_ERR(msq)) { 891 rcu_read_unlock(); 892 free_copy(copy); 893 return PTR_ERR(msq); 894 } 895 896 for (;;) { 897 struct msg_receiver msr_d; 898 899 msg = ERR_PTR(-EACCES); 900 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 901 goto out_unlock1; 902 903 ipc_lock_object(&msq->q_perm); 904 msg = find_msg(msq, &msgtyp, mode); 905 if (!IS_ERR(msg)) { 906 /* 907 * Found a suitable message. 908 * Unlink it from the queue. 909 */ 910 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 911 msg = ERR_PTR(-E2BIG); 912 goto out_unlock0; 913 } 914 /* 915 * If we are copying, then do not unlink message and do 916 * not update queue parameters. 917 */ 918 if (msgflg & MSG_COPY) { 919 msg = copy_msg(msg, copy); 920 goto out_unlock0; 921 } 922 923 list_del(&msg->m_list); 924 msq->q_qnum--; 925 msq->q_rtime = get_seconds(); 926 msq->q_lrpid = task_tgid_vnr(current); 927 msq->q_cbytes -= msg->m_ts; 928 atomic_sub(msg->m_ts, &ns->msg_bytes); 929 atomic_dec(&ns->msg_hdrs); 930 ss_wakeup(&msq->q_senders, 0); 931 932 goto out_unlock0; 933 } 934 935 /* No message waiting. Wait for a message */ 936 if (msgflg & IPC_NOWAIT) { 937 msg = ERR_PTR(-ENOMSG); 938 goto out_unlock0; 939 } 940 941 list_add_tail(&msr_d.r_list, &msq->q_receivers); 942 msr_d.r_tsk = current; 943 msr_d.r_msgtype = msgtyp; 944 msr_d.r_mode = mode; 945 if (msgflg & MSG_NOERROR) 946 msr_d.r_maxsize = INT_MAX; 947 else 948 msr_d.r_maxsize = bufsz; 949 msr_d.r_msg = ERR_PTR(-EAGAIN); 950 current->state = TASK_INTERRUPTIBLE; 951 952 ipc_unlock_object(&msq->q_perm); 953 rcu_read_unlock(); 954 schedule(); 955 956 /* Lockless receive, part 1: 957 * Disable preemption. We don't hold a reference to the queue 958 * and getting a reference would defeat the idea of a lockless 959 * operation, thus the code relies on rcu to guarantee the 960 * existence of msq: 961 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 962 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 963 * rcu_read_lock() prevents preemption between reading r_msg 964 * and acquiring the q_perm.lock in ipc_lock_object(). 965 */ 966 rcu_read_lock(); 967 968 /* Lockless receive, part 2: 969 * Wait until pipelined_send or expunge_all are outside of 970 * wake_up_process(). There is a race with exit(), see 971 * ipc/mqueue.c for the details. 972 */ 973 msg = (struct msg_msg*)msr_d.r_msg; 974 while (msg == NULL) { 975 cpu_relax(); 976 msg = (struct msg_msg *)msr_d.r_msg; 977 } 978 979 /* Lockless receive, part 3: 980 * If there is a message or an error then accept it without 981 * locking. 982 */ 983 if (msg != ERR_PTR(-EAGAIN)) 984 goto out_unlock1; 985 986 /* Lockless receive, part 3: 987 * Acquire the queue spinlock. 988 */ 989 ipc_lock_object(&msq->q_perm); 990 991 /* Lockless receive, part 4: 992 * Repeat test after acquiring the spinlock. 993 */ 994 msg = (struct msg_msg*)msr_d.r_msg; 995 if (msg != ERR_PTR(-EAGAIN)) 996 goto out_unlock0; 997 998 list_del(&msr_d.r_list); 999 if (signal_pending(current)) { 1000 msg = ERR_PTR(-ERESTARTNOHAND); 1001 goto out_unlock0; 1002 } 1003 1004 ipc_unlock_object(&msq->q_perm); 1005 } 1006 1007 out_unlock0: 1008 ipc_unlock_object(&msq->q_perm); 1009 out_unlock1: 1010 rcu_read_unlock(); 1011 if (IS_ERR(msg)) { 1012 free_copy(copy); 1013 return PTR_ERR(msg); 1014 } 1015 1016 bufsz = msg_handler(buf, msg, bufsz); 1017 free_msg(msg); 1018 1019 return bufsz; 1020 } 1021 1022 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 1023 long, msgtyp, int, msgflg) 1024 { 1025 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 1026 } 1027 1028 #ifdef CONFIG_PROC_FS 1029 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 1030 { 1031 struct user_namespace *user_ns = seq_user_ns(s); 1032 struct msg_queue *msq = it; 1033 1034 return seq_printf(s, 1035 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1036 msq->q_perm.key, 1037 msq->q_perm.id, 1038 msq->q_perm.mode, 1039 msq->q_cbytes, 1040 msq->q_qnum, 1041 msq->q_lspid, 1042 msq->q_lrpid, 1043 from_kuid_munged(user_ns, msq->q_perm.uid), 1044 from_kgid_munged(user_ns, msq->q_perm.gid), 1045 from_kuid_munged(user_ns, msq->q_perm.cuid), 1046 from_kgid_munged(user_ns, msq->q_perm.cgid), 1047 msq->q_stime, 1048 msq->q_rtime, 1049 msq->q_ctime); 1050 } 1051 #endif 1052