1 /* 2 * POSIX message queues filesystem for Linux. 3 * 4 * Copyright (C) 2003,2004 Krzysztof Benedyczak (golbi@mat.uni.torun.pl) 5 * Michal Wronski (michal.wronski@gmail.com) 6 * 7 * Spinlocks: Mohamed Abbas (abbas.mohamed@intel.com) 8 * Lockless receive & send, fd based notify: 9 * Manfred Spraul (manfred@colorfullife.com) 10 * 11 * Audit: George Wilson (ltcgcw@us.ibm.com) 12 * 13 * This file is released under the GPL. 14 */ 15 16 #include <linux/capability.h> 17 #include <linux/init.h> 18 #include <linux/pagemap.h> 19 #include <linux/file.h> 20 #include <linux/mount.h> 21 #include <linux/fs_context.h> 22 #include <linux/namei.h> 23 #include <linux/sysctl.h> 24 #include <linux/poll.h> 25 #include <linux/mqueue.h> 26 #include <linux/msg.h> 27 #include <linux/skbuff.h> 28 #include <linux/vmalloc.h> 29 #include <linux/netlink.h> 30 #include <linux/syscalls.h> 31 #include <linux/audit.h> 32 #include <linux/signal.h> 33 #include <linux/mutex.h> 34 #include <linux/nsproxy.h> 35 #include <linux/pid.h> 36 #include <linux/ipc_namespace.h> 37 #include <linux/user_namespace.h> 38 #include <linux/slab.h> 39 #include <linux/sched/wake_q.h> 40 #include <linux/sched/signal.h> 41 #include <linux/sched/user.h> 42 43 #include <net/sock.h> 44 #include "util.h" 45 46 struct mqueue_fs_context { 47 struct ipc_namespace *ipc_ns; 48 }; 49 50 #define MQUEUE_MAGIC 0x19800202 51 #define DIRENT_SIZE 20 52 #define FILENT_SIZE 80 53 54 #define SEND 0 55 #define RECV 1 56 57 #define STATE_NONE 0 58 #define STATE_READY 1 59 60 struct posix_msg_tree_node { 61 struct rb_node rb_node; 62 struct list_head msg_list; 63 int priority; 64 }; 65 66 struct ext_wait_queue { /* queue of sleeping tasks */ 67 struct task_struct *task; 68 struct list_head list; 69 struct msg_msg *msg; /* ptr of loaded message */ 70 int state; /* one of STATE_* values */ 71 }; 72 73 struct mqueue_inode_info { 74 spinlock_t lock; 75 struct inode vfs_inode; 76 wait_queue_head_t wait_q; 77 78 struct rb_root msg_tree; 79 struct rb_node *msg_tree_rightmost; 80 struct posix_msg_tree_node *node_cache; 81 struct mq_attr attr; 82 83 struct sigevent notify; 84 struct pid *notify_owner; 85 struct user_namespace *notify_user_ns; 86 struct user_struct *user; /* user who created, for accounting */ 87 struct sock *notify_sock; 88 struct sk_buff *notify_cookie; 89 90 /* for tasks waiting for free space and messages, respectively */ 91 struct ext_wait_queue e_wait_q[2]; 92 93 unsigned long qsize; /* size of queue in memory (sum of all msgs) */ 94 }; 95 96 static struct file_system_type mqueue_fs_type; 97 static const struct inode_operations mqueue_dir_inode_operations; 98 static const struct file_operations mqueue_file_operations; 99 static const struct super_operations mqueue_super_ops; 100 static const struct fs_context_operations mqueue_fs_context_ops; 101 static void remove_notification(struct mqueue_inode_info *info); 102 103 static struct kmem_cache *mqueue_inode_cachep; 104 105 static struct ctl_table_header *mq_sysctl_table; 106 107 static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) 108 { 109 return container_of(inode, struct mqueue_inode_info, vfs_inode); 110 } 111 112 /* 113 * This routine should be called with the mq_lock held. 114 */ 115 static inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode) 116 { 117 return get_ipc_ns(inode->i_sb->s_fs_info); 118 } 119 120 static struct ipc_namespace *get_ns_from_inode(struct inode *inode) 121 { 122 struct ipc_namespace *ns; 123 124 spin_lock(&mq_lock); 125 ns = __get_ns_from_inode(inode); 126 spin_unlock(&mq_lock); 127 return ns; 128 } 129 130 /* Auxiliary functions to manipulate messages' list */ 131 static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) 132 { 133 struct rb_node **p, *parent = NULL; 134 struct posix_msg_tree_node *leaf; 135 bool rightmost = true; 136 137 p = &info->msg_tree.rb_node; 138 while (*p) { 139 parent = *p; 140 leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node); 141 142 if (likely(leaf->priority == msg->m_type)) 143 goto insert_msg; 144 else if (msg->m_type < leaf->priority) { 145 p = &(*p)->rb_left; 146 rightmost = false; 147 } else 148 p = &(*p)->rb_right; 149 } 150 if (info->node_cache) { 151 leaf = info->node_cache; 152 info->node_cache = NULL; 153 } else { 154 leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC); 155 if (!leaf) 156 return -ENOMEM; 157 INIT_LIST_HEAD(&leaf->msg_list); 158 } 159 leaf->priority = msg->m_type; 160 161 if (rightmost) 162 info->msg_tree_rightmost = &leaf->rb_node; 163 164 rb_link_node(&leaf->rb_node, parent, p); 165 rb_insert_color(&leaf->rb_node, &info->msg_tree); 166 insert_msg: 167 info->attr.mq_curmsgs++; 168 info->qsize += msg->m_ts; 169 list_add_tail(&msg->m_list, &leaf->msg_list); 170 return 0; 171 } 172 173 static inline void msg_tree_erase(struct posix_msg_tree_node *leaf, 174 struct mqueue_inode_info *info) 175 { 176 struct rb_node *node = &leaf->rb_node; 177 178 if (info->msg_tree_rightmost == node) 179 info->msg_tree_rightmost = rb_prev(node); 180 181 rb_erase(node, &info->msg_tree); 182 if (info->node_cache) { 183 kfree(leaf); 184 } else { 185 info->node_cache = leaf; 186 } 187 } 188 189 static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) 190 { 191 struct rb_node *parent = NULL; 192 struct posix_msg_tree_node *leaf; 193 struct msg_msg *msg; 194 195 try_again: 196 /* 197 * During insert, low priorities go to the left and high to the 198 * right. On receive, we want the highest priorities first, so 199 * walk all the way to the right. 200 */ 201 parent = info->msg_tree_rightmost; 202 if (!parent) { 203 if (info->attr.mq_curmsgs) { 204 pr_warn_once("Inconsistency in POSIX message queue, " 205 "no tree element, but supposedly messages " 206 "should exist!\n"); 207 info->attr.mq_curmsgs = 0; 208 } 209 return NULL; 210 } 211 leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node); 212 if (unlikely(list_empty(&leaf->msg_list))) { 213 pr_warn_once("Inconsistency in POSIX message queue, " 214 "empty leaf node but we haven't implemented " 215 "lazy leaf delete!\n"); 216 msg_tree_erase(leaf, info); 217 goto try_again; 218 } else { 219 msg = list_first_entry(&leaf->msg_list, 220 struct msg_msg, m_list); 221 list_del(&msg->m_list); 222 if (list_empty(&leaf->msg_list)) { 223 msg_tree_erase(leaf, info); 224 } 225 } 226 info->attr.mq_curmsgs--; 227 info->qsize -= msg->m_ts; 228 return msg; 229 } 230 231 static struct inode *mqueue_get_inode(struct super_block *sb, 232 struct ipc_namespace *ipc_ns, umode_t mode, 233 struct mq_attr *attr) 234 { 235 struct user_struct *u = current_user(); 236 struct inode *inode; 237 int ret = -ENOMEM; 238 239 inode = new_inode(sb); 240 if (!inode) 241 goto err; 242 243 inode->i_ino = get_next_ino(); 244 inode->i_mode = mode; 245 inode->i_uid = current_fsuid(); 246 inode->i_gid = current_fsgid(); 247 inode->i_mtime = inode->i_ctime = inode->i_atime = current_time(inode); 248 249 if (S_ISREG(mode)) { 250 struct mqueue_inode_info *info; 251 unsigned long mq_bytes, mq_treesize; 252 253 inode->i_fop = &mqueue_file_operations; 254 inode->i_size = FILENT_SIZE; 255 /* mqueue specific info */ 256 info = MQUEUE_I(inode); 257 spin_lock_init(&info->lock); 258 init_waitqueue_head(&info->wait_q); 259 INIT_LIST_HEAD(&info->e_wait_q[0].list); 260 INIT_LIST_HEAD(&info->e_wait_q[1].list); 261 info->notify_owner = NULL; 262 info->notify_user_ns = NULL; 263 info->qsize = 0; 264 info->user = NULL; /* set when all is ok */ 265 info->msg_tree = RB_ROOT; 266 info->msg_tree_rightmost = NULL; 267 info->node_cache = NULL; 268 memset(&info->attr, 0, sizeof(info->attr)); 269 info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, 270 ipc_ns->mq_msg_default); 271 info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max, 272 ipc_ns->mq_msgsize_default); 273 if (attr) { 274 info->attr.mq_maxmsg = attr->mq_maxmsg; 275 info->attr.mq_msgsize = attr->mq_msgsize; 276 } 277 /* 278 * We used to allocate a static array of pointers and account 279 * the size of that array as well as one msg_msg struct per 280 * possible message into the queue size. That's no longer 281 * accurate as the queue is now an rbtree and will grow and 282 * shrink depending on usage patterns. We can, however, still 283 * account one msg_msg struct per message, but the nodes are 284 * allocated depending on priority usage, and most programs 285 * only use one, or a handful, of priorities. However, since 286 * this is pinned memory, we need to assume worst case, so 287 * that means the min(mq_maxmsg, max_priorities) * struct 288 * posix_msg_tree_node. 289 */ 290 291 ret = -EINVAL; 292 if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0) 293 goto out_inode; 294 if (capable(CAP_SYS_RESOURCE)) { 295 if (info->attr.mq_maxmsg > HARD_MSGMAX || 296 info->attr.mq_msgsize > HARD_MSGSIZEMAX) 297 goto out_inode; 298 } else { 299 if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max || 300 info->attr.mq_msgsize > ipc_ns->mq_msgsize_max) 301 goto out_inode; 302 } 303 ret = -EOVERFLOW; 304 /* check for overflow */ 305 if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg) 306 goto out_inode; 307 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + 308 min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * 309 sizeof(struct posix_msg_tree_node); 310 mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize; 311 if (mq_bytes + mq_treesize < mq_bytes) 312 goto out_inode; 313 mq_bytes += mq_treesize; 314 spin_lock(&mq_lock); 315 if (u->mq_bytes + mq_bytes < u->mq_bytes || 316 u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) { 317 spin_unlock(&mq_lock); 318 /* mqueue_evict_inode() releases info->messages */ 319 ret = -EMFILE; 320 goto out_inode; 321 } 322 u->mq_bytes += mq_bytes; 323 spin_unlock(&mq_lock); 324 325 /* all is ok */ 326 info->user = get_uid(u); 327 } else if (S_ISDIR(mode)) { 328 inc_nlink(inode); 329 /* Some things misbehave if size == 0 on a directory */ 330 inode->i_size = 2 * DIRENT_SIZE; 331 inode->i_op = &mqueue_dir_inode_operations; 332 inode->i_fop = &simple_dir_operations; 333 } 334 335 return inode; 336 out_inode: 337 iput(inode); 338 err: 339 return ERR_PTR(ret); 340 } 341 342 static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc) 343 { 344 struct inode *inode; 345 struct ipc_namespace *ns = sb->s_fs_info; 346 347 sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; 348 sb->s_blocksize = PAGE_SIZE; 349 sb->s_blocksize_bits = PAGE_SHIFT; 350 sb->s_magic = MQUEUE_MAGIC; 351 sb->s_op = &mqueue_super_ops; 352 353 inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL); 354 if (IS_ERR(inode)) 355 return PTR_ERR(inode); 356 357 sb->s_root = d_make_root(inode); 358 if (!sb->s_root) 359 return -ENOMEM; 360 return 0; 361 } 362 363 static int mqueue_get_tree(struct fs_context *fc) 364 { 365 struct mqueue_fs_context *ctx = fc->fs_private; 366 367 fc->s_fs_info = ctx->ipc_ns; 368 return vfs_get_super(fc, vfs_get_keyed_super, mqueue_fill_super); 369 } 370 371 static void mqueue_fs_context_free(struct fs_context *fc) 372 { 373 struct mqueue_fs_context *ctx = fc->fs_private; 374 375 put_ipc_ns(ctx->ipc_ns); 376 kfree(ctx); 377 } 378 379 static int mqueue_init_fs_context(struct fs_context *fc) 380 { 381 struct mqueue_fs_context *ctx; 382 383 ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL); 384 if (!ctx) 385 return -ENOMEM; 386 387 ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); 388 put_user_ns(fc->user_ns); 389 fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns); 390 fc->fs_private = ctx; 391 fc->ops = &mqueue_fs_context_ops; 392 return 0; 393 } 394 395 static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) 396 { 397 struct mqueue_fs_context *ctx; 398 struct fs_context *fc; 399 struct vfsmount *mnt; 400 401 fc = fs_context_for_mount(&mqueue_fs_type, SB_KERNMOUNT); 402 if (IS_ERR(fc)) 403 return ERR_CAST(fc); 404 405 ctx = fc->fs_private; 406 put_ipc_ns(ctx->ipc_ns); 407 ctx->ipc_ns = get_ipc_ns(ns); 408 put_user_ns(fc->user_ns); 409 fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns); 410 411 mnt = fc_mount(fc); 412 put_fs_context(fc); 413 return mnt; 414 } 415 416 static void init_once(void *foo) 417 { 418 struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo; 419 420 inode_init_once(&p->vfs_inode); 421 } 422 423 static struct inode *mqueue_alloc_inode(struct super_block *sb) 424 { 425 struct mqueue_inode_info *ei; 426 427 ei = kmem_cache_alloc(mqueue_inode_cachep, GFP_KERNEL); 428 if (!ei) 429 return NULL; 430 return &ei->vfs_inode; 431 } 432 433 static void mqueue_free_inode(struct inode *inode) 434 { 435 kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); 436 } 437 438 static void mqueue_evict_inode(struct inode *inode) 439 { 440 struct mqueue_inode_info *info; 441 struct user_struct *user; 442 struct ipc_namespace *ipc_ns; 443 struct msg_msg *msg, *nmsg; 444 LIST_HEAD(tmp_msg); 445 446 clear_inode(inode); 447 448 if (S_ISDIR(inode->i_mode)) 449 return; 450 451 ipc_ns = get_ns_from_inode(inode); 452 info = MQUEUE_I(inode); 453 spin_lock(&info->lock); 454 while ((msg = msg_get(info)) != NULL) 455 list_add_tail(&msg->m_list, &tmp_msg); 456 kfree(info->node_cache); 457 spin_unlock(&info->lock); 458 459 list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) { 460 list_del(&msg->m_list); 461 free_msg(msg); 462 } 463 464 user = info->user; 465 if (user) { 466 unsigned long mq_bytes, mq_treesize; 467 468 /* Total amount of bytes accounted for the mqueue */ 469 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + 470 min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * 471 sizeof(struct posix_msg_tree_node); 472 473 mq_bytes = mq_treesize + (info->attr.mq_maxmsg * 474 info->attr.mq_msgsize); 475 476 spin_lock(&mq_lock); 477 user->mq_bytes -= mq_bytes; 478 /* 479 * get_ns_from_inode() ensures that the 480 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns 481 * to which we now hold a reference, or it is NULL. 482 * We can't put it here under mq_lock, though. 483 */ 484 if (ipc_ns) 485 ipc_ns->mq_queues_count--; 486 spin_unlock(&mq_lock); 487 free_uid(user); 488 } 489 if (ipc_ns) 490 put_ipc_ns(ipc_ns); 491 } 492 493 static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg) 494 { 495 struct inode *dir = dentry->d_parent->d_inode; 496 struct inode *inode; 497 struct mq_attr *attr = arg; 498 int error; 499 struct ipc_namespace *ipc_ns; 500 501 spin_lock(&mq_lock); 502 ipc_ns = __get_ns_from_inode(dir); 503 if (!ipc_ns) { 504 error = -EACCES; 505 goto out_unlock; 506 } 507 508 if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && 509 !capable(CAP_SYS_RESOURCE)) { 510 error = -ENOSPC; 511 goto out_unlock; 512 } 513 ipc_ns->mq_queues_count++; 514 spin_unlock(&mq_lock); 515 516 inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr); 517 if (IS_ERR(inode)) { 518 error = PTR_ERR(inode); 519 spin_lock(&mq_lock); 520 ipc_ns->mq_queues_count--; 521 goto out_unlock; 522 } 523 524 put_ipc_ns(ipc_ns); 525 dir->i_size += DIRENT_SIZE; 526 dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir); 527 528 d_instantiate(dentry, inode); 529 dget(dentry); 530 return 0; 531 out_unlock: 532 spin_unlock(&mq_lock); 533 if (ipc_ns) 534 put_ipc_ns(ipc_ns); 535 return error; 536 } 537 538 static int mqueue_create(struct inode *dir, struct dentry *dentry, 539 umode_t mode, bool excl) 540 { 541 return mqueue_create_attr(dentry, mode, NULL); 542 } 543 544 static int mqueue_unlink(struct inode *dir, struct dentry *dentry) 545 { 546 struct inode *inode = d_inode(dentry); 547 548 dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir); 549 dir->i_size -= DIRENT_SIZE; 550 drop_nlink(inode); 551 dput(dentry); 552 return 0; 553 } 554 555 /* 556 * This is routine for system read from queue file. 557 * To avoid mess with doing here some sort of mq_receive we allow 558 * to read only queue size & notification info (the only values 559 * that are interesting from user point of view and aren't accessible 560 * through std routines) 561 */ 562 static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, 563 size_t count, loff_t *off) 564 { 565 struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp)); 566 char buffer[FILENT_SIZE]; 567 ssize_t ret; 568 569 spin_lock(&info->lock); 570 snprintf(buffer, sizeof(buffer), 571 "QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n", 572 info->qsize, 573 info->notify_owner ? info->notify.sigev_notify : 0, 574 (info->notify_owner && 575 info->notify.sigev_notify == SIGEV_SIGNAL) ? 576 info->notify.sigev_signo : 0, 577 pid_vnr(info->notify_owner)); 578 spin_unlock(&info->lock); 579 buffer[sizeof(buffer)-1] = '\0'; 580 581 ret = simple_read_from_buffer(u_data, count, off, buffer, 582 strlen(buffer)); 583 if (ret <= 0) 584 return ret; 585 586 file_inode(filp)->i_atime = file_inode(filp)->i_ctime = current_time(file_inode(filp)); 587 return ret; 588 } 589 590 static int mqueue_flush_file(struct file *filp, fl_owner_t id) 591 { 592 struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp)); 593 594 spin_lock(&info->lock); 595 if (task_tgid(current) == info->notify_owner) 596 remove_notification(info); 597 598 spin_unlock(&info->lock); 599 return 0; 600 } 601 602 static __poll_t mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab) 603 { 604 struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp)); 605 __poll_t retval = 0; 606 607 poll_wait(filp, &info->wait_q, poll_tab); 608 609 spin_lock(&info->lock); 610 if (info->attr.mq_curmsgs) 611 retval = EPOLLIN | EPOLLRDNORM; 612 613 if (info->attr.mq_curmsgs < info->attr.mq_maxmsg) 614 retval |= EPOLLOUT | EPOLLWRNORM; 615 spin_unlock(&info->lock); 616 617 return retval; 618 } 619 620 /* Adds current to info->e_wait_q[sr] before element with smaller prio */ 621 static void wq_add(struct mqueue_inode_info *info, int sr, 622 struct ext_wait_queue *ewp) 623 { 624 struct ext_wait_queue *walk; 625 626 list_for_each_entry(walk, &info->e_wait_q[sr].list, list) { 627 if (walk->task->prio <= current->prio) { 628 list_add_tail(&ewp->list, &walk->list); 629 return; 630 } 631 } 632 list_add_tail(&ewp->list, &info->e_wait_q[sr].list); 633 } 634 635 /* 636 * Puts current task to sleep. Caller must hold queue lock. After return 637 * lock isn't held. 638 * sr: SEND or RECV 639 */ 640 static int wq_sleep(struct mqueue_inode_info *info, int sr, 641 ktime_t *timeout, struct ext_wait_queue *ewp) 642 __releases(&info->lock) 643 { 644 int retval; 645 signed long time; 646 647 wq_add(info, sr, ewp); 648 649 for (;;) { 650 __set_current_state(TASK_INTERRUPTIBLE); 651 652 spin_unlock(&info->lock); 653 time = schedule_hrtimeout_range_clock(timeout, 0, 654 HRTIMER_MODE_ABS, CLOCK_REALTIME); 655 656 if (ewp->state == STATE_READY) { 657 retval = 0; 658 goto out; 659 } 660 spin_lock(&info->lock); 661 if (ewp->state == STATE_READY) { 662 retval = 0; 663 goto out_unlock; 664 } 665 if (signal_pending(current)) { 666 retval = -ERESTARTSYS; 667 break; 668 } 669 if (time == 0) { 670 retval = -ETIMEDOUT; 671 break; 672 } 673 } 674 list_del(&ewp->list); 675 out_unlock: 676 spin_unlock(&info->lock); 677 out: 678 return retval; 679 } 680 681 /* 682 * Returns waiting task that should be serviced first or NULL if none exists 683 */ 684 static struct ext_wait_queue *wq_get_first_waiter( 685 struct mqueue_inode_info *info, int sr) 686 { 687 struct list_head *ptr; 688 689 ptr = info->e_wait_q[sr].list.prev; 690 if (ptr == &info->e_wait_q[sr].list) 691 return NULL; 692 return list_entry(ptr, struct ext_wait_queue, list); 693 } 694 695 696 static inline void set_cookie(struct sk_buff *skb, char code) 697 { 698 ((char *)skb->data)[NOTIFY_COOKIE_LEN-1] = code; 699 } 700 701 /* 702 * The next function is only to split too long sys_mq_timedsend 703 */ 704 static void __do_notify(struct mqueue_inode_info *info) 705 { 706 /* notification 707 * invoked when there is registered process and there isn't process 708 * waiting synchronously for message AND state of queue changed from 709 * empty to not empty. Here we are sure that no one is waiting 710 * synchronously. */ 711 if (info->notify_owner && 712 info->attr.mq_curmsgs == 1) { 713 struct kernel_siginfo sig_i; 714 switch (info->notify.sigev_notify) { 715 case SIGEV_NONE: 716 break; 717 case SIGEV_SIGNAL: 718 /* sends signal */ 719 720 clear_siginfo(&sig_i); 721 sig_i.si_signo = info->notify.sigev_signo; 722 sig_i.si_errno = 0; 723 sig_i.si_code = SI_MESGQ; 724 sig_i.si_value = info->notify.sigev_value; 725 /* map current pid/uid into info->owner's namespaces */ 726 rcu_read_lock(); 727 sig_i.si_pid = task_tgid_nr_ns(current, 728 ns_of_pid(info->notify_owner)); 729 sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid()); 730 rcu_read_unlock(); 731 732 kill_pid_info(info->notify.sigev_signo, 733 &sig_i, info->notify_owner); 734 break; 735 case SIGEV_THREAD: 736 set_cookie(info->notify_cookie, NOTIFY_WOKENUP); 737 netlink_sendskb(info->notify_sock, info->notify_cookie); 738 break; 739 } 740 /* after notification unregisters process */ 741 put_pid(info->notify_owner); 742 put_user_ns(info->notify_user_ns); 743 info->notify_owner = NULL; 744 info->notify_user_ns = NULL; 745 } 746 wake_up(&info->wait_q); 747 } 748 749 static int prepare_timeout(const struct __kernel_timespec __user *u_abs_timeout, 750 struct timespec64 *ts) 751 { 752 if (get_timespec64(ts, u_abs_timeout)) 753 return -EFAULT; 754 if (!timespec64_valid(ts)) 755 return -EINVAL; 756 return 0; 757 } 758 759 static void remove_notification(struct mqueue_inode_info *info) 760 { 761 if (info->notify_owner != NULL && 762 info->notify.sigev_notify == SIGEV_THREAD) { 763 set_cookie(info->notify_cookie, NOTIFY_REMOVED); 764 netlink_sendskb(info->notify_sock, info->notify_cookie); 765 } 766 put_pid(info->notify_owner); 767 put_user_ns(info->notify_user_ns); 768 info->notify_owner = NULL; 769 info->notify_user_ns = NULL; 770 } 771 772 static int prepare_open(struct dentry *dentry, int oflag, int ro, 773 umode_t mode, struct filename *name, 774 struct mq_attr *attr) 775 { 776 static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, 777 MAY_READ | MAY_WRITE }; 778 int acc; 779 780 if (d_really_is_negative(dentry)) { 781 if (!(oflag & O_CREAT)) 782 return -ENOENT; 783 if (ro) 784 return ro; 785 audit_inode_parent_hidden(name, dentry->d_parent); 786 return vfs_mkobj(dentry, mode & ~current_umask(), 787 mqueue_create_attr, attr); 788 } 789 /* it already existed */ 790 audit_inode(name, dentry, 0); 791 if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 792 return -EEXIST; 793 if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) 794 return -EINVAL; 795 acc = oflag2acc[oflag & O_ACCMODE]; 796 return inode_permission(d_inode(dentry), acc); 797 } 798 799 static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, 800 struct mq_attr *attr) 801 { 802 struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt; 803 struct dentry *root = mnt->mnt_root; 804 struct filename *name; 805 struct path path; 806 int fd, error; 807 int ro; 808 809 audit_mq_open(oflag, mode, attr); 810 811 if (IS_ERR(name = getname(u_name))) 812 return PTR_ERR(name); 813 814 fd = get_unused_fd_flags(O_CLOEXEC); 815 if (fd < 0) 816 goto out_putname; 817 818 ro = mnt_want_write(mnt); /* we'll drop it in any case */ 819 inode_lock(d_inode(root)); 820 path.dentry = lookup_one_len(name->name, root, strlen(name->name)); 821 if (IS_ERR(path.dentry)) { 822 error = PTR_ERR(path.dentry); 823 goto out_putfd; 824 } 825 path.mnt = mntget(mnt); 826 error = prepare_open(path.dentry, oflag, ro, mode, name, attr); 827 if (!error) { 828 struct file *file = dentry_open(&path, oflag, current_cred()); 829 if (!IS_ERR(file)) 830 fd_install(fd, file); 831 else 832 error = PTR_ERR(file); 833 } 834 path_put(&path); 835 out_putfd: 836 if (error) { 837 put_unused_fd(fd); 838 fd = error; 839 } 840 inode_unlock(d_inode(root)); 841 if (!ro) 842 mnt_drop_write(mnt); 843 out_putname: 844 putname(name); 845 return fd; 846 } 847 848 SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, 849 struct mq_attr __user *, u_attr) 850 { 851 struct mq_attr attr; 852 if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) 853 return -EFAULT; 854 855 return do_mq_open(u_name, oflag, mode, u_attr ? &attr : NULL); 856 } 857 858 SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) 859 { 860 int err; 861 struct filename *name; 862 struct dentry *dentry; 863 struct inode *inode = NULL; 864 struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; 865 struct vfsmount *mnt = ipc_ns->mq_mnt; 866 867 name = getname(u_name); 868 if (IS_ERR(name)) 869 return PTR_ERR(name); 870 871 audit_inode_parent_hidden(name, mnt->mnt_root); 872 err = mnt_want_write(mnt); 873 if (err) 874 goto out_name; 875 inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT); 876 dentry = lookup_one_len(name->name, mnt->mnt_root, 877 strlen(name->name)); 878 if (IS_ERR(dentry)) { 879 err = PTR_ERR(dentry); 880 goto out_unlock; 881 } 882 883 inode = d_inode(dentry); 884 if (!inode) { 885 err = -ENOENT; 886 } else { 887 ihold(inode); 888 err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL); 889 } 890 dput(dentry); 891 892 out_unlock: 893 inode_unlock(d_inode(mnt->mnt_root)); 894 if (inode) 895 iput(inode); 896 mnt_drop_write(mnt); 897 out_name: 898 putname(name); 899 900 return err; 901 } 902 903 /* Pipelined send and receive functions. 904 * 905 * If a receiver finds no waiting message, then it registers itself in the 906 * list of waiting receivers. A sender checks that list before adding the new 907 * message into the message array. If there is a waiting receiver, then it 908 * bypasses the message array and directly hands the message over to the 909 * receiver. The receiver accepts the message and returns without grabbing the 910 * queue spinlock: 911 * 912 * - Set pointer to message. 913 * - Queue the receiver task for later wakeup (without the info->lock). 914 * - Update its state to STATE_READY. Now the receiver can continue. 915 * - Wake up the process after the lock is dropped. Should the process wake up 916 * before this wakeup (due to a timeout or a signal) it will either see 917 * STATE_READY and continue or acquire the lock to check the state again. 918 * 919 * The same algorithm is used for senders. 920 */ 921 922 /* pipelined_send() - send a message directly to the task waiting in 923 * sys_mq_timedreceive() (without inserting message into a queue). 924 */ 925 static inline void pipelined_send(struct wake_q_head *wake_q, 926 struct mqueue_inode_info *info, 927 struct msg_msg *message, 928 struct ext_wait_queue *receiver) 929 { 930 receiver->msg = message; 931 list_del(&receiver->list); 932 wake_q_add(wake_q, receiver->task); 933 /* 934 * Rely on the implicit cmpxchg barrier from wake_q_add such 935 * that we can ensure that updating receiver->state is the last 936 * write operation: As once set, the receiver can continue, 937 * and if we don't have the reference count from the wake_q, 938 * yet, at that point we can later have a use-after-free 939 * condition and bogus wakeup. 940 */ 941 receiver->state = STATE_READY; 942 } 943 944 /* pipelined_receive() - if there is task waiting in sys_mq_timedsend() 945 * gets its message and put to the queue (we have one free place for sure). */ 946 static inline void pipelined_receive(struct wake_q_head *wake_q, 947 struct mqueue_inode_info *info) 948 { 949 struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND); 950 951 if (!sender) { 952 /* for poll */ 953 wake_up_interruptible(&info->wait_q); 954 return; 955 } 956 if (msg_insert(sender->msg, info)) 957 return; 958 959 list_del(&sender->list); 960 wake_q_add(wake_q, sender->task); 961 sender->state = STATE_READY; 962 } 963 964 static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, 965 size_t msg_len, unsigned int msg_prio, 966 struct timespec64 *ts) 967 { 968 struct fd f; 969 struct inode *inode; 970 struct ext_wait_queue wait; 971 struct ext_wait_queue *receiver; 972 struct msg_msg *msg_ptr; 973 struct mqueue_inode_info *info; 974 ktime_t expires, *timeout = NULL; 975 struct posix_msg_tree_node *new_leaf = NULL; 976 int ret = 0; 977 DEFINE_WAKE_Q(wake_q); 978 979 if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX)) 980 return -EINVAL; 981 982 if (ts) { 983 expires = timespec64_to_ktime(*ts); 984 timeout = &expires; 985 } 986 987 audit_mq_sendrecv(mqdes, msg_len, msg_prio, ts); 988 989 f = fdget(mqdes); 990 if (unlikely(!f.file)) { 991 ret = -EBADF; 992 goto out; 993 } 994 995 inode = file_inode(f.file); 996 if (unlikely(f.file->f_op != &mqueue_file_operations)) { 997 ret = -EBADF; 998 goto out_fput; 999 } 1000 info = MQUEUE_I(inode); 1001 audit_file(f.file); 1002 1003 if (unlikely(!(f.file->f_mode & FMODE_WRITE))) { 1004 ret = -EBADF; 1005 goto out_fput; 1006 } 1007 1008 if (unlikely(msg_len > info->attr.mq_msgsize)) { 1009 ret = -EMSGSIZE; 1010 goto out_fput; 1011 } 1012 1013 /* First try to allocate memory, before doing anything with 1014 * existing queues. */ 1015 msg_ptr = load_msg(u_msg_ptr, msg_len); 1016 if (IS_ERR(msg_ptr)) { 1017 ret = PTR_ERR(msg_ptr); 1018 goto out_fput; 1019 } 1020 msg_ptr->m_ts = msg_len; 1021 msg_ptr->m_type = msg_prio; 1022 1023 /* 1024 * msg_insert really wants us to have a valid, spare node struct so 1025 * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will 1026 * fall back to that if necessary. 1027 */ 1028 if (!info->node_cache) 1029 new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL); 1030 1031 spin_lock(&info->lock); 1032 1033 if (!info->node_cache && new_leaf) { 1034 /* Save our speculative allocation into the cache */ 1035 INIT_LIST_HEAD(&new_leaf->msg_list); 1036 info->node_cache = new_leaf; 1037 new_leaf = NULL; 1038 } else { 1039 kfree(new_leaf); 1040 } 1041 1042 if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { 1043 if (f.file->f_flags & O_NONBLOCK) { 1044 ret = -EAGAIN; 1045 } else { 1046 wait.task = current; 1047 wait.msg = (void *) msg_ptr; 1048 wait.state = STATE_NONE; 1049 ret = wq_sleep(info, SEND, timeout, &wait); 1050 /* 1051 * wq_sleep must be called with info->lock held, and 1052 * returns with the lock released 1053 */ 1054 goto out_free; 1055 } 1056 } else { 1057 receiver = wq_get_first_waiter(info, RECV); 1058 if (receiver) { 1059 pipelined_send(&wake_q, info, msg_ptr, receiver); 1060 } else { 1061 /* adds message to the queue */ 1062 ret = msg_insert(msg_ptr, info); 1063 if (ret) 1064 goto out_unlock; 1065 __do_notify(info); 1066 } 1067 inode->i_atime = inode->i_mtime = inode->i_ctime = 1068 current_time(inode); 1069 } 1070 out_unlock: 1071 spin_unlock(&info->lock); 1072 wake_up_q(&wake_q); 1073 out_free: 1074 if (ret) 1075 free_msg(msg_ptr); 1076 out_fput: 1077 fdput(f); 1078 out: 1079 return ret; 1080 } 1081 1082 static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, 1083 size_t msg_len, unsigned int __user *u_msg_prio, 1084 struct timespec64 *ts) 1085 { 1086 ssize_t ret; 1087 struct msg_msg *msg_ptr; 1088 struct fd f; 1089 struct inode *inode; 1090 struct mqueue_inode_info *info; 1091 struct ext_wait_queue wait; 1092 ktime_t expires, *timeout = NULL; 1093 struct posix_msg_tree_node *new_leaf = NULL; 1094 1095 if (ts) { 1096 expires = timespec64_to_ktime(*ts); 1097 timeout = &expires; 1098 } 1099 1100 audit_mq_sendrecv(mqdes, msg_len, 0, ts); 1101 1102 f = fdget(mqdes); 1103 if (unlikely(!f.file)) { 1104 ret = -EBADF; 1105 goto out; 1106 } 1107 1108 inode = file_inode(f.file); 1109 if (unlikely(f.file->f_op != &mqueue_file_operations)) { 1110 ret = -EBADF; 1111 goto out_fput; 1112 } 1113 info = MQUEUE_I(inode); 1114 audit_file(f.file); 1115 1116 if (unlikely(!(f.file->f_mode & FMODE_READ))) { 1117 ret = -EBADF; 1118 goto out_fput; 1119 } 1120 1121 /* checks if buffer is big enough */ 1122 if (unlikely(msg_len < info->attr.mq_msgsize)) { 1123 ret = -EMSGSIZE; 1124 goto out_fput; 1125 } 1126 1127 /* 1128 * msg_insert really wants us to have a valid, spare node struct so 1129 * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will 1130 * fall back to that if necessary. 1131 */ 1132 if (!info->node_cache) 1133 new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL); 1134 1135 spin_lock(&info->lock); 1136 1137 if (!info->node_cache && new_leaf) { 1138 /* Save our speculative allocation into the cache */ 1139 INIT_LIST_HEAD(&new_leaf->msg_list); 1140 info->node_cache = new_leaf; 1141 } else { 1142 kfree(new_leaf); 1143 } 1144 1145 if (info->attr.mq_curmsgs == 0) { 1146 if (f.file->f_flags & O_NONBLOCK) { 1147 spin_unlock(&info->lock); 1148 ret = -EAGAIN; 1149 } else { 1150 wait.task = current; 1151 wait.state = STATE_NONE; 1152 ret = wq_sleep(info, RECV, timeout, &wait); 1153 msg_ptr = wait.msg; 1154 } 1155 } else { 1156 DEFINE_WAKE_Q(wake_q); 1157 1158 msg_ptr = msg_get(info); 1159 1160 inode->i_atime = inode->i_mtime = inode->i_ctime = 1161 current_time(inode); 1162 1163 /* There is now free space in queue. */ 1164 pipelined_receive(&wake_q, info); 1165 spin_unlock(&info->lock); 1166 wake_up_q(&wake_q); 1167 ret = 0; 1168 } 1169 if (ret == 0) { 1170 ret = msg_ptr->m_ts; 1171 1172 if ((u_msg_prio && put_user(msg_ptr->m_type, u_msg_prio)) || 1173 store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) { 1174 ret = -EFAULT; 1175 } 1176 free_msg(msg_ptr); 1177 } 1178 out_fput: 1179 fdput(f); 1180 out: 1181 return ret; 1182 } 1183 1184 SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, 1185 size_t, msg_len, unsigned int, msg_prio, 1186 const struct __kernel_timespec __user *, u_abs_timeout) 1187 { 1188 struct timespec64 ts, *p = NULL; 1189 if (u_abs_timeout) { 1190 int res = prepare_timeout(u_abs_timeout, &ts); 1191 if (res) 1192 return res; 1193 p = &ts; 1194 } 1195 return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p); 1196 } 1197 1198 SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, 1199 size_t, msg_len, unsigned int __user *, u_msg_prio, 1200 const struct __kernel_timespec __user *, u_abs_timeout) 1201 { 1202 struct timespec64 ts, *p = NULL; 1203 if (u_abs_timeout) { 1204 int res = prepare_timeout(u_abs_timeout, &ts); 1205 if (res) 1206 return res; 1207 p = &ts; 1208 } 1209 return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p); 1210 } 1211 1212 /* 1213 * Notes: the case when user wants us to deregister (with NULL as pointer) 1214 * and he isn't currently owner of notification, will be silently discarded. 1215 * It isn't explicitly defined in the POSIX. 1216 */ 1217 static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification) 1218 { 1219 int ret; 1220 struct fd f; 1221 struct sock *sock; 1222 struct inode *inode; 1223 struct mqueue_inode_info *info; 1224 struct sk_buff *nc; 1225 1226 audit_mq_notify(mqdes, notification); 1227 1228 nc = NULL; 1229 sock = NULL; 1230 if (notification != NULL) { 1231 if (unlikely(notification->sigev_notify != SIGEV_NONE && 1232 notification->sigev_notify != SIGEV_SIGNAL && 1233 notification->sigev_notify != SIGEV_THREAD)) 1234 return -EINVAL; 1235 if (notification->sigev_notify == SIGEV_SIGNAL && 1236 !valid_signal(notification->sigev_signo)) { 1237 return -EINVAL; 1238 } 1239 if (notification->sigev_notify == SIGEV_THREAD) { 1240 long timeo; 1241 1242 /* create the notify skb */ 1243 nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL); 1244 if (!nc) { 1245 ret = -ENOMEM; 1246 goto out; 1247 } 1248 if (copy_from_user(nc->data, 1249 notification->sigev_value.sival_ptr, 1250 NOTIFY_COOKIE_LEN)) { 1251 ret = -EFAULT; 1252 goto out; 1253 } 1254 1255 /* TODO: add a header? */ 1256 skb_put(nc, NOTIFY_COOKIE_LEN); 1257 /* and attach it to the socket */ 1258 retry: 1259 f = fdget(notification->sigev_signo); 1260 if (!f.file) { 1261 ret = -EBADF; 1262 goto out; 1263 } 1264 sock = netlink_getsockbyfilp(f.file); 1265 fdput(f); 1266 if (IS_ERR(sock)) { 1267 ret = PTR_ERR(sock); 1268 sock = NULL; 1269 goto out; 1270 } 1271 1272 timeo = MAX_SCHEDULE_TIMEOUT; 1273 ret = netlink_attachskb(sock, nc, &timeo, NULL); 1274 if (ret == 1) { 1275 sock = NULL; 1276 goto retry; 1277 } 1278 if (ret) { 1279 sock = NULL; 1280 nc = NULL; 1281 goto out; 1282 } 1283 } 1284 } 1285 1286 f = fdget(mqdes); 1287 if (!f.file) { 1288 ret = -EBADF; 1289 goto out; 1290 } 1291 1292 inode = file_inode(f.file); 1293 if (unlikely(f.file->f_op != &mqueue_file_operations)) { 1294 ret = -EBADF; 1295 goto out_fput; 1296 } 1297 info = MQUEUE_I(inode); 1298 1299 ret = 0; 1300 spin_lock(&info->lock); 1301 if (notification == NULL) { 1302 if (info->notify_owner == task_tgid(current)) { 1303 remove_notification(info); 1304 inode->i_atime = inode->i_ctime = current_time(inode); 1305 } 1306 } else if (info->notify_owner != NULL) { 1307 ret = -EBUSY; 1308 } else { 1309 switch (notification->sigev_notify) { 1310 case SIGEV_NONE: 1311 info->notify.sigev_notify = SIGEV_NONE; 1312 break; 1313 case SIGEV_THREAD: 1314 info->notify_sock = sock; 1315 info->notify_cookie = nc; 1316 sock = NULL; 1317 nc = NULL; 1318 info->notify.sigev_notify = SIGEV_THREAD; 1319 break; 1320 case SIGEV_SIGNAL: 1321 info->notify.sigev_signo = notification->sigev_signo; 1322 info->notify.sigev_value = notification->sigev_value; 1323 info->notify.sigev_notify = SIGEV_SIGNAL; 1324 break; 1325 } 1326 1327 info->notify_owner = get_pid(task_tgid(current)); 1328 info->notify_user_ns = get_user_ns(current_user_ns()); 1329 inode->i_atime = inode->i_ctime = current_time(inode); 1330 } 1331 spin_unlock(&info->lock); 1332 out_fput: 1333 fdput(f); 1334 out: 1335 if (sock) 1336 netlink_detachskb(sock, nc); 1337 else if (nc) 1338 dev_kfree_skb(nc); 1339 1340 return ret; 1341 } 1342 1343 SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, 1344 const struct sigevent __user *, u_notification) 1345 { 1346 struct sigevent n, *p = NULL; 1347 if (u_notification) { 1348 if (copy_from_user(&n, u_notification, sizeof(struct sigevent))) 1349 return -EFAULT; 1350 p = &n; 1351 } 1352 return do_mq_notify(mqdes, p); 1353 } 1354 1355 static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old) 1356 { 1357 struct fd f; 1358 struct inode *inode; 1359 struct mqueue_inode_info *info; 1360 1361 if (new && (new->mq_flags & (~O_NONBLOCK))) 1362 return -EINVAL; 1363 1364 f = fdget(mqdes); 1365 if (!f.file) 1366 return -EBADF; 1367 1368 if (unlikely(f.file->f_op != &mqueue_file_operations)) { 1369 fdput(f); 1370 return -EBADF; 1371 } 1372 1373 inode = file_inode(f.file); 1374 info = MQUEUE_I(inode); 1375 1376 spin_lock(&info->lock); 1377 1378 if (old) { 1379 *old = info->attr; 1380 old->mq_flags = f.file->f_flags & O_NONBLOCK; 1381 } 1382 if (new) { 1383 audit_mq_getsetattr(mqdes, new); 1384 spin_lock(&f.file->f_lock); 1385 if (new->mq_flags & O_NONBLOCK) 1386 f.file->f_flags |= O_NONBLOCK; 1387 else 1388 f.file->f_flags &= ~O_NONBLOCK; 1389 spin_unlock(&f.file->f_lock); 1390 1391 inode->i_atime = inode->i_ctime = current_time(inode); 1392 } 1393 1394 spin_unlock(&info->lock); 1395 fdput(f); 1396 return 0; 1397 } 1398 1399 SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, 1400 const struct mq_attr __user *, u_mqstat, 1401 struct mq_attr __user *, u_omqstat) 1402 { 1403 int ret; 1404 struct mq_attr mqstat, omqstat; 1405 struct mq_attr *new = NULL, *old = NULL; 1406 1407 if (u_mqstat) { 1408 new = &mqstat; 1409 if (copy_from_user(new, u_mqstat, sizeof(struct mq_attr))) 1410 return -EFAULT; 1411 } 1412 if (u_omqstat) 1413 old = &omqstat; 1414 1415 ret = do_mq_getsetattr(mqdes, new, old); 1416 if (ret || !old) 1417 return ret; 1418 1419 if (copy_to_user(u_omqstat, old, sizeof(struct mq_attr))) 1420 return -EFAULT; 1421 return 0; 1422 } 1423 1424 #ifdef CONFIG_COMPAT 1425 1426 struct compat_mq_attr { 1427 compat_long_t mq_flags; /* message queue flags */ 1428 compat_long_t mq_maxmsg; /* maximum number of messages */ 1429 compat_long_t mq_msgsize; /* maximum message size */ 1430 compat_long_t mq_curmsgs; /* number of messages currently queued */ 1431 compat_long_t __reserved[4]; /* ignored for input, zeroed for output */ 1432 }; 1433 1434 static inline int get_compat_mq_attr(struct mq_attr *attr, 1435 const struct compat_mq_attr __user *uattr) 1436 { 1437 struct compat_mq_attr v; 1438 1439 if (copy_from_user(&v, uattr, sizeof(*uattr))) 1440 return -EFAULT; 1441 1442 memset(attr, 0, sizeof(*attr)); 1443 attr->mq_flags = v.mq_flags; 1444 attr->mq_maxmsg = v.mq_maxmsg; 1445 attr->mq_msgsize = v.mq_msgsize; 1446 attr->mq_curmsgs = v.mq_curmsgs; 1447 return 0; 1448 } 1449 1450 static inline int put_compat_mq_attr(const struct mq_attr *attr, 1451 struct compat_mq_attr __user *uattr) 1452 { 1453 struct compat_mq_attr v; 1454 1455 memset(&v, 0, sizeof(v)); 1456 v.mq_flags = attr->mq_flags; 1457 v.mq_maxmsg = attr->mq_maxmsg; 1458 v.mq_msgsize = attr->mq_msgsize; 1459 v.mq_curmsgs = attr->mq_curmsgs; 1460 if (copy_to_user(uattr, &v, sizeof(*uattr))) 1461 return -EFAULT; 1462 return 0; 1463 } 1464 1465 COMPAT_SYSCALL_DEFINE4(mq_open, const char __user *, u_name, 1466 int, oflag, compat_mode_t, mode, 1467 struct compat_mq_attr __user *, u_attr) 1468 { 1469 struct mq_attr attr, *p = NULL; 1470 if (u_attr && oflag & O_CREAT) { 1471 p = &attr; 1472 if (get_compat_mq_attr(&attr, u_attr)) 1473 return -EFAULT; 1474 } 1475 return do_mq_open(u_name, oflag, mode, p); 1476 } 1477 1478 COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, 1479 const struct compat_sigevent __user *, u_notification) 1480 { 1481 struct sigevent n, *p = NULL; 1482 if (u_notification) { 1483 if (get_compat_sigevent(&n, u_notification)) 1484 return -EFAULT; 1485 if (n.sigev_notify == SIGEV_THREAD) 1486 n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int); 1487 p = &n; 1488 } 1489 return do_mq_notify(mqdes, p); 1490 } 1491 1492 COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, 1493 const struct compat_mq_attr __user *, u_mqstat, 1494 struct compat_mq_attr __user *, u_omqstat) 1495 { 1496 int ret; 1497 struct mq_attr mqstat, omqstat; 1498 struct mq_attr *new = NULL, *old = NULL; 1499 1500 if (u_mqstat) { 1501 new = &mqstat; 1502 if (get_compat_mq_attr(new, u_mqstat)) 1503 return -EFAULT; 1504 } 1505 if (u_omqstat) 1506 old = &omqstat; 1507 1508 ret = do_mq_getsetattr(mqdes, new, old); 1509 if (ret || !old) 1510 return ret; 1511 1512 if (put_compat_mq_attr(old, u_omqstat)) 1513 return -EFAULT; 1514 return 0; 1515 } 1516 #endif 1517 1518 #ifdef CONFIG_COMPAT_32BIT_TIME 1519 static int compat_prepare_timeout(const struct old_timespec32 __user *p, 1520 struct timespec64 *ts) 1521 { 1522 if (get_old_timespec32(ts, p)) 1523 return -EFAULT; 1524 if (!timespec64_valid(ts)) 1525 return -EINVAL; 1526 return 0; 1527 } 1528 1529 SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes, 1530 const char __user *, u_msg_ptr, 1531 unsigned int, msg_len, unsigned int, msg_prio, 1532 const struct old_timespec32 __user *, u_abs_timeout) 1533 { 1534 struct timespec64 ts, *p = NULL; 1535 if (u_abs_timeout) { 1536 int res = compat_prepare_timeout(u_abs_timeout, &ts); 1537 if (res) 1538 return res; 1539 p = &ts; 1540 } 1541 return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p); 1542 } 1543 1544 SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes, 1545 char __user *, u_msg_ptr, 1546 unsigned int, msg_len, unsigned int __user *, u_msg_prio, 1547 const struct old_timespec32 __user *, u_abs_timeout) 1548 { 1549 struct timespec64 ts, *p = NULL; 1550 if (u_abs_timeout) { 1551 int res = compat_prepare_timeout(u_abs_timeout, &ts); 1552 if (res) 1553 return res; 1554 p = &ts; 1555 } 1556 return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p); 1557 } 1558 #endif 1559 1560 static const struct inode_operations mqueue_dir_inode_operations = { 1561 .lookup = simple_lookup, 1562 .create = mqueue_create, 1563 .unlink = mqueue_unlink, 1564 }; 1565 1566 static const struct file_operations mqueue_file_operations = { 1567 .flush = mqueue_flush_file, 1568 .poll = mqueue_poll_file, 1569 .read = mqueue_read_file, 1570 .llseek = default_llseek, 1571 }; 1572 1573 static const struct super_operations mqueue_super_ops = { 1574 .alloc_inode = mqueue_alloc_inode, 1575 .free_inode = mqueue_free_inode, 1576 .evict_inode = mqueue_evict_inode, 1577 .statfs = simple_statfs, 1578 }; 1579 1580 static const struct fs_context_operations mqueue_fs_context_ops = { 1581 .free = mqueue_fs_context_free, 1582 .get_tree = mqueue_get_tree, 1583 }; 1584 1585 static struct file_system_type mqueue_fs_type = { 1586 .name = "mqueue", 1587 .init_fs_context = mqueue_init_fs_context, 1588 .kill_sb = kill_litter_super, 1589 .fs_flags = FS_USERNS_MOUNT, 1590 }; 1591 1592 int mq_init_ns(struct ipc_namespace *ns) 1593 { 1594 struct vfsmount *m; 1595 1596 ns->mq_queues_count = 0; 1597 ns->mq_queues_max = DFLT_QUEUESMAX; 1598 ns->mq_msg_max = DFLT_MSGMAX; 1599 ns->mq_msgsize_max = DFLT_MSGSIZEMAX; 1600 ns->mq_msg_default = DFLT_MSG; 1601 ns->mq_msgsize_default = DFLT_MSGSIZE; 1602 1603 m = mq_create_mount(ns); 1604 if (IS_ERR(m)) 1605 return PTR_ERR(m); 1606 ns->mq_mnt = m; 1607 return 0; 1608 } 1609 1610 void mq_clear_sbinfo(struct ipc_namespace *ns) 1611 { 1612 ns->mq_mnt->mnt_sb->s_fs_info = NULL; 1613 } 1614 1615 void mq_put_mnt(struct ipc_namespace *ns) 1616 { 1617 kern_unmount(ns->mq_mnt); 1618 } 1619 1620 static int __init init_mqueue_fs(void) 1621 { 1622 int error; 1623 1624 mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", 1625 sizeof(struct mqueue_inode_info), 0, 1626 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, init_once); 1627 if (mqueue_inode_cachep == NULL) 1628 return -ENOMEM; 1629 1630 /* ignore failures - they are not fatal */ 1631 mq_sysctl_table = mq_register_sysctl_table(); 1632 1633 error = register_filesystem(&mqueue_fs_type); 1634 if (error) 1635 goto out_sysctl; 1636 1637 spin_lock_init(&mq_lock); 1638 1639 error = mq_init_ns(&init_ipc_ns); 1640 if (error) 1641 goto out_filesystem; 1642 1643 return 0; 1644 1645 out_filesystem: 1646 unregister_filesystem(&mqueue_fs_type); 1647 out_sysctl: 1648 if (mq_sysctl_table) 1649 unregister_sysctl_table(mq_sysctl_table); 1650 kmem_cache_destroy(mqueue_inode_cachep); 1651 return error; 1652 } 1653 1654 device_initcall(init_mqueue_fs); 1655