11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds * POSIX message queues filesystem for Linux.
31da177e4SLinus Torvalds *
41da177e4SLinus Torvalds * Copyright (C) 2003,2004 Krzysztof Benedyczak (golbi@mat.uni.torun.pl)
5f66e928bSMichal Wronski * Michal Wronski (michal.wronski@gmail.com)
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * Spinlocks: Mohamed Abbas (abbas.mohamed@intel.com)
81da177e4SLinus Torvalds * Lockless receive & send, fd based notify:
91da177e4SLinus Torvalds * Manfred Spraul (manfred@colorfullife.com)
101da177e4SLinus Torvalds *
1120ca73bcSGeorge C. Wilson * Audit: George Wilson (ltcgcw@us.ibm.com)
1220ca73bcSGeorge C. Wilson *
131da177e4SLinus Torvalds * This file is released under the GPL.
141da177e4SLinus Torvalds */
151da177e4SLinus Torvalds
16c59ede7bSRandy.Dunlap #include <linux/capability.h>
171da177e4SLinus Torvalds #include <linux/init.h>
181da177e4SLinus Torvalds #include <linux/pagemap.h>
191da177e4SLinus Torvalds #include <linux/file.h>
201da177e4SLinus Torvalds #include <linux/mount.h>
21935c6912SDavid Howells #include <linux/fs_context.h>
221da177e4SLinus Torvalds #include <linux/namei.h>
231da177e4SLinus Torvalds #include <linux/sysctl.h>
241da177e4SLinus Torvalds #include <linux/poll.h>
251da177e4SLinus Torvalds #include <linux/mqueue.h>
261da177e4SLinus Torvalds #include <linux/msg.h>
271da177e4SLinus Torvalds #include <linux/skbuff.h>
285b5c4d1aSDoug Ledford #include <linux/vmalloc.h>
291da177e4SLinus Torvalds #include <linux/netlink.h>
301da177e4SLinus Torvalds #include <linux/syscalls.h>
3120ca73bcSGeorge C. Wilson #include <linux/audit.h>
327ed20e1aSJesper Juhl #include <linux/signal.h>
335f921ae9SIngo Molnar #include <linux/mutex.h>
34b488893aSPavel Emelyanov #include <linux/nsproxy.h>
35b488893aSPavel Emelyanov #include <linux/pid.h>
36614b84cfSSerge E. Hallyn #include <linux/ipc_namespace.h>
376b550f94SSerge E. Hallyn #include <linux/user_namespace.h>
385a0e3ad6STejun Heo #include <linux/slab.h>
3984f001e1SIngo Molnar #include <linux/sched/wake_q.h>
403f07c014SIngo Molnar #include <linux/sched/signal.h>
418703e8a4SIngo Molnar #include <linux/sched/user.h>
425f921ae9SIngo Molnar
431da177e4SLinus Torvalds #include <net/sock.h>
441da177e4SLinus Torvalds #include "util.h"
451da177e4SLinus Torvalds
46935c6912SDavid Howells struct mqueue_fs_context {
47935c6912SDavid Howells struct ipc_namespace *ipc_ns;
48d60c4d01SWaiman Long bool newns; /* Set if newly created ipc namespace */
49935c6912SDavid Howells };
50935c6912SDavid Howells
511da177e4SLinus Torvalds #define MQUEUE_MAGIC 0x19800202
521da177e4SLinus Torvalds #define DIRENT_SIZE 20
531da177e4SLinus Torvalds #define FILENT_SIZE 80
541da177e4SLinus Torvalds
551da177e4SLinus Torvalds #define SEND 0
561da177e4SLinus Torvalds #define RECV 1
571da177e4SLinus Torvalds
581da177e4SLinus Torvalds #define STATE_NONE 0
59fa6004adSDavidlohr Bueso #define STATE_READY 1
601da177e4SLinus Torvalds
61d6629859SDoug Ledford struct posix_msg_tree_node {
62d6629859SDoug Ledford struct rb_node rb_node;
63d6629859SDoug Ledford struct list_head msg_list;
64d6629859SDoug Ledford int priority;
65d6629859SDoug Ledford };
66d6629859SDoug Ledford
67c5b2cbdbSManfred Spraul /*
68c5b2cbdbSManfred Spraul * Locking:
69c5b2cbdbSManfred Spraul *
70c5b2cbdbSManfred Spraul * Accesses to a message queue are synchronized by acquiring info->lock.
71c5b2cbdbSManfred Spraul *
72c5b2cbdbSManfred Spraul * There are two notable exceptions:
73c5b2cbdbSManfred Spraul * - The actual wakeup of a sleeping task is performed using the wake_q
74c5b2cbdbSManfred Spraul * framework. info->lock is already released when wake_up_q is called.
75c5b2cbdbSManfred Spraul * - The exit codepaths after sleeping check ext_wait_queue->state without
76c5b2cbdbSManfred Spraul * any locks. If it is STATE_READY, then the syscall is completed without
77c5b2cbdbSManfred Spraul * acquiring info->lock.
78c5b2cbdbSManfred Spraul *
79c5b2cbdbSManfred Spraul * MQ_BARRIER:
80c5b2cbdbSManfred Spraul * To achieve proper release/acquire memory barrier pairing, the state is set to
81c5b2cbdbSManfred Spraul * STATE_READY with smp_store_release(), and it is read with READ_ONCE followed
82c5b2cbdbSManfred Spraul * by smp_acquire__after_ctrl_dep(). In addition, wake_q_add_safe() is used.
83c5b2cbdbSManfred Spraul *
84c5b2cbdbSManfred Spraul * This prevents the following races:
85c5b2cbdbSManfred Spraul *
86c5b2cbdbSManfred Spraul * 1) With the simple wake_q_add(), the task could be gone already before
87c5b2cbdbSManfred Spraul * the increase of the reference happens
88c5b2cbdbSManfred Spraul * Thread A
89c5b2cbdbSManfred Spraul * Thread B
90c5b2cbdbSManfred Spraul * WRITE_ONCE(wait.state, STATE_NONE);
91c5b2cbdbSManfred Spraul * schedule_hrtimeout()
92c5b2cbdbSManfred Spraul * wake_q_add(A)
93c5b2cbdbSManfred Spraul * if (cmpxchg()) // success
94c5b2cbdbSManfred Spraul * ->state = STATE_READY (reordered)
95c5b2cbdbSManfred Spraul * <timeout returns>
96c5b2cbdbSManfred Spraul * if (wait.state == STATE_READY) return;
97c5b2cbdbSManfred Spraul * sysret to user space
98c5b2cbdbSManfred Spraul * sys_exit()
99c5b2cbdbSManfred Spraul * get_task_struct() // UaF
100c5b2cbdbSManfred Spraul *
101c5b2cbdbSManfred Spraul * Solution: Use wake_q_add_safe() and perform the get_task_struct() before
102c5b2cbdbSManfred Spraul * the smp_store_release() that does ->state = STATE_READY.
103c5b2cbdbSManfred Spraul *
104c5b2cbdbSManfred Spraul * 2) Without proper _release/_acquire barriers, the woken up task
105c5b2cbdbSManfred Spraul * could read stale data
106c5b2cbdbSManfred Spraul *
107c5b2cbdbSManfred Spraul * Thread A
108c5b2cbdbSManfred Spraul * Thread B
109c5b2cbdbSManfred Spraul * do_mq_timedreceive
110c5b2cbdbSManfred Spraul * WRITE_ONCE(wait.state, STATE_NONE);
111c5b2cbdbSManfred Spraul * schedule_hrtimeout()
112c5b2cbdbSManfred Spraul * state = STATE_READY;
113c5b2cbdbSManfred Spraul * <timeout returns>
114c5b2cbdbSManfred Spraul * if (wait.state == STATE_READY) return;
115c5b2cbdbSManfred Spraul * msg_ptr = wait.msg; // Access to stale data!
116c5b2cbdbSManfred Spraul * receiver->msg = message; (reordered)
117c5b2cbdbSManfred Spraul *
118c5b2cbdbSManfred Spraul * Solution: use _release and _acquire barriers.
119c5b2cbdbSManfred Spraul *
120c5b2cbdbSManfred Spraul * 3) There is intentionally no barrier when setting current->state
121c5b2cbdbSManfred Spraul * to TASK_INTERRUPTIBLE: spin_unlock(&info->lock) provides the
122c5b2cbdbSManfred Spraul * release memory barrier, and the wakeup is triggered when holding
123c5b2cbdbSManfred Spraul * info->lock, i.e. spin_lock(&info->lock) provided a pairing
124c5b2cbdbSManfred Spraul * acquire memory barrier.
125c5b2cbdbSManfred Spraul */
126c5b2cbdbSManfred Spraul
1271da177e4SLinus Torvalds struct ext_wait_queue { /* queue of sleeping tasks */
1281da177e4SLinus Torvalds struct task_struct *task;
1291da177e4SLinus Torvalds struct list_head list;
1301da177e4SLinus Torvalds struct msg_msg *msg; /* ptr of loaded message */
1311da177e4SLinus Torvalds int state; /* one of STATE_* values */
1321da177e4SLinus Torvalds };
1331da177e4SLinus Torvalds
1341da177e4SLinus Torvalds struct mqueue_inode_info {
1351da177e4SLinus Torvalds spinlock_t lock;
1361da177e4SLinus Torvalds struct inode vfs_inode;
1371da177e4SLinus Torvalds wait_queue_head_t wait_q;
1381da177e4SLinus Torvalds
139d6629859SDoug Ledford struct rb_root msg_tree;
140a5091fdaSDavidlohr Bueso struct rb_node *msg_tree_rightmost;
141ce2d52ccSDoug Ledford struct posix_msg_tree_node *node_cache;
1421da177e4SLinus Torvalds struct mq_attr attr;
1431da177e4SLinus Torvalds
1441da177e4SLinus Torvalds struct sigevent notify;
145a03fcb73SCedric Le Goater struct pid *notify_owner;
146b5f20061SOleg Nesterov u32 notify_self_exec_id;
1476f9ac6d9SEric W. Biederman struct user_namespace *notify_user_ns;
1486e52a9f0SAlexey Gladkov struct ucounts *ucounts; /* user who created, for accounting */
1491da177e4SLinus Torvalds struct sock *notify_sock;
1501da177e4SLinus Torvalds struct sk_buff *notify_cookie;
1511da177e4SLinus Torvalds
1521da177e4SLinus Torvalds /* for tasks waiting for free space and messages, respectively */
1531da177e4SLinus Torvalds struct ext_wait_queue e_wait_q[2];
1541da177e4SLinus Torvalds
1551da177e4SLinus Torvalds unsigned long qsize; /* size of queue in memory (sum of all msgs) */
1561da177e4SLinus Torvalds };
1571da177e4SLinus Torvalds
158935c6912SDavid Howells static struct file_system_type mqueue_fs_type;
15992e1d5beSArjan van de Ven static const struct inode_operations mqueue_dir_inode_operations;
1609a32144eSArjan van de Ven static const struct file_operations mqueue_file_operations;
161b87221deSAlexey Dobriyan static const struct super_operations mqueue_super_ops;
162935c6912SDavid Howells static const struct fs_context_operations mqueue_fs_context_ops;
1631da177e4SLinus Torvalds static void remove_notification(struct mqueue_inode_info *info);
1641da177e4SLinus Torvalds
165e18b890bSChristoph Lameter static struct kmem_cache *mqueue_inode_cachep;
1661da177e4SLinus Torvalds
MQUEUE_I(struct inode * inode)1671da177e4SLinus Torvalds static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
1681da177e4SLinus Torvalds {
1691da177e4SLinus Torvalds return container_of(inode, struct mqueue_inode_info, vfs_inode);
1701da177e4SLinus Torvalds }
1711da177e4SLinus Torvalds
1727eafd7c7SSerge E. Hallyn /*
1737eafd7c7SSerge E. Hallyn * This routine should be called with the mq_lock held.
1747eafd7c7SSerge E. Hallyn */
__get_ns_from_inode(struct inode * inode)1757eafd7c7SSerge E. Hallyn static inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode)
176614b84cfSSerge E. Hallyn {
1777eafd7c7SSerge E. Hallyn return get_ipc_ns(inode->i_sb->s_fs_info);
178614b84cfSSerge E. Hallyn }
179614b84cfSSerge E. Hallyn
get_ns_from_inode(struct inode * inode)1807eafd7c7SSerge E. Hallyn static struct ipc_namespace *get_ns_from_inode(struct inode *inode)
181614b84cfSSerge E. Hallyn {
1827eafd7c7SSerge E. Hallyn struct ipc_namespace *ns;
1837eafd7c7SSerge E. Hallyn
1847eafd7c7SSerge E. Hallyn spin_lock(&mq_lock);
1857eafd7c7SSerge E. Hallyn ns = __get_ns_from_inode(inode);
1867eafd7c7SSerge E. Hallyn spin_unlock(&mq_lock);
1877eafd7c7SSerge E. Hallyn return ns;
188614b84cfSSerge E. Hallyn }
189614b84cfSSerge E. Hallyn
190d6629859SDoug Ledford /* Auxiliary functions to manipulate messages' list */
msg_insert(struct msg_msg * msg,struct mqueue_inode_info * info)191d6629859SDoug Ledford static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
192d6629859SDoug Ledford {
193d6629859SDoug Ledford struct rb_node **p, *parent = NULL;
194d6629859SDoug Ledford struct posix_msg_tree_node *leaf;
195a5091fdaSDavidlohr Bueso bool rightmost = true;
196d6629859SDoug Ledford
197d6629859SDoug Ledford p = &info->msg_tree.rb_node;
198d6629859SDoug Ledford while (*p) {
199d6629859SDoug Ledford parent = *p;
200d6629859SDoug Ledford leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
201d6629859SDoug Ledford
202d6629859SDoug Ledford if (likely(leaf->priority == msg->m_type))
203d6629859SDoug Ledford goto insert_msg;
204a5091fdaSDavidlohr Bueso else if (msg->m_type < leaf->priority) {
205d6629859SDoug Ledford p = &(*p)->rb_left;
206a5091fdaSDavidlohr Bueso rightmost = false;
207a5091fdaSDavidlohr Bueso } else
208d6629859SDoug Ledford p = &(*p)->rb_right;
209d6629859SDoug Ledford }
210ce2d52ccSDoug Ledford if (info->node_cache) {
211ce2d52ccSDoug Ledford leaf = info->node_cache;
212ce2d52ccSDoug Ledford info->node_cache = NULL;
213ce2d52ccSDoug Ledford } else {
214ce2d52ccSDoug Ledford leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC);
215d6629859SDoug Ledford if (!leaf)
216d6629859SDoug Ledford return -ENOMEM;
217d6629859SDoug Ledford INIT_LIST_HEAD(&leaf->msg_list);
218ce2d52ccSDoug Ledford }
219d6629859SDoug Ledford leaf->priority = msg->m_type;
220a5091fdaSDavidlohr Bueso
221a5091fdaSDavidlohr Bueso if (rightmost)
222a5091fdaSDavidlohr Bueso info->msg_tree_rightmost = &leaf->rb_node;
223a5091fdaSDavidlohr Bueso
224d6629859SDoug Ledford rb_link_node(&leaf->rb_node, parent, p);
225d6629859SDoug Ledford rb_insert_color(&leaf->rb_node, &info->msg_tree);
226d6629859SDoug Ledford insert_msg:
227d6629859SDoug Ledford info->attr.mq_curmsgs++;
228d6629859SDoug Ledford info->qsize += msg->m_ts;
229d6629859SDoug Ledford list_add_tail(&msg->m_list, &leaf->msg_list);
230d6629859SDoug Ledford return 0;
231d6629859SDoug Ledford }
232d6629859SDoug Ledford
msg_tree_erase(struct posix_msg_tree_node * leaf,struct mqueue_inode_info * info)233a5091fdaSDavidlohr Bueso static inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
234a5091fdaSDavidlohr Bueso struct mqueue_inode_info *info)
235a5091fdaSDavidlohr Bueso {
236a5091fdaSDavidlohr Bueso struct rb_node *node = &leaf->rb_node;
237a5091fdaSDavidlohr Bueso
238a5091fdaSDavidlohr Bueso if (info->msg_tree_rightmost == node)
239a5091fdaSDavidlohr Bueso info->msg_tree_rightmost = rb_prev(node);
240a5091fdaSDavidlohr Bueso
241a5091fdaSDavidlohr Bueso rb_erase(node, &info->msg_tree);
24243afe4d3SSomala Swaraj if (info->node_cache)
243a5091fdaSDavidlohr Bueso kfree(leaf);
24443afe4d3SSomala Swaraj else
245a5091fdaSDavidlohr Bueso info->node_cache = leaf;
246a5091fdaSDavidlohr Bueso }
247a5091fdaSDavidlohr Bueso
msg_get(struct mqueue_inode_info * info)248d6629859SDoug Ledford static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
249d6629859SDoug Ledford {
250a5091fdaSDavidlohr Bueso struct rb_node *parent = NULL;
251d6629859SDoug Ledford struct posix_msg_tree_node *leaf;
252d6629859SDoug Ledford struct msg_msg *msg;
253d6629859SDoug Ledford
254d6629859SDoug Ledford try_again:
255d6629859SDoug Ledford /*
256d6629859SDoug Ledford * During insert, low priorities go to the left and high to the
257d6629859SDoug Ledford * right. On receive, we want the highest priorities first, so
258d6629859SDoug Ledford * walk all the way to the right.
259d6629859SDoug Ledford */
260a5091fdaSDavidlohr Bueso parent = info->msg_tree_rightmost;
261d6629859SDoug Ledford if (!parent) {
262d6629859SDoug Ledford if (info->attr.mq_curmsgs) {
263d6629859SDoug Ledford pr_warn_once("Inconsistency in POSIX message queue, "
264d6629859SDoug Ledford "no tree element, but supposedly messages "
265d6629859SDoug Ledford "should exist!\n");
266d6629859SDoug Ledford info->attr.mq_curmsgs = 0;
267d6629859SDoug Ledford }
268d6629859SDoug Ledford return NULL;
269d6629859SDoug Ledford }
270d6629859SDoug Ledford leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
271ce2d52ccSDoug Ledford if (unlikely(list_empty(&leaf->msg_list))) {
272d6629859SDoug Ledford pr_warn_once("Inconsistency in POSIX message queue, "
273d6629859SDoug Ledford "empty leaf node but we haven't implemented "
274d6629859SDoug Ledford "lazy leaf delete!\n");
275a5091fdaSDavidlohr Bueso msg_tree_erase(leaf, info);
276d6629859SDoug Ledford goto try_again;
277d6629859SDoug Ledford } else {
278d6629859SDoug Ledford msg = list_first_entry(&leaf->msg_list,
279d6629859SDoug Ledford struct msg_msg, m_list);
280d6629859SDoug Ledford list_del(&msg->m_list);
281d6629859SDoug Ledford if (list_empty(&leaf->msg_list)) {
282a5091fdaSDavidlohr Bueso msg_tree_erase(leaf, info);
283d6629859SDoug Ledford }
284d6629859SDoug Ledford }
285d6629859SDoug Ledford info->attr.mq_curmsgs--;
286d6629859SDoug Ledford info->qsize -= msg->m_ts;
287d6629859SDoug Ledford return msg;
288d6629859SDoug Ledford }
289d6629859SDoug Ledford
mqueue_get_inode(struct super_block * sb,struct ipc_namespace * ipc_ns,umode_t mode,struct mq_attr * attr)2907eafd7c7SSerge E. Hallyn static struct inode *mqueue_get_inode(struct super_block *sb,
2911b9d5ff7SAl Viro struct ipc_namespace *ipc_ns, umode_t mode,
2921da177e4SLinus Torvalds struct mq_attr *attr)
2931da177e4SLinus Torvalds {
2941da177e4SLinus Torvalds struct inode *inode;
295d40dcdb0SJiri Slaby int ret = -ENOMEM;
2961da177e4SLinus Torvalds
2971da177e4SLinus Torvalds inode = new_inode(sb);
29804715206SJiri Slaby if (!inode)
29904715206SJiri Slaby goto err;
30004715206SJiri Slaby
30185fe4025SChristoph Hellwig inode->i_ino = get_next_ino();
3021da177e4SLinus Torvalds inode->i_mode = mode;
303414c0708SDavid Howells inode->i_uid = current_fsuid();
304414c0708SDavid Howells inode->i_gid = current_fsgid();
305*783904f5SJeff Layton inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
3061da177e4SLinus Torvalds
3071da177e4SLinus Torvalds if (S_ISREG(mode)) {
3081da177e4SLinus Torvalds struct mqueue_inode_info *info;
309d6629859SDoug Ledford unsigned long mq_bytes, mq_treesize;
3101da177e4SLinus Torvalds
3111da177e4SLinus Torvalds inode->i_fop = &mqueue_file_operations;
3121da177e4SLinus Torvalds inode->i_size = FILENT_SIZE;
3131da177e4SLinus Torvalds /* mqueue specific info */
3141da177e4SLinus Torvalds info = MQUEUE_I(inode);
3151da177e4SLinus Torvalds spin_lock_init(&info->lock);
3161da177e4SLinus Torvalds init_waitqueue_head(&info->wait_q);
3171da177e4SLinus Torvalds INIT_LIST_HEAD(&info->e_wait_q[0].list);
3181da177e4SLinus Torvalds INIT_LIST_HEAD(&info->e_wait_q[1].list);
319a03fcb73SCedric Le Goater info->notify_owner = NULL;
3206f9ac6d9SEric W. Biederman info->notify_user_ns = NULL;
3211da177e4SLinus Torvalds info->qsize = 0;
3226e52a9f0SAlexey Gladkov info->ucounts = NULL; /* set when all is ok */
323d6629859SDoug Ledford info->msg_tree = RB_ROOT;
324a5091fdaSDavidlohr Bueso info->msg_tree_rightmost = NULL;
325ce2d52ccSDoug Ledford info->node_cache = NULL;
3261da177e4SLinus Torvalds memset(&info->attr, 0, sizeof(info->attr));
327cef0184cSKOSAKI Motohiro info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
328cef0184cSKOSAKI Motohiro ipc_ns->mq_msg_default);
329cef0184cSKOSAKI Motohiro info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
330cef0184cSKOSAKI Motohiro ipc_ns->mq_msgsize_default);
3311da177e4SLinus Torvalds if (attr) {
3321da177e4SLinus Torvalds info->attr.mq_maxmsg = attr->mq_maxmsg;
3331da177e4SLinus Torvalds info->attr.mq_msgsize = attr->mq_msgsize;
3341da177e4SLinus Torvalds }
335d6629859SDoug Ledford /*
336d6629859SDoug Ledford * We used to allocate a static array of pointers and account
337d6629859SDoug Ledford * the size of that array as well as one msg_msg struct per
338d6629859SDoug Ledford * possible message into the queue size. That's no longer
339d6629859SDoug Ledford * accurate as the queue is now an rbtree and will grow and
340d6629859SDoug Ledford * shrink depending on usage patterns. We can, however, still
341d6629859SDoug Ledford * account one msg_msg struct per message, but the nodes are
342d6629859SDoug Ledford * allocated depending on priority usage, and most programs
343d6629859SDoug Ledford * only use one, or a handful, of priorities. However, since
344d6629859SDoug Ledford * this is pinned memory, we need to assume worst case, so
345d6629859SDoug Ledford * that means the min(mq_maxmsg, max_priorities) * struct
346d6629859SDoug Ledford * posix_msg_tree_node.
347d6629859SDoug Ledford */
34805c1b290SAl Viro
34905c1b290SAl Viro ret = -EINVAL;
35005c1b290SAl Viro if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0)
35105c1b290SAl Viro goto out_inode;
35205c1b290SAl Viro if (capable(CAP_SYS_RESOURCE)) {
35305c1b290SAl Viro if (info->attr.mq_maxmsg > HARD_MSGMAX ||
35405c1b290SAl Viro info->attr.mq_msgsize > HARD_MSGSIZEMAX)
35505c1b290SAl Viro goto out_inode;
35605c1b290SAl Viro } else {
35705c1b290SAl Viro if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max ||
35805c1b290SAl Viro info->attr.mq_msgsize > ipc_ns->mq_msgsize_max)
35905c1b290SAl Viro goto out_inode;
36005c1b290SAl Viro }
36105c1b290SAl Viro ret = -EOVERFLOW;
36205c1b290SAl Viro /* check for overflow */
36305c1b290SAl Viro if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg)
36405c1b290SAl Viro goto out_inode;
365d6629859SDoug Ledford mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
366d6629859SDoug Ledford min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
367d6629859SDoug Ledford sizeof(struct posix_msg_tree_node);
36805c1b290SAl Viro mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize;
36905c1b290SAl Viro if (mq_bytes + mq_treesize < mq_bytes)
37005c1b290SAl Viro goto out_inode;
37105c1b290SAl Viro mq_bytes += mq_treesize;
3726e52a9f0SAlexey Gladkov info->ucounts = get_ucounts(current_ucounts());
3736e52a9f0SAlexey Gladkov if (info->ucounts) {
3746e52a9f0SAlexey Gladkov long msgqueue;
3756e52a9f0SAlexey Gladkov
3761da177e4SLinus Torvalds spin_lock(&mq_lock);
3776e52a9f0SAlexey Gladkov msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
3786e52a9f0SAlexey Gladkov if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) {
3796e52a9f0SAlexey Gladkov dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
3801da177e4SLinus Torvalds spin_unlock(&mq_lock);
3816e52a9f0SAlexey Gladkov put_ucounts(info->ucounts);
3826e52a9f0SAlexey Gladkov info->ucounts = NULL;
3836d8af64cSAl Viro /* mqueue_evict_inode() releases info->messages */
384d40dcdb0SJiri Slaby ret = -EMFILE;
3851da177e4SLinus Torvalds goto out_inode;
3861da177e4SLinus Torvalds }
3871da177e4SLinus Torvalds spin_unlock(&mq_lock);
3886e52a9f0SAlexey Gladkov }
3891da177e4SLinus Torvalds } else if (S_ISDIR(mode)) {
390d8c76e6fSDave Hansen inc_nlink(inode);
3911da177e4SLinus Torvalds /* Some things misbehave if size == 0 on a directory */
3921da177e4SLinus Torvalds inode->i_size = 2 * DIRENT_SIZE;
3931da177e4SLinus Torvalds inode->i_op = &mqueue_dir_inode_operations;
3941da177e4SLinus Torvalds inode->i_fop = &simple_dir_operations;
3951da177e4SLinus Torvalds }
39604715206SJiri Slaby
3971da177e4SLinus Torvalds return inode;
3981da177e4SLinus Torvalds out_inode:
3991da177e4SLinus Torvalds iput(inode);
40004715206SJiri Slaby err:
401d40dcdb0SJiri Slaby return ERR_PTR(ret);
4021da177e4SLinus Torvalds }
4031da177e4SLinus Torvalds
mqueue_fill_super(struct super_block * sb,struct fs_context * fc)404935c6912SDavid Howells static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc)
4051da177e4SLinus Torvalds {
4061da177e4SLinus Torvalds struct inode *inode;
407cfb2f6f6SEric W. Biederman struct ipc_namespace *ns = sb->s_fs_info;
4081da177e4SLinus Torvalds
409a2982cc9SEric W. Biederman sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
41009cbfeafSKirill A. Shutemov sb->s_blocksize = PAGE_SIZE;
41109cbfeafSKirill A. Shutemov sb->s_blocksize_bits = PAGE_SHIFT;
4121da177e4SLinus Torvalds sb->s_magic = MQUEUE_MAGIC;
4131da177e4SLinus Torvalds sb->s_op = &mqueue_super_ops;
4141da177e4SLinus Torvalds
41548fde701SAl Viro inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
41648fde701SAl Viro if (IS_ERR(inode))
41748fde701SAl Viro return PTR_ERR(inode);
4181da177e4SLinus Torvalds
41948fde701SAl Viro sb->s_root = d_make_root(inode);
42048fde701SAl Viro if (!sb->s_root)
42148fde701SAl Viro return -ENOMEM;
42248fde701SAl Viro return 0;
4231da177e4SLinus Torvalds }
4241da177e4SLinus Torvalds
mqueue_get_tree(struct fs_context * fc)425935c6912SDavid Howells static int mqueue_get_tree(struct fs_context *fc)
4261da177e4SLinus Torvalds {
427935c6912SDavid Howells struct mqueue_fs_context *ctx = fc->fs_private;
428935c6912SDavid Howells
429d60c4d01SWaiman Long /*
430d60c4d01SWaiman Long * With a newly created ipc namespace, we don't need to do a search
431d60c4d01SWaiman Long * for an ipc namespace match, but we still need to set s_fs_info.
432d60c4d01SWaiman Long */
433d60c4d01SWaiman Long if (ctx->newns) {
434d60c4d01SWaiman Long fc->s_fs_info = ctx->ipc_ns;
435d60c4d01SWaiman Long return get_tree_nodev(fc, mqueue_fill_super);
436d60c4d01SWaiman Long }
437533770ccSAl Viro return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns);
438cfb2f6f6SEric W. Biederman }
439935c6912SDavid Howells
mqueue_fs_context_free(struct fs_context * fc)440935c6912SDavid Howells static void mqueue_fs_context_free(struct fs_context *fc)
441935c6912SDavid Howells {
442935c6912SDavid Howells struct mqueue_fs_context *ctx = fc->fs_private;
443935c6912SDavid Howells
444935c6912SDavid Howells put_ipc_ns(ctx->ipc_ns);
445935c6912SDavid Howells kfree(ctx);
446935c6912SDavid Howells }
447935c6912SDavid Howells
mqueue_init_fs_context(struct fs_context * fc)448935c6912SDavid Howells static int mqueue_init_fs_context(struct fs_context *fc)
449935c6912SDavid Howells {
450935c6912SDavid Howells struct mqueue_fs_context *ctx;
451935c6912SDavid Howells
452935c6912SDavid Howells ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL);
453935c6912SDavid Howells if (!ctx)
454935c6912SDavid Howells return -ENOMEM;
455935c6912SDavid Howells
456935c6912SDavid Howells ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
457709a643dSAl Viro put_user_ns(fc->user_ns);
458709a643dSAl Viro fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
459935c6912SDavid Howells fc->fs_private = ctx;
460935c6912SDavid Howells fc->ops = &mqueue_fs_context_ops;
461935c6912SDavid Howells return 0;
462935c6912SDavid Howells }
463935c6912SDavid Howells
464d60c4d01SWaiman Long /*
465d60c4d01SWaiman Long * mq_init_ns() is currently the only caller of mq_create_mount().
466d60c4d01SWaiman Long * So the ns parameter is always a newly created ipc namespace.
467d60c4d01SWaiman Long */
mq_create_mount(struct ipc_namespace * ns)468935c6912SDavid Howells static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
469935c6912SDavid Howells {
470935c6912SDavid Howells struct mqueue_fs_context *ctx;
471935c6912SDavid Howells struct fs_context *fc;
472935c6912SDavid Howells struct vfsmount *mnt;
473935c6912SDavid Howells
474935c6912SDavid Howells fc = fs_context_for_mount(&mqueue_fs_type, SB_KERNMOUNT);
475935c6912SDavid Howells if (IS_ERR(fc))
476935c6912SDavid Howells return ERR_CAST(fc);
477935c6912SDavid Howells
478935c6912SDavid Howells ctx = fc->fs_private;
479d60c4d01SWaiman Long ctx->newns = true;
480935c6912SDavid Howells put_ipc_ns(ctx->ipc_ns);
481935c6912SDavid Howells ctx->ipc_ns = get_ipc_ns(ns);
482709a643dSAl Viro put_user_ns(fc->user_ns);
483709a643dSAl Viro fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
484935c6912SDavid Howells
485935c6912SDavid Howells mnt = fc_mount(fc);
486935c6912SDavid Howells put_fs_context(fc);
487935c6912SDavid Howells return mnt;
4881da177e4SLinus Torvalds }
4891da177e4SLinus Torvalds
init_once(void * foo)49051cc5068SAlexey Dobriyan static void init_once(void *foo)
4911da177e4SLinus Torvalds {
4922c795fb0SYu Zhe struct mqueue_inode_info *p = foo;
4931da177e4SLinus Torvalds
4941da177e4SLinus Torvalds inode_init_once(&p->vfs_inode);
4951da177e4SLinus Torvalds }
4961da177e4SLinus Torvalds
mqueue_alloc_inode(struct super_block * sb)4971da177e4SLinus Torvalds static struct inode *mqueue_alloc_inode(struct super_block *sb)
4981da177e4SLinus Torvalds {
4991da177e4SLinus Torvalds struct mqueue_inode_info *ei;
5001da177e4SLinus Torvalds
501fd60b288SMuchun Song ei = alloc_inode_sb(sb, mqueue_inode_cachep, GFP_KERNEL);
5021da177e4SLinus Torvalds if (!ei)
5031da177e4SLinus Torvalds return NULL;
5041da177e4SLinus Torvalds return &ei->vfs_inode;
5051da177e4SLinus Torvalds }
5061da177e4SLinus Torvalds
mqueue_free_inode(struct inode * inode)507015d7956SAl Viro static void mqueue_free_inode(struct inode *inode)
508fa0d7e3dSNick Piggin {
509fa0d7e3dSNick Piggin kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
510fa0d7e3dSNick Piggin }
511fa0d7e3dSNick Piggin
mqueue_evict_inode(struct inode * inode)5126d8af64cSAl Viro static void mqueue_evict_inode(struct inode *inode)
5131da177e4SLinus Torvalds {
5141da177e4SLinus Torvalds struct mqueue_inode_info *info;
5157eafd7c7SSerge E. Hallyn struct ipc_namespace *ipc_ns;
516d6a2946aSLi Rongqing struct msg_msg *msg, *nmsg;
517d6a2946aSLi Rongqing LIST_HEAD(tmp_msg);
5181da177e4SLinus Torvalds
519dbd5768fSJan Kara clear_inode(inode);
5206d8af64cSAl Viro
5216d8af64cSAl Viro if (S_ISDIR(inode->i_mode))
5221da177e4SLinus Torvalds return;
5236d8af64cSAl Viro
5247eafd7c7SSerge E. Hallyn ipc_ns = get_ns_from_inode(inode);
5251da177e4SLinus Torvalds info = MQUEUE_I(inode);
5261da177e4SLinus Torvalds spin_lock(&info->lock);
527d6629859SDoug Ledford while ((msg = msg_get(info)) != NULL)
528d6a2946aSLi Rongqing list_add_tail(&msg->m_list, &tmp_msg);
529ce2d52ccSDoug Ledford kfree(info->node_cache);
5301da177e4SLinus Torvalds spin_unlock(&info->lock);
5311da177e4SLinus Torvalds
532d6a2946aSLi Rongqing list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) {
533d6a2946aSLi Rongqing list_del(&msg->m_list);
534d6a2946aSLi Rongqing free_msg(msg);
535d6a2946aSLi Rongqing }
536d6a2946aSLi Rongqing
5376e52a9f0SAlexey Gladkov if (info->ucounts) {
538a318f12eSKees Cook unsigned long mq_bytes, mq_treesize;
539a318f12eSKees Cook
5408834cf79SAndré Goddard Rosa /* Total amount of bytes accounted for the mqueue */
541d6629859SDoug Ledford mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
542d6629859SDoug Ledford min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
543d6629859SDoug Ledford sizeof(struct posix_msg_tree_node);
544d6629859SDoug Ledford
545d6629859SDoug Ledford mq_bytes = mq_treesize + (info->attr.mq_maxmsg *
546d6629859SDoug Ledford info->attr.mq_msgsize);
547d6629859SDoug Ledford
5481da177e4SLinus Torvalds spin_lock(&mq_lock);
5496e52a9f0SAlexey Gladkov dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
5507eafd7c7SSerge E. Hallyn /*
5517eafd7c7SSerge E. Hallyn * get_ns_from_inode() ensures that the
5527eafd7c7SSerge E. Hallyn * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
5537eafd7c7SSerge E. Hallyn * to which we now hold a reference, or it is NULL.
5547eafd7c7SSerge E. Hallyn * We can't put it here under mq_lock, though.
5557eafd7c7SSerge E. Hallyn */
5567eafd7c7SSerge E. Hallyn if (ipc_ns)
557614b84cfSSerge E. Hallyn ipc_ns->mq_queues_count--;
5581da177e4SLinus Torvalds spin_unlock(&mq_lock);
5596e52a9f0SAlexey Gladkov put_ucounts(info->ucounts);
5606e52a9f0SAlexey Gladkov info->ucounts = NULL;
5611da177e4SLinus Torvalds }
5627eafd7c7SSerge E. Hallyn if (ipc_ns)
5637eafd7c7SSerge E. Hallyn put_ipc_ns(ipc_ns);
5641da177e4SLinus Torvalds }
5651da177e4SLinus Torvalds
mqueue_create_attr(struct dentry * dentry,umode_t mode,void * arg)566eecec19dSAl Viro static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
5671da177e4SLinus Torvalds {
568eecec19dSAl Viro struct inode *dir = dentry->d_parent->d_inode;
5691da177e4SLinus Torvalds struct inode *inode;
570eecec19dSAl Viro struct mq_attr *attr = arg;
5711da177e4SLinus Torvalds int error;
5727eafd7c7SSerge E. Hallyn struct ipc_namespace *ipc_ns;
5731da177e4SLinus Torvalds
5741da177e4SLinus Torvalds spin_lock(&mq_lock);
5757eafd7c7SSerge E. Hallyn ipc_ns = __get_ns_from_inode(dir);
5767eafd7c7SSerge E. Hallyn if (!ipc_ns) {
5777eafd7c7SSerge E. Hallyn error = -EACCES;
5787eafd7c7SSerge E. Hallyn goto out_unlock;
5797eafd7c7SSerge E. Hallyn }
580f3713fd9SDavidlohr Bueso
581f3713fd9SDavidlohr Bueso if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
582f3713fd9SDavidlohr Bueso !capable(CAP_SYS_RESOURCE)) {
5831da177e4SLinus Torvalds error = -ENOSPC;
584614b84cfSSerge E. Hallyn goto out_unlock;
5851da177e4SLinus Torvalds }
586614b84cfSSerge E. Hallyn ipc_ns->mq_queues_count++;
5871da177e4SLinus Torvalds spin_unlock(&mq_lock);
5881da177e4SLinus Torvalds
5897eafd7c7SSerge E. Hallyn inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
590d40dcdb0SJiri Slaby if (IS_ERR(inode)) {
591d40dcdb0SJiri Slaby error = PTR_ERR(inode);
5921da177e4SLinus Torvalds spin_lock(&mq_lock);
593614b84cfSSerge E. Hallyn ipc_ns->mq_queues_count--;
594614b84cfSSerge E. Hallyn goto out_unlock;
5951da177e4SLinus Torvalds }
5961da177e4SLinus Torvalds
5977eafd7c7SSerge E. Hallyn put_ipc_ns(ipc_ns);
5981da177e4SLinus Torvalds dir->i_size += DIRENT_SIZE;
599*783904f5SJeff Layton dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
6001da177e4SLinus Torvalds
6011da177e4SLinus Torvalds d_instantiate(dentry, inode);
6021da177e4SLinus Torvalds dget(dentry);
6031da177e4SLinus Torvalds return 0;
604614b84cfSSerge E. Hallyn out_unlock:
6051da177e4SLinus Torvalds spin_unlock(&mq_lock);
6067eafd7c7SSerge E. Hallyn if (ipc_ns)
6077eafd7c7SSerge E. Hallyn put_ipc_ns(ipc_ns);
6081da177e4SLinus Torvalds return error;
6091da177e4SLinus Torvalds }
6101da177e4SLinus Torvalds
mqueue_create(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry,umode_t mode,bool excl)6116c960e68SChristian Brauner static int mqueue_create(struct mnt_idmap *idmap, struct inode *dir,
612549c7297SChristian Brauner struct dentry *dentry, umode_t mode, bool excl)
613eecec19dSAl Viro {
614eecec19dSAl Viro return mqueue_create_attr(dentry, mode, NULL);
615eecec19dSAl Viro }
616eecec19dSAl Viro
mqueue_unlink(struct inode * dir,struct dentry * dentry)6171da177e4SLinus Torvalds static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
6181da177e4SLinus Torvalds {
61975c3cfa8SDavid Howells struct inode *inode = d_inode(dentry);
6201da177e4SLinus Torvalds
621*783904f5SJeff Layton dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
6221da177e4SLinus Torvalds dir->i_size -= DIRENT_SIZE;
6239a53c3a7SDave Hansen drop_nlink(inode);
6241da177e4SLinus Torvalds dput(dentry);
6251da177e4SLinus Torvalds return 0;
6261da177e4SLinus Torvalds }
6271da177e4SLinus Torvalds
6281da177e4SLinus Torvalds /*
6291da177e4SLinus Torvalds * This is routine for system read from queue file.
6301da177e4SLinus Torvalds * To avoid mess with doing here some sort of mq_receive we allow
6311da177e4SLinus Torvalds * to read only queue size & notification info (the only values
6321da177e4SLinus Torvalds * that are interesting from user point of view and aren't accessible
6331da177e4SLinus Torvalds * through std routines)
6341da177e4SLinus Torvalds */
mqueue_read_file(struct file * filp,char __user * u_data,size_t count,loff_t * off)6351da177e4SLinus Torvalds static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
6361da177e4SLinus Torvalds size_t count, loff_t *off)
6371da177e4SLinus Torvalds {
638*783904f5SJeff Layton struct inode *inode = file_inode(filp);
639*783904f5SJeff Layton struct mqueue_inode_info *info = MQUEUE_I(inode);
6401da177e4SLinus Torvalds char buffer[FILENT_SIZE];
641f1a43f93SAkinobu Mita ssize_t ret;
6421da177e4SLinus Torvalds
6431da177e4SLinus Torvalds spin_lock(&info->lock);
6441da177e4SLinus Torvalds snprintf(buffer, sizeof(buffer),
6451da177e4SLinus Torvalds "QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n",
6461da177e4SLinus Torvalds info->qsize,
6471da177e4SLinus Torvalds info->notify_owner ? info->notify.sigev_notify : 0,
6481da177e4SLinus Torvalds (info->notify_owner &&
6491da177e4SLinus Torvalds info->notify.sigev_notify == SIGEV_SIGNAL) ?
6501da177e4SLinus Torvalds info->notify.sigev_signo : 0,
6516c5f3e7bSPavel Emelyanov pid_vnr(info->notify_owner));
6521da177e4SLinus Torvalds spin_unlock(&info->lock);
6531da177e4SLinus Torvalds buffer[sizeof(buffer)-1] = '\0';
6541da177e4SLinus Torvalds
655f1a43f93SAkinobu Mita ret = simple_read_from_buffer(u_data, count, off, buffer,
656f1a43f93SAkinobu Mita strlen(buffer));
657f1a43f93SAkinobu Mita if (ret <= 0)
658f1a43f93SAkinobu Mita return ret;
6591da177e4SLinus Torvalds
660*783904f5SJeff Layton inode->i_atime = inode_set_ctime_current(inode);
661f1a43f93SAkinobu Mita return ret;
6621da177e4SLinus Torvalds }
6631da177e4SLinus Torvalds
mqueue_flush_file(struct file * filp,fl_owner_t id)66475e1fcc0SMiklos Szeredi static int mqueue_flush_file(struct file *filp, fl_owner_t id)
6651da177e4SLinus Torvalds {
666496ad9aaSAl Viro struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
6671da177e4SLinus Torvalds
6681da177e4SLinus Torvalds spin_lock(&info->lock);
669a03fcb73SCedric Le Goater if (task_tgid(current) == info->notify_owner)
6701da177e4SLinus Torvalds remove_notification(info);
6711da177e4SLinus Torvalds
6721da177e4SLinus Torvalds spin_unlock(&info->lock);
6731da177e4SLinus Torvalds return 0;
6741da177e4SLinus Torvalds }
6751da177e4SLinus Torvalds
mqueue_poll_file(struct file * filp,struct poll_table_struct * poll_tab)6769dd95748SAl Viro static __poll_t mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
6771da177e4SLinus Torvalds {
678496ad9aaSAl Viro struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
6799dd95748SAl Viro __poll_t retval = 0;
6801da177e4SLinus Torvalds
6811da177e4SLinus Torvalds poll_wait(filp, &info->wait_q, poll_tab);
6821da177e4SLinus Torvalds
6831da177e4SLinus Torvalds spin_lock(&info->lock);
6841da177e4SLinus Torvalds if (info->attr.mq_curmsgs)
685a9a08845SLinus Torvalds retval = EPOLLIN | EPOLLRDNORM;
6861da177e4SLinus Torvalds
6871da177e4SLinus Torvalds if (info->attr.mq_curmsgs < info->attr.mq_maxmsg)
688a9a08845SLinus Torvalds retval |= EPOLLOUT | EPOLLWRNORM;
6891da177e4SLinus Torvalds spin_unlock(&info->lock);
6901da177e4SLinus Torvalds
6911da177e4SLinus Torvalds return retval;
6921da177e4SLinus Torvalds }
6931da177e4SLinus Torvalds
6941da177e4SLinus Torvalds /* Adds current to info->e_wait_q[sr] before element with smaller prio */
wq_add(struct mqueue_inode_info * info,int sr,struct ext_wait_queue * ewp)6951da177e4SLinus Torvalds static void wq_add(struct mqueue_inode_info *info, int sr,
6961da177e4SLinus Torvalds struct ext_wait_queue *ewp)
6971da177e4SLinus Torvalds {
6981da177e4SLinus Torvalds struct ext_wait_queue *walk;
6991da177e4SLinus Torvalds
7001da177e4SLinus Torvalds list_for_each_entry(walk, &info->e_wait_q[sr].list, list) {
70168e34f4eSJonathan Haws if (walk->task->prio <= current->prio) {
7021da177e4SLinus Torvalds list_add_tail(&ewp->list, &walk->list);
7031da177e4SLinus Torvalds return;
7041da177e4SLinus Torvalds }
7051da177e4SLinus Torvalds }
7061da177e4SLinus Torvalds list_add_tail(&ewp->list, &info->e_wait_q[sr].list);
7071da177e4SLinus Torvalds }
7081da177e4SLinus Torvalds
7091da177e4SLinus Torvalds /*
7101da177e4SLinus Torvalds * Puts current task to sleep. Caller must hold queue lock. After return
7111da177e4SLinus Torvalds * lock isn't held.
7121da177e4SLinus Torvalds * sr: SEND or RECV
7131da177e4SLinus Torvalds */
wq_sleep(struct mqueue_inode_info * info,int sr,ktime_t * timeout,struct ext_wait_queue * ewp)7141da177e4SLinus Torvalds static int wq_sleep(struct mqueue_inode_info *info, int sr,
7159ca7d8e6SCarsten Emde ktime_t *timeout, struct ext_wait_queue *ewp)
716eac0b1c3SLuc Van Oostenryck __releases(&info->lock)
7171da177e4SLinus Torvalds {
7181da177e4SLinus Torvalds int retval;
7191da177e4SLinus Torvalds signed long time;
7201da177e4SLinus Torvalds
7211da177e4SLinus Torvalds wq_add(info, sr, ewp);
7221da177e4SLinus Torvalds
7231da177e4SLinus Torvalds for (;;) {
724c5b2cbdbSManfred Spraul /* memory barrier not required, we hold info->lock */
725fa6004adSDavidlohr Bueso __set_current_state(TASK_INTERRUPTIBLE);
7261da177e4SLinus Torvalds
7271da177e4SLinus Torvalds spin_unlock(&info->lock);
72832ea845dSWanlong Gao time = schedule_hrtimeout_range_clock(timeout, 0,
72932ea845dSWanlong Gao HRTIMER_MODE_ABS, CLOCK_REALTIME);
7301da177e4SLinus Torvalds
731c5b2cbdbSManfred Spraul if (READ_ONCE(ewp->state) == STATE_READY) {
732c5b2cbdbSManfred Spraul /* see MQ_BARRIER for purpose/pairing */
733c5b2cbdbSManfred Spraul smp_acquire__after_ctrl_dep();
7341da177e4SLinus Torvalds retval = 0;
7351da177e4SLinus Torvalds goto out;
7361da177e4SLinus Torvalds }
7371da177e4SLinus Torvalds spin_lock(&info->lock);
738c5b2cbdbSManfred Spraul
739c5b2cbdbSManfred Spraul /* we hold info->lock, so no memory barrier required */
740c5b2cbdbSManfred Spraul if (READ_ONCE(ewp->state) == STATE_READY) {
7411da177e4SLinus Torvalds retval = 0;
7421da177e4SLinus Torvalds goto out_unlock;
7431da177e4SLinus Torvalds }
7441da177e4SLinus Torvalds if (signal_pending(current)) {
7451da177e4SLinus Torvalds retval = -ERESTARTSYS;
7461da177e4SLinus Torvalds break;
7471da177e4SLinus Torvalds }
7481da177e4SLinus Torvalds if (time == 0) {
7491da177e4SLinus Torvalds retval = -ETIMEDOUT;
7501da177e4SLinus Torvalds break;
7511da177e4SLinus Torvalds }
7521da177e4SLinus Torvalds }
7531da177e4SLinus Torvalds list_del(&ewp->list);
7541da177e4SLinus Torvalds out_unlock:
7551da177e4SLinus Torvalds spin_unlock(&info->lock);
7561da177e4SLinus Torvalds out:
7571da177e4SLinus Torvalds return retval;
7581da177e4SLinus Torvalds }
7591da177e4SLinus Torvalds
7601da177e4SLinus Torvalds /*
7611da177e4SLinus Torvalds * Returns waiting task that should be serviced first or NULL if none exists
7621da177e4SLinus Torvalds */
wq_get_first_waiter(struct mqueue_inode_info * info,int sr)7631da177e4SLinus Torvalds static struct ext_wait_queue *wq_get_first_waiter(
7641da177e4SLinus Torvalds struct mqueue_inode_info *info, int sr)
7651da177e4SLinus Torvalds {
7661da177e4SLinus Torvalds struct list_head *ptr;
7671da177e4SLinus Torvalds
7681da177e4SLinus Torvalds ptr = info->e_wait_q[sr].list.prev;
7691da177e4SLinus Torvalds if (ptr == &info->e_wait_q[sr].list)
7701da177e4SLinus Torvalds return NULL;
7711da177e4SLinus Torvalds return list_entry(ptr, struct ext_wait_queue, list);
7721da177e4SLinus Torvalds }
7731da177e4SLinus Torvalds
7741da177e4SLinus Torvalds
set_cookie(struct sk_buff * skb,char code)7751da177e4SLinus Torvalds static inline void set_cookie(struct sk_buff *skb, char code)
7761da177e4SLinus Torvalds {
7771da177e4SLinus Torvalds ((char *)skb->data)[NOTIFY_COOKIE_LEN-1] = code;
7781da177e4SLinus Torvalds }
7791da177e4SLinus Torvalds
7801da177e4SLinus Torvalds /*
7811da177e4SLinus Torvalds * The next function is only to split too long sys_mq_timedsend
7821da177e4SLinus Torvalds */
__do_notify(struct mqueue_inode_info * info)7831da177e4SLinus Torvalds static void __do_notify(struct mqueue_inode_info *info)
7841da177e4SLinus Torvalds {
7851da177e4SLinus Torvalds /* notification
7861da177e4SLinus Torvalds * invoked when there is registered process and there isn't process
7871da177e4SLinus Torvalds * waiting synchronously for message AND state of queue changed from
7881da177e4SLinus Torvalds * empty to not empty. Here we are sure that no one is waiting
7891da177e4SLinus Torvalds * synchronously. */
7901da177e4SLinus Torvalds if (info->notify_owner &&
7911da177e4SLinus Torvalds info->attr.mq_curmsgs == 1) {
7921da177e4SLinus Torvalds switch (info->notify.sigev_notify) {
7931da177e4SLinus Torvalds case SIGEV_NONE:
7941da177e4SLinus Torvalds break;
795b5f20061SOleg Nesterov case SIGEV_SIGNAL: {
796b5f20061SOleg Nesterov struct kernel_siginfo sig_i;
797b5f20061SOleg Nesterov struct task_struct *task;
798b5f20061SOleg Nesterov
799b5f20061SOleg Nesterov /* do_mq_notify() accepts sigev_signo == 0, why?? */
800b5f20061SOleg Nesterov if (!info->notify.sigev_signo)
801b5f20061SOleg Nesterov break;
8021da177e4SLinus Torvalds
803faf1f22bSEric W. Biederman clear_siginfo(&sig_i);
8041da177e4SLinus Torvalds sig_i.si_signo = info->notify.sigev_signo;
8051da177e4SLinus Torvalds sig_i.si_errno = 0;
8061da177e4SLinus Torvalds sig_i.si_code = SI_MESGQ;
8071da177e4SLinus Torvalds sig_i.si_value = info->notify.sigev_value;
8086b550f94SSerge E. Hallyn rcu_read_lock();
809b5f20061SOleg Nesterov /* map current pid/uid into info->owner's namespaces */
810a6684999SSukadev Bhattiprolu sig_i.si_pid = task_tgid_nr_ns(current,
811a6684999SSukadev Bhattiprolu ns_of_pid(info->notify_owner));
812b5f20061SOleg Nesterov sig_i.si_uid = from_kuid_munged(info->notify_user_ns,
813b5f20061SOleg Nesterov current_uid());
814b5f20061SOleg Nesterov /*
815b5f20061SOleg Nesterov * We can't use kill_pid_info(), this signal should
816b5f20061SOleg Nesterov * bypass check_kill_permission(). It is from kernel
817b5f20061SOleg Nesterov * but si_fromuser() can't know this.
818b5f20061SOleg Nesterov * We do check the self_exec_id, to avoid sending
819b5f20061SOleg Nesterov * signals to programs that don't expect them.
820b5f20061SOleg Nesterov */
821b5f20061SOleg Nesterov task = pid_task(info->notify_owner, PIDTYPE_TGID);
822b5f20061SOleg Nesterov if (task && task->self_exec_id ==
823b5f20061SOleg Nesterov info->notify_self_exec_id) {
824b5f20061SOleg Nesterov do_send_sig_info(info->notify.sigev_signo,
825b5f20061SOleg Nesterov &sig_i, task, PIDTYPE_TGID);
826b5f20061SOleg Nesterov }
8276b550f94SSerge E. Hallyn rcu_read_unlock();
8281da177e4SLinus Torvalds break;
829b5f20061SOleg Nesterov }
8301da177e4SLinus Torvalds case SIGEV_THREAD:
8311da177e4SLinus Torvalds set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
8327ee015e0SDenis V. Lunev netlink_sendskb(info->notify_sock, info->notify_cookie);
8331da177e4SLinus Torvalds break;
8341da177e4SLinus Torvalds }
8351da177e4SLinus Torvalds /* after notification unregisters process */
836a03fcb73SCedric Le Goater put_pid(info->notify_owner);
8376f9ac6d9SEric W. Biederman put_user_ns(info->notify_user_ns);
838a03fcb73SCedric Le Goater info->notify_owner = NULL;
8396f9ac6d9SEric W. Biederman info->notify_user_ns = NULL;
8401da177e4SLinus Torvalds }
8411da177e4SLinus Torvalds wake_up(&info->wait_q);
8421da177e4SLinus Torvalds }
8431da177e4SLinus Torvalds
prepare_timeout(const struct __kernel_timespec __user * u_abs_timeout,struct timespec64 * ts)84421fc538dSArnd Bergmann static int prepare_timeout(const struct __kernel_timespec __user *u_abs_timeout,
845b9047726SDeepa Dinamani struct timespec64 *ts)
8461da177e4SLinus Torvalds {
847b9047726SDeepa Dinamani if (get_timespec64(ts, u_abs_timeout))
8489ca7d8e6SCarsten Emde return -EFAULT;
849b9047726SDeepa Dinamani if (!timespec64_valid(ts))
8501da177e4SLinus Torvalds return -EINVAL;
8511da177e4SLinus Torvalds return 0;
8521da177e4SLinus Torvalds }
8531da177e4SLinus Torvalds
remove_notification(struct mqueue_inode_info * info)8541da177e4SLinus Torvalds static void remove_notification(struct mqueue_inode_info *info)
8551da177e4SLinus Torvalds {
856a03fcb73SCedric Le Goater if (info->notify_owner != NULL &&
8571da177e4SLinus Torvalds info->notify.sigev_notify == SIGEV_THREAD) {
8581da177e4SLinus Torvalds set_cookie(info->notify_cookie, NOTIFY_REMOVED);
8597ee015e0SDenis V. Lunev netlink_sendskb(info->notify_sock, info->notify_cookie);
8601da177e4SLinus Torvalds }
861a03fcb73SCedric Le Goater put_pid(info->notify_owner);
8626f9ac6d9SEric W. Biederman put_user_ns(info->notify_user_ns);
863a03fcb73SCedric Le Goater info->notify_owner = NULL;
8646f9ac6d9SEric W. Biederman info->notify_user_ns = NULL;
8651da177e4SLinus Torvalds }
8661da177e4SLinus Torvalds
prepare_open(struct dentry * dentry,int oflag,int ro,umode_t mode,struct filename * name,struct mq_attr * attr)867066cc813SAl Viro static int prepare_open(struct dentry *dentry, int oflag, int ro,
868066cc813SAl Viro umode_t mode, struct filename *name,
869614b84cfSSerge E. Hallyn struct mq_attr *attr)
8701da177e4SLinus Torvalds {
871745ca247SDavid Howells static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
8721da177e4SLinus Torvalds MAY_READ | MAY_WRITE };
873765927b2SAl Viro int acc;
874066cc813SAl Viro
8759b20d7fcSAl Viro if (d_really_is_negative(dentry)) {
8769b20d7fcSAl Viro if (!(oflag & O_CREAT))
8779b20d7fcSAl Viro return -ENOENT;
878066cc813SAl Viro if (ro)
879066cc813SAl Viro return ro;
880066cc813SAl Viro audit_inode_parent_hidden(name, dentry->d_parent);
881066cc813SAl Viro return vfs_mkobj(dentry, mode & ~current_umask(),
882066cc813SAl Viro mqueue_create_attr, attr);
883066cc813SAl Viro }
8849b20d7fcSAl Viro /* it already existed */
885066cc813SAl Viro audit_inode(name, dentry, 0);
8869b20d7fcSAl Viro if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
8879b20d7fcSAl Viro return -EEXIST;
888765927b2SAl Viro if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
889af4a5372SAl Viro return -EINVAL;
890765927b2SAl Viro acc = oflag2acc[oflag & O_ACCMODE];
8914609e1f1SChristian Brauner return inode_permission(&nop_mnt_idmap, d_inode(dentry), acc);
8921da177e4SLinus Torvalds }
8931da177e4SLinus Torvalds
do_mq_open(const char __user * u_name,int oflag,umode_t mode,struct mq_attr * attr)8940d060606SAl Viro static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
8950d060606SAl Viro struct mq_attr *attr)
8961da177e4SLinus Torvalds {
897cfb2f6f6SEric W. Biederman struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt;
898cfb2f6f6SEric W. Biederman struct dentry *root = mnt->mnt_root;
89991a27b2aSJeff Layton struct filename *name;
900a713fd7fSAl Viro struct path path;
9011da177e4SLinus Torvalds int fd, error;
902312b90fbSAl Viro int ro;
9031da177e4SLinus Torvalds
9040d060606SAl Viro audit_mq_open(oflag, mode, attr);
90520ca73bcSGeorge C. Wilson
9061da177e4SLinus Torvalds if (IS_ERR(name = getname(u_name)))
9071da177e4SLinus Torvalds return PTR_ERR(name);
9081da177e4SLinus Torvalds
909269f2134SUlrich Drepper fd = get_unused_fd_flags(O_CLOEXEC);
9101da177e4SLinus Torvalds if (fd < 0)
9111da177e4SLinus Torvalds goto out_putname;
9121da177e4SLinus Torvalds
913312b90fbSAl Viro ro = mnt_want_write(mnt); /* we'll drop it in any case */
9145955102cSAl Viro inode_lock(d_inode(root));
91591a27b2aSJeff Layton path.dentry = lookup_one_len(name->name, root, strlen(name->name));
916765927b2SAl Viro if (IS_ERR(path.dentry)) {
917765927b2SAl Viro error = PTR_ERR(path.dentry);
9184294a8eeSAndré Goddard Rosa goto out_putfd;
9191da177e4SLinus Torvalds }
920312b90fbSAl Viro path.mnt = mntget(mnt);
921066cc813SAl Viro error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
922066cc813SAl Viro if (!error) {
923066cc813SAl Viro struct file *file = dentry_open(&path, oflag, current_cred());
924066cc813SAl Viro if (!IS_ERR(file))
925066cc813SAl Viro fd_install(fd, file);
926765927b2SAl Viro else
927066cc813SAl Viro error = PTR_ERR(file);
9287c7dce92SAlexander Viro }
929765927b2SAl Viro path_put(&path);
9307c7dce92SAlexander Viro out_putfd:
931765927b2SAl Viro if (error) {
9321da177e4SLinus Torvalds put_unused_fd(fd);
9331da177e4SLinus Torvalds fd = error;
934765927b2SAl Viro }
9355955102cSAl Viro inode_unlock(d_inode(root));
93638d78e58SVladimir Davydov if (!ro)
937312b90fbSAl Viro mnt_drop_write(mnt);
9381da177e4SLinus Torvalds out_putname:
9391da177e4SLinus Torvalds putname(name);
9401da177e4SLinus Torvalds return fd;
9411da177e4SLinus Torvalds }
9421da177e4SLinus Torvalds
SYSCALL_DEFINE4(mq_open,const char __user *,u_name,int,oflag,umode_t,mode,struct mq_attr __user *,u_attr)9430d060606SAl Viro SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
9440d060606SAl Viro struct mq_attr __user *, u_attr)
9450d060606SAl Viro {
9460d060606SAl Viro struct mq_attr attr;
9470d060606SAl Viro if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
9480d060606SAl Viro return -EFAULT;
9490d060606SAl Viro
9500d060606SAl Viro return do_mq_open(u_name, oflag, mode, u_attr ? &attr : NULL);
9510d060606SAl Viro }
9520d060606SAl Viro
SYSCALL_DEFINE1(mq_unlink,const char __user *,u_name)953d5460c99SHeiko Carstens SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
9541da177e4SLinus Torvalds {
9551da177e4SLinus Torvalds int err;
95691a27b2aSJeff Layton struct filename *name;
9571da177e4SLinus Torvalds struct dentry *dentry;
9581da177e4SLinus Torvalds struct inode *inode = NULL;
9597eafd7c7SSerge E. Hallyn struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
960312b90fbSAl Viro struct vfsmount *mnt = ipc_ns->mq_mnt;
9611da177e4SLinus Torvalds
9621da177e4SLinus Torvalds name = getname(u_name);
9631da177e4SLinus Torvalds if (IS_ERR(name))
9641da177e4SLinus Torvalds return PTR_ERR(name);
9651da177e4SLinus Torvalds
96679f6530cSJeff Layton audit_inode_parent_hidden(name, mnt->mnt_root);
967312b90fbSAl Viro err = mnt_want_write(mnt);
968312b90fbSAl Viro if (err)
969312b90fbSAl Viro goto out_name;
9705955102cSAl Viro inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
97191a27b2aSJeff Layton dentry = lookup_one_len(name->name, mnt->mnt_root,
97291a27b2aSJeff Layton strlen(name->name));
9731da177e4SLinus Torvalds if (IS_ERR(dentry)) {
9741da177e4SLinus Torvalds err = PTR_ERR(dentry);
9751da177e4SLinus Torvalds goto out_unlock;
9761da177e4SLinus Torvalds }
9771da177e4SLinus Torvalds
97875c3cfa8SDavid Howells inode = d_inode(dentry);
979312b90fbSAl Viro if (!inode) {
980312b90fbSAl Viro err = -ENOENT;
981312b90fbSAl Viro } else {
9827de9c6eeSAl Viro ihold(inode);
983abf08576SChristian Brauner err = vfs_unlink(&nop_mnt_idmap, d_inode(dentry->d_parent),
9846521f891SChristian Brauner dentry, NULL);
985312b90fbSAl Viro }
9861da177e4SLinus Torvalds dput(dentry);
9871da177e4SLinus Torvalds
9881da177e4SLinus Torvalds out_unlock:
9895955102cSAl Viro inode_unlock(d_inode(mnt->mnt_root));
9901da177e4SLinus Torvalds iput(inode);
991312b90fbSAl Viro mnt_drop_write(mnt);
992312b90fbSAl Viro out_name:
993312b90fbSAl Viro putname(name);
9941da177e4SLinus Torvalds
9951da177e4SLinus Torvalds return err;
9961da177e4SLinus Torvalds }
9971da177e4SLinus Torvalds
9981da177e4SLinus Torvalds /* Pipelined send and receive functions.
9991da177e4SLinus Torvalds *
10001da177e4SLinus Torvalds * If a receiver finds no waiting message, then it registers itself in the
10011da177e4SLinus Torvalds * list of waiting receivers. A sender checks that list before adding the new
10021da177e4SLinus Torvalds * message into the message array. If there is a waiting receiver, then it
10031da177e4SLinus Torvalds * bypasses the message array and directly hands the message over to the
1004fa6004adSDavidlohr Bueso * receiver. The receiver accepts the message and returns without grabbing the
1005fa6004adSDavidlohr Bueso * queue spinlock:
1006fa6004adSDavidlohr Bueso *
1007fa6004adSDavidlohr Bueso * - Set pointer to message.
1008fa6004adSDavidlohr Bueso * - Queue the receiver task for later wakeup (without the info->lock).
1009fa6004adSDavidlohr Bueso * - Update its state to STATE_READY. Now the receiver can continue.
1010fa6004adSDavidlohr Bueso * - Wake up the process after the lock is dropped. Should the process wake up
1011fa6004adSDavidlohr Bueso * before this wakeup (due to a timeout or a signal) it will either see
1012fa6004adSDavidlohr Bueso * STATE_READY and continue or acquire the lock to check the state again.
10131da177e4SLinus Torvalds *
10141da177e4SLinus Torvalds * The same algorithm is used for senders.
10151da177e4SLinus Torvalds */
10161da177e4SLinus Torvalds
__pipelined_op(struct wake_q_head * wake_q,struct mqueue_inode_info * info,struct ext_wait_queue * this)1017ed29f171SDavidlohr Bueso static inline void __pipelined_op(struct wake_q_head *wake_q,
1018ed29f171SDavidlohr Bueso struct mqueue_inode_info *info,
1019ed29f171SDavidlohr Bueso struct ext_wait_queue *this)
1020ed29f171SDavidlohr Bueso {
1021a11ddb37SVarad Gautam struct task_struct *task;
1022a11ddb37SVarad Gautam
1023ed29f171SDavidlohr Bueso list_del(&this->list);
1024a11ddb37SVarad Gautam task = get_task_struct(this->task);
1025c5b2cbdbSManfred Spraul
1026c5b2cbdbSManfred Spraul /* see MQ_BARRIER for purpose/pairing */
1027c5b2cbdbSManfred Spraul smp_store_release(&this->state, STATE_READY);
1028a11ddb37SVarad Gautam wake_q_add_safe(wake_q, task);
1029ed29f171SDavidlohr Bueso }
1030ed29f171SDavidlohr Bueso
10311da177e4SLinus Torvalds /* pipelined_send() - send a message directly to the task waiting in
10321da177e4SLinus Torvalds * sys_mq_timedreceive() (without inserting message into a queue).
10331da177e4SLinus Torvalds */
pipelined_send(struct wake_q_head * wake_q,struct mqueue_inode_info * info,struct msg_msg * message,struct ext_wait_queue * receiver)1034fa6004adSDavidlohr Bueso static inline void pipelined_send(struct wake_q_head *wake_q,
1035fa6004adSDavidlohr Bueso struct mqueue_inode_info *info,
10361da177e4SLinus Torvalds struct msg_msg *message,
10371da177e4SLinus Torvalds struct ext_wait_queue *receiver)
10381da177e4SLinus Torvalds {
10391da177e4SLinus Torvalds receiver->msg = message;
1040ed29f171SDavidlohr Bueso __pipelined_op(wake_q, info, receiver);
10411da177e4SLinus Torvalds }
10421da177e4SLinus Torvalds
10431da177e4SLinus Torvalds /* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
10441da177e4SLinus Torvalds * gets its message and put to the queue (we have one free place for sure). */
pipelined_receive(struct wake_q_head * wake_q,struct mqueue_inode_info * info)1045fa6004adSDavidlohr Bueso static inline void pipelined_receive(struct wake_q_head *wake_q,
1046fa6004adSDavidlohr Bueso struct mqueue_inode_info *info)
10471da177e4SLinus Torvalds {
10481da177e4SLinus Torvalds struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
10491da177e4SLinus Torvalds
10501da177e4SLinus Torvalds if (!sender) {
10511da177e4SLinus Torvalds /* for poll */
10521da177e4SLinus Torvalds wake_up_interruptible(&info->wait_q);
10531da177e4SLinus Torvalds return;
10541da177e4SLinus Torvalds }
1055d6629859SDoug Ledford if (msg_insert(sender->msg, info))
1056d6629859SDoug Ledford return;
1057fa6004adSDavidlohr Bueso
1058ed29f171SDavidlohr Bueso __pipelined_op(wake_q, info, sender);
10591da177e4SLinus Torvalds }
10601da177e4SLinus Torvalds
do_mq_timedsend(mqd_t mqdes,const char __user * u_msg_ptr,size_t msg_len,unsigned int msg_prio,struct timespec64 * ts)10610d060606SAl Viro static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
10620d060606SAl Viro size_t msg_len, unsigned int msg_prio,
1063b9047726SDeepa Dinamani struct timespec64 *ts)
10641da177e4SLinus Torvalds {
10652903ff01SAl Viro struct fd f;
10661da177e4SLinus Torvalds struct inode *inode;
10671da177e4SLinus Torvalds struct ext_wait_queue wait;
10681da177e4SLinus Torvalds struct ext_wait_queue *receiver;
10691da177e4SLinus Torvalds struct msg_msg *msg_ptr;
10701da177e4SLinus Torvalds struct mqueue_inode_info *info;
10719ca7d8e6SCarsten Emde ktime_t expires, *timeout = NULL;
1072ce2d52ccSDoug Ledford struct posix_msg_tree_node *new_leaf = NULL;
10732903ff01SAl Viro int ret = 0;
1074194a6b5bSWaiman Long DEFINE_WAKE_Q(wake_q);
10751da177e4SLinus Torvalds
10761da177e4SLinus Torvalds if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
10771da177e4SLinus Torvalds return -EINVAL;
10781da177e4SLinus Torvalds
10790d060606SAl Viro if (ts) {
1080b9047726SDeepa Dinamani expires = timespec64_to_ktime(*ts);
10810d060606SAl Viro timeout = &expires;
10820d060606SAl Viro }
10830d060606SAl Viro
10840d060606SAl Viro audit_mq_sendrecv(mqdes, msg_len, msg_prio, ts);
10851da177e4SLinus Torvalds
10862903ff01SAl Viro f = fdget(mqdes);
10872903ff01SAl Viro if (unlikely(!f.file)) {
10888d8ffefaSAndré Goddard Rosa ret = -EBADF;
10891da177e4SLinus Torvalds goto out;
10908d8ffefaSAndré Goddard Rosa }
10911da177e4SLinus Torvalds
1092496ad9aaSAl Viro inode = file_inode(f.file);
10932903ff01SAl Viro if (unlikely(f.file->f_op != &mqueue_file_operations)) {
10948d8ffefaSAndré Goddard Rosa ret = -EBADF;
10951da177e4SLinus Torvalds goto out_fput;
10968d8ffefaSAndré Goddard Rosa }
10971da177e4SLinus Torvalds info = MQUEUE_I(inode);
10989f45f5bfSAl Viro audit_file(f.file);
10991da177e4SLinus Torvalds
11002903ff01SAl Viro if (unlikely(!(f.file->f_mode & FMODE_WRITE))) {
11018d8ffefaSAndré Goddard Rosa ret = -EBADF;
11021da177e4SLinus Torvalds goto out_fput;
11038d8ffefaSAndré Goddard Rosa }
11041da177e4SLinus Torvalds
11051da177e4SLinus Torvalds if (unlikely(msg_len > info->attr.mq_msgsize)) {
11061da177e4SLinus Torvalds ret = -EMSGSIZE;
11071da177e4SLinus Torvalds goto out_fput;
11081da177e4SLinus Torvalds }
11091da177e4SLinus Torvalds
11101da177e4SLinus Torvalds /* First try to allocate memory, before doing anything with
11111da177e4SLinus Torvalds * existing queues. */
11121da177e4SLinus Torvalds msg_ptr = load_msg(u_msg_ptr, msg_len);
11131da177e4SLinus Torvalds if (IS_ERR(msg_ptr)) {
11141da177e4SLinus Torvalds ret = PTR_ERR(msg_ptr);
11151da177e4SLinus Torvalds goto out_fput;
11161da177e4SLinus Torvalds }
11171da177e4SLinus Torvalds msg_ptr->m_ts = msg_len;
11181da177e4SLinus Torvalds msg_ptr->m_type = msg_prio;
11191da177e4SLinus Torvalds
1120ce2d52ccSDoug Ledford /*
1121ce2d52ccSDoug Ledford * msg_insert really wants us to have a valid, spare node struct so
1122ce2d52ccSDoug Ledford * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
1123ce2d52ccSDoug Ledford * fall back to that if necessary.
1124ce2d52ccSDoug Ledford */
1125ce2d52ccSDoug Ledford if (!info->node_cache)
1126ce2d52ccSDoug Ledford new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
1127ce2d52ccSDoug Ledford
11281da177e4SLinus Torvalds spin_lock(&info->lock);
11291da177e4SLinus Torvalds
1130ce2d52ccSDoug Ledford if (!info->node_cache && new_leaf) {
1131ce2d52ccSDoug Ledford /* Save our speculative allocation into the cache */
1132ce2d52ccSDoug Ledford INIT_LIST_HEAD(&new_leaf->msg_list);
1133ce2d52ccSDoug Ledford info->node_cache = new_leaf;
1134ce2d52ccSDoug Ledford new_leaf = NULL;
1135ce2d52ccSDoug Ledford } else {
1136ce2d52ccSDoug Ledford kfree(new_leaf);
1137ce2d52ccSDoug Ledford }
1138ce2d52ccSDoug Ledford
11391da177e4SLinus Torvalds if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
11402903ff01SAl Viro if (f.file->f_flags & O_NONBLOCK) {
11411da177e4SLinus Torvalds ret = -EAGAIN;
11421da177e4SLinus Torvalds } else {
11431da177e4SLinus Torvalds wait.task = current;
11441da177e4SLinus Torvalds wait.msg = (void *) msg_ptr;
1145c5b2cbdbSManfred Spraul
1146c5b2cbdbSManfred Spraul /* memory barrier not required, we hold info->lock */
1147c5b2cbdbSManfred Spraul WRITE_ONCE(wait.state, STATE_NONE);
11481da177e4SLinus Torvalds ret = wq_sleep(info, SEND, timeout, &wait);
1149ce2d52ccSDoug Ledford /*
1150ce2d52ccSDoug Ledford * wq_sleep must be called with info->lock held, and
1151ce2d52ccSDoug Ledford * returns with the lock released
1152ce2d52ccSDoug Ledford */
1153ce2d52ccSDoug Ledford goto out_free;
11541da177e4SLinus Torvalds }
11551da177e4SLinus Torvalds } else {
11561da177e4SLinus Torvalds receiver = wq_get_first_waiter(info, RECV);
11571da177e4SLinus Torvalds if (receiver) {
1158fa6004adSDavidlohr Bueso pipelined_send(&wake_q, info, msg_ptr, receiver);
11591da177e4SLinus Torvalds } else {
11601da177e4SLinus Torvalds /* adds message to the queue */
1161ce2d52ccSDoug Ledford ret = msg_insert(msg_ptr, info);
1162ce2d52ccSDoug Ledford if (ret)
1163ce2d52ccSDoug Ledford goto out_unlock;
11641da177e4SLinus Torvalds __do_notify(info);
11651da177e4SLinus Torvalds }
1166*783904f5SJeff Layton inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
11671da177e4SLinus Torvalds }
1168ce2d52ccSDoug Ledford out_unlock:
1169ce2d52ccSDoug Ledford spin_unlock(&info->lock);
1170fa6004adSDavidlohr Bueso wake_up_q(&wake_q);
1171ce2d52ccSDoug Ledford out_free:
1172ce2d52ccSDoug Ledford if (ret)
1173ce2d52ccSDoug Ledford free_msg(msg_ptr);
11741da177e4SLinus Torvalds out_fput:
11752903ff01SAl Viro fdput(f);
11761da177e4SLinus Torvalds out:
11771da177e4SLinus Torvalds return ret;
11781da177e4SLinus Torvalds }
11791da177e4SLinus Torvalds
do_mq_timedreceive(mqd_t mqdes,char __user * u_msg_ptr,size_t msg_len,unsigned int __user * u_msg_prio,struct timespec64 * ts)11800d060606SAl Viro static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
11810d060606SAl Viro size_t msg_len, unsigned int __user *u_msg_prio,
1182b9047726SDeepa Dinamani struct timespec64 *ts)
11831da177e4SLinus Torvalds {
11841da177e4SLinus Torvalds ssize_t ret;
11851da177e4SLinus Torvalds struct msg_msg *msg_ptr;
11862903ff01SAl Viro struct fd f;
11871da177e4SLinus Torvalds struct inode *inode;
11881da177e4SLinus Torvalds struct mqueue_inode_info *info;
11891da177e4SLinus Torvalds struct ext_wait_queue wait;
11909ca7d8e6SCarsten Emde ktime_t expires, *timeout = NULL;
1191ce2d52ccSDoug Ledford struct posix_msg_tree_node *new_leaf = NULL;
11921da177e4SLinus Torvalds
11930d060606SAl Viro if (ts) {
1194b9047726SDeepa Dinamani expires = timespec64_to_ktime(*ts);
11959ca7d8e6SCarsten Emde timeout = &expires;
1196c32c8af4SAl Viro }
119720ca73bcSGeorge C. Wilson
11980d060606SAl Viro audit_mq_sendrecv(mqdes, msg_len, 0, ts);
11991da177e4SLinus Torvalds
12002903ff01SAl Viro f = fdget(mqdes);
12012903ff01SAl Viro if (unlikely(!f.file)) {
12028d8ffefaSAndré Goddard Rosa ret = -EBADF;
12031da177e4SLinus Torvalds goto out;
12048d8ffefaSAndré Goddard Rosa }
12051da177e4SLinus Torvalds
1206496ad9aaSAl Viro inode = file_inode(f.file);
12072903ff01SAl Viro if (unlikely(f.file->f_op != &mqueue_file_operations)) {
12088d8ffefaSAndré Goddard Rosa ret = -EBADF;
12091da177e4SLinus Torvalds goto out_fput;
12108d8ffefaSAndré Goddard Rosa }
12111da177e4SLinus Torvalds info = MQUEUE_I(inode);
12129f45f5bfSAl Viro audit_file(f.file);
12131da177e4SLinus Torvalds
12142903ff01SAl Viro if (unlikely(!(f.file->f_mode & FMODE_READ))) {
12158d8ffefaSAndré Goddard Rosa ret = -EBADF;
12161da177e4SLinus Torvalds goto out_fput;
12178d8ffefaSAndré Goddard Rosa }
12181da177e4SLinus Torvalds
12191da177e4SLinus Torvalds /* checks if buffer is big enough */
12201da177e4SLinus Torvalds if (unlikely(msg_len < info->attr.mq_msgsize)) {
12211da177e4SLinus Torvalds ret = -EMSGSIZE;
12221da177e4SLinus Torvalds goto out_fput;
12231da177e4SLinus Torvalds }
12241da177e4SLinus Torvalds
1225ce2d52ccSDoug Ledford /*
1226ce2d52ccSDoug Ledford * msg_insert really wants us to have a valid, spare node struct so
1227ce2d52ccSDoug Ledford * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
1228ce2d52ccSDoug Ledford * fall back to that if necessary.
1229ce2d52ccSDoug Ledford */
1230ce2d52ccSDoug Ledford if (!info->node_cache)
1231ce2d52ccSDoug Ledford new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
1232ce2d52ccSDoug Ledford
12331da177e4SLinus Torvalds spin_lock(&info->lock);
1234ce2d52ccSDoug Ledford
1235ce2d52ccSDoug Ledford if (!info->node_cache && new_leaf) {
1236ce2d52ccSDoug Ledford /* Save our speculative allocation into the cache */
1237ce2d52ccSDoug Ledford INIT_LIST_HEAD(&new_leaf->msg_list);
1238ce2d52ccSDoug Ledford info->node_cache = new_leaf;
1239ce2d52ccSDoug Ledford } else {
1240ce2d52ccSDoug Ledford kfree(new_leaf);
1241ce2d52ccSDoug Ledford }
1242ce2d52ccSDoug Ledford
12431da177e4SLinus Torvalds if (info->attr.mq_curmsgs == 0) {
12442903ff01SAl Viro if (f.file->f_flags & O_NONBLOCK) {
12451da177e4SLinus Torvalds spin_unlock(&info->lock);
12461da177e4SLinus Torvalds ret = -EAGAIN;
12471da177e4SLinus Torvalds } else {
12481da177e4SLinus Torvalds wait.task = current;
1249c5b2cbdbSManfred Spraul
1250c5b2cbdbSManfred Spraul /* memory barrier not required, we hold info->lock */
1251c5b2cbdbSManfred Spraul WRITE_ONCE(wait.state, STATE_NONE);
12521da177e4SLinus Torvalds ret = wq_sleep(info, RECV, timeout, &wait);
12531da177e4SLinus Torvalds msg_ptr = wait.msg;
12541da177e4SLinus Torvalds }
12551da177e4SLinus Torvalds } else {
1256194a6b5bSWaiman Long DEFINE_WAKE_Q(wake_q);
1257fa6004adSDavidlohr Bueso
12581da177e4SLinus Torvalds msg_ptr = msg_get(info);
12591da177e4SLinus Torvalds
1260*783904f5SJeff Layton inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
12611da177e4SLinus Torvalds
12621da177e4SLinus Torvalds /* There is now free space in queue. */
1263fa6004adSDavidlohr Bueso pipelined_receive(&wake_q, info);
12641da177e4SLinus Torvalds spin_unlock(&info->lock);
1265fa6004adSDavidlohr Bueso wake_up_q(&wake_q);
12661da177e4SLinus Torvalds ret = 0;
12671da177e4SLinus Torvalds }
12681da177e4SLinus Torvalds if (ret == 0) {
12691da177e4SLinus Torvalds ret = msg_ptr->m_ts;
12701da177e4SLinus Torvalds
12711da177e4SLinus Torvalds if ((u_msg_prio && put_user(msg_ptr->m_type, u_msg_prio)) ||
12721da177e4SLinus Torvalds store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) {
12731da177e4SLinus Torvalds ret = -EFAULT;
12741da177e4SLinus Torvalds }
12751da177e4SLinus Torvalds free_msg(msg_ptr);
12761da177e4SLinus Torvalds }
12771da177e4SLinus Torvalds out_fput:
12782903ff01SAl Viro fdput(f);
12791da177e4SLinus Torvalds out:
12801da177e4SLinus Torvalds return ret;
12811da177e4SLinus Torvalds }
12821da177e4SLinus Torvalds
SYSCALL_DEFINE5(mq_timedsend,mqd_t,mqdes,const char __user *,u_msg_ptr,size_t,msg_len,unsigned int,msg_prio,const struct __kernel_timespec __user *,u_abs_timeout)12830d060606SAl Viro SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
12840d060606SAl Viro size_t, msg_len, unsigned int, msg_prio,
128521fc538dSArnd Bergmann const struct __kernel_timespec __user *, u_abs_timeout)
12860d060606SAl Viro {
1287b9047726SDeepa Dinamani struct timespec64 ts, *p = NULL;
12880d060606SAl Viro if (u_abs_timeout) {
12890d060606SAl Viro int res = prepare_timeout(u_abs_timeout, &ts);
12900d060606SAl Viro if (res)
12910d060606SAl Viro return res;
12920d060606SAl Viro p = &ts;
12930d060606SAl Viro }
12940d060606SAl Viro return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
12950d060606SAl Viro }
12960d060606SAl Viro
SYSCALL_DEFINE5(mq_timedreceive,mqd_t,mqdes,char __user *,u_msg_ptr,size_t,msg_len,unsigned int __user *,u_msg_prio,const struct __kernel_timespec __user *,u_abs_timeout)12970d060606SAl Viro SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
12980d060606SAl Viro size_t, msg_len, unsigned int __user *, u_msg_prio,
129921fc538dSArnd Bergmann const struct __kernel_timespec __user *, u_abs_timeout)
13000d060606SAl Viro {
1301b9047726SDeepa Dinamani struct timespec64 ts, *p = NULL;
13020d060606SAl Viro if (u_abs_timeout) {
13030d060606SAl Viro int res = prepare_timeout(u_abs_timeout, &ts);
13040d060606SAl Viro if (res)
13050d060606SAl Viro return res;
13060d060606SAl Viro p = &ts;
13070d060606SAl Viro }
13080d060606SAl Viro return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
13090d060606SAl Viro }
13100d060606SAl Viro
13111da177e4SLinus Torvalds /*
13121da177e4SLinus Torvalds * Notes: the case when user wants us to deregister (with NULL as pointer)
13131da177e4SLinus Torvalds * and he isn't currently owner of notification, will be silently discarded.
13141da177e4SLinus Torvalds * It isn't explicitly defined in the POSIX.
13151da177e4SLinus Torvalds */
do_mq_notify(mqd_t mqdes,const struct sigevent * notification)13160d060606SAl Viro static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification)
13171da177e4SLinus Torvalds {
13182903ff01SAl Viro int ret;
13192903ff01SAl Viro struct fd f;
13201da177e4SLinus Torvalds struct sock *sock;
13211da177e4SLinus Torvalds struct inode *inode;
13221da177e4SLinus Torvalds struct mqueue_inode_info *info;
13231da177e4SLinus Torvalds struct sk_buff *nc;
13241da177e4SLinus Torvalds
13250d060606SAl Viro audit_mq_notify(mqdes, notification);
132620ca73bcSGeorge C. Wilson
13271da177e4SLinus Torvalds nc = NULL;
13281da177e4SLinus Torvalds sock = NULL;
13290d060606SAl Viro if (notification != NULL) {
13300d060606SAl Viro if (unlikely(notification->sigev_notify != SIGEV_NONE &&
13310d060606SAl Viro notification->sigev_notify != SIGEV_SIGNAL &&
13320d060606SAl Viro notification->sigev_notify != SIGEV_THREAD))
13331da177e4SLinus Torvalds return -EINVAL;
13340d060606SAl Viro if (notification->sigev_notify == SIGEV_SIGNAL &&
13350d060606SAl Viro !valid_signal(notification->sigev_signo)) {
13361da177e4SLinus Torvalds return -EINVAL;
13371da177e4SLinus Torvalds }
13380d060606SAl Viro if (notification->sigev_notify == SIGEV_THREAD) {
1339c3d8d1e3SPatrick McHardy long timeo;
1340c3d8d1e3SPatrick McHardy
13411da177e4SLinus Torvalds /* create the notify skb */
13421da177e4SLinus Torvalds nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
1343c231740dSMarkus Elfring if (!nc)
1344c231740dSMarkus Elfring return -ENOMEM;
1345c231740dSMarkus Elfring
13461da177e4SLinus Torvalds if (copy_from_user(nc->data,
13470d060606SAl Viro notification->sigev_value.sival_ptr,
13481da177e4SLinus Torvalds NOTIFY_COOKIE_LEN)) {
13498d8ffefaSAndré Goddard Rosa ret = -EFAULT;
1350c231740dSMarkus Elfring goto free_skb;
13511da177e4SLinus Torvalds }
13521da177e4SLinus Torvalds
13531da177e4SLinus Torvalds /* TODO: add a header? */
13541da177e4SLinus Torvalds skb_put(nc, NOTIFY_COOKIE_LEN);
13551da177e4SLinus Torvalds /* and attach it to the socket */
13561da177e4SLinus Torvalds retry:
13570d060606SAl Viro f = fdget(notification->sigev_signo);
13582903ff01SAl Viro if (!f.file) {
13591da177e4SLinus Torvalds ret = -EBADF;
13601da177e4SLinus Torvalds goto out;
13618d8ffefaSAndré Goddard Rosa }
13622903ff01SAl Viro sock = netlink_getsockbyfilp(f.file);
13632903ff01SAl Viro fdput(f);
13641da177e4SLinus Torvalds if (IS_ERR(sock)) {
13651da177e4SLinus Torvalds ret = PTR_ERR(sock);
1366c231740dSMarkus Elfring goto free_skb;
13671da177e4SLinus Torvalds }
13681da177e4SLinus Torvalds
1369c3d8d1e3SPatrick McHardy timeo = MAX_SCHEDULE_TIMEOUT;
13709457afeeSDenis V. Lunev ret = netlink_attachskb(sock, nc, &timeo, NULL);
1371f991af3dSCong Wang if (ret == 1) {
1372f991af3dSCong Wang sock = NULL;
13731da177e4SLinus Torvalds goto retry;
1374f991af3dSCong Wang }
1375c231740dSMarkus Elfring if (ret)
1376c231740dSMarkus Elfring return ret;
13771da177e4SLinus Torvalds }
13781da177e4SLinus Torvalds }
13791da177e4SLinus Torvalds
13802903ff01SAl Viro f = fdget(mqdes);
13812903ff01SAl Viro if (!f.file) {
13828d8ffefaSAndré Goddard Rosa ret = -EBADF;
13831da177e4SLinus Torvalds goto out;
13848d8ffefaSAndré Goddard Rosa }
13851da177e4SLinus Torvalds
1386496ad9aaSAl Viro inode = file_inode(f.file);
13872903ff01SAl Viro if (unlikely(f.file->f_op != &mqueue_file_operations)) {
13888d8ffefaSAndré Goddard Rosa ret = -EBADF;
13891da177e4SLinus Torvalds goto out_fput;
13908d8ffefaSAndré Goddard Rosa }
13911da177e4SLinus Torvalds info = MQUEUE_I(inode);
13921da177e4SLinus Torvalds
13931da177e4SLinus Torvalds ret = 0;
13941da177e4SLinus Torvalds spin_lock(&info->lock);
13950d060606SAl Viro if (notification == NULL) {
1396a03fcb73SCedric Le Goater if (info->notify_owner == task_tgid(current)) {
13971da177e4SLinus Torvalds remove_notification(info);
1398*783904f5SJeff Layton inode->i_atime = inode_set_ctime_current(inode);
13991da177e4SLinus Torvalds }
1400a03fcb73SCedric Le Goater } else if (info->notify_owner != NULL) {
14011da177e4SLinus Torvalds ret = -EBUSY;
14021da177e4SLinus Torvalds } else {
14030d060606SAl Viro switch (notification->sigev_notify) {
14041da177e4SLinus Torvalds case SIGEV_NONE:
14051da177e4SLinus Torvalds info->notify.sigev_notify = SIGEV_NONE;
14061da177e4SLinus Torvalds break;
14071da177e4SLinus Torvalds case SIGEV_THREAD:
14081da177e4SLinus Torvalds info->notify_sock = sock;
14091da177e4SLinus Torvalds info->notify_cookie = nc;
14101da177e4SLinus Torvalds sock = NULL;
14111da177e4SLinus Torvalds nc = NULL;
14121da177e4SLinus Torvalds info->notify.sigev_notify = SIGEV_THREAD;
14131da177e4SLinus Torvalds break;
14141da177e4SLinus Torvalds case SIGEV_SIGNAL:
14150d060606SAl Viro info->notify.sigev_signo = notification->sigev_signo;
14160d060606SAl Viro info->notify.sigev_value = notification->sigev_value;
14171da177e4SLinus Torvalds info->notify.sigev_notify = SIGEV_SIGNAL;
1418b5f20061SOleg Nesterov info->notify_self_exec_id = current->self_exec_id;
14191da177e4SLinus Torvalds break;
14201da177e4SLinus Torvalds }
1421a03fcb73SCedric Le Goater
1422a03fcb73SCedric Le Goater info->notify_owner = get_pid(task_tgid(current));
14236f9ac6d9SEric W. Biederman info->notify_user_ns = get_user_ns(current_user_ns());
1424*783904f5SJeff Layton inode->i_atime = inode_set_ctime_current(inode);
14251da177e4SLinus Torvalds }
14261da177e4SLinus Torvalds spin_unlock(&info->lock);
14271da177e4SLinus Torvalds out_fput:
14282903ff01SAl Viro fdput(f);
14291da177e4SLinus Torvalds out:
14303ab08fe2SDavidlohr Bueso if (sock)
14311da177e4SLinus Torvalds netlink_detachskb(sock, nc);
143297b0b1adSMarkus Elfring else
1433c231740dSMarkus Elfring free_skb:
14341da177e4SLinus Torvalds dev_kfree_skb(nc);
14353ab08fe2SDavidlohr Bueso
14361da177e4SLinus Torvalds return ret;
14371da177e4SLinus Torvalds }
14381da177e4SLinus Torvalds
SYSCALL_DEFINE2(mq_notify,mqd_t,mqdes,const struct sigevent __user *,u_notification)14390d060606SAl Viro SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
14400d060606SAl Viro const struct sigevent __user *, u_notification)
14411da177e4SLinus Torvalds {
14420d060606SAl Viro struct sigevent n, *p = NULL;
14430d060606SAl Viro if (u_notification) {
14440d060606SAl Viro if (copy_from_user(&n, u_notification, sizeof(struct sigevent)))
14450d060606SAl Viro return -EFAULT;
14460d060606SAl Viro p = &n;
14470d060606SAl Viro }
14480d060606SAl Viro return do_mq_notify(mqdes, p);
14490d060606SAl Viro }
14500d060606SAl Viro
do_mq_getsetattr(int mqdes,struct mq_attr * new,struct mq_attr * old)14510d060606SAl Viro static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old)
14520d060606SAl Viro {
14532903ff01SAl Viro struct fd f;
14541da177e4SLinus Torvalds struct inode *inode;
14551da177e4SLinus Torvalds struct mqueue_inode_info *info;
14561da177e4SLinus Torvalds
14570d060606SAl Viro if (new && (new->mq_flags & (~O_NONBLOCK)))
14581da177e4SLinus Torvalds return -EINVAL;
14591da177e4SLinus Torvalds
14602903ff01SAl Viro f = fdget(mqdes);
14610d060606SAl Viro if (!f.file)
14620d060606SAl Viro return -EBADF;
14630d060606SAl Viro
14640d060606SAl Viro if (unlikely(f.file->f_op != &mqueue_file_operations)) {
14650d060606SAl Viro fdput(f);
14660d060606SAl Viro return -EBADF;
14678d8ffefaSAndré Goddard Rosa }
14681da177e4SLinus Torvalds
1469496ad9aaSAl Viro inode = file_inode(f.file);
14701da177e4SLinus Torvalds info = MQUEUE_I(inode);
14711da177e4SLinus Torvalds
14721da177e4SLinus Torvalds spin_lock(&info->lock);
14731da177e4SLinus Torvalds
14740d060606SAl Viro if (old) {
14750d060606SAl Viro *old = info->attr;
14760d060606SAl Viro old->mq_flags = f.file->f_flags & O_NONBLOCK;
14770d060606SAl Viro }
14780d060606SAl Viro if (new) {
14790d060606SAl Viro audit_mq_getsetattr(mqdes, new);
14802903ff01SAl Viro spin_lock(&f.file->f_lock);
14810d060606SAl Viro if (new->mq_flags & O_NONBLOCK)
14822903ff01SAl Viro f.file->f_flags |= O_NONBLOCK;
14831da177e4SLinus Torvalds else
14842903ff01SAl Viro f.file->f_flags &= ~O_NONBLOCK;
14852903ff01SAl Viro spin_unlock(&f.file->f_lock);
14861da177e4SLinus Torvalds
1487*783904f5SJeff Layton inode->i_atime = inode_set_ctime_current(inode);
14881da177e4SLinus Torvalds }
14891da177e4SLinus Torvalds
14901da177e4SLinus Torvalds spin_unlock(&info->lock);
14912903ff01SAl Viro fdput(f);
14920d060606SAl Viro return 0;
14931da177e4SLinus Torvalds }
14941da177e4SLinus Torvalds
SYSCALL_DEFINE3(mq_getsetattr,mqd_t,mqdes,const struct mq_attr __user *,u_mqstat,struct mq_attr __user *,u_omqstat)14950d060606SAl Viro SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
14960d060606SAl Viro const struct mq_attr __user *, u_mqstat,
14970d060606SAl Viro struct mq_attr __user *, u_omqstat)
14980d060606SAl Viro {
14990d060606SAl Viro int ret;
15000d060606SAl Viro struct mq_attr mqstat, omqstat;
15010d060606SAl Viro struct mq_attr *new = NULL, *old = NULL;
15020d060606SAl Viro
15030d060606SAl Viro if (u_mqstat) {
15040d060606SAl Viro new = &mqstat;
15050d060606SAl Viro if (copy_from_user(new, u_mqstat, sizeof(struct mq_attr)))
15060d060606SAl Viro return -EFAULT;
15070d060606SAl Viro }
15080d060606SAl Viro if (u_omqstat)
15090d060606SAl Viro old = &omqstat;
15100d060606SAl Viro
15110d060606SAl Viro ret = do_mq_getsetattr(mqdes, new, old);
15120d060606SAl Viro if (ret || !old)
15130d060606SAl Viro return ret;
15140d060606SAl Viro
15150d060606SAl Viro if (copy_to_user(u_omqstat, old, sizeof(struct mq_attr)))
15160d060606SAl Viro return -EFAULT;
15170d060606SAl Viro return 0;
15180d060606SAl Viro }
15190d060606SAl Viro
15200d060606SAl Viro #ifdef CONFIG_COMPAT
15210d060606SAl Viro
15220d060606SAl Viro struct compat_mq_attr {
15230d060606SAl Viro compat_long_t mq_flags; /* message queue flags */
15240d060606SAl Viro compat_long_t mq_maxmsg; /* maximum number of messages */
15250d060606SAl Viro compat_long_t mq_msgsize; /* maximum message size */
15260d060606SAl Viro compat_long_t mq_curmsgs; /* number of messages currently queued */
15270d060606SAl Viro compat_long_t __reserved[4]; /* ignored for input, zeroed for output */
15280d060606SAl Viro };
15290d060606SAl Viro
get_compat_mq_attr(struct mq_attr * attr,const struct compat_mq_attr __user * uattr)15300d060606SAl Viro static inline int get_compat_mq_attr(struct mq_attr *attr,
15310d060606SAl Viro const struct compat_mq_attr __user *uattr)
15320d060606SAl Viro {
15330d060606SAl Viro struct compat_mq_attr v;
15340d060606SAl Viro
15350d060606SAl Viro if (copy_from_user(&v, uattr, sizeof(*uattr)))
15360d060606SAl Viro return -EFAULT;
15370d060606SAl Viro
15380d060606SAl Viro memset(attr, 0, sizeof(*attr));
15390d060606SAl Viro attr->mq_flags = v.mq_flags;
15400d060606SAl Viro attr->mq_maxmsg = v.mq_maxmsg;
15410d060606SAl Viro attr->mq_msgsize = v.mq_msgsize;
15420d060606SAl Viro attr->mq_curmsgs = v.mq_curmsgs;
15430d060606SAl Viro return 0;
15440d060606SAl Viro }
15450d060606SAl Viro
put_compat_mq_attr(const struct mq_attr * attr,struct compat_mq_attr __user * uattr)15460d060606SAl Viro static inline int put_compat_mq_attr(const struct mq_attr *attr,
15470d060606SAl Viro struct compat_mq_attr __user *uattr)
15480d060606SAl Viro {
15490d060606SAl Viro struct compat_mq_attr v;
15500d060606SAl Viro
15510d060606SAl Viro memset(&v, 0, sizeof(v));
15520d060606SAl Viro v.mq_flags = attr->mq_flags;
15530d060606SAl Viro v.mq_maxmsg = attr->mq_maxmsg;
15540d060606SAl Viro v.mq_msgsize = attr->mq_msgsize;
15550d060606SAl Viro v.mq_curmsgs = attr->mq_curmsgs;
15560d060606SAl Viro if (copy_to_user(uattr, &v, sizeof(*uattr)))
15570d060606SAl Viro return -EFAULT;
15580d060606SAl Viro return 0;
15590d060606SAl Viro }
15600d060606SAl Viro
COMPAT_SYSCALL_DEFINE4(mq_open,const char __user *,u_name,int,oflag,compat_mode_t,mode,struct compat_mq_attr __user *,u_attr)15610d060606SAl Viro COMPAT_SYSCALL_DEFINE4(mq_open, const char __user *, u_name,
15620d060606SAl Viro int, oflag, compat_mode_t, mode,
15630d060606SAl Viro struct compat_mq_attr __user *, u_attr)
15640d060606SAl Viro {
15650d060606SAl Viro struct mq_attr attr, *p = NULL;
15660d060606SAl Viro if (u_attr && oflag & O_CREAT) {
15670d060606SAl Viro p = &attr;
15680d060606SAl Viro if (get_compat_mq_attr(&attr, u_attr))
15690d060606SAl Viro return -EFAULT;
15700d060606SAl Viro }
15710d060606SAl Viro return do_mq_open(u_name, oflag, mode, p);
15720d060606SAl Viro }
15730d060606SAl Viro
COMPAT_SYSCALL_DEFINE2(mq_notify,mqd_t,mqdes,const struct compat_sigevent __user *,u_notification)1574b0d17578SArnd Bergmann COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
1575b0d17578SArnd Bergmann const struct compat_sigevent __user *, u_notification)
1576b0d17578SArnd Bergmann {
1577b0d17578SArnd Bergmann struct sigevent n, *p = NULL;
1578b0d17578SArnd Bergmann if (u_notification) {
1579b0d17578SArnd Bergmann if (get_compat_sigevent(&n, u_notification))
1580b0d17578SArnd Bergmann return -EFAULT;
1581b0d17578SArnd Bergmann if (n.sigev_notify == SIGEV_THREAD)
1582b0d17578SArnd Bergmann n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int);
1583b0d17578SArnd Bergmann p = &n;
1584b0d17578SArnd Bergmann }
1585b0d17578SArnd Bergmann return do_mq_notify(mqdes, p);
1586b0d17578SArnd Bergmann }
1587b0d17578SArnd Bergmann
COMPAT_SYSCALL_DEFINE3(mq_getsetattr,mqd_t,mqdes,const struct compat_mq_attr __user *,u_mqstat,struct compat_mq_attr __user *,u_omqstat)1588b0d17578SArnd Bergmann COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1589b0d17578SArnd Bergmann const struct compat_mq_attr __user *, u_mqstat,
1590b0d17578SArnd Bergmann struct compat_mq_attr __user *, u_omqstat)
1591b0d17578SArnd Bergmann {
1592b0d17578SArnd Bergmann int ret;
1593b0d17578SArnd Bergmann struct mq_attr mqstat, omqstat;
1594b0d17578SArnd Bergmann struct mq_attr *new = NULL, *old = NULL;
1595b0d17578SArnd Bergmann
1596b0d17578SArnd Bergmann if (u_mqstat) {
1597b0d17578SArnd Bergmann new = &mqstat;
1598b0d17578SArnd Bergmann if (get_compat_mq_attr(new, u_mqstat))
1599b0d17578SArnd Bergmann return -EFAULT;
1600b0d17578SArnd Bergmann }
1601b0d17578SArnd Bergmann if (u_omqstat)
1602b0d17578SArnd Bergmann old = &omqstat;
1603b0d17578SArnd Bergmann
1604b0d17578SArnd Bergmann ret = do_mq_getsetattr(mqdes, new, old);
1605b0d17578SArnd Bergmann if (ret || !old)
1606b0d17578SArnd Bergmann return ret;
1607b0d17578SArnd Bergmann
1608b0d17578SArnd Bergmann if (put_compat_mq_attr(old, u_omqstat))
1609b0d17578SArnd Bergmann return -EFAULT;
1610b0d17578SArnd Bergmann return 0;
1611b0d17578SArnd Bergmann }
1612b0d17578SArnd Bergmann #endif
1613b0d17578SArnd Bergmann
1614b0d17578SArnd Bergmann #ifdef CONFIG_COMPAT_32BIT_TIME
compat_prepare_timeout(const struct old_timespec32 __user * p,struct timespec64 * ts)16159afc5eeeSArnd Bergmann static int compat_prepare_timeout(const struct old_timespec32 __user *p,
1616b9047726SDeepa Dinamani struct timespec64 *ts)
16170d060606SAl Viro {
16189afc5eeeSArnd Bergmann if (get_old_timespec32(ts, p))
16190d060606SAl Viro return -EFAULT;
1620b9047726SDeepa Dinamani if (!timespec64_valid(ts))
16210d060606SAl Viro return -EINVAL;
16220d060606SAl Viro return 0;
16230d060606SAl Viro }
16240d060606SAl Viro
SYSCALL_DEFINE5(mq_timedsend_time32,mqd_t,mqdes,const char __user *,u_msg_ptr,unsigned int,msg_len,unsigned int,msg_prio,const struct old_timespec32 __user *,u_abs_timeout)16258dabe724SArnd Bergmann SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes,
16260d060606SAl Viro const char __user *, u_msg_ptr,
16278dabe724SArnd Bergmann unsigned int, msg_len, unsigned int, msg_prio,
16289afc5eeeSArnd Bergmann const struct old_timespec32 __user *, u_abs_timeout)
16290d060606SAl Viro {
1630b9047726SDeepa Dinamani struct timespec64 ts, *p = NULL;
16310d060606SAl Viro if (u_abs_timeout) {
16320d060606SAl Viro int res = compat_prepare_timeout(u_abs_timeout, &ts);
16330d060606SAl Viro if (res)
16340d060606SAl Viro return res;
16350d060606SAl Viro p = &ts;
16360d060606SAl Viro }
16370d060606SAl Viro return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
16380d060606SAl Viro }
16390d060606SAl Viro
SYSCALL_DEFINE5(mq_timedreceive_time32,mqd_t,mqdes,char __user *,u_msg_ptr,unsigned int,msg_len,unsigned int __user *,u_msg_prio,const struct old_timespec32 __user *,u_abs_timeout)16408dabe724SArnd Bergmann SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes,
16410d060606SAl Viro char __user *, u_msg_ptr,
16428dabe724SArnd Bergmann unsigned int, msg_len, unsigned int __user *, u_msg_prio,
16439afc5eeeSArnd Bergmann const struct old_timespec32 __user *, u_abs_timeout)
16440d060606SAl Viro {
1645b9047726SDeepa Dinamani struct timespec64 ts, *p = NULL;
16460d060606SAl Viro if (u_abs_timeout) {
16470d060606SAl Viro int res = compat_prepare_timeout(u_abs_timeout, &ts);
16480d060606SAl Viro if (res)
16490d060606SAl Viro return res;
16500d060606SAl Viro p = &ts;
16510d060606SAl Viro }
16520d060606SAl Viro return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
16530d060606SAl Viro }
16540d060606SAl Viro #endif
16550d060606SAl Viro
165692e1d5beSArjan van de Ven static const struct inode_operations mqueue_dir_inode_operations = {
16571da177e4SLinus Torvalds .lookup = simple_lookup,
16581da177e4SLinus Torvalds .create = mqueue_create,
16591da177e4SLinus Torvalds .unlink = mqueue_unlink,
16601da177e4SLinus Torvalds };
16611da177e4SLinus Torvalds
16629a32144eSArjan van de Ven static const struct file_operations mqueue_file_operations = {
16631da177e4SLinus Torvalds .flush = mqueue_flush_file,
16641da177e4SLinus Torvalds .poll = mqueue_poll_file,
16651da177e4SLinus Torvalds .read = mqueue_read_file,
16666038f373SArnd Bergmann .llseek = default_llseek,
16671da177e4SLinus Torvalds };
16681da177e4SLinus Torvalds
1669b87221deSAlexey Dobriyan static const struct super_operations mqueue_super_ops = {
16701da177e4SLinus Torvalds .alloc_inode = mqueue_alloc_inode,
1671015d7956SAl Viro .free_inode = mqueue_free_inode,
16726d8af64cSAl Viro .evict_inode = mqueue_evict_inode,
16731da177e4SLinus Torvalds .statfs = simple_statfs,
16741da177e4SLinus Torvalds };
16751da177e4SLinus Torvalds
1676935c6912SDavid Howells static const struct fs_context_operations mqueue_fs_context_ops = {
1677935c6912SDavid Howells .free = mqueue_fs_context_free,
1678935c6912SDavid Howells .get_tree = mqueue_get_tree,
1679935c6912SDavid Howells };
1680935c6912SDavid Howells
16811da177e4SLinus Torvalds static struct file_system_type mqueue_fs_type = {
16821da177e4SLinus Torvalds .name = "mqueue",
1683935c6912SDavid Howells .init_fs_context = mqueue_init_fs_context,
16841da177e4SLinus Torvalds .kill_sb = kill_litter_super,
1685bc1b69edSGao feng .fs_flags = FS_USERNS_MOUNT,
16861da177e4SLinus Torvalds };
16871da177e4SLinus Torvalds
mq_init_ns(struct ipc_namespace * ns)16887eafd7c7SSerge E. Hallyn int mq_init_ns(struct ipc_namespace *ns)
16897eafd7c7SSerge E. Hallyn {
1690935c6912SDavid Howells struct vfsmount *m;
1691935c6912SDavid Howells
16927eafd7c7SSerge E. Hallyn ns->mq_queues_count = 0;
16937eafd7c7SSerge E. Hallyn ns->mq_queues_max = DFLT_QUEUESMAX;
16947eafd7c7SSerge E. Hallyn ns->mq_msg_max = DFLT_MSGMAX;
16957eafd7c7SSerge E. Hallyn ns->mq_msgsize_max = DFLT_MSGSIZEMAX;
1696cef0184cSKOSAKI Motohiro ns->mq_msg_default = DFLT_MSG;
1697cef0184cSKOSAKI Motohiro ns->mq_msgsize_default = DFLT_MSGSIZE;
169836735a6aSAl Viro
1699935c6912SDavid Howells m = mq_create_mount(ns);
1700935c6912SDavid Howells if (IS_ERR(m))
1701935c6912SDavid Howells return PTR_ERR(m);
1702935c6912SDavid Howells ns->mq_mnt = m;
17037eafd7c7SSerge E. Hallyn return 0;
17047eafd7c7SSerge E. Hallyn }
17057eafd7c7SSerge E. Hallyn
mq_clear_sbinfo(struct ipc_namespace * ns)17067eafd7c7SSerge E. Hallyn void mq_clear_sbinfo(struct ipc_namespace *ns)
17077eafd7c7SSerge E. Hallyn {
17087eafd7c7SSerge E. Hallyn ns->mq_mnt->mnt_sb->s_fs_info = NULL;
17097eafd7c7SSerge E. Hallyn }
17107eafd7c7SSerge E. Hallyn
init_mqueue_fs(void)17111da177e4SLinus Torvalds static int __init init_mqueue_fs(void)
17121da177e4SLinus Torvalds {
17131da177e4SLinus Torvalds int error;
17141da177e4SLinus Torvalds
17151da177e4SLinus Torvalds mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
17161da177e4SLinus Torvalds sizeof(struct mqueue_inode_info), 0,
17175d097056SVladimir Davydov SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, init_once);
17181da177e4SLinus Torvalds if (mqueue_inode_cachep == NULL)
17191da177e4SLinus Torvalds return -ENOMEM;
17201da177e4SLinus Torvalds
1721dc55e35fSAlexey Gladkov if (!setup_mq_sysctls(&init_ipc_ns)) {
1722dc55e35fSAlexey Gladkov pr_warn("sysctl registration failed\n");
172312b677f2SZhengchao Shao error = -ENOMEM;
172412b677f2SZhengchao Shao goto out_kmem;
1725dc55e35fSAlexey Gladkov }
17261da177e4SLinus Torvalds
17271da177e4SLinus Torvalds error = register_filesystem(&mqueue_fs_type);
17281da177e4SLinus Torvalds if (error)
17291da177e4SLinus Torvalds goto out_sysctl;
17301da177e4SLinus Torvalds
17317eafd7c7SSerge E. Hallyn spin_lock_init(&mq_lock);
17327eafd7c7SSerge E. Hallyn
17336f686574SAl Viro error = mq_init_ns(&init_ipc_ns);
17346f686574SAl Viro if (error)
17351da177e4SLinus Torvalds goto out_filesystem;
17361da177e4SLinus Torvalds
17371da177e4SLinus Torvalds return 0;
17381da177e4SLinus Torvalds
17391da177e4SLinus Torvalds out_filesystem:
17401da177e4SLinus Torvalds unregister_filesystem(&mqueue_fs_type);
17411da177e4SLinus Torvalds out_sysctl:
1742c579d60fSHangyu Hua retire_mq_sysctls(&init_ipc_ns);
174312b677f2SZhengchao Shao out_kmem:
174412b677f2SZhengchao Shao kmem_cache_destroy(mqueue_inode_cachep);
17451da177e4SLinus Torvalds return error;
17461da177e4SLinus Torvalds }
17471da177e4SLinus Torvalds
17486d08a256SDavidlohr Bueso device_initcall(init_mqueue_fs);
1749