xref: /openbmc/linux/fs/eventfd.c (revision 9d56dd3b083a3bec56e9da35ce07baca81030b03)
1 /*
2  *  fs/eventfd.c
3  *
4  *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
5  *
6  */
7 
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/init.h>
11 #include <linux/fs.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/list.h>
15 #include <linux/spinlock.h>
16 #include <linux/anon_inodes.h>
17 #include <linux/syscalls.h>
18 #include <linux/module.h>
19 #include <linux/kref.h>
20 #include <linux/eventfd.h>
21 
22 struct eventfd_ctx {
23 	struct kref kref;
24 	wait_queue_head_t wqh;
25 	/*
26 	 * Every time that a write(2) is performed on an eventfd, the
27 	 * value of the __u64 being written is added to "count" and a
28 	 * wakeup is performed on "wqh". A read(2) will return the "count"
29 	 * value to userspace, and will reset "count" to zero. The kernel
30 	 * side eventfd_signal() also, adds to the "count" counter and
31 	 * issue a wakeup.
32 	 */
33 	__u64 count;
34 	unsigned int flags;
35 };
36 
37 /**
38  * eventfd_signal - Adds @n to the eventfd counter.
39  * @ctx: [in] Pointer to the eventfd context.
40  * @n: [in] Value of the counter to be added to the eventfd internal counter.
41  *          The value cannot be negative.
42  *
43  * This function is supposed to be called by the kernel in paths that do not
44  * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
45  * value, and we signal this as overflow condition by returining a POLLERR
46  * to poll(2).
47  *
48  * Returns @n in case of success, a non-negative number lower than @n in case
49  * of overflow, or the following error codes:
50  *
51  * -EINVAL    : The value of @n is negative.
52  */
53 int eventfd_signal(struct eventfd_ctx *ctx, int n)
54 {
55 	unsigned long flags;
56 
57 	if (n < 0)
58 		return -EINVAL;
59 	spin_lock_irqsave(&ctx->wqh.lock, flags);
60 	if (ULLONG_MAX - ctx->count < n)
61 		n = (int) (ULLONG_MAX - ctx->count);
62 	ctx->count += n;
63 	if (waitqueue_active(&ctx->wqh))
64 		wake_up_locked_poll(&ctx->wqh, POLLIN);
65 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
66 
67 	return n;
68 }
69 EXPORT_SYMBOL_GPL(eventfd_signal);
70 
71 static void eventfd_free_ctx(struct eventfd_ctx *ctx)
72 {
73 	kfree(ctx);
74 }
75 
76 static void eventfd_free(struct kref *kref)
77 {
78 	struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
79 
80 	eventfd_free_ctx(ctx);
81 }
82 
83 /**
84  * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
85  * @ctx: [in] Pointer to the eventfd context.
86  *
87  * Returns: In case of success, returns a pointer to the eventfd context.
88  */
89 struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
90 {
91 	kref_get(&ctx->kref);
92 	return ctx;
93 }
94 EXPORT_SYMBOL_GPL(eventfd_ctx_get);
95 
96 /**
97  * eventfd_ctx_put - Releases a reference to the internal eventfd context.
98  * @ctx: [in] Pointer to eventfd context.
99  *
100  * The eventfd context reference must have been previously acquired either
101  * with eventfd_ctx_get() or eventfd_ctx_fdget()).
102  */
103 void eventfd_ctx_put(struct eventfd_ctx *ctx)
104 {
105 	kref_put(&ctx->kref, eventfd_free);
106 }
107 EXPORT_SYMBOL_GPL(eventfd_ctx_put);
108 
109 static int eventfd_release(struct inode *inode, struct file *file)
110 {
111 	struct eventfd_ctx *ctx = file->private_data;
112 
113 	wake_up_poll(&ctx->wqh, POLLHUP);
114 	eventfd_ctx_put(ctx);
115 	return 0;
116 }
117 
118 static unsigned int eventfd_poll(struct file *file, poll_table *wait)
119 {
120 	struct eventfd_ctx *ctx = file->private_data;
121 	unsigned int events = 0;
122 	unsigned long flags;
123 
124 	poll_wait(file, &ctx->wqh, wait);
125 
126 	spin_lock_irqsave(&ctx->wqh.lock, flags);
127 	if (ctx->count > 0)
128 		events |= POLLIN;
129 	if (ctx->count == ULLONG_MAX)
130 		events |= POLLERR;
131 	if (ULLONG_MAX - 1 > ctx->count)
132 		events |= POLLOUT;
133 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
134 
135 	return events;
136 }
137 
138 static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
139 			    loff_t *ppos)
140 {
141 	struct eventfd_ctx *ctx = file->private_data;
142 	ssize_t res;
143 	__u64 ucnt = 0;
144 	DECLARE_WAITQUEUE(wait, current);
145 
146 	if (count < sizeof(ucnt))
147 		return -EINVAL;
148 	spin_lock_irq(&ctx->wqh.lock);
149 	res = -EAGAIN;
150 	if (ctx->count > 0)
151 		res = sizeof(ucnt);
152 	else if (!(file->f_flags & O_NONBLOCK)) {
153 		__add_wait_queue(&ctx->wqh, &wait);
154 		for (res = 0;;) {
155 			set_current_state(TASK_INTERRUPTIBLE);
156 			if (ctx->count > 0) {
157 				res = sizeof(ucnt);
158 				break;
159 			}
160 			if (signal_pending(current)) {
161 				res = -ERESTARTSYS;
162 				break;
163 			}
164 			spin_unlock_irq(&ctx->wqh.lock);
165 			schedule();
166 			spin_lock_irq(&ctx->wqh.lock);
167 		}
168 		__remove_wait_queue(&ctx->wqh, &wait);
169 		__set_current_state(TASK_RUNNING);
170 	}
171 	if (likely(res > 0)) {
172 		ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
173 		ctx->count -= ucnt;
174 		if (waitqueue_active(&ctx->wqh))
175 			wake_up_locked_poll(&ctx->wqh, POLLOUT);
176 	}
177 	spin_unlock_irq(&ctx->wqh.lock);
178 	if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
179 		return -EFAULT;
180 
181 	return res;
182 }
183 
184 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
185 			     loff_t *ppos)
186 {
187 	struct eventfd_ctx *ctx = file->private_data;
188 	ssize_t res;
189 	__u64 ucnt;
190 	DECLARE_WAITQUEUE(wait, current);
191 
192 	if (count < sizeof(ucnt))
193 		return -EINVAL;
194 	if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
195 		return -EFAULT;
196 	if (ucnt == ULLONG_MAX)
197 		return -EINVAL;
198 	spin_lock_irq(&ctx->wqh.lock);
199 	res = -EAGAIN;
200 	if (ULLONG_MAX - ctx->count > ucnt)
201 		res = sizeof(ucnt);
202 	else if (!(file->f_flags & O_NONBLOCK)) {
203 		__add_wait_queue(&ctx->wqh, &wait);
204 		for (res = 0;;) {
205 			set_current_state(TASK_INTERRUPTIBLE);
206 			if (ULLONG_MAX - ctx->count > ucnt) {
207 				res = sizeof(ucnt);
208 				break;
209 			}
210 			if (signal_pending(current)) {
211 				res = -ERESTARTSYS;
212 				break;
213 			}
214 			spin_unlock_irq(&ctx->wqh.lock);
215 			schedule();
216 			spin_lock_irq(&ctx->wqh.lock);
217 		}
218 		__remove_wait_queue(&ctx->wqh, &wait);
219 		__set_current_state(TASK_RUNNING);
220 	}
221 	if (likely(res > 0)) {
222 		ctx->count += ucnt;
223 		if (waitqueue_active(&ctx->wqh))
224 			wake_up_locked_poll(&ctx->wqh, POLLIN);
225 	}
226 	spin_unlock_irq(&ctx->wqh.lock);
227 
228 	return res;
229 }
230 
231 static const struct file_operations eventfd_fops = {
232 	.release	= eventfd_release,
233 	.poll		= eventfd_poll,
234 	.read		= eventfd_read,
235 	.write		= eventfd_write,
236 };
237 
238 /**
239  * eventfd_fget - Acquire a reference of an eventfd file descriptor.
240  * @fd: [in] Eventfd file descriptor.
241  *
242  * Returns a pointer to the eventfd file structure in case of success, or the
243  * following error pointer:
244  *
245  * -EBADF    : Invalid @fd file descriptor.
246  * -EINVAL   : The @fd file descriptor is not an eventfd file.
247  */
248 struct file *eventfd_fget(int fd)
249 {
250 	struct file *file;
251 
252 	file = fget(fd);
253 	if (!file)
254 		return ERR_PTR(-EBADF);
255 	if (file->f_op != &eventfd_fops) {
256 		fput(file);
257 		return ERR_PTR(-EINVAL);
258 	}
259 
260 	return file;
261 }
262 EXPORT_SYMBOL_GPL(eventfd_fget);
263 
264 /**
265  * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
266  * @fd: [in] Eventfd file descriptor.
267  *
268  * Returns a pointer to the internal eventfd context, otherwise the error
269  * pointers returned by the following functions:
270  *
271  * eventfd_fget
272  */
273 struct eventfd_ctx *eventfd_ctx_fdget(int fd)
274 {
275 	struct file *file;
276 	struct eventfd_ctx *ctx;
277 
278 	file = eventfd_fget(fd);
279 	if (IS_ERR(file))
280 		return (struct eventfd_ctx *) file;
281 	ctx = eventfd_ctx_get(file->private_data);
282 	fput(file);
283 
284 	return ctx;
285 }
286 EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
287 
288 /**
289  * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
290  * @file: [in] Eventfd file pointer.
291  *
292  * Returns a pointer to the internal eventfd context, otherwise the error
293  * pointer:
294  *
295  * -EINVAL   : The @fd file descriptor is not an eventfd file.
296  */
297 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
298 {
299 	if (file->f_op != &eventfd_fops)
300 		return ERR_PTR(-EINVAL);
301 
302 	return eventfd_ctx_get(file->private_data);
303 }
304 EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
305 
306 /**
307  * eventfd_file_create - Creates an eventfd file pointer.
308  * @count: Initial eventfd counter value.
309  * @flags: Flags for the eventfd file.
310  *
311  * This function creates an eventfd file pointer, w/out installing it into
312  * the fd table. This is useful when the eventfd file is used during the
313  * initialization of data structures that require extra setup after the eventfd
314  * creation. So the eventfd creation is split into the file pointer creation
315  * phase, and the file descriptor installation phase.
316  * In this way races with userspace closing the newly installed file descriptor
317  * can be avoided.
318  * Returns an eventfd file pointer, or a proper error pointer.
319  */
320 struct file *eventfd_file_create(unsigned int count, int flags)
321 {
322 	struct file *file;
323 	struct eventfd_ctx *ctx;
324 
325 	/* Check the EFD_* constants for consistency.  */
326 	BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
327 	BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
328 
329 	if (flags & ~EFD_FLAGS_SET)
330 		return ERR_PTR(-EINVAL);
331 
332 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
333 	if (!ctx)
334 		return ERR_PTR(-ENOMEM);
335 
336 	kref_init(&ctx->kref);
337 	init_waitqueue_head(&ctx->wqh);
338 	ctx->count = count;
339 	ctx->flags = flags;
340 
341 	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
342 				  O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
343 	if (IS_ERR(file))
344 		eventfd_free_ctx(ctx);
345 
346 	return file;
347 }
348 
349 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
350 {
351 	int fd, error;
352 	struct file *file;
353 
354 	error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
355 	if (error < 0)
356 		return error;
357 	fd = error;
358 
359 	file = eventfd_file_create(count, flags);
360 	if (IS_ERR(file)) {
361 		error = PTR_ERR(file);
362 		goto err_put_unused_fd;
363 	}
364 	fd_install(fd, file);
365 
366 	return fd;
367 
368 err_put_unused_fd:
369 	put_unused_fd(fd);
370 
371 	return error;
372 }
373 
374 SYSCALL_DEFINE1(eventfd, unsigned int, count)
375 {
376 	return sys_eventfd2(count, 0);
377 }
378 
379