xref: /openbmc/linux/fs/fuse/dev.c (revision 1d3d752b471d2a3a1d5e4fe177e5e7d52abb4e4c)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21 
22 static kmem_cache_t *fuse_req_cachep;
23 
24 static inline struct fuse_conn *fuse_get_conn(struct file *file)
25 {
26 	struct fuse_conn *fc;
27 	spin_lock(&fuse_lock);
28 	fc = file->private_data;
29 	if (fc && !fc->mounted)
30 		fc = NULL;
31 	spin_unlock(&fuse_lock);
32 	return fc;
33 }
34 
35 static inline void fuse_request_init(struct fuse_req *req)
36 {
37 	memset(req, 0, sizeof(*req));
38 	INIT_LIST_HEAD(&req->list);
39 	init_waitqueue_head(&req->waitq);
40 	atomic_set(&req->count, 1);
41 }
42 
43 struct fuse_req *fuse_request_alloc(void)
44 {
45 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
46 	if (req)
47 		fuse_request_init(req);
48 	return req;
49 }
50 
51 void fuse_request_free(struct fuse_req *req)
52 {
53 	kmem_cache_free(fuse_req_cachep, req);
54 }
55 
56 static inline void block_sigs(sigset_t *oldset)
57 {
58 	sigset_t mask;
59 
60 	siginitsetinv(&mask, sigmask(SIGKILL));
61 	sigprocmask(SIG_BLOCK, &mask, oldset);
62 }
63 
64 static inline void restore_sigs(sigset_t *oldset)
65 {
66 	sigprocmask(SIG_SETMASK, oldset, NULL);
67 }
68 
69 void fuse_reset_request(struct fuse_req *req)
70 {
71 	int preallocated = req->preallocated;
72 	BUG_ON(atomic_read(&req->count) != 1);
73 	fuse_request_init(req);
74 	req->preallocated = preallocated;
75 }
76 
77 static void __fuse_get_request(struct fuse_req *req)
78 {
79 	atomic_inc(&req->count);
80 }
81 
82 /* Must be called with > 1 refcount */
83 static void __fuse_put_request(struct fuse_req *req)
84 {
85 	BUG_ON(atomic_read(&req->count) < 2);
86 	atomic_dec(&req->count);
87 }
88 
89 static struct fuse_req *do_get_request(struct fuse_conn *fc)
90 {
91 	struct fuse_req *req;
92 
93 	spin_lock(&fuse_lock);
94 	BUG_ON(list_empty(&fc->unused_list));
95 	req = list_entry(fc->unused_list.next, struct fuse_req, list);
96 	list_del_init(&req->list);
97 	spin_unlock(&fuse_lock);
98 	fuse_request_init(req);
99 	req->preallocated = 1;
100 	req->in.h.uid = current->fsuid;
101 	req->in.h.gid = current->fsgid;
102 	req->in.h.pid = current->pid;
103 	return req;
104 }
105 
106 /* This can return NULL, but only in case it's interrupted by a SIGKILL */
107 struct fuse_req *fuse_get_request(struct fuse_conn *fc)
108 {
109 	int intr;
110 	sigset_t oldset;
111 
112 	block_sigs(&oldset);
113 	intr = down_interruptible(&fc->outstanding_sem);
114 	restore_sigs(&oldset);
115 	return intr ? NULL : do_get_request(fc);
116 }
117 
118 static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
119 {
120 	spin_lock(&fuse_lock);
121 	if (req->preallocated)
122 		list_add(&req->list, &fc->unused_list);
123 	else
124 		fuse_request_free(req);
125 
126 	/* If we are in debt decrease that first */
127 	if (fc->outstanding_debt)
128 		fc->outstanding_debt--;
129 	else
130 		up(&fc->outstanding_sem);
131 	spin_unlock(&fuse_lock);
132 }
133 
134 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
135 {
136 	if (atomic_dec_and_test(&req->count))
137 		fuse_putback_request(fc, req);
138 }
139 
140 void fuse_release_background(struct fuse_req *req)
141 {
142 	iput(req->inode);
143 	iput(req->inode2);
144 	if (req->file)
145 		fput(req->file);
146 	spin_lock(&fuse_lock);
147 	list_del(&req->bg_entry);
148 	spin_unlock(&fuse_lock);
149 }
150 
151 /*
152  * This function is called when a request is finished.  Either a reply
153  * has arrived or it was interrupted (and not yet sent) or some error
154  * occurred during communication with userspace, or the device file was
155  * closed.  It decreases the reference count for the request.  In case
156  * of a background request the reference to the stored objects are
157  * released.  The requester thread is woken up (if still waiting), and
158  * finally the request is either freed or put on the unused_list
159  *
160  * Called with fuse_lock, unlocks it
161  */
162 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
163 {
164 	int putback;
165 	req->finished = 1;
166 	putback = atomic_dec_and_test(&req->count);
167 	spin_unlock(&fuse_lock);
168 	if (req->background) {
169 		down_read(&fc->sbput_sem);
170 		if (fc->mounted)
171 			fuse_release_background(req);
172 		up_read(&fc->sbput_sem);
173 	}
174 	wake_up(&req->waitq);
175 	if (req->in.h.opcode == FUSE_INIT) {
176 		int i;
177 
178 		if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
179 			fc->conn_error = 1;
180 
181 		fc->minor = req->misc.init_in_out.minor;
182 
183 		/* After INIT reply is received other requests can go
184 		   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
185 		   up()s on outstanding_sem.  The last up() is done in
186 		   fuse_putback_request() */
187 		for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
188 			up(&fc->outstanding_sem);
189 	} else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) {
190 		/* Special case for failed iget in CREATE */
191 		u64 nodeid = req->in.h.nodeid;
192 		__fuse_get_request(req);
193 		fuse_reset_request(req);
194 		fuse_send_forget(fc, req, nodeid, 1);
195 		putback = 0;
196 	}
197 	if (putback)
198 		fuse_putback_request(fc, req);
199 }
200 
201 /*
202  * Unfortunately request interruption not just solves the deadlock
203  * problem, it causes problems too.  These stem from the fact, that an
204  * interrupted request is continued to be processed in userspace,
205  * while all the locks and object references (inode and file) held
206  * during the operation are released.
207  *
208  * To release the locks is exactly why there's a need to interrupt the
209  * request, so there's not a lot that can be done about this, except
210  * introduce additional locking in userspace.
211  *
212  * More important is to keep inode and file references until userspace
213  * has replied, otherwise FORGET and RELEASE could be sent while the
214  * inode/file is still used by the filesystem.
215  *
216  * For this reason the concept of "background" request is introduced.
217  * An interrupted request is backgrounded if it has been already sent
218  * to userspace.  Backgrounding involves getting an extra reference to
219  * inode(s) or file used in the request, and adding the request to
220  * fc->background list.  When a reply is received for a background
221  * request, the object references are released, and the request is
222  * removed from the list.  If the filesystem is unmounted while there
223  * are still background requests, the list is walked and references
224  * are released as if a reply was received.
225  *
226  * There's one more use for a background request.  The RELEASE message is
227  * always sent as background, since it doesn't return an error or
228  * data.
229  */
230 static void background_request(struct fuse_conn *fc, struct fuse_req *req)
231 {
232 	req->background = 1;
233 	list_add(&req->bg_entry, &fc->background);
234 	if (req->inode)
235 		req->inode = igrab(req->inode);
236 	if (req->inode2)
237 		req->inode2 = igrab(req->inode2);
238 	if (req->file)
239 		get_file(req->file);
240 }
241 
242 /* Called with fuse_lock held.  Releases, and then reacquires it. */
243 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
244 {
245 	sigset_t oldset;
246 
247 	spin_unlock(&fuse_lock);
248 	block_sigs(&oldset);
249 	wait_event_interruptible(req->waitq, req->finished);
250 	restore_sigs(&oldset);
251 	spin_lock(&fuse_lock);
252 	if (req->finished)
253 		return;
254 
255 	req->out.h.error = -EINTR;
256 	req->interrupted = 1;
257 	if (req->locked) {
258 		/* This is uninterruptible sleep, because data is
259 		   being copied to/from the buffers of req.  During
260 		   locked state, there mustn't be any filesystem
261 		   operation (e.g. page fault), since that could lead
262 		   to deadlock */
263 		spin_unlock(&fuse_lock);
264 		wait_event(req->waitq, !req->locked);
265 		spin_lock(&fuse_lock);
266 	}
267 	if (!req->sent && !list_empty(&req->list)) {
268 		list_del(&req->list);
269 		__fuse_put_request(req);
270 	} else if (!req->finished && req->sent)
271 		background_request(fc, req);
272 }
273 
274 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
275 {
276 	unsigned nbytes = 0;
277 	unsigned i;
278 
279 	for (i = 0; i < numargs; i++)
280 		nbytes += args[i].size;
281 
282 	return nbytes;
283 }
284 
285 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
286 {
287 	fc->reqctr++;
288 	/* zero is special */
289 	if (fc->reqctr == 0)
290 		fc->reqctr = 1;
291 	req->in.h.unique = fc->reqctr;
292 	req->in.h.len = sizeof(struct fuse_in_header) +
293 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
294 	if (!req->preallocated) {
295 		/* If request is not preallocated (either FORGET or
296 		   RELEASE), then still decrease outstanding_sem, so
297 		   user can't open infinite number of files while not
298 		   processing the RELEASE requests.  However for
299 		   efficiency do it without blocking, so if down()
300 		   would block, just increase the debt instead */
301 		if (down_trylock(&fc->outstanding_sem))
302 			fc->outstanding_debt++;
303 	}
304 	list_add_tail(&req->list, &fc->pending);
305 	wake_up(&fc->waitq);
306 }
307 
308 /*
309  * This can only be interrupted by a SIGKILL
310  */
311 void request_send(struct fuse_conn *fc, struct fuse_req *req)
312 {
313 	req->isreply = 1;
314 	spin_lock(&fuse_lock);
315 	if (!fc->connected)
316 		req->out.h.error = -ENOTCONN;
317 	else if (fc->conn_error)
318 		req->out.h.error = -ECONNREFUSED;
319 	else {
320 		queue_request(fc, req);
321 		/* acquire extra reference, since request is still needed
322 		   after request_end() */
323 		__fuse_get_request(req);
324 
325 		request_wait_answer(fc, req);
326 	}
327 	spin_unlock(&fuse_lock);
328 }
329 
330 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
331 {
332 	spin_lock(&fuse_lock);
333 	if (fc->connected) {
334 		queue_request(fc, req);
335 		spin_unlock(&fuse_lock);
336 	} else {
337 		req->out.h.error = -ENOTCONN;
338 		request_end(fc, req);
339 	}
340 }
341 
342 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
343 {
344 	req->isreply = 0;
345 	request_send_nowait(fc, req);
346 }
347 
348 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
349 {
350 	req->isreply = 1;
351 	spin_lock(&fuse_lock);
352 	background_request(fc, req);
353 	spin_unlock(&fuse_lock);
354 	request_send_nowait(fc, req);
355 }
356 
357 void fuse_send_init(struct fuse_conn *fc)
358 {
359 	/* This is called from fuse_read_super() so there's guaranteed
360 	   to be a request available */
361 	struct fuse_req *req = do_get_request(fc);
362 	struct fuse_init_in_out *arg = &req->misc.init_in_out;
363 	arg->major = FUSE_KERNEL_VERSION;
364 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
365 	req->in.h.opcode = FUSE_INIT;
366 	req->in.numargs = 1;
367 	req->in.args[0].size = sizeof(*arg);
368 	req->in.args[0].value = arg;
369 	req->out.numargs = 1;
370 	req->out.args[0].size = sizeof(*arg);
371 	req->out.args[0].value = arg;
372 	request_send_background(fc, req);
373 }
374 
375 /*
376  * Lock the request.  Up to the next unlock_request() there mustn't be
377  * anything that could cause a page-fault.  If the request was already
378  * interrupted bail out.
379  */
380 static inline int lock_request(struct fuse_req *req)
381 {
382 	int err = 0;
383 	if (req) {
384 		spin_lock(&fuse_lock);
385 		if (req->interrupted)
386 			err = -ENOENT;
387 		else
388 			req->locked = 1;
389 		spin_unlock(&fuse_lock);
390 	}
391 	return err;
392 }
393 
394 /*
395  * Unlock request.  If it was interrupted during being locked, the
396  * requester thread is currently waiting for it to be unlocked, so
397  * wake it up.
398  */
399 static inline void unlock_request(struct fuse_req *req)
400 {
401 	if (req) {
402 		spin_lock(&fuse_lock);
403 		req->locked = 0;
404 		if (req->interrupted)
405 			wake_up(&req->waitq);
406 		spin_unlock(&fuse_lock);
407 	}
408 }
409 
410 struct fuse_copy_state {
411 	int write;
412 	struct fuse_req *req;
413 	const struct iovec *iov;
414 	unsigned long nr_segs;
415 	unsigned long seglen;
416 	unsigned long addr;
417 	struct page *pg;
418 	void *mapaddr;
419 	void *buf;
420 	unsigned len;
421 };
422 
423 static void fuse_copy_init(struct fuse_copy_state *cs, int write,
424 			   struct fuse_req *req, const struct iovec *iov,
425 			   unsigned long nr_segs)
426 {
427 	memset(cs, 0, sizeof(*cs));
428 	cs->write = write;
429 	cs->req = req;
430 	cs->iov = iov;
431 	cs->nr_segs = nr_segs;
432 }
433 
434 /* Unmap and put previous page of userspace buffer */
435 static inline void fuse_copy_finish(struct fuse_copy_state *cs)
436 {
437 	if (cs->mapaddr) {
438 		kunmap_atomic(cs->mapaddr, KM_USER0);
439 		if (cs->write) {
440 			flush_dcache_page(cs->pg);
441 			set_page_dirty_lock(cs->pg);
442 		}
443 		put_page(cs->pg);
444 		cs->mapaddr = NULL;
445 	}
446 }
447 
448 /*
449  * Get another pagefull of userspace buffer, and map it to kernel
450  * address space, and lock request
451  */
452 static int fuse_copy_fill(struct fuse_copy_state *cs)
453 {
454 	unsigned long offset;
455 	int err;
456 
457 	unlock_request(cs->req);
458 	fuse_copy_finish(cs);
459 	if (!cs->seglen) {
460 		BUG_ON(!cs->nr_segs);
461 		cs->seglen = cs->iov[0].iov_len;
462 		cs->addr = (unsigned long) cs->iov[0].iov_base;
463 		cs->iov ++;
464 		cs->nr_segs --;
465 	}
466 	down_read(&current->mm->mmap_sem);
467 	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
468 			     &cs->pg, NULL);
469 	up_read(&current->mm->mmap_sem);
470 	if (err < 0)
471 		return err;
472 	BUG_ON(err != 1);
473 	offset = cs->addr % PAGE_SIZE;
474 	cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
475 	cs->buf = cs->mapaddr + offset;
476 	cs->len = min(PAGE_SIZE - offset, cs->seglen);
477 	cs->seglen -= cs->len;
478 	cs->addr += cs->len;
479 
480 	return lock_request(cs->req);
481 }
482 
483 /* Do as much copy to/from userspace buffer as we can */
484 static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
485 			       unsigned *size)
486 {
487 	unsigned ncpy = min(*size, cs->len);
488 	if (val) {
489 		if (cs->write)
490 			memcpy(cs->buf, *val, ncpy);
491 		else
492 			memcpy(*val, cs->buf, ncpy);
493 		*val += ncpy;
494 	}
495 	*size -= ncpy;
496 	cs->len -= ncpy;
497 	cs->buf += ncpy;
498 	return ncpy;
499 }
500 
501 /*
502  * Copy a page in the request to/from the userspace buffer.  Must be
503  * done atomically
504  */
505 static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
506 				 unsigned offset, unsigned count, int zeroing)
507 {
508 	if (page && zeroing && count < PAGE_SIZE) {
509 		void *mapaddr = kmap_atomic(page, KM_USER1);
510 		memset(mapaddr, 0, PAGE_SIZE);
511 		kunmap_atomic(mapaddr, KM_USER1);
512 	}
513 	while (count) {
514 		int err;
515 		if (!cs->len && (err = fuse_copy_fill(cs)))
516 			return err;
517 		if (page) {
518 			void *mapaddr = kmap_atomic(page, KM_USER1);
519 			void *buf = mapaddr + offset;
520 			offset += fuse_copy_do(cs, &buf, &count);
521 			kunmap_atomic(mapaddr, KM_USER1);
522 		} else
523 			offset += fuse_copy_do(cs, NULL, &count);
524 	}
525 	if (page && !cs->write)
526 		flush_dcache_page(page);
527 	return 0;
528 }
529 
530 /* Copy pages in the request to/from userspace buffer */
531 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
532 			   int zeroing)
533 {
534 	unsigned i;
535 	struct fuse_req *req = cs->req;
536 	unsigned offset = req->page_offset;
537 	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
538 
539 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
540 		struct page *page = req->pages[i];
541 		int err = fuse_copy_page(cs, page, offset, count, zeroing);
542 		if (err)
543 			return err;
544 
545 		nbytes -= count;
546 		count = min(nbytes, (unsigned) PAGE_SIZE);
547 		offset = 0;
548 	}
549 	return 0;
550 }
551 
552 /* Copy a single argument in the request to/from userspace buffer */
553 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
554 {
555 	while (size) {
556 		int err;
557 		if (!cs->len && (err = fuse_copy_fill(cs)))
558 			return err;
559 		fuse_copy_do(cs, &val, &size);
560 	}
561 	return 0;
562 }
563 
564 /* Copy request arguments to/from userspace buffer */
565 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
566 			  unsigned argpages, struct fuse_arg *args,
567 			  int zeroing)
568 {
569 	int err = 0;
570 	unsigned i;
571 
572 	for (i = 0; !err && i < numargs; i++)  {
573 		struct fuse_arg *arg = &args[i];
574 		if (i == numargs - 1 && argpages)
575 			err = fuse_copy_pages(cs, arg->size, zeroing);
576 		else
577 			err = fuse_copy_one(cs, arg->value, arg->size);
578 	}
579 	return err;
580 }
581 
582 /* Wait until a request is available on the pending list */
583 static void request_wait(struct fuse_conn *fc)
584 {
585 	DECLARE_WAITQUEUE(wait, current);
586 
587 	add_wait_queue_exclusive(&fc->waitq, &wait);
588 	while (fc->mounted && list_empty(&fc->pending)) {
589 		set_current_state(TASK_INTERRUPTIBLE);
590 		if (signal_pending(current))
591 			break;
592 
593 		spin_unlock(&fuse_lock);
594 		schedule();
595 		spin_lock(&fuse_lock);
596 	}
597 	set_current_state(TASK_RUNNING);
598 	remove_wait_queue(&fc->waitq, &wait);
599 }
600 
601 /*
602  * Read a single request into the userspace filesystem's buffer.  This
603  * function waits until a request is available, then removes it from
604  * the pending list and copies request data to userspace buffer.  If
605  * no reply is needed (FORGET) or request has been interrupted or
606  * there was an error during the copying then it's finished by calling
607  * request_end().  Otherwise add it to the processing list, and set
608  * the 'sent' flag.
609  */
610 static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
611 			      unsigned long nr_segs, loff_t *off)
612 {
613 	int err;
614 	struct fuse_conn *fc;
615 	struct fuse_req *req;
616 	struct fuse_in *in;
617 	struct fuse_copy_state cs;
618 	unsigned reqsize;
619 
620  restart:
621 	spin_lock(&fuse_lock);
622 	fc = file->private_data;
623 	err = -EPERM;
624 	if (!fc)
625 		goto err_unlock;
626 	request_wait(fc);
627 	err = -ENODEV;
628 	if (!fc->mounted)
629 		goto err_unlock;
630 	err = -ERESTARTSYS;
631 	if (list_empty(&fc->pending))
632 		goto err_unlock;
633 
634 	req = list_entry(fc->pending.next, struct fuse_req, list);
635 	list_del_init(&req->list);
636 
637 	in = &req->in;
638 	reqsize = in->h.len;
639 	/* If request is too large, reply with an error and restart the read */
640 	if (iov_length(iov, nr_segs) < reqsize) {
641 		req->out.h.error = -EIO;
642 		/* SETXATTR is special, since it may contain too large data */
643 		if (in->h.opcode == FUSE_SETXATTR)
644 			req->out.h.error = -E2BIG;
645 		request_end(fc, req);
646 		goto restart;
647 	}
648 	spin_unlock(&fuse_lock);
649 	fuse_copy_init(&cs, 1, req, iov, nr_segs);
650 	err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
651 	if (!err)
652 		err = fuse_copy_args(&cs, in->numargs, in->argpages,
653 				     (struct fuse_arg *) in->args, 0);
654 	fuse_copy_finish(&cs);
655 	spin_lock(&fuse_lock);
656 	req->locked = 0;
657 	if (!err && req->interrupted)
658 		err = -ENOENT;
659 	if (err) {
660 		if (!req->interrupted)
661 			req->out.h.error = -EIO;
662 		request_end(fc, req);
663 		return err;
664 	}
665 	if (!req->isreply)
666 		request_end(fc, req);
667 	else {
668 		req->sent = 1;
669 		list_add_tail(&req->list, &fc->processing);
670 		spin_unlock(&fuse_lock);
671 	}
672 	return reqsize;
673 
674  err_unlock:
675 	spin_unlock(&fuse_lock);
676 	return err;
677 }
678 
679 static ssize_t fuse_dev_read(struct file *file, char __user *buf,
680 			     size_t nbytes, loff_t *off)
681 {
682 	struct iovec iov;
683 	iov.iov_len = nbytes;
684 	iov.iov_base = buf;
685 	return fuse_dev_readv(file, &iov, 1, off);
686 }
687 
688 /* Look up request on processing list by unique ID */
689 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
690 {
691 	struct list_head *entry;
692 
693 	list_for_each(entry, &fc->processing) {
694 		struct fuse_req *req;
695 		req = list_entry(entry, struct fuse_req, list);
696 		if (req->in.h.unique == unique)
697 			return req;
698 	}
699 	return NULL;
700 }
701 
702 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
703 			 unsigned nbytes)
704 {
705 	unsigned reqsize = sizeof(struct fuse_out_header);
706 
707 	if (out->h.error)
708 		return nbytes != reqsize ? -EINVAL : 0;
709 
710 	reqsize += len_args(out->numargs, out->args);
711 
712 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
713 		return -EINVAL;
714 	else if (reqsize > nbytes) {
715 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
716 		unsigned diffsize = reqsize - nbytes;
717 		if (diffsize > lastarg->size)
718 			return -EINVAL;
719 		lastarg->size -= diffsize;
720 	}
721 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
722 			      out->page_zeroing);
723 }
724 
725 /*
726  * Write a single reply to a request.  First the header is copied from
727  * the write buffer.  The request is then searched on the processing
728  * list by the unique ID found in the header.  If found, then remove
729  * it from the list and copy the rest of the buffer to the request.
730  * The request is finished by calling request_end()
731  */
732 static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
733 			       unsigned long nr_segs, loff_t *off)
734 {
735 	int err;
736 	unsigned nbytes = iov_length(iov, nr_segs);
737 	struct fuse_req *req;
738 	struct fuse_out_header oh;
739 	struct fuse_copy_state cs;
740 	struct fuse_conn *fc = fuse_get_conn(file);
741 	if (!fc)
742 		return -ENODEV;
743 
744 	fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
745 	if (nbytes < sizeof(struct fuse_out_header))
746 		return -EINVAL;
747 
748 	err = fuse_copy_one(&cs, &oh, sizeof(oh));
749 	if (err)
750 		goto err_finish;
751 	err = -EINVAL;
752 	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
753 	    oh.len != nbytes)
754 		goto err_finish;
755 
756 	spin_lock(&fuse_lock);
757 	req = request_find(fc, oh.unique);
758 	err = -EINVAL;
759 	if (!req)
760 		goto err_unlock;
761 
762 	list_del_init(&req->list);
763 	if (req->interrupted) {
764 		request_end(fc, req);
765 		fuse_copy_finish(&cs);
766 		return -ENOENT;
767 	}
768 	req->out.h = oh;
769 	req->locked = 1;
770 	cs.req = req;
771 	spin_unlock(&fuse_lock);
772 
773 	err = copy_out_args(&cs, &req->out, nbytes);
774 	fuse_copy_finish(&cs);
775 
776 	spin_lock(&fuse_lock);
777 	req->locked = 0;
778 	if (!err) {
779 		if (req->interrupted)
780 			err = -ENOENT;
781 	} else if (!req->interrupted)
782 		req->out.h.error = -EIO;
783 	request_end(fc, req);
784 
785 	return err ? err : nbytes;
786 
787  err_unlock:
788 	spin_unlock(&fuse_lock);
789  err_finish:
790 	fuse_copy_finish(&cs);
791 	return err;
792 }
793 
794 static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
795 			      size_t nbytes, loff_t *off)
796 {
797 	struct iovec iov;
798 	iov.iov_len = nbytes;
799 	iov.iov_base = (char __user *) buf;
800 	return fuse_dev_writev(file, &iov, 1, off);
801 }
802 
803 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
804 {
805 	struct fuse_conn *fc = fuse_get_conn(file);
806 	unsigned mask = POLLOUT | POLLWRNORM;
807 
808 	if (!fc)
809 		return -ENODEV;
810 
811 	poll_wait(file, &fc->waitq, wait);
812 
813 	spin_lock(&fuse_lock);
814 	if (!list_empty(&fc->pending))
815                 mask |= POLLIN | POLLRDNORM;
816 	spin_unlock(&fuse_lock);
817 
818 	return mask;
819 }
820 
821 /* Abort all requests on the given list (pending or processing) */
822 static void end_requests(struct fuse_conn *fc, struct list_head *head)
823 {
824 	while (!list_empty(head)) {
825 		struct fuse_req *req;
826 		req = list_entry(head->next, struct fuse_req, list);
827 		list_del_init(&req->list);
828 		req->out.h.error = -ECONNABORTED;
829 		request_end(fc, req);
830 		spin_lock(&fuse_lock);
831 	}
832 }
833 
834 static int fuse_dev_release(struct inode *inode, struct file *file)
835 {
836 	struct fuse_conn *fc;
837 
838 	spin_lock(&fuse_lock);
839 	fc = file->private_data;
840 	if (fc) {
841 		fc->connected = 0;
842 		end_requests(fc, &fc->pending);
843 		end_requests(fc, &fc->processing);
844 		fuse_release_conn(fc);
845 	}
846 	spin_unlock(&fuse_lock);
847 	return 0;
848 }
849 
850 struct file_operations fuse_dev_operations = {
851 	.owner		= THIS_MODULE,
852 	.llseek		= no_llseek,
853 	.read		= fuse_dev_read,
854 	.readv		= fuse_dev_readv,
855 	.write		= fuse_dev_write,
856 	.writev		= fuse_dev_writev,
857 	.poll		= fuse_dev_poll,
858 	.release	= fuse_dev_release,
859 };
860 
861 static struct miscdevice fuse_miscdevice = {
862 	.minor = FUSE_MINOR,
863 	.name  = "fuse",
864 	.fops = &fuse_dev_operations,
865 };
866 
867 int __init fuse_dev_init(void)
868 {
869 	int err = -ENOMEM;
870 	fuse_req_cachep = kmem_cache_create("fuse_request",
871 					    sizeof(struct fuse_req),
872 					    0, 0, NULL, NULL);
873 	if (!fuse_req_cachep)
874 		goto out;
875 
876 	err = misc_register(&fuse_miscdevice);
877 	if (err)
878 		goto out_cache_clean;
879 
880 	return 0;
881 
882  out_cache_clean:
883 	kmem_cache_destroy(fuse_req_cachep);
884  out:
885 	return err;
886 }
887 
888 void fuse_dev_cleanup(void)
889 {
890 	misc_deregister(&fuse_miscdevice);
891 	kmem_cache_destroy(fuse_req_cachep);
892 }
893