xref: /openbmc/linux/fs/fuse/dev.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21 
22 static kmem_cache_t *fuse_req_cachep;
23 
24 static inline struct fuse_conn *fuse_get_conn(struct file *file)
25 {
26 	struct fuse_conn *fc;
27 	spin_lock(&fuse_lock);
28 	fc = file->private_data;
29 	if (fc && !fc->mounted)
30 		fc = NULL;
31 	spin_unlock(&fuse_lock);
32 	return fc;
33 }
34 
35 static inline void fuse_request_init(struct fuse_req *req)
36 {
37 	memset(req, 0, sizeof(*req));
38 	INIT_LIST_HEAD(&req->list);
39 	init_waitqueue_head(&req->waitq);
40 	atomic_set(&req->count, 1);
41 }
42 
43 struct fuse_req *fuse_request_alloc(void)
44 {
45 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
46 	if (req)
47 		fuse_request_init(req);
48 	return req;
49 }
50 
51 void fuse_request_free(struct fuse_req *req)
52 {
53 	kmem_cache_free(fuse_req_cachep, req);
54 }
55 
56 static inline void block_sigs(sigset_t *oldset)
57 {
58 	sigset_t mask;
59 
60 	siginitsetinv(&mask, sigmask(SIGKILL));
61 	sigprocmask(SIG_BLOCK, &mask, oldset);
62 }
63 
64 static inline void restore_sigs(sigset_t *oldset)
65 {
66 	sigprocmask(SIG_SETMASK, oldset, NULL);
67 }
68 
69 void fuse_reset_request(struct fuse_req *req)
70 {
71 	int preallocated = req->preallocated;
72 	BUG_ON(atomic_read(&req->count) != 1);
73 	fuse_request_init(req);
74 	req->preallocated = preallocated;
75 }
76 
77 static void __fuse_get_request(struct fuse_req *req)
78 {
79 	atomic_inc(&req->count);
80 }
81 
82 /* Must be called with > 1 refcount */
83 static void __fuse_put_request(struct fuse_req *req)
84 {
85 	BUG_ON(atomic_read(&req->count) < 2);
86 	atomic_dec(&req->count);
87 }
88 
89 static struct fuse_req *do_get_request(struct fuse_conn *fc)
90 {
91 	struct fuse_req *req;
92 
93 	spin_lock(&fuse_lock);
94 	BUG_ON(list_empty(&fc->unused_list));
95 	req = list_entry(fc->unused_list.next, struct fuse_req, list);
96 	list_del_init(&req->list);
97 	spin_unlock(&fuse_lock);
98 	fuse_request_init(req);
99 	req->preallocated = 1;
100 	req->in.h.uid = current->fsuid;
101 	req->in.h.gid = current->fsgid;
102 	req->in.h.pid = current->pid;
103 	return req;
104 }
105 
106 /* This can return NULL, but only in case it's interrupted by a SIGKILL */
107 struct fuse_req *fuse_get_request(struct fuse_conn *fc)
108 {
109 	int intr;
110 	sigset_t oldset;
111 
112 	block_sigs(&oldset);
113 	intr = down_interruptible(&fc->outstanding_sem);
114 	restore_sigs(&oldset);
115 	return intr ? NULL : do_get_request(fc);
116 }
117 
118 static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
119 {
120 	spin_lock(&fuse_lock);
121 	if (req->preallocated)
122 		list_add(&req->list, &fc->unused_list);
123 	else
124 		fuse_request_free(req);
125 
126 	/* If we are in debt decrease that first */
127 	if (fc->outstanding_debt)
128 		fc->outstanding_debt--;
129 	else
130 		up(&fc->outstanding_sem);
131 	spin_unlock(&fuse_lock);
132 }
133 
134 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
135 {
136 	if (atomic_dec_and_test(&req->count))
137 		fuse_putback_request(fc, req);
138 }
139 
140 void fuse_release_background(struct fuse_req *req)
141 {
142 	iput(req->inode);
143 	iput(req->inode2);
144 	if (req->file)
145 		fput(req->file);
146 	spin_lock(&fuse_lock);
147 	list_del(&req->bg_entry);
148 	spin_unlock(&fuse_lock);
149 }
150 
151 /*
152  * This function is called when a request is finished.  Either a reply
153  * has arrived or it was interrupted (and not yet sent) or some error
154  * occurred during communication with userspace, or the device file was
155  * closed.  It decreases the reference count for the request.  In case
156  * of a background request the reference to the stored objects are
157  * released.  The requester thread is woken up (if still waiting), and
158  * finally the request is either freed or put on the unused_list
159  *
160  * Called with fuse_lock, unlocks it
161  */
162 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
163 {
164 	int putback;
165 	req->finished = 1;
166 	putback = atomic_dec_and_test(&req->count);
167 	spin_unlock(&fuse_lock);
168 	if (req->background) {
169 		down_read(&fc->sbput_sem);
170 		if (fc->mounted)
171 			fuse_release_background(req);
172 		up_read(&fc->sbput_sem);
173 	}
174 	wake_up(&req->waitq);
175 	if (req->in.h.opcode == FUSE_INIT) {
176 		int i;
177 
178 		if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
179 			fc->conn_error = 1;
180 
181 		/* After INIT reply is received other requests can go
182 		   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
183 		   up()s on outstanding_sem.  The last up() is done in
184 		   fuse_putback_request() */
185 		for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
186 			up(&fc->outstanding_sem);
187 	}
188 	if (putback)
189 		fuse_putback_request(fc, req);
190 }
191 
192 /*
193  * Unfortunately request interruption not just solves the deadlock
194  * problem, it causes problems too.  These stem from the fact, that an
195  * interrupted request is continued to be processed in userspace,
196  * while all the locks and object references (inode and file) held
197  * during the operation are released.
198  *
199  * To release the locks is exactly why there's a need to interrupt the
200  * request, so there's not a lot that can be done about this, except
201  * introduce additional locking in userspace.
202  *
203  * More important is to keep inode and file references until userspace
204  * has replied, otherwise FORGET and RELEASE could be sent while the
205  * inode/file is still used by the filesystem.
206  *
207  * For this reason the concept of "background" request is introduced.
208  * An interrupted request is backgrounded if it has been already sent
209  * to userspace.  Backgrounding involves getting an extra reference to
210  * inode(s) or file used in the request, and adding the request to
211  * fc->background list.  When a reply is received for a background
212  * request, the object references are released, and the request is
213  * removed from the list.  If the filesystem is unmounted while there
214  * are still background requests, the list is walked and references
215  * are released as if a reply was received.
216  *
217  * There's one more use for a background request.  The RELEASE message is
218  * always sent as background, since it doesn't return an error or
219  * data.
220  */
221 static void background_request(struct fuse_conn *fc, struct fuse_req *req)
222 {
223 	req->background = 1;
224 	list_add(&req->bg_entry, &fc->background);
225 	if (req->inode)
226 		req->inode = igrab(req->inode);
227 	if (req->inode2)
228 		req->inode2 = igrab(req->inode2);
229 	if (req->file)
230 		get_file(req->file);
231 }
232 
233 /* Called with fuse_lock held.  Releases, and then reacquires it. */
234 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
235 {
236 	sigset_t oldset;
237 
238 	spin_unlock(&fuse_lock);
239 	block_sigs(&oldset);
240 	wait_event_interruptible(req->waitq, req->finished);
241 	restore_sigs(&oldset);
242 	spin_lock(&fuse_lock);
243 	if (req->finished)
244 		return;
245 
246 	req->out.h.error = -EINTR;
247 	req->interrupted = 1;
248 	if (req->locked) {
249 		/* This is uninterruptible sleep, because data is
250 		   being copied to/from the buffers of req.  During
251 		   locked state, there mustn't be any filesystem
252 		   operation (e.g. page fault), since that could lead
253 		   to deadlock */
254 		spin_unlock(&fuse_lock);
255 		wait_event(req->waitq, !req->locked);
256 		spin_lock(&fuse_lock);
257 	}
258 	if (!req->sent && !list_empty(&req->list)) {
259 		list_del(&req->list);
260 		__fuse_put_request(req);
261 	} else if (!req->finished && req->sent)
262 		background_request(fc, req);
263 }
264 
265 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
266 {
267 	unsigned nbytes = 0;
268 	unsigned i;
269 
270 	for (i = 0; i < numargs; i++)
271 		nbytes += args[i].size;
272 
273 	return nbytes;
274 }
275 
276 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
277 {
278 	fc->reqctr++;
279 	/* zero is special */
280 	if (fc->reqctr == 0)
281 		fc->reqctr = 1;
282 	req->in.h.unique = fc->reqctr;
283 	req->in.h.len = sizeof(struct fuse_in_header) +
284 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
285 	if (!req->preallocated) {
286 		/* If request is not preallocated (either FORGET or
287 		   RELEASE), then still decrease outstanding_sem, so
288 		   user can't open infinite number of files while not
289 		   processing the RELEASE requests.  However for
290 		   efficiency do it without blocking, so if down()
291 		   would block, just increase the debt instead */
292 		if (down_trylock(&fc->outstanding_sem))
293 			fc->outstanding_debt++;
294 	}
295 	list_add_tail(&req->list, &fc->pending);
296 	wake_up(&fc->waitq);
297 }
298 
299 /*
300  * This can only be interrupted by a SIGKILL
301  */
302 void request_send(struct fuse_conn *fc, struct fuse_req *req)
303 {
304 	req->isreply = 1;
305 	spin_lock(&fuse_lock);
306 	if (!fc->connected)
307 		req->out.h.error = -ENOTCONN;
308 	else if (fc->conn_error)
309 		req->out.h.error = -ECONNREFUSED;
310 	else {
311 		queue_request(fc, req);
312 		/* acquire extra reference, since request is still needed
313 		   after request_end() */
314 		__fuse_get_request(req);
315 
316 		request_wait_answer(fc, req);
317 	}
318 	spin_unlock(&fuse_lock);
319 }
320 
321 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
322 {
323 	spin_lock(&fuse_lock);
324 	if (fc->connected) {
325 		queue_request(fc, req);
326 		spin_unlock(&fuse_lock);
327 	} else {
328 		req->out.h.error = -ENOTCONN;
329 		request_end(fc, req);
330 	}
331 }
332 
333 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
334 {
335 	req->isreply = 0;
336 	request_send_nowait(fc, req);
337 }
338 
339 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
340 {
341 	req->isreply = 1;
342 	spin_lock(&fuse_lock);
343 	background_request(fc, req);
344 	spin_unlock(&fuse_lock);
345 	request_send_nowait(fc, req);
346 }
347 
348 void fuse_send_init(struct fuse_conn *fc)
349 {
350 	/* This is called from fuse_read_super() so there's guaranteed
351 	   to be a request available */
352 	struct fuse_req *req = do_get_request(fc);
353 	struct fuse_init_in_out *arg = &req->misc.init_in_out;
354 	arg->major = FUSE_KERNEL_VERSION;
355 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
356 	req->in.h.opcode = FUSE_INIT;
357 	req->in.numargs = 1;
358 	req->in.args[0].size = sizeof(*arg);
359 	req->in.args[0].value = arg;
360 	req->out.numargs = 1;
361 	req->out.args[0].size = sizeof(*arg);
362 	req->out.args[0].value = arg;
363 	request_send_background(fc, req);
364 }
365 
366 /*
367  * Lock the request.  Up to the next unlock_request() there mustn't be
368  * anything that could cause a page-fault.  If the request was already
369  * interrupted bail out.
370  */
371 static inline int lock_request(struct fuse_req *req)
372 {
373 	int err = 0;
374 	if (req) {
375 		spin_lock(&fuse_lock);
376 		if (req->interrupted)
377 			err = -ENOENT;
378 		else
379 			req->locked = 1;
380 		spin_unlock(&fuse_lock);
381 	}
382 	return err;
383 }
384 
385 /*
386  * Unlock request.  If it was interrupted during being locked, the
387  * requester thread is currently waiting for it to be unlocked, so
388  * wake it up.
389  */
390 static inline void unlock_request(struct fuse_req *req)
391 {
392 	if (req) {
393 		spin_lock(&fuse_lock);
394 		req->locked = 0;
395 		if (req->interrupted)
396 			wake_up(&req->waitq);
397 		spin_unlock(&fuse_lock);
398 	}
399 }
400 
401 struct fuse_copy_state {
402 	int write;
403 	struct fuse_req *req;
404 	const struct iovec *iov;
405 	unsigned long nr_segs;
406 	unsigned long seglen;
407 	unsigned long addr;
408 	struct page *pg;
409 	void *mapaddr;
410 	void *buf;
411 	unsigned len;
412 };
413 
414 static void fuse_copy_init(struct fuse_copy_state *cs, int write,
415 			   struct fuse_req *req, const struct iovec *iov,
416 			   unsigned long nr_segs)
417 {
418 	memset(cs, 0, sizeof(*cs));
419 	cs->write = write;
420 	cs->req = req;
421 	cs->iov = iov;
422 	cs->nr_segs = nr_segs;
423 }
424 
425 /* Unmap and put previous page of userspace buffer */
426 static inline void fuse_copy_finish(struct fuse_copy_state *cs)
427 {
428 	if (cs->mapaddr) {
429 		kunmap_atomic(cs->mapaddr, KM_USER0);
430 		if (cs->write) {
431 			flush_dcache_page(cs->pg);
432 			set_page_dirty_lock(cs->pg);
433 		}
434 		put_page(cs->pg);
435 		cs->mapaddr = NULL;
436 	}
437 }
438 
439 /*
440  * Get another pagefull of userspace buffer, and map it to kernel
441  * address space, and lock request
442  */
443 static int fuse_copy_fill(struct fuse_copy_state *cs)
444 {
445 	unsigned long offset;
446 	int err;
447 
448 	unlock_request(cs->req);
449 	fuse_copy_finish(cs);
450 	if (!cs->seglen) {
451 		BUG_ON(!cs->nr_segs);
452 		cs->seglen = cs->iov[0].iov_len;
453 		cs->addr = (unsigned long) cs->iov[0].iov_base;
454 		cs->iov ++;
455 		cs->nr_segs --;
456 	}
457 	down_read(&current->mm->mmap_sem);
458 	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
459 			     &cs->pg, NULL);
460 	up_read(&current->mm->mmap_sem);
461 	if (err < 0)
462 		return err;
463 	BUG_ON(err != 1);
464 	offset = cs->addr % PAGE_SIZE;
465 	cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
466 	cs->buf = cs->mapaddr + offset;
467 	cs->len = min(PAGE_SIZE - offset, cs->seglen);
468 	cs->seglen -= cs->len;
469 	cs->addr += cs->len;
470 
471 	return lock_request(cs->req);
472 }
473 
474 /* Do as much copy to/from userspace buffer as we can */
475 static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
476 			       unsigned *size)
477 {
478 	unsigned ncpy = min(*size, cs->len);
479 	if (val) {
480 		if (cs->write)
481 			memcpy(cs->buf, *val, ncpy);
482 		else
483 			memcpy(*val, cs->buf, ncpy);
484 		*val += ncpy;
485 	}
486 	*size -= ncpy;
487 	cs->len -= ncpy;
488 	cs->buf += ncpy;
489 	return ncpy;
490 }
491 
492 /*
493  * Copy a page in the request to/from the userspace buffer.  Must be
494  * done atomically
495  */
496 static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
497 				 unsigned offset, unsigned count, int zeroing)
498 {
499 	if (page && zeroing && count < PAGE_SIZE) {
500 		void *mapaddr = kmap_atomic(page, KM_USER1);
501 		memset(mapaddr, 0, PAGE_SIZE);
502 		kunmap_atomic(mapaddr, KM_USER1);
503 	}
504 	while (count) {
505 		int err;
506 		if (!cs->len && (err = fuse_copy_fill(cs)))
507 			return err;
508 		if (page) {
509 			void *mapaddr = kmap_atomic(page, KM_USER1);
510 			void *buf = mapaddr + offset;
511 			offset += fuse_copy_do(cs, &buf, &count);
512 			kunmap_atomic(mapaddr, KM_USER1);
513 		} else
514 			offset += fuse_copy_do(cs, NULL, &count);
515 	}
516 	if (page && !cs->write)
517 		flush_dcache_page(page);
518 	return 0;
519 }
520 
521 /* Copy pages in the request to/from userspace buffer */
522 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
523 			   int zeroing)
524 {
525 	unsigned i;
526 	struct fuse_req *req = cs->req;
527 	unsigned offset = req->page_offset;
528 	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
529 
530 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
531 		struct page *page = req->pages[i];
532 		int err = fuse_copy_page(cs, page, offset, count, zeroing);
533 		if (err)
534 			return err;
535 
536 		nbytes -= count;
537 		count = min(nbytes, (unsigned) PAGE_SIZE);
538 		offset = 0;
539 	}
540 	return 0;
541 }
542 
543 /* Copy a single argument in the request to/from userspace buffer */
544 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
545 {
546 	while (size) {
547 		int err;
548 		if (!cs->len && (err = fuse_copy_fill(cs)))
549 			return err;
550 		fuse_copy_do(cs, &val, &size);
551 	}
552 	return 0;
553 }
554 
555 /* Copy request arguments to/from userspace buffer */
556 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
557 			  unsigned argpages, struct fuse_arg *args,
558 			  int zeroing)
559 {
560 	int err = 0;
561 	unsigned i;
562 
563 	for (i = 0; !err && i < numargs; i++)  {
564 		struct fuse_arg *arg = &args[i];
565 		if (i == numargs - 1 && argpages)
566 			err = fuse_copy_pages(cs, arg->size, zeroing);
567 		else
568 			err = fuse_copy_one(cs, arg->value, arg->size);
569 	}
570 	return err;
571 }
572 
573 /* Wait until a request is available on the pending list */
574 static void request_wait(struct fuse_conn *fc)
575 {
576 	DECLARE_WAITQUEUE(wait, current);
577 
578 	add_wait_queue_exclusive(&fc->waitq, &wait);
579 	while (fc->mounted && list_empty(&fc->pending)) {
580 		set_current_state(TASK_INTERRUPTIBLE);
581 		if (signal_pending(current))
582 			break;
583 
584 		spin_unlock(&fuse_lock);
585 		schedule();
586 		spin_lock(&fuse_lock);
587 	}
588 	set_current_state(TASK_RUNNING);
589 	remove_wait_queue(&fc->waitq, &wait);
590 }
591 
592 /*
593  * Read a single request into the userspace filesystem's buffer.  This
594  * function waits until a request is available, then removes it from
595  * the pending list and copies request data to userspace buffer.  If
596  * no reply is needed (FORGET) or request has been interrupted or
597  * there was an error during the copying then it's finished by calling
598  * request_end().  Otherwise add it to the processing list, and set
599  * the 'sent' flag.
600  */
601 static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
602 			      unsigned long nr_segs, loff_t *off)
603 {
604 	int err;
605 	struct fuse_conn *fc;
606 	struct fuse_req *req;
607 	struct fuse_in *in;
608 	struct fuse_copy_state cs;
609 	unsigned reqsize;
610 
611 	spin_lock(&fuse_lock);
612 	fc = file->private_data;
613 	err = -EPERM;
614 	if (!fc)
615 		goto err_unlock;
616 	request_wait(fc);
617 	err = -ENODEV;
618 	if (!fc->mounted)
619 		goto err_unlock;
620 	err = -ERESTARTSYS;
621 	if (list_empty(&fc->pending))
622 		goto err_unlock;
623 
624 	req = list_entry(fc->pending.next, struct fuse_req, list);
625 	list_del_init(&req->list);
626 	spin_unlock(&fuse_lock);
627 
628 	in = &req->in;
629 	reqsize = req->in.h.len;
630 	fuse_copy_init(&cs, 1, req, iov, nr_segs);
631 	err = -EINVAL;
632 	if (iov_length(iov, nr_segs) >= reqsize) {
633 		err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
634 		if (!err)
635 			err = fuse_copy_args(&cs, in->numargs, in->argpages,
636 					     (struct fuse_arg *) in->args, 0);
637 	}
638 	fuse_copy_finish(&cs);
639 
640 	spin_lock(&fuse_lock);
641 	req->locked = 0;
642 	if (!err && req->interrupted)
643 		err = -ENOENT;
644 	if (err) {
645 		if (!req->interrupted)
646 			req->out.h.error = -EIO;
647 		request_end(fc, req);
648 		return err;
649 	}
650 	if (!req->isreply)
651 		request_end(fc, req);
652 	else {
653 		req->sent = 1;
654 		list_add_tail(&req->list, &fc->processing);
655 		spin_unlock(&fuse_lock);
656 	}
657 	return reqsize;
658 
659  err_unlock:
660 	spin_unlock(&fuse_lock);
661 	return err;
662 }
663 
664 static ssize_t fuse_dev_read(struct file *file, char __user *buf,
665 			     size_t nbytes, loff_t *off)
666 {
667 	struct iovec iov;
668 	iov.iov_len = nbytes;
669 	iov.iov_base = buf;
670 	return fuse_dev_readv(file, &iov, 1, off);
671 }
672 
673 /* Look up request on processing list by unique ID */
674 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
675 {
676 	struct list_head *entry;
677 
678 	list_for_each(entry, &fc->processing) {
679 		struct fuse_req *req;
680 		req = list_entry(entry, struct fuse_req, list);
681 		if (req->in.h.unique == unique)
682 			return req;
683 	}
684 	return NULL;
685 }
686 
687 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
688 			 unsigned nbytes)
689 {
690 	unsigned reqsize = sizeof(struct fuse_out_header);
691 
692 	if (out->h.error)
693 		return nbytes != reqsize ? -EINVAL : 0;
694 
695 	reqsize += len_args(out->numargs, out->args);
696 
697 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
698 		return -EINVAL;
699 	else if (reqsize > nbytes) {
700 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
701 		unsigned diffsize = reqsize - nbytes;
702 		if (diffsize > lastarg->size)
703 			return -EINVAL;
704 		lastarg->size -= diffsize;
705 	}
706 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
707 			      out->page_zeroing);
708 }
709 
710 /*
711  * Write a single reply to a request.  First the header is copied from
712  * the write buffer.  The request is then searched on the processing
713  * list by the unique ID found in the header.  If found, then remove
714  * it from the list and copy the rest of the buffer to the request.
715  * The request is finished by calling request_end()
716  */
717 static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
718 			       unsigned long nr_segs, loff_t *off)
719 {
720 	int err;
721 	unsigned nbytes = iov_length(iov, nr_segs);
722 	struct fuse_req *req;
723 	struct fuse_out_header oh;
724 	struct fuse_copy_state cs;
725 	struct fuse_conn *fc = fuse_get_conn(file);
726 	if (!fc)
727 		return -ENODEV;
728 
729 	fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
730 	if (nbytes < sizeof(struct fuse_out_header))
731 		return -EINVAL;
732 
733 	err = fuse_copy_one(&cs, &oh, sizeof(oh));
734 	if (err)
735 		goto err_finish;
736 	err = -EINVAL;
737 	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
738 	    oh.len != nbytes)
739 		goto err_finish;
740 
741 	spin_lock(&fuse_lock);
742 	req = request_find(fc, oh.unique);
743 	err = -EINVAL;
744 	if (!req)
745 		goto err_unlock;
746 
747 	list_del_init(&req->list);
748 	if (req->interrupted) {
749 		request_end(fc, req);
750 		fuse_copy_finish(&cs);
751 		return -ENOENT;
752 	}
753 	req->out.h = oh;
754 	req->locked = 1;
755 	cs.req = req;
756 	spin_unlock(&fuse_lock);
757 
758 	err = copy_out_args(&cs, &req->out, nbytes);
759 	fuse_copy_finish(&cs);
760 
761 	spin_lock(&fuse_lock);
762 	req->locked = 0;
763 	if (!err) {
764 		if (req->interrupted)
765 			err = -ENOENT;
766 	} else if (!req->interrupted)
767 		req->out.h.error = -EIO;
768 	request_end(fc, req);
769 
770 	return err ? err : nbytes;
771 
772  err_unlock:
773 	spin_unlock(&fuse_lock);
774  err_finish:
775 	fuse_copy_finish(&cs);
776 	return err;
777 }
778 
779 static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
780 			      size_t nbytes, loff_t *off)
781 {
782 	struct iovec iov;
783 	iov.iov_len = nbytes;
784 	iov.iov_base = (char __user *) buf;
785 	return fuse_dev_writev(file, &iov, 1, off);
786 }
787 
788 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
789 {
790 	struct fuse_conn *fc = fuse_get_conn(file);
791 	unsigned mask = POLLOUT | POLLWRNORM;
792 
793 	if (!fc)
794 		return -ENODEV;
795 
796 	poll_wait(file, &fc->waitq, wait);
797 
798 	spin_lock(&fuse_lock);
799 	if (!list_empty(&fc->pending))
800                 mask |= POLLIN | POLLRDNORM;
801 	spin_unlock(&fuse_lock);
802 
803 	return mask;
804 }
805 
806 /* Abort all requests on the given list (pending or processing) */
807 static void end_requests(struct fuse_conn *fc, struct list_head *head)
808 {
809 	while (!list_empty(head)) {
810 		struct fuse_req *req;
811 		req = list_entry(head->next, struct fuse_req, list);
812 		list_del_init(&req->list);
813 		req->out.h.error = -ECONNABORTED;
814 		request_end(fc, req);
815 		spin_lock(&fuse_lock);
816 	}
817 }
818 
819 static int fuse_dev_release(struct inode *inode, struct file *file)
820 {
821 	struct fuse_conn *fc;
822 
823 	spin_lock(&fuse_lock);
824 	fc = file->private_data;
825 	if (fc) {
826 		fc->connected = 0;
827 		end_requests(fc, &fc->pending);
828 		end_requests(fc, &fc->processing);
829 		fuse_release_conn(fc);
830 	}
831 	spin_unlock(&fuse_lock);
832 	return 0;
833 }
834 
835 struct file_operations fuse_dev_operations = {
836 	.owner		= THIS_MODULE,
837 	.llseek		= no_llseek,
838 	.read		= fuse_dev_read,
839 	.readv		= fuse_dev_readv,
840 	.write		= fuse_dev_write,
841 	.writev		= fuse_dev_writev,
842 	.poll		= fuse_dev_poll,
843 	.release	= fuse_dev_release,
844 };
845 
846 static struct miscdevice fuse_miscdevice = {
847 	.minor = FUSE_MINOR,
848 	.name  = "fuse",
849 	.fops = &fuse_dev_operations,
850 };
851 
852 int __init fuse_dev_init(void)
853 {
854 	int err = -ENOMEM;
855 	fuse_req_cachep = kmem_cache_create("fuse_request",
856 					    sizeof(struct fuse_req),
857 					    0, 0, NULL, NULL);
858 	if (!fuse_req_cachep)
859 		goto out;
860 
861 	err = misc_register(&fuse_miscdevice);
862 	if (err)
863 		goto out_cache_clean;
864 
865 	return 0;
866 
867  out_cache_clean:
868 	kmem_cache_destroy(fuse_req_cachep);
869  out:
870 	return err;
871 }
872 
873 void fuse_dev_cleanup(void)
874 {
875 	misc_deregister(&fuse_miscdevice);
876 	kmem_cache_destroy(fuse_req_cachep);
877 }
878