xref: /openbmc/linux/fs/fuse/dev.c (revision 96de0e252cedffad61b3cb5e05662c591898e69a)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21 
22 static struct kmem_cache *fuse_req_cachep;
23 
24 static struct fuse_conn *fuse_get_conn(struct file *file)
25 {
26 	/*
27 	 * Lockless access is OK, because file->private data is set
28 	 * once during mount and is valid until the file is released.
29 	 */
30 	return file->private_data;
31 }
32 
33 static void fuse_request_init(struct fuse_req *req)
34 {
35 	memset(req, 0, sizeof(*req));
36 	INIT_LIST_HEAD(&req->list);
37 	INIT_LIST_HEAD(&req->intr_entry);
38 	init_waitqueue_head(&req->waitq);
39 	atomic_set(&req->count, 1);
40 }
41 
42 struct fuse_req *fuse_request_alloc(void)
43 {
44 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
45 	if (req)
46 		fuse_request_init(req);
47 	return req;
48 }
49 
50 void fuse_request_free(struct fuse_req *req)
51 {
52 	kmem_cache_free(fuse_req_cachep, req);
53 }
54 
55 static void block_sigs(sigset_t *oldset)
56 {
57 	sigset_t mask;
58 
59 	siginitsetinv(&mask, sigmask(SIGKILL));
60 	sigprocmask(SIG_BLOCK, &mask, oldset);
61 }
62 
63 static void restore_sigs(sigset_t *oldset)
64 {
65 	sigprocmask(SIG_SETMASK, oldset, NULL);
66 }
67 
68 static void __fuse_get_request(struct fuse_req *req)
69 {
70 	atomic_inc(&req->count);
71 }
72 
73 /* Must be called with > 1 refcount */
74 static void __fuse_put_request(struct fuse_req *req)
75 {
76 	BUG_ON(atomic_read(&req->count) < 2);
77 	atomic_dec(&req->count);
78 }
79 
80 static void fuse_req_init_context(struct fuse_req *req)
81 {
82 	req->in.h.uid = current->fsuid;
83 	req->in.h.gid = current->fsgid;
84 	req->in.h.pid = current->pid;
85 }
86 
87 struct fuse_req *fuse_get_req(struct fuse_conn *fc)
88 {
89 	struct fuse_req *req;
90 	sigset_t oldset;
91 	int intr;
92 	int err;
93 
94 	atomic_inc(&fc->num_waiting);
95 	block_sigs(&oldset);
96 	intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
97 	restore_sigs(&oldset);
98 	err = -EINTR;
99 	if (intr)
100 		goto out;
101 
102 	err = -ENOTCONN;
103 	if (!fc->connected)
104 		goto out;
105 
106 	req = fuse_request_alloc();
107 	err = -ENOMEM;
108 	if (!req)
109 		goto out;
110 
111 	fuse_req_init_context(req);
112 	req->waiting = 1;
113 	return req;
114 
115  out:
116 	atomic_dec(&fc->num_waiting);
117 	return ERR_PTR(err);
118 }
119 
120 /*
121  * Return request in fuse_file->reserved_req.  However that may
122  * currently be in use.  If that is the case, wait for it to become
123  * available.
124  */
125 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
126 					 struct file *file)
127 {
128 	struct fuse_req *req = NULL;
129 	struct fuse_file *ff = file->private_data;
130 
131 	do {
132 		wait_event(fc->reserved_req_waitq, ff->reserved_req);
133 		spin_lock(&fc->lock);
134 		if (ff->reserved_req) {
135 			req = ff->reserved_req;
136 			ff->reserved_req = NULL;
137 			get_file(file);
138 			req->stolen_file = file;
139 		}
140 		spin_unlock(&fc->lock);
141 	} while (!req);
142 
143 	return req;
144 }
145 
146 /*
147  * Put stolen request back into fuse_file->reserved_req
148  */
149 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
150 {
151 	struct file *file = req->stolen_file;
152 	struct fuse_file *ff = file->private_data;
153 
154 	spin_lock(&fc->lock);
155 	fuse_request_init(req);
156 	BUG_ON(ff->reserved_req);
157 	ff->reserved_req = req;
158 	wake_up_all(&fc->reserved_req_waitq);
159 	spin_unlock(&fc->lock);
160 	fput(file);
161 }
162 
163 /*
164  * Gets a requests for a file operation, always succeeds
165  *
166  * This is used for sending the FLUSH request, which must get to
167  * userspace, due to POSIX locks which may need to be unlocked.
168  *
169  * If allocation fails due to OOM, use the reserved request in
170  * fuse_file.
171  *
172  * This is very unlikely to deadlock accidentally, since the
173  * filesystem should not have it's own file open.  If deadlock is
174  * intentional, it can still be broken by "aborting" the filesystem.
175  */
176 struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
177 {
178 	struct fuse_req *req;
179 
180 	atomic_inc(&fc->num_waiting);
181 	wait_event(fc->blocked_waitq, !fc->blocked);
182 	req = fuse_request_alloc();
183 	if (!req)
184 		req = get_reserved_req(fc, file);
185 
186 	fuse_req_init_context(req);
187 	req->waiting = 1;
188 	return req;
189 }
190 
191 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
192 {
193 	if (atomic_dec_and_test(&req->count)) {
194 		if (req->waiting)
195 			atomic_dec(&fc->num_waiting);
196 
197 		if (req->stolen_file)
198 			put_reserved_req(fc, req);
199 		else
200 			fuse_request_free(req);
201 	}
202 }
203 
204 /*
205  * This function is called when a request is finished.  Either a reply
206  * has arrived or it was aborted (and not yet sent) or some error
207  * occurred during communication with userspace, or the device file
208  * was closed.  The requester thread is woken up (if still waiting),
209  * the 'end' callback is called if given, else the reference to the
210  * request is released
211  *
212  * Called with fc->lock, unlocks it
213  */
214 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
215 	__releases(fc->lock)
216 {
217 	void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
218 	req->end = NULL;
219 	list_del(&req->list);
220 	list_del(&req->intr_entry);
221 	req->state = FUSE_REQ_FINISHED;
222 	if (req->background) {
223 		if (fc->num_background == FUSE_MAX_BACKGROUND) {
224 			fc->blocked = 0;
225 			wake_up_all(&fc->blocked_waitq);
226 		}
227 		if (fc->num_background == FUSE_CONGESTION_THRESHOLD) {
228 			clear_bdi_congested(&fc->bdi, READ);
229 			clear_bdi_congested(&fc->bdi, WRITE);
230 		}
231 		fc->num_background--;
232 	}
233 	spin_unlock(&fc->lock);
234 	wake_up(&req->waitq);
235 	if (end)
236 		end(fc, req);
237 	else
238 		fuse_put_request(fc, req);
239 }
240 
241 static void wait_answer_interruptible(struct fuse_conn *fc,
242 				      struct fuse_req *req)
243 {
244 	if (signal_pending(current))
245 		return;
246 
247 	spin_unlock(&fc->lock);
248 	wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
249 	spin_lock(&fc->lock);
250 }
251 
252 static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
253 {
254 	list_add_tail(&req->intr_entry, &fc->interrupts);
255 	wake_up(&fc->waitq);
256 	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
257 }
258 
259 /* Called with fc->lock held.  Releases, and then reacquires it. */
260 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
261 {
262 	if (!fc->no_interrupt) {
263 		/* Any signal may interrupt this */
264 		wait_answer_interruptible(fc, req);
265 
266 		if (req->aborted)
267 			goto aborted;
268 		if (req->state == FUSE_REQ_FINISHED)
269 			return;
270 
271 		req->interrupted = 1;
272 		if (req->state == FUSE_REQ_SENT)
273 			queue_interrupt(fc, req);
274 	}
275 
276 	if (!req->force) {
277 		sigset_t oldset;
278 
279 		/* Only fatal signals may interrupt this */
280 		block_sigs(&oldset);
281 		wait_answer_interruptible(fc, req);
282 		restore_sigs(&oldset);
283 
284 		if (req->aborted)
285 			goto aborted;
286 		if (req->state == FUSE_REQ_FINISHED)
287 			return;
288 
289 		/* Request is not yet in userspace, bail out */
290 		if (req->state == FUSE_REQ_PENDING) {
291 			list_del(&req->list);
292 			__fuse_put_request(req);
293 			req->out.h.error = -EINTR;
294 			return;
295 		}
296 	}
297 
298 	/*
299 	 * Either request is already in userspace, or it was forced.
300 	 * Wait it out.
301 	 */
302 	spin_unlock(&fc->lock);
303 	wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
304 	spin_lock(&fc->lock);
305 
306 	if (!req->aborted)
307 		return;
308 
309  aborted:
310 	BUG_ON(req->state != FUSE_REQ_FINISHED);
311 	if (req->locked) {
312 		/* This is uninterruptible sleep, because data is
313 		   being copied to/from the buffers of req.  During
314 		   locked state, there mustn't be any filesystem
315 		   operation (e.g. page fault), since that could lead
316 		   to deadlock */
317 		spin_unlock(&fc->lock);
318 		wait_event(req->waitq, !req->locked);
319 		spin_lock(&fc->lock);
320 	}
321 }
322 
323 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
324 {
325 	unsigned nbytes = 0;
326 	unsigned i;
327 
328 	for (i = 0; i < numargs; i++)
329 		nbytes += args[i].size;
330 
331 	return nbytes;
332 }
333 
334 static u64 fuse_get_unique(struct fuse_conn *fc)
335  {
336  	fc->reqctr++;
337  	/* zero is special */
338  	if (fc->reqctr == 0)
339  		fc->reqctr = 1;
340 
341 	return fc->reqctr;
342 }
343 
344 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
345 {
346 	req->in.h.unique = fuse_get_unique(fc);
347 	req->in.h.len = sizeof(struct fuse_in_header) +
348 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
349 	list_add_tail(&req->list, &fc->pending);
350 	req->state = FUSE_REQ_PENDING;
351 	if (!req->waiting) {
352 		req->waiting = 1;
353 		atomic_inc(&fc->num_waiting);
354 	}
355 	wake_up(&fc->waitq);
356 	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
357 }
358 
359 void request_send(struct fuse_conn *fc, struct fuse_req *req)
360 {
361 	req->isreply = 1;
362 	spin_lock(&fc->lock);
363 	if (!fc->connected)
364 		req->out.h.error = -ENOTCONN;
365 	else if (fc->conn_error)
366 		req->out.h.error = -ECONNREFUSED;
367 	else {
368 		queue_request(fc, req);
369 		/* acquire extra reference, since request is still needed
370 		   after request_end() */
371 		__fuse_get_request(req);
372 
373 		request_wait_answer(fc, req);
374 	}
375 	spin_unlock(&fc->lock);
376 }
377 
378 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
379 {
380 	spin_lock(&fc->lock);
381 	if (fc->connected) {
382 		req->background = 1;
383 		fc->num_background++;
384 		if (fc->num_background == FUSE_MAX_BACKGROUND)
385 			fc->blocked = 1;
386 		if (fc->num_background == FUSE_CONGESTION_THRESHOLD) {
387 			set_bdi_congested(&fc->bdi, READ);
388 			set_bdi_congested(&fc->bdi, WRITE);
389 		}
390 
391 		queue_request(fc, req);
392 		spin_unlock(&fc->lock);
393 	} else {
394 		req->out.h.error = -ENOTCONN;
395 		request_end(fc, req);
396 	}
397 }
398 
399 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
400 {
401 	req->isreply = 0;
402 	request_send_nowait(fc, req);
403 }
404 
405 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
406 {
407 	req->isreply = 1;
408 	request_send_nowait(fc, req);
409 }
410 
411 /*
412  * Lock the request.  Up to the next unlock_request() there mustn't be
413  * anything that could cause a page-fault.  If the request was already
414  * aborted bail out.
415  */
416 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
417 {
418 	int err = 0;
419 	if (req) {
420 		spin_lock(&fc->lock);
421 		if (req->aborted)
422 			err = -ENOENT;
423 		else
424 			req->locked = 1;
425 		spin_unlock(&fc->lock);
426 	}
427 	return err;
428 }
429 
430 /*
431  * Unlock request.  If it was aborted during being locked, the
432  * requester thread is currently waiting for it to be unlocked, so
433  * wake it up.
434  */
435 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
436 {
437 	if (req) {
438 		spin_lock(&fc->lock);
439 		req->locked = 0;
440 		if (req->aborted)
441 			wake_up(&req->waitq);
442 		spin_unlock(&fc->lock);
443 	}
444 }
445 
446 struct fuse_copy_state {
447 	struct fuse_conn *fc;
448 	int write;
449 	struct fuse_req *req;
450 	const struct iovec *iov;
451 	unsigned long nr_segs;
452 	unsigned long seglen;
453 	unsigned long addr;
454 	struct page *pg;
455 	void *mapaddr;
456 	void *buf;
457 	unsigned len;
458 };
459 
460 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
461 			   int write, struct fuse_req *req,
462 			   const struct iovec *iov, unsigned long nr_segs)
463 {
464 	memset(cs, 0, sizeof(*cs));
465 	cs->fc = fc;
466 	cs->write = write;
467 	cs->req = req;
468 	cs->iov = iov;
469 	cs->nr_segs = nr_segs;
470 }
471 
472 /* Unmap and put previous page of userspace buffer */
473 static void fuse_copy_finish(struct fuse_copy_state *cs)
474 {
475 	if (cs->mapaddr) {
476 		kunmap_atomic(cs->mapaddr, KM_USER0);
477 		if (cs->write) {
478 			flush_dcache_page(cs->pg);
479 			set_page_dirty_lock(cs->pg);
480 		}
481 		put_page(cs->pg);
482 		cs->mapaddr = NULL;
483 	}
484 }
485 
486 /*
487  * Get another pagefull of userspace buffer, and map it to kernel
488  * address space, and lock request
489  */
490 static int fuse_copy_fill(struct fuse_copy_state *cs)
491 {
492 	unsigned long offset;
493 	int err;
494 
495 	unlock_request(cs->fc, cs->req);
496 	fuse_copy_finish(cs);
497 	if (!cs->seglen) {
498 		BUG_ON(!cs->nr_segs);
499 		cs->seglen = cs->iov[0].iov_len;
500 		cs->addr = (unsigned long) cs->iov[0].iov_base;
501 		cs->iov ++;
502 		cs->nr_segs --;
503 	}
504 	down_read(&current->mm->mmap_sem);
505 	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
506 			     &cs->pg, NULL);
507 	up_read(&current->mm->mmap_sem);
508 	if (err < 0)
509 		return err;
510 	BUG_ON(err != 1);
511 	offset = cs->addr % PAGE_SIZE;
512 	cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
513 	cs->buf = cs->mapaddr + offset;
514 	cs->len = min(PAGE_SIZE - offset, cs->seglen);
515 	cs->seglen -= cs->len;
516 	cs->addr += cs->len;
517 
518 	return lock_request(cs->fc, cs->req);
519 }
520 
521 /* Do as much copy to/from userspace buffer as we can */
522 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
523 {
524 	unsigned ncpy = min(*size, cs->len);
525 	if (val) {
526 		if (cs->write)
527 			memcpy(cs->buf, *val, ncpy);
528 		else
529 			memcpy(*val, cs->buf, ncpy);
530 		*val += ncpy;
531 	}
532 	*size -= ncpy;
533 	cs->len -= ncpy;
534 	cs->buf += ncpy;
535 	return ncpy;
536 }
537 
538 /*
539  * Copy a page in the request to/from the userspace buffer.  Must be
540  * done atomically
541  */
542 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
543 			  unsigned offset, unsigned count, int zeroing)
544 {
545 	if (page && zeroing && count < PAGE_SIZE) {
546 		void *mapaddr = kmap_atomic(page, KM_USER1);
547 		memset(mapaddr, 0, PAGE_SIZE);
548 		kunmap_atomic(mapaddr, KM_USER1);
549 	}
550 	while (count) {
551 		int err;
552 		if (!cs->len && (err = fuse_copy_fill(cs)))
553 			return err;
554 		if (page) {
555 			void *mapaddr = kmap_atomic(page, KM_USER1);
556 			void *buf = mapaddr + offset;
557 			offset += fuse_copy_do(cs, &buf, &count);
558 			kunmap_atomic(mapaddr, KM_USER1);
559 		} else
560 			offset += fuse_copy_do(cs, NULL, &count);
561 	}
562 	if (page && !cs->write)
563 		flush_dcache_page(page);
564 	return 0;
565 }
566 
567 /* Copy pages in the request to/from userspace buffer */
568 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
569 			   int zeroing)
570 {
571 	unsigned i;
572 	struct fuse_req *req = cs->req;
573 	unsigned offset = req->page_offset;
574 	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
575 
576 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
577 		struct page *page = req->pages[i];
578 		int err = fuse_copy_page(cs, page, offset, count, zeroing);
579 		if (err)
580 			return err;
581 
582 		nbytes -= count;
583 		count = min(nbytes, (unsigned) PAGE_SIZE);
584 		offset = 0;
585 	}
586 	return 0;
587 }
588 
589 /* Copy a single argument in the request to/from userspace buffer */
590 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
591 {
592 	while (size) {
593 		int err;
594 		if (!cs->len && (err = fuse_copy_fill(cs)))
595 			return err;
596 		fuse_copy_do(cs, &val, &size);
597 	}
598 	return 0;
599 }
600 
601 /* Copy request arguments to/from userspace buffer */
602 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
603 			  unsigned argpages, struct fuse_arg *args,
604 			  int zeroing)
605 {
606 	int err = 0;
607 	unsigned i;
608 
609 	for (i = 0; !err && i < numargs; i++)  {
610 		struct fuse_arg *arg = &args[i];
611 		if (i == numargs - 1 && argpages)
612 			err = fuse_copy_pages(cs, arg->size, zeroing);
613 		else
614 			err = fuse_copy_one(cs, arg->value, arg->size);
615 	}
616 	return err;
617 }
618 
619 static int request_pending(struct fuse_conn *fc)
620 {
621 	return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
622 }
623 
624 /* Wait until a request is available on the pending list */
625 static void request_wait(struct fuse_conn *fc)
626 {
627 	DECLARE_WAITQUEUE(wait, current);
628 
629 	add_wait_queue_exclusive(&fc->waitq, &wait);
630 	while (fc->connected && !request_pending(fc)) {
631 		set_current_state(TASK_INTERRUPTIBLE);
632 		if (signal_pending(current))
633 			break;
634 
635 		spin_unlock(&fc->lock);
636 		schedule();
637 		spin_lock(&fc->lock);
638 	}
639 	set_current_state(TASK_RUNNING);
640 	remove_wait_queue(&fc->waitq, &wait);
641 }
642 
643 /*
644  * Transfer an interrupt request to userspace
645  *
646  * Unlike other requests this is assembled on demand, without a need
647  * to allocate a separate fuse_req structure.
648  *
649  * Called with fc->lock held, releases it
650  */
651 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
652 			       const struct iovec *iov, unsigned long nr_segs)
653 	__releases(fc->lock)
654 {
655 	struct fuse_copy_state cs;
656 	struct fuse_in_header ih;
657 	struct fuse_interrupt_in arg;
658 	unsigned reqsize = sizeof(ih) + sizeof(arg);
659 	int err;
660 
661 	list_del_init(&req->intr_entry);
662 	req->intr_unique = fuse_get_unique(fc);
663 	memset(&ih, 0, sizeof(ih));
664 	memset(&arg, 0, sizeof(arg));
665 	ih.len = reqsize;
666 	ih.opcode = FUSE_INTERRUPT;
667 	ih.unique = req->intr_unique;
668 	arg.unique = req->in.h.unique;
669 
670 	spin_unlock(&fc->lock);
671 	if (iov_length(iov, nr_segs) < reqsize)
672 		return -EINVAL;
673 
674 	fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
675 	err = fuse_copy_one(&cs, &ih, sizeof(ih));
676 	if (!err)
677 		err = fuse_copy_one(&cs, &arg, sizeof(arg));
678 	fuse_copy_finish(&cs);
679 
680 	return err ? err : reqsize;
681 }
682 
683 /*
684  * Read a single request into the userspace filesystem's buffer.  This
685  * function waits until a request is available, then removes it from
686  * the pending list and copies request data to userspace buffer.  If
687  * no reply is needed (FORGET) or request has been aborted or there
688  * was an error during the copying then it's finished by calling
689  * request_end().  Otherwise add it to the processing list, and set
690  * the 'sent' flag.
691  */
692 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
693 			      unsigned long nr_segs, loff_t pos)
694 {
695 	int err;
696 	struct fuse_req *req;
697 	struct fuse_in *in;
698 	struct fuse_copy_state cs;
699 	unsigned reqsize;
700 	struct file *file = iocb->ki_filp;
701 	struct fuse_conn *fc = fuse_get_conn(file);
702 	if (!fc)
703 		return -EPERM;
704 
705  restart:
706 	spin_lock(&fc->lock);
707 	err = -EAGAIN;
708 	if ((file->f_flags & O_NONBLOCK) && fc->connected &&
709 	    !request_pending(fc))
710 		goto err_unlock;
711 
712 	request_wait(fc);
713 	err = -ENODEV;
714 	if (!fc->connected)
715 		goto err_unlock;
716 	err = -ERESTARTSYS;
717 	if (!request_pending(fc))
718 		goto err_unlock;
719 
720 	if (!list_empty(&fc->interrupts)) {
721 		req = list_entry(fc->interrupts.next, struct fuse_req,
722 				 intr_entry);
723 		return fuse_read_interrupt(fc, req, iov, nr_segs);
724 	}
725 
726 	req = list_entry(fc->pending.next, struct fuse_req, list);
727 	req->state = FUSE_REQ_READING;
728 	list_move(&req->list, &fc->io);
729 
730 	in = &req->in;
731 	reqsize = in->h.len;
732 	/* If request is too large, reply with an error and restart the read */
733 	if (iov_length(iov, nr_segs) < reqsize) {
734 		req->out.h.error = -EIO;
735 		/* SETXATTR is special, since it may contain too large data */
736 		if (in->h.opcode == FUSE_SETXATTR)
737 			req->out.h.error = -E2BIG;
738 		request_end(fc, req);
739 		goto restart;
740 	}
741 	spin_unlock(&fc->lock);
742 	fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
743 	err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
744 	if (!err)
745 		err = fuse_copy_args(&cs, in->numargs, in->argpages,
746 				     (struct fuse_arg *) in->args, 0);
747 	fuse_copy_finish(&cs);
748 	spin_lock(&fc->lock);
749 	req->locked = 0;
750 	if (req->aborted) {
751 		request_end(fc, req);
752 		return -ENODEV;
753 	}
754 	if (err) {
755 		req->out.h.error = -EIO;
756 		request_end(fc, req);
757 		return err;
758 	}
759 	if (!req->isreply)
760 		request_end(fc, req);
761 	else {
762 		req->state = FUSE_REQ_SENT;
763 		list_move_tail(&req->list, &fc->processing);
764 		if (req->interrupted)
765 			queue_interrupt(fc, req);
766 		spin_unlock(&fc->lock);
767 	}
768 	return reqsize;
769 
770  err_unlock:
771 	spin_unlock(&fc->lock);
772 	return err;
773 }
774 
775 /* Look up request on processing list by unique ID */
776 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
777 {
778 	struct list_head *entry;
779 
780 	list_for_each(entry, &fc->processing) {
781 		struct fuse_req *req;
782 		req = list_entry(entry, struct fuse_req, list);
783 		if (req->in.h.unique == unique || req->intr_unique == unique)
784 			return req;
785 	}
786 	return NULL;
787 }
788 
789 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
790 			 unsigned nbytes)
791 {
792 	unsigned reqsize = sizeof(struct fuse_out_header);
793 
794 	if (out->h.error)
795 		return nbytes != reqsize ? -EINVAL : 0;
796 
797 	reqsize += len_args(out->numargs, out->args);
798 
799 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
800 		return -EINVAL;
801 	else if (reqsize > nbytes) {
802 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
803 		unsigned diffsize = reqsize - nbytes;
804 		if (diffsize > lastarg->size)
805 			return -EINVAL;
806 		lastarg->size -= diffsize;
807 	}
808 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
809 			      out->page_zeroing);
810 }
811 
812 /*
813  * Write a single reply to a request.  First the header is copied from
814  * the write buffer.  The request is then searched on the processing
815  * list by the unique ID found in the header.  If found, then remove
816  * it from the list and copy the rest of the buffer to the request.
817  * The request is finished by calling request_end()
818  */
819 static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
820 			       unsigned long nr_segs, loff_t pos)
821 {
822 	int err;
823 	unsigned nbytes = iov_length(iov, nr_segs);
824 	struct fuse_req *req;
825 	struct fuse_out_header oh;
826 	struct fuse_copy_state cs;
827 	struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
828 	if (!fc)
829 		return -EPERM;
830 
831 	fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
832 	if (nbytes < sizeof(struct fuse_out_header))
833 		return -EINVAL;
834 
835 	err = fuse_copy_one(&cs, &oh, sizeof(oh));
836 	if (err)
837 		goto err_finish;
838 	err = -EINVAL;
839 	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
840 	    oh.len != nbytes)
841 		goto err_finish;
842 
843 	spin_lock(&fc->lock);
844 	err = -ENOENT;
845 	if (!fc->connected)
846 		goto err_unlock;
847 
848 	req = request_find(fc, oh.unique);
849 	if (!req)
850 		goto err_unlock;
851 
852 	if (req->aborted) {
853 		spin_unlock(&fc->lock);
854 		fuse_copy_finish(&cs);
855 		spin_lock(&fc->lock);
856 		request_end(fc, req);
857 		return -ENOENT;
858 	}
859 	/* Is it an interrupt reply? */
860 	if (req->intr_unique == oh.unique) {
861 		err = -EINVAL;
862 		if (nbytes != sizeof(struct fuse_out_header))
863 			goto err_unlock;
864 
865 		if (oh.error == -ENOSYS)
866 			fc->no_interrupt = 1;
867 		else if (oh.error == -EAGAIN)
868 			queue_interrupt(fc, req);
869 
870 		spin_unlock(&fc->lock);
871 		fuse_copy_finish(&cs);
872 		return nbytes;
873 	}
874 
875 	req->state = FUSE_REQ_WRITING;
876 	list_move(&req->list, &fc->io);
877 	req->out.h = oh;
878 	req->locked = 1;
879 	cs.req = req;
880 	spin_unlock(&fc->lock);
881 
882 	err = copy_out_args(&cs, &req->out, nbytes);
883 	fuse_copy_finish(&cs);
884 
885 	spin_lock(&fc->lock);
886 	req->locked = 0;
887 	if (!err) {
888 		if (req->aborted)
889 			err = -ENOENT;
890 	} else if (!req->aborted)
891 		req->out.h.error = -EIO;
892 	request_end(fc, req);
893 
894 	return err ? err : nbytes;
895 
896  err_unlock:
897 	spin_unlock(&fc->lock);
898  err_finish:
899 	fuse_copy_finish(&cs);
900 	return err;
901 }
902 
903 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
904 {
905 	unsigned mask = POLLOUT | POLLWRNORM;
906 	struct fuse_conn *fc = fuse_get_conn(file);
907 	if (!fc)
908 		return POLLERR;
909 
910 	poll_wait(file, &fc->waitq, wait);
911 
912 	spin_lock(&fc->lock);
913 	if (!fc->connected)
914 		mask = POLLERR;
915 	else if (request_pending(fc))
916 		mask |= POLLIN | POLLRDNORM;
917 	spin_unlock(&fc->lock);
918 
919 	return mask;
920 }
921 
922 /*
923  * Abort all requests on the given list (pending or processing)
924  *
925  * This function releases and reacquires fc->lock
926  */
927 static void end_requests(struct fuse_conn *fc, struct list_head *head)
928 {
929 	while (!list_empty(head)) {
930 		struct fuse_req *req;
931 		req = list_entry(head->next, struct fuse_req, list);
932 		req->out.h.error = -ECONNABORTED;
933 		request_end(fc, req);
934 		spin_lock(&fc->lock);
935 	}
936 }
937 
938 /*
939  * Abort requests under I/O
940  *
941  * The requests are set to aborted and finished, and the request
942  * waiter is woken up.  This will make request_wait_answer() wait
943  * until the request is unlocked and then return.
944  *
945  * If the request is asynchronous, then the end function needs to be
946  * called after waiting for the request to be unlocked (if it was
947  * locked).
948  */
949 static void end_io_requests(struct fuse_conn *fc)
950 {
951 	while (!list_empty(&fc->io)) {
952 		struct fuse_req *req =
953 			list_entry(fc->io.next, struct fuse_req, list);
954 		void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
955 
956 		req->aborted = 1;
957 		req->out.h.error = -ECONNABORTED;
958 		req->state = FUSE_REQ_FINISHED;
959 		list_del_init(&req->list);
960 		wake_up(&req->waitq);
961 		if (end) {
962 			req->end = NULL;
963 			/* The end function will consume this reference */
964 			__fuse_get_request(req);
965 			spin_unlock(&fc->lock);
966 			wait_event(req->waitq, !req->locked);
967 			end(fc, req);
968 			spin_lock(&fc->lock);
969 		}
970 	}
971 }
972 
973 /*
974  * Abort all requests.
975  *
976  * Emergency exit in case of a malicious or accidental deadlock, or
977  * just a hung filesystem.
978  *
979  * The same effect is usually achievable through killing the
980  * filesystem daemon and all users of the filesystem.  The exception
981  * is the combination of an asynchronous request and the tricky
982  * deadlock (see Documentation/filesystems/fuse.txt).
983  *
984  * During the aborting, progression of requests from the pending and
985  * processing lists onto the io list, and progression of new requests
986  * onto the pending list is prevented by req->connected being false.
987  *
988  * Progression of requests under I/O to the processing list is
989  * prevented by the req->aborted flag being true for these requests.
990  * For this reason requests on the io list must be aborted first.
991  */
992 void fuse_abort_conn(struct fuse_conn *fc)
993 {
994 	spin_lock(&fc->lock);
995 	if (fc->connected) {
996 		fc->connected = 0;
997 		fc->blocked = 0;
998 		end_io_requests(fc);
999 		end_requests(fc, &fc->pending);
1000 		end_requests(fc, &fc->processing);
1001 		wake_up_all(&fc->waitq);
1002 		wake_up_all(&fc->blocked_waitq);
1003 		kill_fasync(&fc->fasync, SIGIO, POLL_IN);
1004 	}
1005 	spin_unlock(&fc->lock);
1006 }
1007 
1008 static int fuse_dev_release(struct inode *inode, struct file *file)
1009 {
1010 	struct fuse_conn *fc = fuse_get_conn(file);
1011 	if (fc) {
1012 		spin_lock(&fc->lock);
1013 		fc->connected = 0;
1014 		end_requests(fc, &fc->pending);
1015 		end_requests(fc, &fc->processing);
1016 		spin_unlock(&fc->lock);
1017 		fasync_helper(-1, file, 0, &fc->fasync);
1018 		fuse_conn_put(fc);
1019 	}
1020 
1021 	return 0;
1022 }
1023 
1024 static int fuse_dev_fasync(int fd, struct file *file, int on)
1025 {
1026 	struct fuse_conn *fc = fuse_get_conn(file);
1027 	if (!fc)
1028 		return -EPERM;
1029 
1030 	/* No locking - fasync_helper does its own locking */
1031 	return fasync_helper(fd, file, on, &fc->fasync);
1032 }
1033 
1034 const struct file_operations fuse_dev_operations = {
1035 	.owner		= THIS_MODULE,
1036 	.llseek		= no_llseek,
1037 	.read		= do_sync_read,
1038 	.aio_read	= fuse_dev_read,
1039 	.write		= do_sync_write,
1040 	.aio_write	= fuse_dev_write,
1041 	.poll		= fuse_dev_poll,
1042 	.release	= fuse_dev_release,
1043 	.fasync		= fuse_dev_fasync,
1044 };
1045 
1046 static struct miscdevice fuse_miscdevice = {
1047 	.minor = FUSE_MINOR,
1048 	.name  = "fuse",
1049 	.fops = &fuse_dev_operations,
1050 };
1051 
1052 int __init fuse_dev_init(void)
1053 {
1054 	int err = -ENOMEM;
1055 	fuse_req_cachep = kmem_cache_create("fuse_request",
1056 					    sizeof(struct fuse_req),
1057 					    0, 0, NULL);
1058 	if (!fuse_req_cachep)
1059 		goto out;
1060 
1061 	err = misc_register(&fuse_miscdevice);
1062 	if (err)
1063 		goto out_cache_clean;
1064 
1065 	return 0;
1066 
1067  out_cache_clean:
1068 	kmem_cache_destroy(fuse_req_cachep);
1069  out:
1070 	return err;
1071 }
1072 
1073 void fuse_dev_cleanup(void)
1074 {
1075 	misc_deregister(&fuse_miscdevice);
1076 	kmem_cache_destroy(fuse_req_cachep);
1077 }
1078