xref: /openbmc/linux/fs/fuse/dev.c (revision dd3bb14f)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 #include <linux/pipe_fs_i.h>
20 
21 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
22 
23 static struct kmem_cache *fuse_req_cachep;
24 
25 static struct fuse_conn *fuse_get_conn(struct file *file)
26 {
27 	/*
28 	 * Lockless access is OK, because file->private data is set
29 	 * once during mount and is valid until the file is released.
30 	 */
31 	return file->private_data;
32 }
33 
34 static void fuse_request_init(struct fuse_req *req)
35 {
36 	memset(req, 0, sizeof(*req));
37 	INIT_LIST_HEAD(&req->list);
38 	INIT_LIST_HEAD(&req->intr_entry);
39 	init_waitqueue_head(&req->waitq);
40 	atomic_set(&req->count, 1);
41 }
42 
43 struct fuse_req *fuse_request_alloc(void)
44 {
45 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
46 	if (req)
47 		fuse_request_init(req);
48 	return req;
49 }
50 EXPORT_SYMBOL_GPL(fuse_request_alloc);
51 
52 struct fuse_req *fuse_request_alloc_nofs(void)
53 {
54 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
55 	if (req)
56 		fuse_request_init(req);
57 	return req;
58 }
59 
60 void fuse_request_free(struct fuse_req *req)
61 {
62 	kmem_cache_free(fuse_req_cachep, req);
63 }
64 
65 static void block_sigs(sigset_t *oldset)
66 {
67 	sigset_t mask;
68 
69 	siginitsetinv(&mask, sigmask(SIGKILL));
70 	sigprocmask(SIG_BLOCK, &mask, oldset);
71 }
72 
73 static void restore_sigs(sigset_t *oldset)
74 {
75 	sigprocmask(SIG_SETMASK, oldset, NULL);
76 }
77 
78 static void __fuse_get_request(struct fuse_req *req)
79 {
80 	atomic_inc(&req->count);
81 }
82 
83 /* Must be called with > 1 refcount */
84 static void __fuse_put_request(struct fuse_req *req)
85 {
86 	BUG_ON(atomic_read(&req->count) < 2);
87 	atomic_dec(&req->count);
88 }
89 
90 static void fuse_req_init_context(struct fuse_req *req)
91 {
92 	req->in.h.uid = current_fsuid();
93 	req->in.h.gid = current_fsgid();
94 	req->in.h.pid = current->pid;
95 }
96 
97 struct fuse_req *fuse_get_req(struct fuse_conn *fc)
98 {
99 	struct fuse_req *req;
100 	sigset_t oldset;
101 	int intr;
102 	int err;
103 
104 	atomic_inc(&fc->num_waiting);
105 	block_sigs(&oldset);
106 	intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
107 	restore_sigs(&oldset);
108 	err = -EINTR;
109 	if (intr)
110 		goto out;
111 
112 	err = -ENOTCONN;
113 	if (!fc->connected)
114 		goto out;
115 
116 	req = fuse_request_alloc();
117 	err = -ENOMEM;
118 	if (!req)
119 		goto out;
120 
121 	fuse_req_init_context(req);
122 	req->waiting = 1;
123 	return req;
124 
125  out:
126 	atomic_dec(&fc->num_waiting);
127 	return ERR_PTR(err);
128 }
129 EXPORT_SYMBOL_GPL(fuse_get_req);
130 
131 /*
132  * Return request in fuse_file->reserved_req.  However that may
133  * currently be in use.  If that is the case, wait for it to become
134  * available.
135  */
136 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
137 					 struct file *file)
138 {
139 	struct fuse_req *req = NULL;
140 	struct fuse_file *ff = file->private_data;
141 
142 	do {
143 		wait_event(fc->reserved_req_waitq, ff->reserved_req);
144 		spin_lock(&fc->lock);
145 		if (ff->reserved_req) {
146 			req = ff->reserved_req;
147 			ff->reserved_req = NULL;
148 			get_file(file);
149 			req->stolen_file = file;
150 		}
151 		spin_unlock(&fc->lock);
152 	} while (!req);
153 
154 	return req;
155 }
156 
157 /*
158  * Put stolen request back into fuse_file->reserved_req
159  */
160 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
161 {
162 	struct file *file = req->stolen_file;
163 	struct fuse_file *ff = file->private_data;
164 
165 	spin_lock(&fc->lock);
166 	fuse_request_init(req);
167 	BUG_ON(ff->reserved_req);
168 	ff->reserved_req = req;
169 	wake_up_all(&fc->reserved_req_waitq);
170 	spin_unlock(&fc->lock);
171 	fput(file);
172 }
173 
174 /*
175  * Gets a requests for a file operation, always succeeds
176  *
177  * This is used for sending the FLUSH request, which must get to
178  * userspace, due to POSIX locks which may need to be unlocked.
179  *
180  * If allocation fails due to OOM, use the reserved request in
181  * fuse_file.
182  *
183  * This is very unlikely to deadlock accidentally, since the
184  * filesystem should not have it's own file open.  If deadlock is
185  * intentional, it can still be broken by "aborting" the filesystem.
186  */
187 struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
188 {
189 	struct fuse_req *req;
190 
191 	atomic_inc(&fc->num_waiting);
192 	wait_event(fc->blocked_waitq, !fc->blocked);
193 	req = fuse_request_alloc();
194 	if (!req)
195 		req = get_reserved_req(fc, file);
196 
197 	fuse_req_init_context(req);
198 	req->waiting = 1;
199 	return req;
200 }
201 
202 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
203 {
204 	if (atomic_dec_and_test(&req->count)) {
205 		if (req->waiting)
206 			atomic_dec(&fc->num_waiting);
207 
208 		if (req->stolen_file)
209 			put_reserved_req(fc, req);
210 		else
211 			fuse_request_free(req);
212 	}
213 }
214 EXPORT_SYMBOL_GPL(fuse_put_request);
215 
216 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
217 {
218 	unsigned nbytes = 0;
219 	unsigned i;
220 
221 	for (i = 0; i < numargs; i++)
222 		nbytes += args[i].size;
223 
224 	return nbytes;
225 }
226 
227 static u64 fuse_get_unique(struct fuse_conn *fc)
228 {
229 	fc->reqctr++;
230 	/* zero is special */
231 	if (fc->reqctr == 0)
232 		fc->reqctr = 1;
233 
234 	return fc->reqctr;
235 }
236 
237 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
238 {
239 	req->in.h.unique = fuse_get_unique(fc);
240 	req->in.h.len = sizeof(struct fuse_in_header) +
241 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
242 	list_add_tail(&req->list, &fc->pending);
243 	req->state = FUSE_REQ_PENDING;
244 	if (!req->waiting) {
245 		req->waiting = 1;
246 		atomic_inc(&fc->num_waiting);
247 	}
248 	wake_up(&fc->waitq);
249 	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
250 }
251 
252 static void flush_bg_queue(struct fuse_conn *fc)
253 {
254 	while (fc->active_background < fc->max_background &&
255 	       !list_empty(&fc->bg_queue)) {
256 		struct fuse_req *req;
257 
258 		req = list_entry(fc->bg_queue.next, struct fuse_req, list);
259 		list_del(&req->list);
260 		fc->active_background++;
261 		queue_request(fc, req);
262 	}
263 }
264 
265 /*
266  * This function is called when a request is finished.  Either a reply
267  * has arrived or it was aborted (and not yet sent) or some error
268  * occurred during communication with userspace, or the device file
269  * was closed.  The requester thread is woken up (if still waiting),
270  * the 'end' callback is called if given, else the reference to the
271  * request is released
272  *
273  * Called with fc->lock, unlocks it
274  */
275 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
276 __releases(&fc->lock)
277 {
278 	void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
279 	req->end = NULL;
280 	list_del(&req->list);
281 	list_del(&req->intr_entry);
282 	req->state = FUSE_REQ_FINISHED;
283 	if (req->background) {
284 		if (fc->num_background == fc->max_background) {
285 			fc->blocked = 0;
286 			wake_up_all(&fc->blocked_waitq);
287 		}
288 		if (fc->num_background == fc->congestion_threshold &&
289 		    fc->connected && fc->bdi_initialized) {
290 			clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
291 			clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
292 		}
293 		fc->num_background--;
294 		fc->active_background--;
295 		flush_bg_queue(fc);
296 	}
297 	spin_unlock(&fc->lock);
298 	wake_up(&req->waitq);
299 	if (end)
300 		end(fc, req);
301 	fuse_put_request(fc, req);
302 }
303 
304 static void wait_answer_interruptible(struct fuse_conn *fc,
305 				      struct fuse_req *req)
306 __releases(&fc->lock)
307 __acquires(&fc->lock)
308 {
309 	if (signal_pending(current))
310 		return;
311 
312 	spin_unlock(&fc->lock);
313 	wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
314 	spin_lock(&fc->lock);
315 }
316 
317 static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
318 {
319 	list_add_tail(&req->intr_entry, &fc->interrupts);
320 	wake_up(&fc->waitq);
321 	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
322 }
323 
324 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
325 __releases(&fc->lock)
326 __acquires(&fc->lock)
327 {
328 	if (!fc->no_interrupt) {
329 		/* Any signal may interrupt this */
330 		wait_answer_interruptible(fc, req);
331 
332 		if (req->aborted)
333 			goto aborted;
334 		if (req->state == FUSE_REQ_FINISHED)
335 			return;
336 
337 		req->interrupted = 1;
338 		if (req->state == FUSE_REQ_SENT)
339 			queue_interrupt(fc, req);
340 	}
341 
342 	if (!req->force) {
343 		sigset_t oldset;
344 
345 		/* Only fatal signals may interrupt this */
346 		block_sigs(&oldset);
347 		wait_answer_interruptible(fc, req);
348 		restore_sigs(&oldset);
349 
350 		if (req->aborted)
351 			goto aborted;
352 		if (req->state == FUSE_REQ_FINISHED)
353 			return;
354 
355 		/* Request is not yet in userspace, bail out */
356 		if (req->state == FUSE_REQ_PENDING) {
357 			list_del(&req->list);
358 			__fuse_put_request(req);
359 			req->out.h.error = -EINTR;
360 			return;
361 		}
362 	}
363 
364 	/*
365 	 * Either request is already in userspace, or it was forced.
366 	 * Wait it out.
367 	 */
368 	spin_unlock(&fc->lock);
369 	wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
370 	spin_lock(&fc->lock);
371 
372 	if (!req->aborted)
373 		return;
374 
375  aborted:
376 	BUG_ON(req->state != FUSE_REQ_FINISHED);
377 	if (req->locked) {
378 		/* This is uninterruptible sleep, because data is
379 		   being copied to/from the buffers of req.  During
380 		   locked state, there mustn't be any filesystem
381 		   operation (e.g. page fault), since that could lead
382 		   to deadlock */
383 		spin_unlock(&fc->lock);
384 		wait_event(req->waitq, !req->locked);
385 		spin_lock(&fc->lock);
386 	}
387 }
388 
389 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
390 {
391 	req->isreply = 1;
392 	spin_lock(&fc->lock);
393 	if (!fc->connected)
394 		req->out.h.error = -ENOTCONN;
395 	else if (fc->conn_error)
396 		req->out.h.error = -ECONNREFUSED;
397 	else {
398 		queue_request(fc, req);
399 		/* acquire extra reference, since request is still needed
400 		   after request_end() */
401 		__fuse_get_request(req);
402 
403 		request_wait_answer(fc, req);
404 	}
405 	spin_unlock(&fc->lock);
406 }
407 EXPORT_SYMBOL_GPL(fuse_request_send);
408 
409 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
410 					    struct fuse_req *req)
411 {
412 	req->background = 1;
413 	fc->num_background++;
414 	if (fc->num_background == fc->max_background)
415 		fc->blocked = 1;
416 	if (fc->num_background == fc->congestion_threshold &&
417 	    fc->bdi_initialized) {
418 		set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
419 		set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
420 	}
421 	list_add_tail(&req->list, &fc->bg_queue);
422 	flush_bg_queue(fc);
423 }
424 
425 static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
426 {
427 	spin_lock(&fc->lock);
428 	if (fc->connected) {
429 		fuse_request_send_nowait_locked(fc, req);
430 		spin_unlock(&fc->lock);
431 	} else {
432 		req->out.h.error = -ENOTCONN;
433 		request_end(fc, req);
434 	}
435 }
436 
437 void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
438 {
439 	req->isreply = 0;
440 	fuse_request_send_nowait(fc, req);
441 }
442 
443 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
444 {
445 	req->isreply = 1;
446 	fuse_request_send_nowait(fc, req);
447 }
448 EXPORT_SYMBOL_GPL(fuse_request_send_background);
449 
450 /*
451  * Called under fc->lock
452  *
453  * fc->connected must have been checked previously
454  */
455 void fuse_request_send_background_locked(struct fuse_conn *fc,
456 					 struct fuse_req *req)
457 {
458 	req->isreply = 1;
459 	fuse_request_send_nowait_locked(fc, req);
460 }
461 
462 /*
463  * Lock the request.  Up to the next unlock_request() there mustn't be
464  * anything that could cause a page-fault.  If the request was already
465  * aborted bail out.
466  */
467 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
468 {
469 	int err = 0;
470 	if (req) {
471 		spin_lock(&fc->lock);
472 		if (req->aborted)
473 			err = -ENOENT;
474 		else
475 			req->locked = 1;
476 		spin_unlock(&fc->lock);
477 	}
478 	return err;
479 }
480 
481 /*
482  * Unlock request.  If it was aborted during being locked, the
483  * requester thread is currently waiting for it to be unlocked, so
484  * wake it up.
485  */
486 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
487 {
488 	if (req) {
489 		spin_lock(&fc->lock);
490 		req->locked = 0;
491 		if (req->aborted)
492 			wake_up(&req->waitq);
493 		spin_unlock(&fc->lock);
494 	}
495 }
496 
497 struct fuse_copy_state {
498 	struct fuse_conn *fc;
499 	int write;
500 	struct fuse_req *req;
501 	const struct iovec *iov;
502 	struct pipe_buffer *pipebufs;
503 	struct pipe_buffer *currbuf;
504 	struct pipe_inode_info *pipe;
505 	unsigned long nr_segs;
506 	unsigned long seglen;
507 	unsigned long addr;
508 	struct page *pg;
509 	void *mapaddr;
510 	void *buf;
511 	unsigned len;
512 };
513 
514 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
515 			   int write, struct fuse_req *req,
516 			   const struct iovec *iov, unsigned long nr_segs)
517 {
518 	memset(cs, 0, sizeof(*cs));
519 	cs->fc = fc;
520 	cs->write = write;
521 	cs->req = req;
522 	cs->iov = iov;
523 	cs->nr_segs = nr_segs;
524 }
525 
526 /* Unmap and put previous page of userspace buffer */
527 static void fuse_copy_finish(struct fuse_copy_state *cs)
528 {
529 	if (cs->currbuf) {
530 		struct pipe_buffer *buf = cs->currbuf;
531 
532 		buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
533 
534 		cs->currbuf = NULL;
535 		cs->mapaddr = NULL;
536 	} else if (cs->mapaddr) {
537 		kunmap_atomic(cs->mapaddr, KM_USER0);
538 		if (cs->write) {
539 			flush_dcache_page(cs->pg);
540 			set_page_dirty_lock(cs->pg);
541 		}
542 		put_page(cs->pg);
543 		cs->mapaddr = NULL;
544 	}
545 }
546 
547 /*
548  * Get another pagefull of userspace buffer, and map it to kernel
549  * address space, and lock request
550  */
551 static int fuse_copy_fill(struct fuse_copy_state *cs)
552 {
553 	unsigned long offset;
554 	int err;
555 
556 	unlock_request(cs->fc, cs->req);
557 	fuse_copy_finish(cs);
558 	if (cs->pipebufs) {
559 		struct pipe_buffer *buf = cs->pipebufs;
560 
561 		err = buf->ops->confirm(cs->pipe, buf);
562 		if (err)
563 			return err;
564 
565 		BUG_ON(!cs->nr_segs);
566 		cs->currbuf = buf;
567 		cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
568 		cs->len = buf->len;
569 		cs->buf = cs->mapaddr + buf->offset;
570 		cs->pipebufs++;
571 		cs->nr_segs--;
572 	} else {
573 		if (!cs->seglen) {
574 			BUG_ON(!cs->nr_segs);
575 			cs->seglen = cs->iov[0].iov_len;
576 			cs->addr = (unsigned long) cs->iov[0].iov_base;
577 			cs->iov++;
578 			cs->nr_segs--;
579 		}
580 		err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
581 		if (err < 0)
582 			return err;
583 		BUG_ON(err != 1);
584 		offset = cs->addr % PAGE_SIZE;
585 		cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
586 		cs->buf = cs->mapaddr + offset;
587 		cs->len = min(PAGE_SIZE - offset, cs->seglen);
588 		cs->seglen -= cs->len;
589 		cs->addr += cs->len;
590 	}
591 
592 	return lock_request(cs->fc, cs->req);
593 }
594 
595 /* Do as much copy to/from userspace buffer as we can */
596 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
597 {
598 	unsigned ncpy = min(*size, cs->len);
599 	if (val) {
600 		if (cs->write)
601 			memcpy(cs->buf, *val, ncpy);
602 		else
603 			memcpy(*val, cs->buf, ncpy);
604 		*val += ncpy;
605 	}
606 	*size -= ncpy;
607 	cs->len -= ncpy;
608 	cs->buf += ncpy;
609 	return ncpy;
610 }
611 
612 /*
613  * Copy a page in the request to/from the userspace buffer.  Must be
614  * done atomically
615  */
616 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
617 			  unsigned offset, unsigned count, int zeroing)
618 {
619 	if (page && zeroing && count < PAGE_SIZE) {
620 		void *mapaddr = kmap_atomic(page, KM_USER1);
621 		memset(mapaddr, 0, PAGE_SIZE);
622 		kunmap_atomic(mapaddr, KM_USER1);
623 	}
624 	while (count) {
625 		if (!cs->len) {
626 			int err = fuse_copy_fill(cs);
627 			if (err)
628 				return err;
629 		}
630 		if (page) {
631 			void *mapaddr = kmap_atomic(page, KM_USER1);
632 			void *buf = mapaddr + offset;
633 			offset += fuse_copy_do(cs, &buf, &count);
634 			kunmap_atomic(mapaddr, KM_USER1);
635 		} else
636 			offset += fuse_copy_do(cs, NULL, &count);
637 	}
638 	if (page && !cs->write)
639 		flush_dcache_page(page);
640 	return 0;
641 }
642 
643 /* Copy pages in the request to/from userspace buffer */
644 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
645 			   int zeroing)
646 {
647 	unsigned i;
648 	struct fuse_req *req = cs->req;
649 	unsigned offset = req->page_offset;
650 	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
651 
652 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
653 		struct page *page = req->pages[i];
654 		int err = fuse_copy_page(cs, page, offset, count, zeroing);
655 		if (err)
656 			return err;
657 
658 		nbytes -= count;
659 		count = min(nbytes, (unsigned) PAGE_SIZE);
660 		offset = 0;
661 	}
662 	return 0;
663 }
664 
665 /* Copy a single argument in the request to/from userspace buffer */
666 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
667 {
668 	while (size) {
669 		if (!cs->len) {
670 			int err = fuse_copy_fill(cs);
671 			if (err)
672 				return err;
673 		}
674 		fuse_copy_do(cs, &val, &size);
675 	}
676 	return 0;
677 }
678 
679 /* Copy request arguments to/from userspace buffer */
680 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
681 			  unsigned argpages, struct fuse_arg *args,
682 			  int zeroing)
683 {
684 	int err = 0;
685 	unsigned i;
686 
687 	for (i = 0; !err && i < numargs; i++)  {
688 		struct fuse_arg *arg = &args[i];
689 		if (i == numargs - 1 && argpages)
690 			err = fuse_copy_pages(cs, arg->size, zeroing);
691 		else
692 			err = fuse_copy_one(cs, arg->value, arg->size);
693 	}
694 	return err;
695 }
696 
697 static int request_pending(struct fuse_conn *fc)
698 {
699 	return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
700 }
701 
702 /* Wait until a request is available on the pending list */
703 static void request_wait(struct fuse_conn *fc)
704 __releases(&fc->lock)
705 __acquires(&fc->lock)
706 {
707 	DECLARE_WAITQUEUE(wait, current);
708 
709 	add_wait_queue_exclusive(&fc->waitq, &wait);
710 	while (fc->connected && !request_pending(fc)) {
711 		set_current_state(TASK_INTERRUPTIBLE);
712 		if (signal_pending(current))
713 			break;
714 
715 		spin_unlock(&fc->lock);
716 		schedule();
717 		spin_lock(&fc->lock);
718 	}
719 	set_current_state(TASK_RUNNING);
720 	remove_wait_queue(&fc->waitq, &wait);
721 }
722 
723 /*
724  * Transfer an interrupt request to userspace
725  *
726  * Unlike other requests this is assembled on demand, without a need
727  * to allocate a separate fuse_req structure.
728  *
729  * Called with fc->lock held, releases it
730  */
731 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
732 			       const struct iovec *iov, unsigned long nr_segs)
733 __releases(&fc->lock)
734 {
735 	struct fuse_copy_state cs;
736 	struct fuse_in_header ih;
737 	struct fuse_interrupt_in arg;
738 	unsigned reqsize = sizeof(ih) + sizeof(arg);
739 	int err;
740 
741 	list_del_init(&req->intr_entry);
742 	req->intr_unique = fuse_get_unique(fc);
743 	memset(&ih, 0, sizeof(ih));
744 	memset(&arg, 0, sizeof(arg));
745 	ih.len = reqsize;
746 	ih.opcode = FUSE_INTERRUPT;
747 	ih.unique = req->intr_unique;
748 	arg.unique = req->in.h.unique;
749 
750 	spin_unlock(&fc->lock);
751 	if (iov_length(iov, nr_segs) < reqsize)
752 		return -EINVAL;
753 
754 	fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
755 	err = fuse_copy_one(&cs, &ih, sizeof(ih));
756 	if (!err)
757 		err = fuse_copy_one(&cs, &arg, sizeof(arg));
758 	fuse_copy_finish(&cs);
759 
760 	return err ? err : reqsize;
761 }
762 
763 /*
764  * Read a single request into the userspace filesystem's buffer.  This
765  * function waits until a request is available, then removes it from
766  * the pending list and copies request data to userspace buffer.  If
767  * no reply is needed (FORGET) or request has been aborted or there
768  * was an error during the copying then it's finished by calling
769  * request_end().  Otherwise add it to the processing list, and set
770  * the 'sent' flag.
771  */
772 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
773 			      unsigned long nr_segs, loff_t pos)
774 {
775 	int err;
776 	struct fuse_req *req;
777 	struct fuse_in *in;
778 	struct fuse_copy_state cs;
779 	unsigned reqsize;
780 	struct file *file = iocb->ki_filp;
781 	struct fuse_conn *fc = fuse_get_conn(file);
782 	if (!fc)
783 		return -EPERM;
784 
785  restart:
786 	spin_lock(&fc->lock);
787 	err = -EAGAIN;
788 	if ((file->f_flags & O_NONBLOCK) && fc->connected &&
789 	    !request_pending(fc))
790 		goto err_unlock;
791 
792 	request_wait(fc);
793 	err = -ENODEV;
794 	if (!fc->connected)
795 		goto err_unlock;
796 	err = -ERESTARTSYS;
797 	if (!request_pending(fc))
798 		goto err_unlock;
799 
800 	if (!list_empty(&fc->interrupts)) {
801 		req = list_entry(fc->interrupts.next, struct fuse_req,
802 				 intr_entry);
803 		return fuse_read_interrupt(fc, req, iov, nr_segs);
804 	}
805 
806 	req = list_entry(fc->pending.next, struct fuse_req, list);
807 	req->state = FUSE_REQ_READING;
808 	list_move(&req->list, &fc->io);
809 
810 	in = &req->in;
811 	reqsize = in->h.len;
812 	/* If request is too large, reply with an error and restart the read */
813 	if (iov_length(iov, nr_segs) < reqsize) {
814 		req->out.h.error = -EIO;
815 		/* SETXATTR is special, since it may contain too large data */
816 		if (in->h.opcode == FUSE_SETXATTR)
817 			req->out.h.error = -E2BIG;
818 		request_end(fc, req);
819 		goto restart;
820 	}
821 	spin_unlock(&fc->lock);
822 	fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
823 	err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
824 	if (!err)
825 		err = fuse_copy_args(&cs, in->numargs, in->argpages,
826 				     (struct fuse_arg *) in->args, 0);
827 	fuse_copy_finish(&cs);
828 	spin_lock(&fc->lock);
829 	req->locked = 0;
830 	if (req->aborted) {
831 		request_end(fc, req);
832 		return -ENODEV;
833 	}
834 	if (err) {
835 		req->out.h.error = -EIO;
836 		request_end(fc, req);
837 		return err;
838 	}
839 	if (!req->isreply)
840 		request_end(fc, req);
841 	else {
842 		req->state = FUSE_REQ_SENT;
843 		list_move_tail(&req->list, &fc->processing);
844 		if (req->interrupted)
845 			queue_interrupt(fc, req);
846 		spin_unlock(&fc->lock);
847 	}
848 	return reqsize;
849 
850  err_unlock:
851 	spin_unlock(&fc->lock);
852 	return err;
853 }
854 
855 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
856 			    struct fuse_copy_state *cs)
857 {
858 	struct fuse_notify_poll_wakeup_out outarg;
859 	int err = -EINVAL;
860 
861 	if (size != sizeof(outarg))
862 		goto err;
863 
864 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
865 	if (err)
866 		goto err;
867 
868 	fuse_copy_finish(cs);
869 	return fuse_notify_poll_wakeup(fc, &outarg);
870 
871 err:
872 	fuse_copy_finish(cs);
873 	return err;
874 }
875 
876 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
877 				   struct fuse_copy_state *cs)
878 {
879 	struct fuse_notify_inval_inode_out outarg;
880 	int err = -EINVAL;
881 
882 	if (size != sizeof(outarg))
883 		goto err;
884 
885 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
886 	if (err)
887 		goto err;
888 	fuse_copy_finish(cs);
889 
890 	down_read(&fc->killsb);
891 	err = -ENOENT;
892 	if (fc->sb) {
893 		err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
894 					       outarg.off, outarg.len);
895 	}
896 	up_read(&fc->killsb);
897 	return err;
898 
899 err:
900 	fuse_copy_finish(cs);
901 	return err;
902 }
903 
904 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
905 				   struct fuse_copy_state *cs)
906 {
907 	struct fuse_notify_inval_entry_out outarg;
908 	int err = -ENOMEM;
909 	char *buf;
910 	struct qstr name;
911 
912 	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
913 	if (!buf)
914 		goto err;
915 
916 	err = -EINVAL;
917 	if (size < sizeof(outarg))
918 		goto err;
919 
920 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
921 	if (err)
922 		goto err;
923 
924 	err = -ENAMETOOLONG;
925 	if (outarg.namelen > FUSE_NAME_MAX)
926 		goto err;
927 
928 	name.name = buf;
929 	name.len = outarg.namelen;
930 	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
931 	if (err)
932 		goto err;
933 	fuse_copy_finish(cs);
934 	buf[outarg.namelen] = 0;
935 	name.hash = full_name_hash(name.name, name.len);
936 
937 	down_read(&fc->killsb);
938 	err = -ENOENT;
939 	if (fc->sb)
940 		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
941 	up_read(&fc->killsb);
942 	kfree(buf);
943 	return err;
944 
945 err:
946 	kfree(buf);
947 	fuse_copy_finish(cs);
948 	return err;
949 }
950 
951 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
952 		       unsigned int size, struct fuse_copy_state *cs)
953 {
954 	switch (code) {
955 	case FUSE_NOTIFY_POLL:
956 		return fuse_notify_poll(fc, size, cs);
957 
958 	case FUSE_NOTIFY_INVAL_INODE:
959 		return fuse_notify_inval_inode(fc, size, cs);
960 
961 	case FUSE_NOTIFY_INVAL_ENTRY:
962 		return fuse_notify_inval_entry(fc, size, cs);
963 
964 	default:
965 		fuse_copy_finish(cs);
966 		return -EINVAL;
967 	}
968 }
969 
970 /* Look up request on processing list by unique ID */
971 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
972 {
973 	struct list_head *entry;
974 
975 	list_for_each(entry, &fc->processing) {
976 		struct fuse_req *req;
977 		req = list_entry(entry, struct fuse_req, list);
978 		if (req->in.h.unique == unique || req->intr_unique == unique)
979 			return req;
980 	}
981 	return NULL;
982 }
983 
984 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
985 			 unsigned nbytes)
986 {
987 	unsigned reqsize = sizeof(struct fuse_out_header);
988 
989 	if (out->h.error)
990 		return nbytes != reqsize ? -EINVAL : 0;
991 
992 	reqsize += len_args(out->numargs, out->args);
993 
994 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
995 		return -EINVAL;
996 	else if (reqsize > nbytes) {
997 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
998 		unsigned diffsize = reqsize - nbytes;
999 		if (diffsize > lastarg->size)
1000 			return -EINVAL;
1001 		lastarg->size -= diffsize;
1002 	}
1003 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1004 			      out->page_zeroing);
1005 }
1006 
1007 /*
1008  * Write a single reply to a request.  First the header is copied from
1009  * the write buffer.  The request is then searched on the processing
1010  * list by the unique ID found in the header.  If found, then remove
1011  * it from the list and copy the rest of the buffer to the request.
1012  * The request is finished by calling request_end()
1013  */
1014 static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1015 				 struct fuse_copy_state *cs, size_t nbytes)
1016 {
1017 	int err;
1018 	struct fuse_req *req;
1019 	struct fuse_out_header oh;
1020 
1021 	if (nbytes < sizeof(struct fuse_out_header))
1022 		return -EINVAL;
1023 
1024 	err = fuse_copy_one(cs, &oh, sizeof(oh));
1025 	if (err)
1026 		goto err_finish;
1027 
1028 	err = -EINVAL;
1029 	if (oh.len != nbytes)
1030 		goto err_finish;
1031 
1032 	/*
1033 	 * Zero oh.unique indicates unsolicited notification message
1034 	 * and error contains notification code.
1035 	 */
1036 	if (!oh.unique) {
1037 		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1038 		return err ? err : nbytes;
1039 	}
1040 
1041 	err = -EINVAL;
1042 	if (oh.error <= -1000 || oh.error > 0)
1043 		goto err_finish;
1044 
1045 	spin_lock(&fc->lock);
1046 	err = -ENOENT;
1047 	if (!fc->connected)
1048 		goto err_unlock;
1049 
1050 	req = request_find(fc, oh.unique);
1051 	if (!req)
1052 		goto err_unlock;
1053 
1054 	if (req->aborted) {
1055 		spin_unlock(&fc->lock);
1056 		fuse_copy_finish(cs);
1057 		spin_lock(&fc->lock);
1058 		request_end(fc, req);
1059 		return -ENOENT;
1060 	}
1061 	/* Is it an interrupt reply? */
1062 	if (req->intr_unique == oh.unique) {
1063 		err = -EINVAL;
1064 		if (nbytes != sizeof(struct fuse_out_header))
1065 			goto err_unlock;
1066 
1067 		if (oh.error == -ENOSYS)
1068 			fc->no_interrupt = 1;
1069 		else if (oh.error == -EAGAIN)
1070 			queue_interrupt(fc, req);
1071 
1072 		spin_unlock(&fc->lock);
1073 		fuse_copy_finish(cs);
1074 		return nbytes;
1075 	}
1076 
1077 	req->state = FUSE_REQ_WRITING;
1078 	list_move(&req->list, &fc->io);
1079 	req->out.h = oh;
1080 	req->locked = 1;
1081 	cs->req = req;
1082 	spin_unlock(&fc->lock);
1083 
1084 	err = copy_out_args(cs, &req->out, nbytes);
1085 	fuse_copy_finish(cs);
1086 
1087 	spin_lock(&fc->lock);
1088 	req->locked = 0;
1089 	if (!err) {
1090 		if (req->aborted)
1091 			err = -ENOENT;
1092 	} else if (!req->aborted)
1093 		req->out.h.error = -EIO;
1094 	request_end(fc, req);
1095 
1096 	return err ? err : nbytes;
1097 
1098  err_unlock:
1099 	spin_unlock(&fc->lock);
1100  err_finish:
1101 	fuse_copy_finish(cs);
1102 	return err;
1103 }
1104 
1105 static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1106 			      unsigned long nr_segs, loff_t pos)
1107 {
1108 	struct fuse_copy_state cs;
1109 	struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1110 	if (!fc)
1111 		return -EPERM;
1112 
1113 	fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
1114 
1115 	return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1116 }
1117 
1118 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1119 				     struct file *out, loff_t *ppos,
1120 				     size_t len, unsigned int flags)
1121 {
1122 	unsigned nbuf;
1123 	unsigned idx;
1124 	struct pipe_buffer *bufs;
1125 	struct fuse_copy_state cs;
1126 	struct fuse_conn *fc;
1127 	size_t rem;
1128 	ssize_t ret;
1129 
1130 	fc = fuse_get_conn(out);
1131 	if (!fc)
1132 		return -EPERM;
1133 
1134 	bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
1135 	if (!bufs)
1136 		return -ENOMEM;
1137 
1138 	pipe_lock(pipe);
1139 	nbuf = 0;
1140 	rem = 0;
1141 	for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1142 		rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1143 
1144 	ret = -EINVAL;
1145 	if (rem < len) {
1146 		pipe_unlock(pipe);
1147 		goto out;
1148 	}
1149 
1150 	rem = len;
1151 	while (rem) {
1152 		struct pipe_buffer *ibuf;
1153 		struct pipe_buffer *obuf;
1154 
1155 		BUG_ON(nbuf >= pipe->buffers);
1156 		BUG_ON(!pipe->nrbufs);
1157 		ibuf = &pipe->bufs[pipe->curbuf];
1158 		obuf = &bufs[nbuf];
1159 
1160 		if (rem >= ibuf->len) {
1161 			*obuf = *ibuf;
1162 			ibuf->ops = NULL;
1163 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1164 			pipe->nrbufs--;
1165 		} else {
1166 			ibuf->ops->get(pipe, ibuf);
1167 			*obuf = *ibuf;
1168 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1169 			obuf->len = rem;
1170 			ibuf->offset += obuf->len;
1171 			ibuf->len -= obuf->len;
1172 		}
1173 		nbuf++;
1174 		rem -= obuf->len;
1175 	}
1176 	pipe_unlock(pipe);
1177 
1178 	memset(&cs, 0, sizeof(struct fuse_copy_state));
1179 	cs.fc = fc;
1180 	cs.write = 0;
1181 	cs.pipebufs = bufs;
1182 	cs.nr_segs = nbuf;
1183 	cs.pipe = pipe;
1184 
1185 	ret = fuse_dev_do_write(fc, &cs, len);
1186 
1187 	for (idx = 0; idx < nbuf; idx++) {
1188 		struct pipe_buffer *buf = &bufs[idx];
1189 		buf->ops->release(pipe, buf);
1190 	}
1191 out:
1192 	kfree(bufs);
1193 	return ret;
1194 }
1195 
1196 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1197 {
1198 	unsigned mask = POLLOUT | POLLWRNORM;
1199 	struct fuse_conn *fc = fuse_get_conn(file);
1200 	if (!fc)
1201 		return POLLERR;
1202 
1203 	poll_wait(file, &fc->waitq, wait);
1204 
1205 	spin_lock(&fc->lock);
1206 	if (!fc->connected)
1207 		mask = POLLERR;
1208 	else if (request_pending(fc))
1209 		mask |= POLLIN | POLLRDNORM;
1210 	spin_unlock(&fc->lock);
1211 
1212 	return mask;
1213 }
1214 
1215 /*
1216  * Abort all requests on the given list (pending or processing)
1217  *
1218  * This function releases and reacquires fc->lock
1219  */
1220 static void end_requests(struct fuse_conn *fc, struct list_head *head)
1221 __releases(&fc->lock)
1222 __acquires(&fc->lock)
1223 {
1224 	while (!list_empty(head)) {
1225 		struct fuse_req *req;
1226 		req = list_entry(head->next, struct fuse_req, list);
1227 		req->out.h.error = -ECONNABORTED;
1228 		request_end(fc, req);
1229 		spin_lock(&fc->lock);
1230 	}
1231 }
1232 
1233 /*
1234  * Abort requests under I/O
1235  *
1236  * The requests are set to aborted and finished, and the request
1237  * waiter is woken up.  This will make request_wait_answer() wait
1238  * until the request is unlocked and then return.
1239  *
1240  * If the request is asynchronous, then the end function needs to be
1241  * called after waiting for the request to be unlocked (if it was
1242  * locked).
1243  */
1244 static void end_io_requests(struct fuse_conn *fc)
1245 __releases(&fc->lock)
1246 __acquires(&fc->lock)
1247 {
1248 	while (!list_empty(&fc->io)) {
1249 		struct fuse_req *req =
1250 			list_entry(fc->io.next, struct fuse_req, list);
1251 		void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1252 
1253 		req->aborted = 1;
1254 		req->out.h.error = -ECONNABORTED;
1255 		req->state = FUSE_REQ_FINISHED;
1256 		list_del_init(&req->list);
1257 		wake_up(&req->waitq);
1258 		if (end) {
1259 			req->end = NULL;
1260 			__fuse_get_request(req);
1261 			spin_unlock(&fc->lock);
1262 			wait_event(req->waitq, !req->locked);
1263 			end(fc, req);
1264 			fuse_put_request(fc, req);
1265 			spin_lock(&fc->lock);
1266 		}
1267 	}
1268 }
1269 
1270 /*
1271  * Abort all requests.
1272  *
1273  * Emergency exit in case of a malicious or accidental deadlock, or
1274  * just a hung filesystem.
1275  *
1276  * The same effect is usually achievable through killing the
1277  * filesystem daemon and all users of the filesystem.  The exception
1278  * is the combination of an asynchronous request and the tricky
1279  * deadlock (see Documentation/filesystems/fuse.txt).
1280  *
1281  * During the aborting, progression of requests from the pending and
1282  * processing lists onto the io list, and progression of new requests
1283  * onto the pending list is prevented by req->connected being false.
1284  *
1285  * Progression of requests under I/O to the processing list is
1286  * prevented by the req->aborted flag being true for these requests.
1287  * For this reason requests on the io list must be aborted first.
1288  */
1289 void fuse_abort_conn(struct fuse_conn *fc)
1290 {
1291 	spin_lock(&fc->lock);
1292 	if (fc->connected) {
1293 		fc->connected = 0;
1294 		fc->blocked = 0;
1295 		end_io_requests(fc);
1296 		end_requests(fc, &fc->pending);
1297 		end_requests(fc, &fc->processing);
1298 		wake_up_all(&fc->waitq);
1299 		wake_up_all(&fc->blocked_waitq);
1300 		kill_fasync(&fc->fasync, SIGIO, POLL_IN);
1301 	}
1302 	spin_unlock(&fc->lock);
1303 }
1304 EXPORT_SYMBOL_GPL(fuse_abort_conn);
1305 
1306 int fuse_dev_release(struct inode *inode, struct file *file)
1307 {
1308 	struct fuse_conn *fc = fuse_get_conn(file);
1309 	if (fc) {
1310 		spin_lock(&fc->lock);
1311 		fc->connected = 0;
1312 		end_requests(fc, &fc->pending);
1313 		end_requests(fc, &fc->processing);
1314 		spin_unlock(&fc->lock);
1315 		fuse_conn_put(fc);
1316 	}
1317 
1318 	return 0;
1319 }
1320 EXPORT_SYMBOL_GPL(fuse_dev_release);
1321 
1322 static int fuse_dev_fasync(int fd, struct file *file, int on)
1323 {
1324 	struct fuse_conn *fc = fuse_get_conn(file);
1325 	if (!fc)
1326 		return -EPERM;
1327 
1328 	/* No locking - fasync_helper does its own locking */
1329 	return fasync_helper(fd, file, on, &fc->fasync);
1330 }
1331 
1332 const struct file_operations fuse_dev_operations = {
1333 	.owner		= THIS_MODULE,
1334 	.llseek		= no_llseek,
1335 	.read		= do_sync_read,
1336 	.aio_read	= fuse_dev_read,
1337 	.write		= do_sync_write,
1338 	.aio_write	= fuse_dev_write,
1339 	.splice_write	= fuse_dev_splice_write,
1340 	.poll		= fuse_dev_poll,
1341 	.release	= fuse_dev_release,
1342 	.fasync		= fuse_dev_fasync,
1343 };
1344 EXPORT_SYMBOL_GPL(fuse_dev_operations);
1345 
1346 static struct miscdevice fuse_miscdevice = {
1347 	.minor = FUSE_MINOR,
1348 	.name  = "fuse",
1349 	.fops = &fuse_dev_operations,
1350 };
1351 
1352 int __init fuse_dev_init(void)
1353 {
1354 	int err = -ENOMEM;
1355 	fuse_req_cachep = kmem_cache_create("fuse_request",
1356 					    sizeof(struct fuse_req),
1357 					    0, 0, NULL);
1358 	if (!fuse_req_cachep)
1359 		goto out;
1360 
1361 	err = misc_register(&fuse_miscdevice);
1362 	if (err)
1363 		goto out_cache_clean;
1364 
1365 	return 0;
1366 
1367  out_cache_clean:
1368 	kmem_cache_destroy(fuse_req_cachep);
1369  out:
1370 	return err;
1371 }
1372 
1373 void fuse_dev_cleanup(void)
1374 {
1375 	misc_deregister(&fuse_miscdevice);
1376 	kmem_cache_destroy(fuse_req_cachep);
1377 }
1378