xref: /openbmc/linux/fs/fuse/virtio_fs.c (revision 58ada94f95f71d4f73197ab0e9603dbba6e47fe3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * virtio-fs: Virtio Filesystem
4  * Copyright (C) 2018 Red Hat, Inc.
5  */
6 
7 #include <linux/fs.h>
8 #include <linux/module.h>
9 #include <linux/virtio.h>
10 #include <linux/virtio_fs.h>
11 #include <linux/delay.h>
12 #include <linux/fs_context.h>
13 #include <linux/highmem.h>
14 #include "fuse_i.h"
15 
16 /* List of virtio-fs device instances and a lock for the list. Also provides
17  * mutual exclusion in device removal and mounting path
18  */
19 static DEFINE_MUTEX(virtio_fs_mutex);
20 static LIST_HEAD(virtio_fs_instances);
21 
22 enum {
23 	VQ_HIPRIO,
24 	VQ_REQUEST
25 };
26 
27 /* Per-virtqueue state */
28 struct virtio_fs_vq {
29 	spinlock_t lock;
30 	struct virtqueue *vq;     /* protected by ->lock */
31 	struct work_struct done_work;
32 	struct list_head queued_reqs;
33 	struct list_head end_reqs;	/* End these requests */
34 	struct delayed_work dispatch_work;
35 	struct fuse_dev *fud;
36 	bool connected;
37 	long in_flight;
38 	char name[24];
39 } ____cacheline_aligned_in_smp;
40 
41 /* A virtio-fs device instance */
42 struct virtio_fs {
43 	struct kref refcount;
44 	struct list_head list;    /* on virtio_fs_instances */
45 	char *tag;
46 	struct virtio_fs_vq *vqs;
47 	unsigned int nvqs;               /* number of virtqueues */
48 	unsigned int num_request_queues; /* number of request queues */
49 };
50 
51 struct virtio_fs_forget {
52 	struct fuse_in_header ih;
53 	struct fuse_forget_in arg;
54 	/* This request can be temporarily queued on virt queue */
55 	struct list_head list;
56 };
57 
58 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
59 				 struct fuse_req *req, bool in_flight);
60 
61 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
62 {
63 	struct virtio_fs *fs = vq->vdev->priv;
64 
65 	return &fs->vqs[vq->index];
66 }
67 
68 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
69 {
70 	return &vq_to_fsvq(vq)->fud->pq;
71 }
72 
73 /* Should be called with fsvq->lock held. */
74 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
75 {
76 	fsvq->in_flight++;
77 }
78 
79 /* Should be called with fsvq->lock held. */
80 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
81 {
82 	WARN_ON(fsvq->in_flight <= 0);
83 	fsvq->in_flight--;
84 }
85 
86 static void release_virtio_fs_obj(struct kref *ref)
87 {
88 	struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
89 
90 	kfree(vfs->vqs);
91 	kfree(vfs);
92 }
93 
94 /* Make sure virtiofs_mutex is held */
95 static void virtio_fs_put(struct virtio_fs *fs)
96 {
97 	kref_put(&fs->refcount, release_virtio_fs_obj);
98 }
99 
100 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
101 {
102 	struct virtio_fs *vfs = fiq->priv;
103 
104 	mutex_lock(&virtio_fs_mutex);
105 	virtio_fs_put(vfs);
106 	mutex_unlock(&virtio_fs_mutex);
107 }
108 
109 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
110 {
111 	WARN_ON(fsvq->in_flight < 0);
112 
113 	/* Wait for in flight requests to finish.*/
114 	while (1) {
115 		spin_lock(&fsvq->lock);
116 		if (!fsvq->in_flight) {
117 			spin_unlock(&fsvq->lock);
118 			break;
119 		}
120 		spin_unlock(&fsvq->lock);
121 		/* TODO use completion instead of timeout */
122 		usleep_range(1000, 2000);
123 	}
124 
125 	flush_work(&fsvq->done_work);
126 	flush_delayed_work(&fsvq->dispatch_work);
127 }
128 
129 static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
130 {
131 	struct virtio_fs_vq *fsvq;
132 	int i;
133 
134 	for (i = 0; i < fs->nvqs; i++) {
135 		fsvq = &fs->vqs[i];
136 		virtio_fs_drain_queue(fsvq);
137 	}
138 }
139 
140 static void virtio_fs_start_all_queues(struct virtio_fs *fs)
141 {
142 	struct virtio_fs_vq *fsvq;
143 	int i;
144 
145 	for (i = 0; i < fs->nvqs; i++) {
146 		fsvq = &fs->vqs[i];
147 		spin_lock(&fsvq->lock);
148 		fsvq->connected = true;
149 		spin_unlock(&fsvq->lock);
150 	}
151 }
152 
153 /* Add a new instance to the list or return -EEXIST if tag name exists*/
154 static int virtio_fs_add_instance(struct virtio_fs *fs)
155 {
156 	struct virtio_fs *fs2;
157 	bool duplicate = false;
158 
159 	mutex_lock(&virtio_fs_mutex);
160 
161 	list_for_each_entry(fs2, &virtio_fs_instances, list) {
162 		if (strcmp(fs->tag, fs2->tag) == 0)
163 			duplicate = true;
164 	}
165 
166 	if (!duplicate)
167 		list_add_tail(&fs->list, &virtio_fs_instances);
168 
169 	mutex_unlock(&virtio_fs_mutex);
170 
171 	if (duplicate)
172 		return -EEXIST;
173 	return 0;
174 }
175 
176 /* Return the virtio_fs with a given tag, or NULL */
177 static struct virtio_fs *virtio_fs_find_instance(const char *tag)
178 {
179 	struct virtio_fs *fs;
180 
181 	mutex_lock(&virtio_fs_mutex);
182 
183 	list_for_each_entry(fs, &virtio_fs_instances, list) {
184 		if (strcmp(fs->tag, tag) == 0) {
185 			kref_get(&fs->refcount);
186 			goto found;
187 		}
188 	}
189 
190 	fs = NULL; /* not found */
191 
192 found:
193 	mutex_unlock(&virtio_fs_mutex);
194 
195 	return fs;
196 }
197 
198 static void virtio_fs_free_devs(struct virtio_fs *fs)
199 {
200 	unsigned int i;
201 
202 	for (i = 0; i < fs->nvqs; i++) {
203 		struct virtio_fs_vq *fsvq = &fs->vqs[i];
204 
205 		if (!fsvq->fud)
206 			continue;
207 
208 		fuse_dev_free(fsvq->fud);
209 		fsvq->fud = NULL;
210 	}
211 }
212 
213 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
214 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
215 {
216 	char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
217 	char *end;
218 	size_t len;
219 
220 	virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
221 			   &tag_buf, sizeof(tag_buf));
222 	end = memchr(tag_buf, '\0', sizeof(tag_buf));
223 	if (end == tag_buf)
224 		return -EINVAL; /* empty tag */
225 	if (!end)
226 		end = &tag_buf[sizeof(tag_buf)];
227 
228 	len = end - tag_buf;
229 	fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
230 	if (!fs->tag)
231 		return -ENOMEM;
232 	memcpy(fs->tag, tag_buf, len);
233 	fs->tag[len] = '\0';
234 	return 0;
235 }
236 
237 /* Work function for hiprio completion */
238 static void virtio_fs_hiprio_done_work(struct work_struct *work)
239 {
240 	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
241 						 done_work);
242 	struct virtqueue *vq = fsvq->vq;
243 
244 	/* Free completed FUSE_FORGET requests */
245 	spin_lock(&fsvq->lock);
246 	do {
247 		unsigned int len;
248 		void *req;
249 
250 		virtqueue_disable_cb(vq);
251 
252 		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
253 			kfree(req);
254 			dec_in_flight_req(fsvq);
255 		}
256 	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
257 	spin_unlock(&fsvq->lock);
258 }
259 
260 static void virtio_fs_request_dispatch_work(struct work_struct *work)
261 {
262 	struct fuse_req *req;
263 	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
264 						 dispatch_work.work);
265 	struct fuse_conn *fc = fsvq->fud->fc;
266 	int ret;
267 
268 	pr_debug("virtio-fs: worker %s called.\n", __func__);
269 	while (1) {
270 		spin_lock(&fsvq->lock);
271 		req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
272 					       list);
273 		if (!req) {
274 			spin_unlock(&fsvq->lock);
275 			break;
276 		}
277 
278 		list_del_init(&req->list);
279 		spin_unlock(&fsvq->lock);
280 		fuse_request_end(fc, req);
281 	}
282 
283 	/* Dispatch pending requests */
284 	while (1) {
285 		spin_lock(&fsvq->lock);
286 		req = list_first_entry_or_null(&fsvq->queued_reqs,
287 					       struct fuse_req, list);
288 		if (!req) {
289 			spin_unlock(&fsvq->lock);
290 			return;
291 		}
292 		list_del_init(&req->list);
293 		spin_unlock(&fsvq->lock);
294 
295 		ret = virtio_fs_enqueue_req(fsvq, req, true);
296 		if (ret < 0) {
297 			if (ret == -ENOMEM || ret == -ENOSPC) {
298 				spin_lock(&fsvq->lock);
299 				list_add_tail(&req->list, &fsvq->queued_reqs);
300 				schedule_delayed_work(&fsvq->dispatch_work,
301 						      msecs_to_jiffies(1));
302 				spin_unlock(&fsvq->lock);
303 				return;
304 			}
305 			req->out.h.error = ret;
306 			spin_lock(&fsvq->lock);
307 			dec_in_flight_req(fsvq);
308 			spin_unlock(&fsvq->lock);
309 			pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
310 			       ret);
311 			fuse_request_end(fc, req);
312 		}
313 	}
314 }
315 
316 /*
317  * Returns 1 if queue is full and sender should wait a bit before sending
318  * next request, 0 otherwise.
319  */
320 static int send_forget_request(struct virtio_fs_vq *fsvq,
321 			       struct virtio_fs_forget *forget,
322 			       bool in_flight)
323 {
324 	struct scatterlist sg;
325 	struct virtqueue *vq;
326 	int ret = 0;
327 	bool notify;
328 
329 	spin_lock(&fsvq->lock);
330 	if (!fsvq->connected) {
331 		if (in_flight)
332 			dec_in_flight_req(fsvq);
333 		kfree(forget);
334 		goto out;
335 	}
336 
337 	sg_init_one(&sg, forget, sizeof(*forget));
338 	vq = fsvq->vq;
339 	dev_dbg(&vq->vdev->dev, "%s\n", __func__);
340 
341 	ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC);
342 	if (ret < 0) {
343 		if (ret == -ENOMEM || ret == -ENOSPC) {
344 			pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
345 				 ret);
346 			list_add_tail(&forget->list, &fsvq->queued_reqs);
347 			schedule_delayed_work(&fsvq->dispatch_work,
348 					      msecs_to_jiffies(1));
349 			if (!in_flight)
350 				inc_in_flight_req(fsvq);
351 			/* Queue is full */
352 			ret = 1;
353 		} else {
354 			pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
355 				 ret);
356 			kfree(forget);
357 			if (in_flight)
358 				dec_in_flight_req(fsvq);
359 		}
360 		goto out;
361 	}
362 
363 	if (!in_flight)
364 		inc_in_flight_req(fsvq);
365 	notify = virtqueue_kick_prepare(vq);
366 	spin_unlock(&fsvq->lock);
367 
368 	if (notify)
369 		virtqueue_notify(vq);
370 	return ret;
371 out:
372 	spin_unlock(&fsvq->lock);
373 	return ret;
374 }
375 
376 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
377 {
378 	struct virtio_fs_forget *forget;
379 	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
380 						 dispatch_work.work);
381 	pr_debug("virtio-fs: worker %s called.\n", __func__);
382 	while (1) {
383 		spin_lock(&fsvq->lock);
384 		forget = list_first_entry_or_null(&fsvq->queued_reqs,
385 					struct virtio_fs_forget, list);
386 		if (!forget) {
387 			spin_unlock(&fsvq->lock);
388 			return;
389 		}
390 
391 		list_del(&forget->list);
392 		spin_unlock(&fsvq->lock);
393 		if (send_forget_request(fsvq, forget, true))
394 			return;
395 	}
396 }
397 
398 /* Allocate and copy args into req->argbuf */
399 static int copy_args_to_argbuf(struct fuse_req *req)
400 {
401 	struct fuse_args *args = req->args;
402 	unsigned int offset = 0;
403 	unsigned int num_in;
404 	unsigned int num_out;
405 	unsigned int len;
406 	unsigned int i;
407 
408 	num_in = args->in_numargs - args->in_pages;
409 	num_out = args->out_numargs - args->out_pages;
410 	len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
411 	      fuse_len_args(num_out, args->out_args);
412 
413 	req->argbuf = kmalloc(len, GFP_ATOMIC);
414 	if (!req->argbuf)
415 		return -ENOMEM;
416 
417 	for (i = 0; i < num_in; i++) {
418 		memcpy(req->argbuf + offset,
419 		       args->in_args[i].value,
420 		       args->in_args[i].size);
421 		offset += args->in_args[i].size;
422 	}
423 
424 	return 0;
425 }
426 
427 /* Copy args out of and free req->argbuf */
428 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
429 {
430 	unsigned int remaining;
431 	unsigned int offset;
432 	unsigned int num_in;
433 	unsigned int num_out;
434 	unsigned int i;
435 
436 	remaining = req->out.h.len - sizeof(req->out.h);
437 	num_in = args->in_numargs - args->in_pages;
438 	num_out = args->out_numargs - args->out_pages;
439 	offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
440 
441 	for (i = 0; i < num_out; i++) {
442 		unsigned int argsize = args->out_args[i].size;
443 
444 		if (args->out_argvar &&
445 		    i == args->out_numargs - 1 &&
446 		    argsize > remaining) {
447 			argsize = remaining;
448 		}
449 
450 		memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
451 		offset += argsize;
452 
453 		if (i != args->out_numargs - 1)
454 			remaining -= argsize;
455 	}
456 
457 	/* Store the actual size of the variable-length arg */
458 	if (args->out_argvar)
459 		args->out_args[args->out_numargs - 1].size = remaining;
460 
461 	kfree(req->argbuf);
462 	req->argbuf = NULL;
463 }
464 
465 /* Work function for request completion */
466 static void virtio_fs_requests_done_work(struct work_struct *work)
467 {
468 	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
469 						 done_work);
470 	struct fuse_pqueue *fpq = &fsvq->fud->pq;
471 	struct fuse_conn *fc = fsvq->fud->fc;
472 	struct virtqueue *vq = fsvq->vq;
473 	struct fuse_req *req;
474 	struct fuse_args_pages *ap;
475 	struct fuse_req *next;
476 	struct fuse_args *args;
477 	unsigned int len, i, thislen;
478 	struct page *page;
479 	LIST_HEAD(reqs);
480 
481 	/* Collect completed requests off the virtqueue */
482 	spin_lock(&fsvq->lock);
483 	do {
484 		virtqueue_disable_cb(vq);
485 
486 		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
487 			spin_lock(&fpq->lock);
488 			list_move_tail(&req->list, &reqs);
489 			spin_unlock(&fpq->lock);
490 		}
491 	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
492 	spin_unlock(&fsvq->lock);
493 
494 	/* End requests */
495 	list_for_each_entry_safe(req, next, &reqs, list) {
496 		/*
497 		 * TODO verify that server properly follows FUSE protocol
498 		 * (oh.uniq, oh.len)
499 		 */
500 		args = req->args;
501 		copy_args_from_argbuf(args, req);
502 
503 		if (args->out_pages && args->page_zeroing) {
504 			len = args->out_args[args->out_numargs - 1].size;
505 			ap = container_of(args, typeof(*ap), args);
506 			for (i = 0; i < ap->num_pages; i++) {
507 				thislen = ap->descs[i].length;
508 				if (len < thislen) {
509 					WARN_ON(ap->descs[i].offset);
510 					page = ap->pages[i];
511 					zero_user_segment(page, len, thislen);
512 					len = 0;
513 				} else {
514 					len -= thislen;
515 				}
516 			}
517 		}
518 
519 		spin_lock(&fpq->lock);
520 		clear_bit(FR_SENT, &req->flags);
521 		list_del_init(&req->list);
522 		spin_unlock(&fpq->lock);
523 
524 		fuse_request_end(fc, req);
525 		spin_lock(&fsvq->lock);
526 		dec_in_flight_req(fsvq);
527 		spin_unlock(&fsvq->lock);
528 	}
529 }
530 
531 /* Virtqueue interrupt handler */
532 static void virtio_fs_vq_done(struct virtqueue *vq)
533 {
534 	struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
535 
536 	dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
537 
538 	schedule_work(&fsvq->done_work);
539 }
540 
541 /* Initialize virtqueues */
542 static int virtio_fs_setup_vqs(struct virtio_device *vdev,
543 			       struct virtio_fs *fs)
544 {
545 	struct virtqueue **vqs;
546 	vq_callback_t **callbacks;
547 	const char **names;
548 	unsigned int i;
549 	int ret = 0;
550 
551 	virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
552 		     &fs->num_request_queues);
553 	if (fs->num_request_queues == 0)
554 		return -EINVAL;
555 
556 	fs->nvqs = 1 + fs->num_request_queues;
557 	fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
558 	if (!fs->vqs)
559 		return -ENOMEM;
560 
561 	vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
562 	callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
563 					GFP_KERNEL);
564 	names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
565 	if (!vqs || !callbacks || !names) {
566 		ret = -ENOMEM;
567 		goto out;
568 	}
569 
570 	callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
571 	snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
572 			"hiprio");
573 	names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
574 	INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
575 	INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
576 	INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
577 	INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
578 			virtio_fs_hiprio_dispatch_work);
579 	spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
580 
581 	/* Initialize the requests virtqueues */
582 	for (i = VQ_REQUEST; i < fs->nvqs; i++) {
583 		spin_lock_init(&fs->vqs[i].lock);
584 		INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
585 		INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
586 				  virtio_fs_request_dispatch_work);
587 		INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
588 		INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
589 		snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
590 			 "requests.%u", i - VQ_REQUEST);
591 		callbacks[i] = virtio_fs_vq_done;
592 		names[i] = fs->vqs[i].name;
593 	}
594 
595 	ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
596 	if (ret < 0)
597 		goto out;
598 
599 	for (i = 0; i < fs->nvqs; i++)
600 		fs->vqs[i].vq = vqs[i];
601 
602 	virtio_fs_start_all_queues(fs);
603 out:
604 	kfree(names);
605 	kfree(callbacks);
606 	kfree(vqs);
607 	if (ret)
608 		kfree(fs->vqs);
609 	return ret;
610 }
611 
612 /* Free virtqueues (device must already be reset) */
613 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
614 				  struct virtio_fs *fs)
615 {
616 	vdev->config->del_vqs(vdev);
617 }
618 
619 static int virtio_fs_probe(struct virtio_device *vdev)
620 {
621 	struct virtio_fs *fs;
622 	int ret;
623 
624 	fs = kzalloc(sizeof(*fs), GFP_KERNEL);
625 	if (!fs)
626 		return -ENOMEM;
627 	kref_init(&fs->refcount);
628 	vdev->priv = fs;
629 
630 	ret = virtio_fs_read_tag(vdev, fs);
631 	if (ret < 0)
632 		goto out;
633 
634 	ret = virtio_fs_setup_vqs(vdev, fs);
635 	if (ret < 0)
636 		goto out;
637 
638 	/* TODO vq affinity */
639 
640 	/* Bring the device online in case the filesystem is mounted and
641 	 * requests need to be sent before we return.
642 	 */
643 	virtio_device_ready(vdev);
644 
645 	ret = virtio_fs_add_instance(fs);
646 	if (ret < 0)
647 		goto out_vqs;
648 
649 	return 0;
650 
651 out_vqs:
652 	vdev->config->reset(vdev);
653 	virtio_fs_cleanup_vqs(vdev, fs);
654 
655 out:
656 	vdev->priv = NULL;
657 	kfree(fs);
658 	return ret;
659 }
660 
661 static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
662 {
663 	struct virtio_fs_vq *fsvq;
664 	int i;
665 
666 	for (i = 0; i < fs->nvqs; i++) {
667 		fsvq = &fs->vqs[i];
668 		spin_lock(&fsvq->lock);
669 		fsvq->connected = false;
670 		spin_unlock(&fsvq->lock);
671 	}
672 }
673 
674 static void virtio_fs_remove(struct virtio_device *vdev)
675 {
676 	struct virtio_fs *fs = vdev->priv;
677 
678 	mutex_lock(&virtio_fs_mutex);
679 	/* This device is going away. No one should get new reference */
680 	list_del_init(&fs->list);
681 	virtio_fs_stop_all_queues(fs);
682 	virtio_fs_drain_all_queues(fs);
683 	vdev->config->reset(vdev);
684 	virtio_fs_cleanup_vqs(vdev, fs);
685 
686 	vdev->priv = NULL;
687 	/* Put device reference on virtio_fs object */
688 	virtio_fs_put(fs);
689 	mutex_unlock(&virtio_fs_mutex);
690 }
691 
692 #ifdef CONFIG_PM_SLEEP
693 static int virtio_fs_freeze(struct virtio_device *vdev)
694 {
695 	/* TODO need to save state here */
696 	pr_warn("virtio-fs: suspend/resume not yet supported\n");
697 	return -EOPNOTSUPP;
698 }
699 
700 static int virtio_fs_restore(struct virtio_device *vdev)
701 {
702 	 /* TODO need to restore state here */
703 	return 0;
704 }
705 #endif /* CONFIG_PM_SLEEP */
706 
707 static const struct virtio_device_id id_table[] = {
708 	{ VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
709 	{},
710 };
711 
712 static const unsigned int feature_table[] = {};
713 
714 static struct virtio_driver virtio_fs_driver = {
715 	.driver.name		= KBUILD_MODNAME,
716 	.driver.owner		= THIS_MODULE,
717 	.id_table		= id_table,
718 	.feature_table		= feature_table,
719 	.feature_table_size	= ARRAY_SIZE(feature_table),
720 	.probe			= virtio_fs_probe,
721 	.remove			= virtio_fs_remove,
722 #ifdef CONFIG_PM_SLEEP
723 	.freeze			= virtio_fs_freeze,
724 	.restore		= virtio_fs_restore,
725 #endif
726 };
727 
728 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
729 __releases(fiq->lock)
730 {
731 	struct fuse_forget_link *link;
732 	struct virtio_fs_forget *forget;
733 	struct virtio_fs *fs;
734 	struct virtio_fs_vq *fsvq;
735 	u64 unique;
736 
737 	link = fuse_dequeue_forget(fiq, 1, NULL);
738 	unique = fuse_get_unique(fiq);
739 
740 	fs = fiq->priv;
741 	fsvq = &fs->vqs[VQ_HIPRIO];
742 	spin_unlock(&fiq->lock);
743 
744 	/* Allocate a buffer for the request */
745 	forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
746 
747 	forget->ih = (struct fuse_in_header){
748 		.opcode = FUSE_FORGET,
749 		.nodeid = link->forget_one.nodeid,
750 		.unique = unique,
751 		.len = sizeof(*forget),
752 	};
753 	forget->arg = (struct fuse_forget_in){
754 		.nlookup = link->forget_one.nlookup,
755 	};
756 
757 	send_forget_request(fsvq, forget, false);
758 	kfree(link);
759 }
760 
761 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
762 __releases(fiq->lock)
763 {
764 	/*
765 	 * TODO interrupts.
766 	 *
767 	 * Normal fs operations on a local filesystems aren't interruptible.
768 	 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
769 	 * with shared lock between host and guest.
770 	 */
771 	spin_unlock(&fiq->lock);
772 }
773 
774 /* Return the number of scatter-gather list elements required */
775 static unsigned int sg_count_fuse_req(struct fuse_req *req)
776 {
777 	struct fuse_args *args = req->args;
778 	struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
779 	unsigned int total_sgs = 1 /* fuse_in_header */;
780 
781 	if (args->in_numargs - args->in_pages)
782 		total_sgs += 1;
783 
784 	if (args->in_pages)
785 		total_sgs += ap->num_pages;
786 
787 	if (!test_bit(FR_ISREPLY, &req->flags))
788 		return total_sgs;
789 
790 	total_sgs += 1 /* fuse_out_header */;
791 
792 	if (args->out_numargs - args->out_pages)
793 		total_sgs += 1;
794 
795 	if (args->out_pages)
796 		total_sgs += ap->num_pages;
797 
798 	return total_sgs;
799 }
800 
801 /* Add pages to scatter-gather list and return number of elements used */
802 static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
803 				       struct page **pages,
804 				       struct fuse_page_desc *page_descs,
805 				       unsigned int num_pages,
806 				       unsigned int total_len)
807 {
808 	unsigned int i;
809 	unsigned int this_len;
810 
811 	for (i = 0; i < num_pages && total_len; i++) {
812 		sg_init_table(&sg[i], 1);
813 		this_len =  min(page_descs[i].length, total_len);
814 		sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
815 		total_len -= this_len;
816 	}
817 
818 	return i;
819 }
820 
821 /* Add args to scatter-gather list and return number of elements used */
822 static unsigned int sg_init_fuse_args(struct scatterlist *sg,
823 				      struct fuse_req *req,
824 				      struct fuse_arg *args,
825 				      unsigned int numargs,
826 				      bool argpages,
827 				      void *argbuf,
828 				      unsigned int *len_used)
829 {
830 	struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
831 	unsigned int total_sgs = 0;
832 	unsigned int len;
833 
834 	len = fuse_len_args(numargs - argpages, args);
835 	if (len)
836 		sg_init_one(&sg[total_sgs++], argbuf, len);
837 
838 	if (argpages)
839 		total_sgs += sg_init_fuse_pages(&sg[total_sgs],
840 						ap->pages, ap->descs,
841 						ap->num_pages,
842 						args[numargs - 1].size);
843 
844 	if (len_used)
845 		*len_used = len;
846 
847 	return total_sgs;
848 }
849 
850 /* Add a request to a virtqueue and kick the device */
851 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
852 				 struct fuse_req *req, bool in_flight)
853 {
854 	/* requests need at least 4 elements */
855 	struct scatterlist *stack_sgs[6];
856 	struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
857 	struct scatterlist **sgs = stack_sgs;
858 	struct scatterlist *sg = stack_sg;
859 	struct virtqueue *vq;
860 	struct fuse_args *args = req->args;
861 	unsigned int argbuf_used = 0;
862 	unsigned int out_sgs = 0;
863 	unsigned int in_sgs = 0;
864 	unsigned int total_sgs;
865 	unsigned int i;
866 	int ret;
867 	bool notify;
868 	struct fuse_pqueue *fpq;
869 
870 	/* Does the sglist fit on the stack? */
871 	total_sgs = sg_count_fuse_req(req);
872 	if (total_sgs > ARRAY_SIZE(stack_sgs)) {
873 		sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
874 		sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
875 		if (!sgs || !sg) {
876 			ret = -ENOMEM;
877 			goto out;
878 		}
879 	}
880 
881 	/* Use a bounce buffer since stack args cannot be mapped */
882 	ret = copy_args_to_argbuf(req);
883 	if (ret < 0)
884 		goto out;
885 
886 	/* Request elements */
887 	sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
888 	out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
889 				     (struct fuse_arg *)args->in_args,
890 				     args->in_numargs, args->in_pages,
891 				     req->argbuf, &argbuf_used);
892 
893 	/* Reply elements */
894 	if (test_bit(FR_ISREPLY, &req->flags)) {
895 		sg_init_one(&sg[out_sgs + in_sgs++],
896 			    &req->out.h, sizeof(req->out.h));
897 		in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
898 					    args->out_args, args->out_numargs,
899 					    args->out_pages,
900 					    req->argbuf + argbuf_used, NULL);
901 	}
902 
903 	WARN_ON(out_sgs + in_sgs != total_sgs);
904 
905 	for (i = 0; i < total_sgs; i++)
906 		sgs[i] = &sg[i];
907 
908 	spin_lock(&fsvq->lock);
909 
910 	if (!fsvq->connected) {
911 		spin_unlock(&fsvq->lock);
912 		ret = -ENOTCONN;
913 		goto out;
914 	}
915 
916 	vq = fsvq->vq;
917 	ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
918 	if (ret < 0) {
919 		spin_unlock(&fsvq->lock);
920 		goto out;
921 	}
922 
923 	/* Request successfully sent. */
924 	fpq = &fsvq->fud->pq;
925 	spin_lock(&fpq->lock);
926 	list_add_tail(&req->list, fpq->processing);
927 	spin_unlock(&fpq->lock);
928 	set_bit(FR_SENT, &req->flags);
929 	/* matches barrier in request_wait_answer() */
930 	smp_mb__after_atomic();
931 
932 	if (!in_flight)
933 		inc_in_flight_req(fsvq);
934 	notify = virtqueue_kick_prepare(vq);
935 
936 	spin_unlock(&fsvq->lock);
937 
938 	if (notify)
939 		virtqueue_notify(vq);
940 
941 out:
942 	if (ret < 0 && req->argbuf) {
943 		kfree(req->argbuf);
944 		req->argbuf = NULL;
945 	}
946 	if (sgs != stack_sgs) {
947 		kfree(sgs);
948 		kfree(sg);
949 	}
950 
951 	return ret;
952 }
953 
954 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
955 __releases(fiq->lock)
956 {
957 	unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
958 	struct virtio_fs *fs;
959 	struct fuse_req *req;
960 	struct virtio_fs_vq *fsvq;
961 	int ret;
962 
963 	WARN_ON(list_empty(&fiq->pending));
964 	req = list_last_entry(&fiq->pending, struct fuse_req, list);
965 	clear_bit(FR_PENDING, &req->flags);
966 	list_del_init(&req->list);
967 	WARN_ON(!list_empty(&fiq->pending));
968 	spin_unlock(&fiq->lock);
969 
970 	fs = fiq->priv;
971 
972 	pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
973 		  __func__, req->in.h.opcode, req->in.h.unique,
974 		 req->in.h.nodeid, req->in.h.len,
975 		 fuse_len_args(req->args->out_numargs, req->args->out_args));
976 
977 	fsvq = &fs->vqs[queue_id];
978 	ret = virtio_fs_enqueue_req(fsvq, req, false);
979 	if (ret < 0) {
980 		if (ret == -ENOMEM || ret == -ENOSPC) {
981 			/*
982 			 * Virtqueue full. Retry submission from worker
983 			 * context as we might be holding fc->bg_lock.
984 			 */
985 			spin_lock(&fsvq->lock);
986 			list_add_tail(&req->list, &fsvq->queued_reqs);
987 			inc_in_flight_req(fsvq);
988 			schedule_delayed_work(&fsvq->dispatch_work,
989 						msecs_to_jiffies(1));
990 			spin_unlock(&fsvq->lock);
991 			return;
992 		}
993 		req->out.h.error = ret;
994 		pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
995 
996 		/* Can't end request in submission context. Use a worker */
997 		spin_lock(&fsvq->lock);
998 		list_add_tail(&req->list, &fsvq->end_reqs);
999 		schedule_delayed_work(&fsvq->dispatch_work, 0);
1000 		spin_unlock(&fsvq->lock);
1001 		return;
1002 	}
1003 }
1004 
1005 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
1006 	.wake_forget_and_unlock		= virtio_fs_wake_forget_and_unlock,
1007 	.wake_interrupt_and_unlock	= virtio_fs_wake_interrupt_and_unlock,
1008 	.wake_pending_and_unlock	= virtio_fs_wake_pending_and_unlock,
1009 	.release			= virtio_fs_fiq_release,
1010 };
1011 
1012 static int virtio_fs_fill_super(struct super_block *sb)
1013 {
1014 	struct fuse_conn *fc = get_fuse_conn_super(sb);
1015 	struct virtio_fs *fs = fc->iq.priv;
1016 	unsigned int i;
1017 	int err;
1018 	struct fuse_fs_context ctx = {
1019 		.rootmode = S_IFDIR,
1020 		.default_permissions = 1,
1021 		.allow_other = 1,
1022 		.max_read = UINT_MAX,
1023 		.blksize = 512,
1024 		.destroy = true,
1025 		.no_control = true,
1026 		.no_force_umount = true,
1027 		.no_mount_options = true,
1028 	};
1029 
1030 	mutex_lock(&virtio_fs_mutex);
1031 
1032 	/* After holding mutex, make sure virtiofs device is still there.
1033 	 * Though we are holding a reference to it, drive ->remove might
1034 	 * still have cleaned up virtual queues. In that case bail out.
1035 	 */
1036 	err = -EINVAL;
1037 	if (list_empty(&fs->list)) {
1038 		pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
1039 		goto err;
1040 	}
1041 
1042 	err = -ENOMEM;
1043 	/* Allocate fuse_dev for hiprio and notification queues */
1044 	for (i = 0; i < VQ_REQUEST; i++) {
1045 		struct virtio_fs_vq *fsvq = &fs->vqs[i];
1046 
1047 		fsvq->fud = fuse_dev_alloc();
1048 		if (!fsvq->fud)
1049 			goto err_free_fuse_devs;
1050 	}
1051 
1052 	ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
1053 	err = fuse_fill_super_common(sb, &ctx);
1054 	if (err < 0)
1055 		goto err_free_fuse_devs;
1056 
1057 	fc = fs->vqs[VQ_REQUEST].fud->fc;
1058 
1059 	for (i = 0; i < fs->nvqs; i++) {
1060 		struct virtio_fs_vq *fsvq = &fs->vqs[i];
1061 
1062 		if (i == VQ_REQUEST)
1063 			continue; /* already initialized */
1064 		fuse_dev_install(fsvq->fud, fc);
1065 	}
1066 
1067 	/* Previous unmount will stop all queues. Start these again */
1068 	virtio_fs_start_all_queues(fs);
1069 	fuse_send_init(fc);
1070 	mutex_unlock(&virtio_fs_mutex);
1071 	return 0;
1072 
1073 err_free_fuse_devs:
1074 	virtio_fs_free_devs(fs);
1075 err:
1076 	mutex_unlock(&virtio_fs_mutex);
1077 	return err;
1078 }
1079 
1080 static void virtio_kill_sb(struct super_block *sb)
1081 {
1082 	struct fuse_conn *fc = get_fuse_conn_super(sb);
1083 	struct virtio_fs *vfs;
1084 	struct virtio_fs_vq *fsvq;
1085 
1086 	/* If mount failed, we can still be called without any fc */
1087 	if (!fc)
1088 		return fuse_kill_sb_anon(sb);
1089 
1090 	vfs = fc->iq.priv;
1091 	fsvq = &vfs->vqs[VQ_HIPRIO];
1092 
1093 	/* Stop forget queue. Soon destroy will be sent */
1094 	spin_lock(&fsvq->lock);
1095 	fsvq->connected = false;
1096 	spin_unlock(&fsvq->lock);
1097 	virtio_fs_drain_all_queues(vfs);
1098 
1099 	fuse_kill_sb_anon(sb);
1100 
1101 	/* fuse_kill_sb_anon() must have sent destroy. Stop all queues
1102 	 * and drain one more time and free fuse devices. Freeing fuse
1103 	 * devices will drop their reference on fuse_conn and that in
1104 	 * turn will drop its reference on virtio_fs object.
1105 	 */
1106 	virtio_fs_stop_all_queues(vfs);
1107 	virtio_fs_drain_all_queues(vfs);
1108 	virtio_fs_free_devs(vfs);
1109 }
1110 
1111 static int virtio_fs_test_super(struct super_block *sb,
1112 				struct fs_context *fsc)
1113 {
1114 	struct fuse_conn *fc = fsc->s_fs_info;
1115 
1116 	return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
1117 }
1118 
1119 static int virtio_fs_set_super(struct super_block *sb,
1120 			       struct fs_context *fsc)
1121 {
1122 	int err;
1123 
1124 	err = get_anon_bdev(&sb->s_dev);
1125 	if (!err)
1126 		fuse_conn_get(fsc->s_fs_info);
1127 
1128 	return err;
1129 }
1130 
1131 static int virtio_fs_get_tree(struct fs_context *fsc)
1132 {
1133 	struct virtio_fs *fs;
1134 	struct super_block *sb;
1135 	struct fuse_conn *fc;
1136 	int err;
1137 
1138 	/* This gets a reference on virtio_fs object. This ptr gets installed
1139 	 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1140 	 * to drop the reference to this object.
1141 	 */
1142 	fs = virtio_fs_find_instance(fsc->source);
1143 	if (!fs) {
1144 		pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
1145 		return -EINVAL;
1146 	}
1147 
1148 	fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
1149 	if (!fc) {
1150 		mutex_lock(&virtio_fs_mutex);
1151 		virtio_fs_put(fs);
1152 		mutex_unlock(&virtio_fs_mutex);
1153 		return -ENOMEM;
1154 	}
1155 
1156 	fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
1157 		       fs);
1158 	fc->release = fuse_free_conn;
1159 	fc->delete_stale = true;
1160 
1161 	fsc->s_fs_info = fc;
1162 	sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
1163 	fuse_conn_put(fc);
1164 	if (IS_ERR(sb))
1165 		return PTR_ERR(sb);
1166 
1167 	if (!sb->s_root) {
1168 		err = virtio_fs_fill_super(sb);
1169 		if (err) {
1170 			deactivate_locked_super(sb);
1171 			return err;
1172 		}
1173 
1174 		sb->s_flags |= SB_ACTIVE;
1175 	}
1176 
1177 	WARN_ON(fsc->root);
1178 	fsc->root = dget(sb->s_root);
1179 	return 0;
1180 }
1181 
1182 static const struct fs_context_operations virtio_fs_context_ops = {
1183 	.get_tree	= virtio_fs_get_tree,
1184 };
1185 
1186 static int virtio_fs_init_fs_context(struct fs_context *fsc)
1187 {
1188 	fsc->ops = &virtio_fs_context_ops;
1189 	return 0;
1190 }
1191 
1192 static struct file_system_type virtio_fs_type = {
1193 	.owner		= THIS_MODULE,
1194 	.name		= "virtiofs",
1195 	.init_fs_context = virtio_fs_init_fs_context,
1196 	.kill_sb	= virtio_kill_sb,
1197 };
1198 
1199 static int __init virtio_fs_init(void)
1200 {
1201 	int ret;
1202 
1203 	ret = register_virtio_driver(&virtio_fs_driver);
1204 	if (ret < 0)
1205 		return ret;
1206 
1207 	ret = register_filesystem(&virtio_fs_type);
1208 	if (ret < 0) {
1209 		unregister_virtio_driver(&virtio_fs_driver);
1210 		return ret;
1211 	}
1212 
1213 	return 0;
1214 }
1215 module_init(virtio_fs_init);
1216 
1217 static void __exit virtio_fs_exit(void)
1218 {
1219 	unregister_filesystem(&virtio_fs_type);
1220 	unregister_virtio_driver(&virtio_fs_driver);
1221 }
1222 module_exit(virtio_fs_exit);
1223 
1224 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1225 MODULE_DESCRIPTION("Virtio Filesystem");
1226 MODULE_LICENSE("GPL");
1227 MODULE_ALIAS_FS(KBUILD_MODNAME);
1228 MODULE_DEVICE_TABLE(virtio, id_table);
1229