xref: /openbmc/linux/drivers/virtio/virtio_ring.c (revision 77d84ff8)
1 /* Virtio ring implementation.
2  *
3  *  Copyright 2007 Rusty Russell IBM Corporation
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, write to the Free Software
17  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 #include <linux/virtio.h>
20 #include <linux/virtio_ring.h>
21 #include <linux/virtio_config.h>
22 #include <linux/device.h>
23 #include <linux/slab.h>
24 #include <linux/module.h>
25 #include <linux/hrtimer.h>
26 
27 #ifdef DEBUG
28 /* For development, we want to crash whenever the ring is screwed. */
29 #define BAD_RING(_vq, fmt, args...)				\
30 	do {							\
31 		dev_err(&(_vq)->vq.vdev->dev,			\
32 			"%s:"fmt, (_vq)->vq.name, ##args);	\
33 		BUG();						\
34 	} while (0)
35 /* Caller is supposed to guarantee no reentry. */
36 #define START_USE(_vq)						\
37 	do {							\
38 		if ((_vq)->in_use)				\
39 			panic("%s:in_use = %i\n",		\
40 			      (_vq)->vq.name, (_vq)->in_use);	\
41 		(_vq)->in_use = __LINE__;			\
42 	} while (0)
43 #define END_USE(_vq) \
44 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
45 #else
46 #define BAD_RING(_vq, fmt, args...)				\
47 	do {							\
48 		dev_err(&_vq->vq.vdev->dev,			\
49 			"%s:"fmt, (_vq)->vq.name, ##args);	\
50 		(_vq)->broken = true;				\
51 	} while (0)
52 #define START_USE(vq)
53 #define END_USE(vq)
54 #endif
55 
56 struct vring_virtqueue
57 {
58 	struct virtqueue vq;
59 
60 	/* Actual memory layout for this queue */
61 	struct vring vring;
62 
63 	/* Can we use weak barriers? */
64 	bool weak_barriers;
65 
66 	/* Other side has made a mess, don't try any more. */
67 	bool broken;
68 
69 	/* Host supports indirect buffers */
70 	bool indirect;
71 
72 	/* Host publishes avail event idx */
73 	bool event;
74 
75 	/* Head of free buffer list. */
76 	unsigned int free_head;
77 	/* Number we've added since last sync. */
78 	unsigned int num_added;
79 
80 	/* Last used index we've seen. */
81 	u16 last_used_idx;
82 
83 	/* How to notify other side. FIXME: commonalize hcalls! */
84 	bool (*notify)(struct virtqueue *vq);
85 
86 #ifdef DEBUG
87 	/* They're supposed to lock for us. */
88 	unsigned int in_use;
89 
90 	/* Figure out if their kicks are too delayed. */
91 	bool last_add_time_valid;
92 	ktime_t last_add_time;
93 #endif
94 
95 	/* Tokens for callbacks. */
96 	void *data[];
97 };
98 
99 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
100 
101 static inline struct scatterlist *sg_next_chained(struct scatterlist *sg,
102 						  unsigned int *count)
103 {
104 	return sg_next(sg);
105 }
106 
107 static inline struct scatterlist *sg_next_arr(struct scatterlist *sg,
108 					      unsigned int *count)
109 {
110 	if (--(*count) == 0)
111 		return NULL;
112 	return sg + 1;
113 }
114 
115 /* Set up an indirect table of descriptors and add it to the queue. */
116 static inline int vring_add_indirect(struct vring_virtqueue *vq,
117 				     struct scatterlist *sgs[],
118 				     struct scatterlist *(*next)
119 				       (struct scatterlist *, unsigned int *),
120 				     unsigned int total_sg,
121 				     unsigned int total_out,
122 				     unsigned int total_in,
123 				     unsigned int out_sgs,
124 				     unsigned int in_sgs,
125 				     gfp_t gfp)
126 {
127 	struct vring_desc *desc;
128 	unsigned head;
129 	struct scatterlist *sg;
130 	int i, n;
131 
132 	/*
133 	 * We require lowmem mappings for the descriptors because
134 	 * otherwise virt_to_phys will give us bogus addresses in the
135 	 * virtqueue.
136 	 */
137 	gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
138 
139 	desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
140 	if (!desc)
141 		return -ENOMEM;
142 
143 	/* Transfer entries from the sg lists into the indirect page */
144 	i = 0;
145 	for (n = 0; n < out_sgs; n++) {
146 		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
147 			desc[i].flags = VRING_DESC_F_NEXT;
148 			desc[i].addr = sg_phys(sg);
149 			desc[i].len = sg->length;
150 			desc[i].next = i+1;
151 			i++;
152 		}
153 	}
154 	for (; n < (out_sgs + in_sgs); n++) {
155 		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
156 			desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
157 			desc[i].addr = sg_phys(sg);
158 			desc[i].len = sg->length;
159 			desc[i].next = i+1;
160 			i++;
161 		}
162 	}
163 	BUG_ON(i != total_sg);
164 
165 	/* Last one doesn't continue. */
166 	desc[i-1].flags &= ~VRING_DESC_F_NEXT;
167 	desc[i-1].next = 0;
168 
169 	/* We're about to use a buffer */
170 	vq->vq.num_free--;
171 
172 	/* Use a single buffer which doesn't continue */
173 	head = vq->free_head;
174 	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
175 	vq->vring.desc[head].addr = virt_to_phys(desc);
176 	/* kmemleak gives a false positive, as it's hidden by virt_to_phys */
177 	kmemleak_ignore(desc);
178 	vq->vring.desc[head].len = i * sizeof(struct vring_desc);
179 
180 	/* Update free pointer */
181 	vq->free_head = vq->vring.desc[head].next;
182 
183 	return head;
184 }
185 
186 static inline int virtqueue_add(struct virtqueue *_vq,
187 				struct scatterlist *sgs[],
188 				struct scatterlist *(*next)
189 				  (struct scatterlist *, unsigned int *),
190 				unsigned int total_out,
191 				unsigned int total_in,
192 				unsigned int out_sgs,
193 				unsigned int in_sgs,
194 				void *data,
195 				gfp_t gfp)
196 {
197 	struct vring_virtqueue *vq = to_vvq(_vq);
198 	struct scatterlist *sg;
199 	unsigned int i, n, avail, uninitialized_var(prev), total_sg;
200 	int head;
201 
202 	START_USE(vq);
203 
204 	BUG_ON(data == NULL);
205 
206 #ifdef DEBUG
207 	{
208 		ktime_t now = ktime_get();
209 
210 		/* No kick or get, with .1 second between?  Warn. */
211 		if (vq->last_add_time_valid)
212 			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
213 					    > 100);
214 		vq->last_add_time = now;
215 		vq->last_add_time_valid = true;
216 	}
217 #endif
218 
219 	total_sg = total_in + total_out;
220 
221 	/* If the host supports indirect descriptor tables, and we have multiple
222 	 * buffers, then go indirect. FIXME: tune this threshold */
223 	if (vq->indirect && total_sg > 1 && vq->vq.num_free) {
224 		head = vring_add_indirect(vq, sgs, next, total_sg, total_out,
225 					  total_in,
226 					  out_sgs, in_sgs, gfp);
227 		if (likely(head >= 0))
228 			goto add_head;
229 	}
230 
231 	BUG_ON(total_sg > vq->vring.num);
232 	BUG_ON(total_sg == 0);
233 
234 	if (vq->vq.num_free < total_sg) {
235 		pr_debug("Can't add buf len %i - avail = %i\n",
236 			 total_sg, vq->vq.num_free);
237 		/* FIXME: for historical reasons, we force a notify here if
238 		 * there are outgoing parts to the buffer.  Presumably the
239 		 * host should service the ring ASAP. */
240 		if (out_sgs)
241 			vq->notify(&vq->vq);
242 		END_USE(vq);
243 		return -ENOSPC;
244 	}
245 
246 	/* We're about to use some buffers from the free list. */
247 	vq->vq.num_free -= total_sg;
248 
249 	head = i = vq->free_head;
250 	for (n = 0; n < out_sgs; n++) {
251 		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
252 			vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
253 			vq->vring.desc[i].addr = sg_phys(sg);
254 			vq->vring.desc[i].len = sg->length;
255 			prev = i;
256 			i = vq->vring.desc[i].next;
257 		}
258 	}
259 	for (; n < (out_sgs + in_sgs); n++) {
260 		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
261 			vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
262 			vq->vring.desc[i].addr = sg_phys(sg);
263 			vq->vring.desc[i].len = sg->length;
264 			prev = i;
265 			i = vq->vring.desc[i].next;
266 		}
267 	}
268 	/* Last one doesn't continue. */
269 	vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
270 
271 	/* Update free pointer */
272 	vq->free_head = i;
273 
274 add_head:
275 	/* Set token. */
276 	vq->data[head] = data;
277 
278 	/* Put entry in available array (but don't update avail->idx until they
279 	 * do sync). */
280 	avail = (vq->vring.avail->idx & (vq->vring.num-1));
281 	vq->vring.avail->ring[avail] = head;
282 
283 	/* Descriptors and available array need to be set before we expose the
284 	 * new available array entries. */
285 	virtio_wmb(vq->weak_barriers);
286 	vq->vring.avail->idx++;
287 	vq->num_added++;
288 
289 	/* This is very unlikely, but theoretically possible.  Kick
290 	 * just in case. */
291 	if (unlikely(vq->num_added == (1 << 16) - 1))
292 		virtqueue_kick(_vq);
293 
294 	pr_debug("Added buffer head %i to %p\n", head, vq);
295 	END_USE(vq);
296 
297 	return 0;
298 }
299 
300 /**
301  * virtqueue_add_sgs - expose buffers to other end
302  * @vq: the struct virtqueue we're talking about.
303  * @sgs: array of terminated scatterlists.
304  * @out_num: the number of scatterlists readable by other side
305  * @in_num: the number of scatterlists which are writable (after readable ones)
306  * @data: the token identifying the buffer.
307  * @gfp: how to do memory allocations (if necessary).
308  *
309  * Caller must ensure we don't call this with other virtqueue operations
310  * at the same time (except where noted).
311  *
312  * Returns zero or a negative error (ie. ENOSPC, ENOMEM).
313  */
314 int virtqueue_add_sgs(struct virtqueue *_vq,
315 		      struct scatterlist *sgs[],
316 		      unsigned int out_sgs,
317 		      unsigned int in_sgs,
318 		      void *data,
319 		      gfp_t gfp)
320 {
321 	unsigned int i, total_out, total_in;
322 
323 	/* Count them first. */
324 	for (i = total_out = total_in = 0; i < out_sgs; i++) {
325 		struct scatterlist *sg;
326 		for (sg = sgs[i]; sg; sg = sg_next(sg))
327 			total_out++;
328 	}
329 	for (; i < out_sgs + in_sgs; i++) {
330 		struct scatterlist *sg;
331 		for (sg = sgs[i]; sg; sg = sg_next(sg))
332 			total_in++;
333 	}
334 	return virtqueue_add(_vq, sgs, sg_next_chained,
335 			     total_out, total_in, out_sgs, in_sgs, data, gfp);
336 }
337 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
338 
339 /**
340  * virtqueue_add_outbuf - expose output buffers to other end
341  * @vq: the struct virtqueue we're talking about.
342  * @sgs: array of scatterlists (need not be terminated!)
343  * @num: the number of scatterlists readable by other side
344  * @data: the token identifying the buffer.
345  * @gfp: how to do memory allocations (if necessary).
346  *
347  * Caller must ensure we don't call this with other virtqueue operations
348  * at the same time (except where noted).
349  *
350  * Returns zero or a negative error (ie. ENOSPC, ENOMEM).
351  */
352 int virtqueue_add_outbuf(struct virtqueue *vq,
353 			 struct scatterlist sg[], unsigned int num,
354 			 void *data,
355 			 gfp_t gfp)
356 {
357 	return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp);
358 }
359 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
360 
361 /**
362  * virtqueue_add_inbuf - expose input buffers to other end
363  * @vq: the struct virtqueue we're talking about.
364  * @sgs: array of scatterlists (need not be terminated!)
365  * @num: the number of scatterlists writable by other side
366  * @data: the token identifying the buffer.
367  * @gfp: how to do memory allocations (if necessary).
368  *
369  * Caller must ensure we don't call this with other virtqueue operations
370  * at the same time (except where noted).
371  *
372  * Returns zero or a negative error (ie. ENOSPC, ENOMEM).
373  */
374 int virtqueue_add_inbuf(struct virtqueue *vq,
375 			struct scatterlist sg[], unsigned int num,
376 			void *data,
377 			gfp_t gfp)
378 {
379 	return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp);
380 }
381 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
382 
383 /**
384  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
385  * @vq: the struct virtqueue
386  *
387  * Instead of virtqueue_kick(), you can do:
388  *	if (virtqueue_kick_prepare(vq))
389  *		virtqueue_notify(vq);
390  *
391  * This is sometimes useful because the virtqueue_kick_prepare() needs
392  * to be serialized, but the actual virtqueue_notify() call does not.
393  */
394 bool virtqueue_kick_prepare(struct virtqueue *_vq)
395 {
396 	struct vring_virtqueue *vq = to_vvq(_vq);
397 	u16 new, old;
398 	bool needs_kick;
399 
400 	START_USE(vq);
401 	/* We need to expose available array entries before checking avail
402 	 * event. */
403 	virtio_mb(vq->weak_barriers);
404 
405 	old = vq->vring.avail->idx - vq->num_added;
406 	new = vq->vring.avail->idx;
407 	vq->num_added = 0;
408 
409 #ifdef DEBUG
410 	if (vq->last_add_time_valid) {
411 		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
412 					      vq->last_add_time)) > 100);
413 	}
414 	vq->last_add_time_valid = false;
415 #endif
416 
417 	if (vq->event) {
418 		needs_kick = vring_need_event(vring_avail_event(&vq->vring),
419 					      new, old);
420 	} else {
421 		needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
422 	}
423 	END_USE(vq);
424 	return needs_kick;
425 }
426 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
427 
428 /**
429  * virtqueue_notify - second half of split virtqueue_kick call.
430  * @vq: the struct virtqueue
431  *
432  * This does not need to be serialized.
433  *
434  * Returns false if host notify failed or queue is broken, otherwise true.
435  */
436 bool virtqueue_notify(struct virtqueue *_vq)
437 {
438 	struct vring_virtqueue *vq = to_vvq(_vq);
439 
440 	if (unlikely(vq->broken))
441 		return false;
442 
443 	/* Prod other side to tell it about changes. */
444 	if (!vq->notify(_vq)) {
445 		vq->broken = true;
446 		return false;
447 	}
448 	return true;
449 }
450 EXPORT_SYMBOL_GPL(virtqueue_notify);
451 
452 /**
453  * virtqueue_kick - update after add_buf
454  * @vq: the struct virtqueue
455  *
456  * After one or more virtqueue_add_* calls, invoke this to kick
457  * the other side.
458  *
459  * Caller must ensure we don't call this with other virtqueue
460  * operations at the same time (except where noted).
461  *
462  * Returns false if kick failed, otherwise true.
463  */
464 bool virtqueue_kick(struct virtqueue *vq)
465 {
466 	if (virtqueue_kick_prepare(vq))
467 		return virtqueue_notify(vq);
468 	return true;
469 }
470 EXPORT_SYMBOL_GPL(virtqueue_kick);
471 
472 static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
473 {
474 	unsigned int i;
475 
476 	/* Clear data ptr. */
477 	vq->data[head] = NULL;
478 
479 	/* Put back on free list: find end */
480 	i = head;
481 
482 	/* Free the indirect table */
483 	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
484 		kfree(phys_to_virt(vq->vring.desc[i].addr));
485 
486 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
487 		i = vq->vring.desc[i].next;
488 		vq->vq.num_free++;
489 	}
490 
491 	vq->vring.desc[i].next = vq->free_head;
492 	vq->free_head = head;
493 	/* Plus final descriptor */
494 	vq->vq.num_free++;
495 }
496 
497 static inline bool more_used(const struct vring_virtqueue *vq)
498 {
499 	return vq->last_used_idx != vq->vring.used->idx;
500 }
501 
502 /**
503  * virtqueue_get_buf - get the next used buffer
504  * @vq: the struct virtqueue we're talking about.
505  * @len: the length written into the buffer
506  *
507  * If the driver wrote data into the buffer, @len will be set to the
508  * amount written.  This means you don't need to clear the buffer
509  * beforehand to ensure there's no data leakage in the case of short
510  * writes.
511  *
512  * Caller must ensure we don't call this with other virtqueue
513  * operations at the same time (except where noted).
514  *
515  * Returns NULL if there are no used buffers, or the "data" token
516  * handed to virtqueue_add_*().
517  */
518 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
519 {
520 	struct vring_virtqueue *vq = to_vvq(_vq);
521 	void *ret;
522 	unsigned int i;
523 	u16 last_used;
524 
525 	START_USE(vq);
526 
527 	if (unlikely(vq->broken)) {
528 		END_USE(vq);
529 		return NULL;
530 	}
531 
532 	if (!more_used(vq)) {
533 		pr_debug("No more buffers in queue\n");
534 		END_USE(vq);
535 		return NULL;
536 	}
537 
538 	/* Only get used array entries after they have been exposed by host. */
539 	virtio_rmb(vq->weak_barriers);
540 
541 	last_used = (vq->last_used_idx & (vq->vring.num - 1));
542 	i = vq->vring.used->ring[last_used].id;
543 	*len = vq->vring.used->ring[last_used].len;
544 
545 	if (unlikely(i >= vq->vring.num)) {
546 		BAD_RING(vq, "id %u out of range\n", i);
547 		return NULL;
548 	}
549 	if (unlikely(!vq->data[i])) {
550 		BAD_RING(vq, "id %u is not a head!\n", i);
551 		return NULL;
552 	}
553 
554 	/* detach_buf clears data, so grab it now. */
555 	ret = vq->data[i];
556 	detach_buf(vq, i);
557 	vq->last_used_idx++;
558 	/* If we expect an interrupt for the next entry, tell host
559 	 * by writing event index and flush out the write before
560 	 * the read in the next get_buf call. */
561 	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
562 		vring_used_event(&vq->vring) = vq->last_used_idx;
563 		virtio_mb(vq->weak_barriers);
564 	}
565 
566 #ifdef DEBUG
567 	vq->last_add_time_valid = false;
568 #endif
569 
570 	END_USE(vq);
571 	return ret;
572 }
573 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
574 
575 /**
576  * virtqueue_disable_cb - disable callbacks
577  * @vq: the struct virtqueue we're talking about.
578  *
579  * Note that this is not necessarily synchronous, hence unreliable and only
580  * useful as an optimization.
581  *
582  * Unlike other operations, this need not be serialized.
583  */
584 void virtqueue_disable_cb(struct virtqueue *_vq)
585 {
586 	struct vring_virtqueue *vq = to_vvq(_vq);
587 
588 	vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
589 }
590 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
591 
592 /**
593  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
594  * @vq: the struct virtqueue we're talking about.
595  *
596  * This re-enables callbacks; it returns current queue state
597  * in an opaque unsigned value. This value should be later tested by
598  * virtqueue_poll, to detect a possible race between the driver checking for
599  * more work, and enabling callbacks.
600  *
601  * Caller must ensure we don't call this with other virtqueue
602  * operations at the same time (except where noted).
603  */
604 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
605 {
606 	struct vring_virtqueue *vq = to_vvq(_vq);
607 	u16 last_used_idx;
608 
609 	START_USE(vq);
610 
611 	/* We optimistically turn back on interrupts, then check if there was
612 	 * more to do. */
613 	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
614 	 * either clear the flags bit or point the event index at the next
615 	 * entry. Always do both to keep code simple. */
616 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
617 	vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx;
618 	END_USE(vq);
619 	return last_used_idx;
620 }
621 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
622 
623 /**
624  * virtqueue_poll - query pending used buffers
625  * @vq: the struct virtqueue we're talking about.
626  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
627  *
628  * Returns "true" if there are pending used buffers in the queue.
629  *
630  * This does not need to be serialized.
631  */
632 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
633 {
634 	struct vring_virtqueue *vq = to_vvq(_vq);
635 
636 	virtio_mb(vq->weak_barriers);
637 	return (u16)last_used_idx != vq->vring.used->idx;
638 }
639 EXPORT_SYMBOL_GPL(virtqueue_poll);
640 
641 /**
642  * virtqueue_enable_cb - restart callbacks after disable_cb.
643  * @vq: the struct virtqueue we're talking about.
644  *
645  * This re-enables callbacks; it returns "false" if there are pending
646  * buffers in the queue, to detect a possible race between the driver
647  * checking for more work, and enabling callbacks.
648  *
649  * Caller must ensure we don't call this with other virtqueue
650  * operations at the same time (except where noted).
651  */
652 bool virtqueue_enable_cb(struct virtqueue *_vq)
653 {
654 	unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
655 	return !virtqueue_poll(_vq, last_used_idx);
656 }
657 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
658 
659 /**
660  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
661  * @vq: the struct virtqueue we're talking about.
662  *
663  * This re-enables callbacks but hints to the other side to delay
664  * interrupts until most of the available buffers have been processed;
665  * it returns "false" if there are many pending buffers in the queue,
666  * to detect a possible race between the driver checking for more work,
667  * and enabling callbacks.
668  *
669  * Caller must ensure we don't call this with other virtqueue
670  * operations at the same time (except where noted).
671  */
672 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
673 {
674 	struct vring_virtqueue *vq = to_vvq(_vq);
675 	u16 bufs;
676 
677 	START_USE(vq);
678 
679 	/* We optimistically turn back on interrupts, then check if there was
680 	 * more to do. */
681 	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
682 	 * either clear the flags bit or point the event index at the next
683 	 * entry. Always do both to keep code simple. */
684 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
685 	/* TODO: tune this threshold */
686 	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
687 	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
688 	virtio_mb(vq->weak_barriers);
689 	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
690 		END_USE(vq);
691 		return false;
692 	}
693 
694 	END_USE(vq);
695 	return true;
696 }
697 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
698 
699 /**
700  * virtqueue_detach_unused_buf - detach first unused buffer
701  * @vq: the struct virtqueue we're talking about.
702  *
703  * Returns NULL or the "data" token handed to virtqueue_add_*().
704  * This is not valid on an active queue; it is useful only for device
705  * shutdown.
706  */
707 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
708 {
709 	struct vring_virtqueue *vq = to_vvq(_vq);
710 	unsigned int i;
711 	void *buf;
712 
713 	START_USE(vq);
714 
715 	for (i = 0; i < vq->vring.num; i++) {
716 		if (!vq->data[i])
717 			continue;
718 		/* detach_buf clears data, so grab it now. */
719 		buf = vq->data[i];
720 		detach_buf(vq, i);
721 		vq->vring.avail->idx--;
722 		END_USE(vq);
723 		return buf;
724 	}
725 	/* That should have freed everything. */
726 	BUG_ON(vq->vq.num_free != vq->vring.num);
727 
728 	END_USE(vq);
729 	return NULL;
730 }
731 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
732 
733 irqreturn_t vring_interrupt(int irq, void *_vq)
734 {
735 	struct vring_virtqueue *vq = to_vvq(_vq);
736 
737 	if (!more_used(vq)) {
738 		pr_debug("virtqueue interrupt with no work for %p\n", vq);
739 		return IRQ_NONE;
740 	}
741 
742 	if (unlikely(vq->broken))
743 		return IRQ_HANDLED;
744 
745 	pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
746 	if (vq->vq.callback)
747 		vq->vq.callback(&vq->vq);
748 
749 	return IRQ_HANDLED;
750 }
751 EXPORT_SYMBOL_GPL(vring_interrupt);
752 
753 struct virtqueue *vring_new_virtqueue(unsigned int index,
754 				      unsigned int num,
755 				      unsigned int vring_align,
756 				      struct virtio_device *vdev,
757 				      bool weak_barriers,
758 				      void *pages,
759 				      bool (*notify)(struct virtqueue *),
760 				      void (*callback)(struct virtqueue *),
761 				      const char *name)
762 {
763 	struct vring_virtqueue *vq;
764 	unsigned int i;
765 
766 	/* We assume num is a power of 2. */
767 	if (num & (num - 1)) {
768 		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
769 		return NULL;
770 	}
771 
772 	vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
773 	if (!vq)
774 		return NULL;
775 
776 	vring_init(&vq->vring, num, pages, vring_align);
777 	vq->vq.callback = callback;
778 	vq->vq.vdev = vdev;
779 	vq->vq.name = name;
780 	vq->vq.num_free = num;
781 	vq->vq.index = index;
782 	vq->notify = notify;
783 	vq->weak_barriers = weak_barriers;
784 	vq->broken = false;
785 	vq->last_used_idx = 0;
786 	vq->num_added = 0;
787 	list_add_tail(&vq->vq.list, &vdev->vqs);
788 #ifdef DEBUG
789 	vq->in_use = false;
790 	vq->last_add_time_valid = false;
791 #endif
792 
793 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
794 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
795 
796 	/* No callback?  Tell other side not to bother us. */
797 	if (!callback)
798 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
799 
800 	/* Put everything in free lists. */
801 	vq->free_head = 0;
802 	for (i = 0; i < num-1; i++) {
803 		vq->vring.desc[i].next = i+1;
804 		vq->data[i] = NULL;
805 	}
806 	vq->data[i] = NULL;
807 
808 	return &vq->vq;
809 }
810 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
811 
812 void vring_del_virtqueue(struct virtqueue *vq)
813 {
814 	list_del(&vq->list);
815 	kfree(to_vvq(vq));
816 }
817 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
818 
819 /* Manipulates transport-specific feature bits. */
820 void vring_transport_features(struct virtio_device *vdev)
821 {
822 	unsigned int i;
823 
824 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
825 		switch (i) {
826 		case VIRTIO_RING_F_INDIRECT_DESC:
827 			break;
828 		case VIRTIO_RING_F_EVENT_IDX:
829 			break;
830 		default:
831 			/* We don't understand this bit. */
832 			clear_bit(i, vdev->features);
833 		}
834 	}
835 }
836 EXPORT_SYMBOL_GPL(vring_transport_features);
837 
838 /**
839  * virtqueue_get_vring_size - return the size of the virtqueue's vring
840  * @vq: the struct virtqueue containing the vring of interest.
841  *
842  * Returns the size of the vring.  This is mainly used for boasting to
843  * userspace.  Unlike other operations, this need not be serialized.
844  */
845 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
846 {
847 
848 	struct vring_virtqueue *vq = to_vvq(_vq);
849 
850 	return vq->vring.num;
851 }
852 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
853 
854 bool virtqueue_is_broken(struct virtqueue *_vq)
855 {
856 	struct vring_virtqueue *vq = to_vvq(_vq);
857 
858 	return vq->broken;
859 }
860 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
861 
862 MODULE_LICENSE("GPL");
863