xref: /openbmc/linux/drivers/virtio/virtio_ring.c (revision a4e1d0b7)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/spinlock.h>
15 #include <xen/xen.h>
16 
17 #ifdef DEBUG
18 /* For development, we want to crash whenever the ring is screwed. */
19 #define BAD_RING(_vq, fmt, args...)				\
20 	do {							\
21 		dev_err(&(_vq)->vq.vdev->dev,			\
22 			"%s:"fmt, (_vq)->vq.name, ##args);	\
23 		BUG();						\
24 	} while (0)
25 /* Caller is supposed to guarantee no reentry. */
26 #define START_USE(_vq)						\
27 	do {							\
28 		if ((_vq)->in_use)				\
29 			panic("%s:in_use = %i\n",		\
30 			      (_vq)->vq.name, (_vq)->in_use);	\
31 		(_vq)->in_use = __LINE__;			\
32 	} while (0)
33 #define END_USE(_vq) \
34 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
35 #define LAST_ADD_TIME_UPDATE(_vq)				\
36 	do {							\
37 		ktime_t now = ktime_get();			\
38 								\
39 		/* No kick or get, with .1 second between?  Warn. */ \
40 		if ((_vq)->last_add_time_valid)			\
41 			WARN_ON(ktime_to_ms(ktime_sub(now,	\
42 				(_vq)->last_add_time)) > 100);	\
43 		(_vq)->last_add_time = now;			\
44 		(_vq)->last_add_time_valid = true;		\
45 	} while (0)
46 #define LAST_ADD_TIME_CHECK(_vq)				\
47 	do {							\
48 		if ((_vq)->last_add_time_valid) {		\
49 			WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50 				      (_vq)->last_add_time)) > 100); \
51 		}						\
52 	} while (0)
53 #define LAST_ADD_TIME_INVALID(_vq)				\
54 	((_vq)->last_add_time_valid = false)
55 #else
56 #define BAD_RING(_vq, fmt, args...)				\
57 	do {							\
58 		dev_err(&_vq->vq.vdev->dev,			\
59 			"%s:"fmt, (_vq)->vq.name, ##args);	\
60 		(_vq)->broken = true;				\
61 	} while (0)
62 #define START_USE(vq)
63 #define END_USE(vq)
64 #define LAST_ADD_TIME_UPDATE(vq)
65 #define LAST_ADD_TIME_CHECK(vq)
66 #define LAST_ADD_TIME_INVALID(vq)
67 #endif
68 
69 struct vring_desc_state_split {
70 	void *data;			/* Data for callback. */
71 	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
72 };
73 
74 struct vring_desc_state_packed {
75 	void *data;			/* Data for callback. */
76 	struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77 	u16 num;			/* Descriptor list length. */
78 	u16 last;			/* The last desc state in a list. */
79 };
80 
81 struct vring_desc_extra {
82 	dma_addr_t addr;		/* Descriptor DMA addr. */
83 	u32 len;			/* Descriptor length. */
84 	u16 flags;			/* Descriptor flags. */
85 	u16 next;			/* The next desc state in a list. */
86 };
87 
88 struct vring_virtqueue_split {
89 	/* Actual memory layout for this queue. */
90 	struct vring vring;
91 
92 	/* Last written value to avail->flags */
93 	u16 avail_flags_shadow;
94 
95 	/*
96 	 * Last written value to avail->idx in
97 	 * guest byte order.
98 	 */
99 	u16 avail_idx_shadow;
100 
101 	/* Per-descriptor state. */
102 	struct vring_desc_state_split *desc_state;
103 	struct vring_desc_extra *desc_extra;
104 
105 	/* DMA address and size information */
106 	dma_addr_t queue_dma_addr;
107 	size_t queue_size_in_bytes;
108 
109 	/*
110 	 * The parameters for creating vrings are reserved for creating new
111 	 * vring.
112 	 */
113 	u32 vring_align;
114 	bool may_reduce_num;
115 };
116 
117 struct vring_virtqueue_packed {
118 	/* Actual memory layout for this queue. */
119 	struct {
120 		unsigned int num;
121 		struct vring_packed_desc *desc;
122 		struct vring_packed_desc_event *driver;
123 		struct vring_packed_desc_event *device;
124 	} vring;
125 
126 	/* Driver ring wrap counter. */
127 	bool avail_wrap_counter;
128 
129 	/* Avail used flags. */
130 	u16 avail_used_flags;
131 
132 	/* Index of the next avail descriptor. */
133 	u16 next_avail_idx;
134 
135 	/*
136 	 * Last written value to driver->flags in
137 	 * guest byte order.
138 	 */
139 	u16 event_flags_shadow;
140 
141 	/* Per-descriptor state. */
142 	struct vring_desc_state_packed *desc_state;
143 	struct vring_desc_extra *desc_extra;
144 
145 	/* DMA address and size information */
146 	dma_addr_t ring_dma_addr;
147 	dma_addr_t driver_event_dma_addr;
148 	dma_addr_t device_event_dma_addr;
149 	size_t ring_size_in_bytes;
150 	size_t event_size_in_bytes;
151 };
152 
153 struct vring_virtqueue {
154 	struct virtqueue vq;
155 
156 	/* Is this a packed ring? */
157 	bool packed_ring;
158 
159 	/* Is DMA API used? */
160 	bool use_dma_api;
161 
162 	/* Can we use weak barriers? */
163 	bool weak_barriers;
164 
165 	/* Other side has made a mess, don't try any more. */
166 	bool broken;
167 
168 	/* Host supports indirect buffers */
169 	bool indirect;
170 
171 	/* Host publishes avail event idx */
172 	bool event;
173 
174 	/* Head of free buffer list. */
175 	unsigned int free_head;
176 	/* Number we've added since last sync. */
177 	unsigned int num_added;
178 
179 	/* Last used index  we've seen.
180 	 * for split ring, it just contains last used index
181 	 * for packed ring:
182 	 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
183 	 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
184 	 */
185 	u16 last_used_idx;
186 
187 	/* Hint for event idx: already triggered no need to disable. */
188 	bool event_triggered;
189 
190 	union {
191 		/* Available for split ring */
192 		struct vring_virtqueue_split split;
193 
194 		/* Available for packed ring */
195 		struct vring_virtqueue_packed packed;
196 	};
197 
198 	/* How to notify other side. FIXME: commonalize hcalls! */
199 	bool (*notify)(struct virtqueue *vq);
200 
201 	/* DMA, allocation, and size information */
202 	bool we_own_ring;
203 
204 #ifdef DEBUG
205 	/* They're supposed to lock for us. */
206 	unsigned int in_use;
207 
208 	/* Figure out if their kicks are too delayed. */
209 	bool last_add_time_valid;
210 	ktime_t last_add_time;
211 #endif
212 };
213 
214 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
215 					       struct vring_virtqueue_split *vring_split,
216 					       struct virtio_device *vdev,
217 					       bool weak_barriers,
218 					       bool context,
219 					       bool (*notify)(struct virtqueue *),
220 					       void (*callback)(struct virtqueue *),
221 					       const char *name);
222 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
223 static void vring_free(struct virtqueue *_vq);
224 
225 /*
226  * Helpers.
227  */
228 
229 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
230 
231 static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
232 					  unsigned int total_sg)
233 {
234 	/*
235 	 * If the host supports indirect descriptor tables, and we have multiple
236 	 * buffers, then go indirect. FIXME: tune this threshold
237 	 */
238 	return (vq->indirect && total_sg > 1 && vq->vq.num_free);
239 }
240 
241 /*
242  * Modern virtio devices have feature bits to specify whether they need a
243  * quirk and bypass the IOMMU. If not there, just use the DMA API.
244  *
245  * If there, the interaction between virtio and DMA API is messy.
246  *
247  * On most systems with virtio, physical addresses match bus addresses,
248  * and it doesn't particularly matter whether we use the DMA API.
249  *
250  * On some systems, including Xen and any system with a physical device
251  * that speaks virtio behind a physical IOMMU, we must use the DMA API
252  * for virtio DMA to work at all.
253  *
254  * On other systems, including SPARC and PPC64, virtio-pci devices are
255  * enumerated as though they are behind an IOMMU, but the virtio host
256  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
257  * there or somehow map everything as the identity.
258  *
259  * For the time being, we preserve historic behavior and bypass the DMA
260  * API.
261  *
262  * TODO: install a per-device DMA ops structure that does the right thing
263  * taking into account all the above quirks, and use the DMA API
264  * unconditionally on data path.
265  */
266 
267 static bool vring_use_dma_api(struct virtio_device *vdev)
268 {
269 	if (!virtio_has_dma_quirk(vdev))
270 		return true;
271 
272 	/* Otherwise, we are left to guess. */
273 	/*
274 	 * In theory, it's possible to have a buggy QEMU-supposed
275 	 * emulated Q35 IOMMU and Xen enabled at the same time.  On
276 	 * such a configuration, virtio has never worked and will
277 	 * not work without an even larger kludge.  Instead, enable
278 	 * the DMA API if we're a Xen guest, which at least allows
279 	 * all of the sensible Xen configurations to work correctly.
280 	 */
281 	if (xen_domain())
282 		return true;
283 
284 	return false;
285 }
286 
287 size_t virtio_max_dma_size(struct virtio_device *vdev)
288 {
289 	size_t max_segment_size = SIZE_MAX;
290 
291 	if (vring_use_dma_api(vdev))
292 		max_segment_size = dma_max_mapping_size(vdev->dev.parent);
293 
294 	return max_segment_size;
295 }
296 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
297 
298 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
299 			      dma_addr_t *dma_handle, gfp_t flag)
300 {
301 	if (vring_use_dma_api(vdev)) {
302 		return dma_alloc_coherent(vdev->dev.parent, size,
303 					  dma_handle, flag);
304 	} else {
305 		void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
306 
307 		if (queue) {
308 			phys_addr_t phys_addr = virt_to_phys(queue);
309 			*dma_handle = (dma_addr_t)phys_addr;
310 
311 			/*
312 			 * Sanity check: make sure we dind't truncate
313 			 * the address.  The only arches I can find that
314 			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
315 			 * are certain non-highmem MIPS and x86
316 			 * configurations, but these configurations
317 			 * should never allocate physical pages above 32
318 			 * bits, so this is fine.  Just in case, throw a
319 			 * warning and abort if we end up with an
320 			 * unrepresentable address.
321 			 */
322 			if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
323 				free_pages_exact(queue, PAGE_ALIGN(size));
324 				return NULL;
325 			}
326 		}
327 		return queue;
328 	}
329 }
330 
331 static void vring_free_queue(struct virtio_device *vdev, size_t size,
332 			     void *queue, dma_addr_t dma_handle)
333 {
334 	if (vring_use_dma_api(vdev))
335 		dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
336 	else
337 		free_pages_exact(queue, PAGE_ALIGN(size));
338 }
339 
340 /*
341  * The DMA ops on various arches are rather gnarly right now, and
342  * making all of the arch DMA ops work on the vring device itself
343  * is a mess.  For now, we use the parent device for DMA ops.
344  */
345 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
346 {
347 	return vq->vq.vdev->dev.parent;
348 }
349 
350 /* Map one sg entry. */
351 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
352 				   struct scatterlist *sg,
353 				   enum dma_data_direction direction)
354 {
355 	if (!vq->use_dma_api)
356 		return (dma_addr_t)sg_phys(sg);
357 
358 	/*
359 	 * We can't use dma_map_sg, because we don't use scatterlists in
360 	 * the way it expects (we don't guarantee that the scatterlist
361 	 * will exist for the lifetime of the mapping).
362 	 */
363 	return dma_map_page(vring_dma_dev(vq),
364 			    sg_page(sg), sg->offset, sg->length,
365 			    direction);
366 }
367 
368 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
369 				   void *cpu_addr, size_t size,
370 				   enum dma_data_direction direction)
371 {
372 	if (!vq->use_dma_api)
373 		return (dma_addr_t)virt_to_phys(cpu_addr);
374 
375 	return dma_map_single(vring_dma_dev(vq),
376 			      cpu_addr, size, direction);
377 }
378 
379 static int vring_mapping_error(const struct vring_virtqueue *vq,
380 			       dma_addr_t addr)
381 {
382 	if (!vq->use_dma_api)
383 		return 0;
384 
385 	return dma_mapping_error(vring_dma_dev(vq), addr);
386 }
387 
388 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
389 {
390 	vq->vq.num_free = num;
391 
392 	if (vq->packed_ring)
393 		vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
394 	else
395 		vq->last_used_idx = 0;
396 
397 	vq->event_triggered = false;
398 	vq->num_added = 0;
399 
400 #ifdef DEBUG
401 	vq->in_use = false;
402 	vq->last_add_time_valid = false;
403 #endif
404 }
405 
406 
407 /*
408  * Split ring specific functions - *_split().
409  */
410 
411 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
412 					   struct vring_desc *desc)
413 {
414 	u16 flags;
415 
416 	if (!vq->use_dma_api)
417 		return;
418 
419 	flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
420 
421 	dma_unmap_page(vring_dma_dev(vq),
422 		       virtio64_to_cpu(vq->vq.vdev, desc->addr),
423 		       virtio32_to_cpu(vq->vq.vdev, desc->len),
424 		       (flags & VRING_DESC_F_WRITE) ?
425 		       DMA_FROM_DEVICE : DMA_TO_DEVICE);
426 }
427 
428 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
429 					  unsigned int i)
430 {
431 	struct vring_desc_extra *extra = vq->split.desc_extra;
432 	u16 flags;
433 
434 	if (!vq->use_dma_api)
435 		goto out;
436 
437 	flags = extra[i].flags;
438 
439 	if (flags & VRING_DESC_F_INDIRECT) {
440 		dma_unmap_single(vring_dma_dev(vq),
441 				 extra[i].addr,
442 				 extra[i].len,
443 				 (flags & VRING_DESC_F_WRITE) ?
444 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
445 	} else {
446 		dma_unmap_page(vring_dma_dev(vq),
447 			       extra[i].addr,
448 			       extra[i].len,
449 			       (flags & VRING_DESC_F_WRITE) ?
450 			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
451 	}
452 
453 out:
454 	return extra[i].next;
455 }
456 
457 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
458 					       unsigned int total_sg,
459 					       gfp_t gfp)
460 {
461 	struct vring_desc *desc;
462 	unsigned int i;
463 
464 	/*
465 	 * We require lowmem mappings for the descriptors because
466 	 * otherwise virt_to_phys will give us bogus addresses in the
467 	 * virtqueue.
468 	 */
469 	gfp &= ~__GFP_HIGHMEM;
470 
471 	desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
472 	if (!desc)
473 		return NULL;
474 
475 	for (i = 0; i < total_sg; i++)
476 		desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
477 	return desc;
478 }
479 
480 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
481 						    struct vring_desc *desc,
482 						    unsigned int i,
483 						    dma_addr_t addr,
484 						    unsigned int len,
485 						    u16 flags,
486 						    bool indirect)
487 {
488 	struct vring_virtqueue *vring = to_vvq(vq);
489 	struct vring_desc_extra *extra = vring->split.desc_extra;
490 	u16 next;
491 
492 	desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
493 	desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
494 	desc[i].len = cpu_to_virtio32(vq->vdev, len);
495 
496 	if (!indirect) {
497 		next = extra[i].next;
498 		desc[i].next = cpu_to_virtio16(vq->vdev, next);
499 
500 		extra[i].addr = addr;
501 		extra[i].len = len;
502 		extra[i].flags = flags;
503 	} else
504 		next = virtio16_to_cpu(vq->vdev, desc[i].next);
505 
506 	return next;
507 }
508 
509 static inline int virtqueue_add_split(struct virtqueue *_vq,
510 				      struct scatterlist *sgs[],
511 				      unsigned int total_sg,
512 				      unsigned int out_sgs,
513 				      unsigned int in_sgs,
514 				      void *data,
515 				      void *ctx,
516 				      gfp_t gfp)
517 {
518 	struct vring_virtqueue *vq = to_vvq(_vq);
519 	struct scatterlist *sg;
520 	struct vring_desc *desc;
521 	unsigned int i, n, avail, descs_used, prev, err_idx;
522 	int head;
523 	bool indirect;
524 
525 	START_USE(vq);
526 
527 	BUG_ON(data == NULL);
528 	BUG_ON(ctx && vq->indirect);
529 
530 	if (unlikely(vq->broken)) {
531 		END_USE(vq);
532 		return -EIO;
533 	}
534 
535 	LAST_ADD_TIME_UPDATE(vq);
536 
537 	BUG_ON(total_sg == 0);
538 
539 	head = vq->free_head;
540 
541 	if (virtqueue_use_indirect(vq, total_sg))
542 		desc = alloc_indirect_split(_vq, total_sg, gfp);
543 	else {
544 		desc = NULL;
545 		WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
546 	}
547 
548 	if (desc) {
549 		/* Use a single buffer which doesn't continue */
550 		indirect = true;
551 		/* Set up rest to use this indirect table. */
552 		i = 0;
553 		descs_used = 1;
554 	} else {
555 		indirect = false;
556 		desc = vq->split.vring.desc;
557 		i = head;
558 		descs_used = total_sg;
559 	}
560 
561 	if (unlikely(vq->vq.num_free < descs_used)) {
562 		pr_debug("Can't add buf len %i - avail = %i\n",
563 			 descs_used, vq->vq.num_free);
564 		/* FIXME: for historical reasons, we force a notify here if
565 		 * there are outgoing parts to the buffer.  Presumably the
566 		 * host should service the ring ASAP. */
567 		if (out_sgs)
568 			vq->notify(&vq->vq);
569 		if (indirect)
570 			kfree(desc);
571 		END_USE(vq);
572 		return -ENOSPC;
573 	}
574 
575 	for (n = 0; n < out_sgs; n++) {
576 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
577 			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
578 			if (vring_mapping_error(vq, addr))
579 				goto unmap_release;
580 
581 			prev = i;
582 			/* Note that we trust indirect descriptor
583 			 * table since it use stream DMA mapping.
584 			 */
585 			i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
586 						     VRING_DESC_F_NEXT,
587 						     indirect);
588 		}
589 	}
590 	for (; n < (out_sgs + in_sgs); n++) {
591 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
592 			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
593 			if (vring_mapping_error(vq, addr))
594 				goto unmap_release;
595 
596 			prev = i;
597 			/* Note that we trust indirect descriptor
598 			 * table since it use stream DMA mapping.
599 			 */
600 			i = virtqueue_add_desc_split(_vq, desc, i, addr,
601 						     sg->length,
602 						     VRING_DESC_F_NEXT |
603 						     VRING_DESC_F_WRITE,
604 						     indirect);
605 		}
606 	}
607 	/* Last one doesn't continue. */
608 	desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
609 	if (!indirect && vq->use_dma_api)
610 		vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
611 			~VRING_DESC_F_NEXT;
612 
613 	if (indirect) {
614 		/* Now that the indirect table is filled in, map it. */
615 		dma_addr_t addr = vring_map_single(
616 			vq, desc, total_sg * sizeof(struct vring_desc),
617 			DMA_TO_DEVICE);
618 		if (vring_mapping_error(vq, addr))
619 			goto unmap_release;
620 
621 		virtqueue_add_desc_split(_vq, vq->split.vring.desc,
622 					 head, addr,
623 					 total_sg * sizeof(struct vring_desc),
624 					 VRING_DESC_F_INDIRECT,
625 					 false);
626 	}
627 
628 	/* We're using some buffers from the free list. */
629 	vq->vq.num_free -= descs_used;
630 
631 	/* Update free pointer */
632 	if (indirect)
633 		vq->free_head = vq->split.desc_extra[head].next;
634 	else
635 		vq->free_head = i;
636 
637 	/* Store token and indirect buffer state. */
638 	vq->split.desc_state[head].data = data;
639 	if (indirect)
640 		vq->split.desc_state[head].indir_desc = desc;
641 	else
642 		vq->split.desc_state[head].indir_desc = ctx;
643 
644 	/* Put entry in available array (but don't update avail->idx until they
645 	 * do sync). */
646 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
647 	vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
648 
649 	/* Descriptors and available array need to be set before we expose the
650 	 * new available array entries. */
651 	virtio_wmb(vq->weak_barriers);
652 	vq->split.avail_idx_shadow++;
653 	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
654 						vq->split.avail_idx_shadow);
655 	vq->num_added++;
656 
657 	pr_debug("Added buffer head %i to %p\n", head, vq);
658 	END_USE(vq);
659 
660 	/* This is very unlikely, but theoretically possible.  Kick
661 	 * just in case. */
662 	if (unlikely(vq->num_added == (1 << 16) - 1))
663 		virtqueue_kick(_vq);
664 
665 	return 0;
666 
667 unmap_release:
668 	err_idx = i;
669 
670 	if (indirect)
671 		i = 0;
672 	else
673 		i = head;
674 
675 	for (n = 0; n < total_sg; n++) {
676 		if (i == err_idx)
677 			break;
678 		if (indirect) {
679 			vring_unmap_one_split_indirect(vq, &desc[i]);
680 			i = virtio16_to_cpu(_vq->vdev, desc[i].next);
681 		} else
682 			i = vring_unmap_one_split(vq, i);
683 	}
684 
685 	if (indirect)
686 		kfree(desc);
687 
688 	END_USE(vq);
689 	return -ENOMEM;
690 }
691 
692 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
693 {
694 	struct vring_virtqueue *vq = to_vvq(_vq);
695 	u16 new, old;
696 	bool needs_kick;
697 
698 	START_USE(vq);
699 	/* We need to expose available array entries before checking avail
700 	 * event. */
701 	virtio_mb(vq->weak_barriers);
702 
703 	old = vq->split.avail_idx_shadow - vq->num_added;
704 	new = vq->split.avail_idx_shadow;
705 	vq->num_added = 0;
706 
707 	LAST_ADD_TIME_CHECK(vq);
708 	LAST_ADD_TIME_INVALID(vq);
709 
710 	if (vq->event) {
711 		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
712 					vring_avail_event(&vq->split.vring)),
713 					      new, old);
714 	} else {
715 		needs_kick = !(vq->split.vring.used->flags &
716 					cpu_to_virtio16(_vq->vdev,
717 						VRING_USED_F_NO_NOTIFY));
718 	}
719 	END_USE(vq);
720 	return needs_kick;
721 }
722 
723 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
724 			     void **ctx)
725 {
726 	unsigned int i, j;
727 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
728 
729 	/* Clear data ptr. */
730 	vq->split.desc_state[head].data = NULL;
731 
732 	/* Put back on free list: unmap first-level descriptors and find end */
733 	i = head;
734 
735 	while (vq->split.vring.desc[i].flags & nextflag) {
736 		vring_unmap_one_split(vq, i);
737 		i = vq->split.desc_extra[i].next;
738 		vq->vq.num_free++;
739 	}
740 
741 	vring_unmap_one_split(vq, i);
742 	vq->split.desc_extra[i].next = vq->free_head;
743 	vq->free_head = head;
744 
745 	/* Plus final descriptor */
746 	vq->vq.num_free++;
747 
748 	if (vq->indirect) {
749 		struct vring_desc *indir_desc =
750 				vq->split.desc_state[head].indir_desc;
751 		u32 len;
752 
753 		/* Free the indirect table, if any, now that it's unmapped. */
754 		if (!indir_desc)
755 			return;
756 
757 		len = vq->split.desc_extra[head].len;
758 
759 		BUG_ON(!(vq->split.desc_extra[head].flags &
760 				VRING_DESC_F_INDIRECT));
761 		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
762 
763 		for (j = 0; j < len / sizeof(struct vring_desc); j++)
764 			vring_unmap_one_split_indirect(vq, &indir_desc[j]);
765 
766 		kfree(indir_desc);
767 		vq->split.desc_state[head].indir_desc = NULL;
768 	} else if (ctx) {
769 		*ctx = vq->split.desc_state[head].indir_desc;
770 	}
771 }
772 
773 static inline bool more_used_split(const struct vring_virtqueue *vq)
774 {
775 	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
776 			vq->split.vring.used->idx);
777 }
778 
779 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
780 					 unsigned int *len,
781 					 void **ctx)
782 {
783 	struct vring_virtqueue *vq = to_vvq(_vq);
784 	void *ret;
785 	unsigned int i;
786 	u16 last_used;
787 
788 	START_USE(vq);
789 
790 	if (unlikely(vq->broken)) {
791 		END_USE(vq);
792 		return NULL;
793 	}
794 
795 	if (!more_used_split(vq)) {
796 		pr_debug("No more buffers in queue\n");
797 		END_USE(vq);
798 		return NULL;
799 	}
800 
801 	/* Only get used array entries after they have been exposed by host. */
802 	virtio_rmb(vq->weak_barriers);
803 
804 	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
805 	i = virtio32_to_cpu(_vq->vdev,
806 			vq->split.vring.used->ring[last_used].id);
807 	*len = virtio32_to_cpu(_vq->vdev,
808 			vq->split.vring.used->ring[last_used].len);
809 
810 	if (unlikely(i >= vq->split.vring.num)) {
811 		BAD_RING(vq, "id %u out of range\n", i);
812 		return NULL;
813 	}
814 	if (unlikely(!vq->split.desc_state[i].data)) {
815 		BAD_RING(vq, "id %u is not a head!\n", i);
816 		return NULL;
817 	}
818 
819 	/* detach_buf_split clears data, so grab it now. */
820 	ret = vq->split.desc_state[i].data;
821 	detach_buf_split(vq, i, ctx);
822 	vq->last_used_idx++;
823 	/* If we expect an interrupt for the next entry, tell host
824 	 * by writing event index and flush out the write before
825 	 * the read in the next get_buf call. */
826 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
827 		virtio_store_mb(vq->weak_barriers,
828 				&vring_used_event(&vq->split.vring),
829 				cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
830 
831 	LAST_ADD_TIME_INVALID(vq);
832 
833 	END_USE(vq);
834 	return ret;
835 }
836 
837 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
838 {
839 	struct vring_virtqueue *vq = to_vvq(_vq);
840 
841 	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
842 		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
843 		if (vq->event)
844 			/* TODO: this is a hack. Figure out a cleaner value to write. */
845 			vring_used_event(&vq->split.vring) = 0x0;
846 		else
847 			vq->split.vring.avail->flags =
848 				cpu_to_virtio16(_vq->vdev,
849 						vq->split.avail_flags_shadow);
850 	}
851 }
852 
853 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
854 {
855 	struct vring_virtqueue *vq = to_vvq(_vq);
856 	u16 last_used_idx;
857 
858 	START_USE(vq);
859 
860 	/* We optimistically turn back on interrupts, then check if there was
861 	 * more to do. */
862 	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
863 	 * either clear the flags bit or point the event index at the next
864 	 * entry. Always do both to keep code simple. */
865 	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
866 		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
867 		if (!vq->event)
868 			vq->split.vring.avail->flags =
869 				cpu_to_virtio16(_vq->vdev,
870 						vq->split.avail_flags_shadow);
871 	}
872 	vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
873 			last_used_idx = vq->last_used_idx);
874 	END_USE(vq);
875 	return last_used_idx;
876 }
877 
878 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
879 {
880 	struct vring_virtqueue *vq = to_vvq(_vq);
881 
882 	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
883 			vq->split.vring.used->idx);
884 }
885 
886 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
887 {
888 	struct vring_virtqueue *vq = to_vvq(_vq);
889 	u16 bufs;
890 
891 	START_USE(vq);
892 
893 	/* We optimistically turn back on interrupts, then check if there was
894 	 * more to do. */
895 	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
896 	 * either clear the flags bit or point the event index at the next
897 	 * entry. Always update the event index to keep code simple. */
898 	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
899 		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
900 		if (!vq->event)
901 			vq->split.vring.avail->flags =
902 				cpu_to_virtio16(_vq->vdev,
903 						vq->split.avail_flags_shadow);
904 	}
905 	/* TODO: tune this threshold */
906 	bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
907 
908 	virtio_store_mb(vq->weak_barriers,
909 			&vring_used_event(&vq->split.vring),
910 			cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
911 
912 	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
913 					- vq->last_used_idx) > bufs)) {
914 		END_USE(vq);
915 		return false;
916 	}
917 
918 	END_USE(vq);
919 	return true;
920 }
921 
922 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
923 {
924 	struct vring_virtqueue *vq = to_vvq(_vq);
925 	unsigned int i;
926 	void *buf;
927 
928 	START_USE(vq);
929 
930 	for (i = 0; i < vq->split.vring.num; i++) {
931 		if (!vq->split.desc_state[i].data)
932 			continue;
933 		/* detach_buf_split clears data, so grab it now. */
934 		buf = vq->split.desc_state[i].data;
935 		detach_buf_split(vq, i, NULL);
936 		vq->split.avail_idx_shadow--;
937 		vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
938 				vq->split.avail_idx_shadow);
939 		END_USE(vq);
940 		return buf;
941 	}
942 	/* That should have freed everything. */
943 	BUG_ON(vq->vq.num_free != vq->split.vring.num);
944 
945 	END_USE(vq);
946 	return NULL;
947 }
948 
949 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
950 				       struct vring_virtqueue *vq)
951 {
952 	struct virtio_device *vdev;
953 
954 	vdev = vq->vq.vdev;
955 
956 	vring_split->avail_flags_shadow = 0;
957 	vring_split->avail_idx_shadow = 0;
958 
959 	/* No callback?  Tell other side not to bother us. */
960 	if (!vq->vq.callback) {
961 		vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
962 		if (!vq->event)
963 			vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
964 					vring_split->avail_flags_shadow);
965 	}
966 }
967 
968 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
969 {
970 	int num;
971 
972 	num = vq->split.vring.num;
973 
974 	vq->split.vring.avail->flags = 0;
975 	vq->split.vring.avail->idx = 0;
976 
977 	/* reset avail event */
978 	vq->split.vring.avail->ring[num] = 0;
979 
980 	vq->split.vring.used->flags = 0;
981 	vq->split.vring.used->idx = 0;
982 
983 	/* reset used event */
984 	*(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
985 
986 	virtqueue_init(vq, num);
987 
988 	virtqueue_vring_init_split(&vq->split, vq);
989 }
990 
991 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
992 					 struct vring_virtqueue_split *vring_split)
993 {
994 	vq->split = *vring_split;
995 
996 	/* Put everything in free lists. */
997 	vq->free_head = 0;
998 }
999 
1000 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1001 {
1002 	struct vring_desc_state_split *state;
1003 	struct vring_desc_extra *extra;
1004 	u32 num = vring_split->vring.num;
1005 
1006 	state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1007 	if (!state)
1008 		goto err_state;
1009 
1010 	extra = vring_alloc_desc_extra(num);
1011 	if (!extra)
1012 		goto err_extra;
1013 
1014 	memset(state, 0, num * sizeof(struct vring_desc_state_split));
1015 
1016 	vring_split->desc_state = state;
1017 	vring_split->desc_extra = extra;
1018 	return 0;
1019 
1020 err_extra:
1021 	kfree(state);
1022 err_state:
1023 	return -ENOMEM;
1024 }
1025 
1026 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1027 			     struct virtio_device *vdev)
1028 {
1029 	vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1030 			 vring_split->vring.desc,
1031 			 vring_split->queue_dma_addr);
1032 
1033 	kfree(vring_split->desc_state);
1034 	kfree(vring_split->desc_extra);
1035 }
1036 
1037 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1038 				   struct virtio_device *vdev,
1039 				   u32 num,
1040 				   unsigned int vring_align,
1041 				   bool may_reduce_num)
1042 {
1043 	void *queue = NULL;
1044 	dma_addr_t dma_addr;
1045 
1046 	/* We assume num is a power of 2. */
1047 	if (num & (num - 1)) {
1048 		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1049 		return -EINVAL;
1050 	}
1051 
1052 	/* TODO: allocate each queue chunk individually */
1053 	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1054 		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1055 					  &dma_addr,
1056 					  GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
1057 		if (queue)
1058 			break;
1059 		if (!may_reduce_num)
1060 			return -ENOMEM;
1061 	}
1062 
1063 	if (!num)
1064 		return -ENOMEM;
1065 
1066 	if (!queue) {
1067 		/* Try to get a single page. You are my only hope! */
1068 		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1069 					  &dma_addr, GFP_KERNEL|__GFP_ZERO);
1070 	}
1071 	if (!queue)
1072 		return -ENOMEM;
1073 
1074 	vring_init(&vring_split->vring, num, queue, vring_align);
1075 
1076 	vring_split->queue_dma_addr = dma_addr;
1077 	vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1078 
1079 	vring_split->vring_align = vring_align;
1080 	vring_split->may_reduce_num = may_reduce_num;
1081 
1082 	return 0;
1083 }
1084 
1085 static struct virtqueue *vring_create_virtqueue_split(
1086 	unsigned int index,
1087 	unsigned int num,
1088 	unsigned int vring_align,
1089 	struct virtio_device *vdev,
1090 	bool weak_barriers,
1091 	bool may_reduce_num,
1092 	bool context,
1093 	bool (*notify)(struct virtqueue *),
1094 	void (*callback)(struct virtqueue *),
1095 	const char *name)
1096 {
1097 	struct vring_virtqueue_split vring_split = {};
1098 	struct virtqueue *vq;
1099 	int err;
1100 
1101 	err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1102 				      may_reduce_num);
1103 	if (err)
1104 		return NULL;
1105 
1106 	vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1107 				   context, notify, callback, name);
1108 	if (!vq) {
1109 		vring_free_split(&vring_split, vdev);
1110 		return NULL;
1111 	}
1112 
1113 	to_vvq(vq)->we_own_ring = true;
1114 
1115 	return vq;
1116 }
1117 
1118 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1119 {
1120 	struct vring_virtqueue_split vring_split = {};
1121 	struct vring_virtqueue *vq = to_vvq(_vq);
1122 	struct virtio_device *vdev = _vq->vdev;
1123 	int err;
1124 
1125 	err = vring_alloc_queue_split(&vring_split, vdev, num,
1126 				      vq->split.vring_align,
1127 				      vq->split.may_reduce_num);
1128 	if (err)
1129 		goto err;
1130 
1131 	err = vring_alloc_state_extra_split(&vring_split);
1132 	if (err)
1133 		goto err_state_extra;
1134 
1135 	vring_free(&vq->vq);
1136 
1137 	virtqueue_vring_init_split(&vring_split, vq);
1138 
1139 	virtqueue_init(vq, vring_split.vring.num);
1140 	virtqueue_vring_attach_split(vq, &vring_split);
1141 
1142 	return 0;
1143 
1144 err_state_extra:
1145 	vring_free_split(&vring_split, vdev);
1146 err:
1147 	virtqueue_reinit_split(vq);
1148 	return -ENOMEM;
1149 }
1150 
1151 
1152 /*
1153  * Packed ring specific functions - *_packed().
1154  */
1155 static inline bool packed_used_wrap_counter(u16 last_used_idx)
1156 {
1157 	return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1158 }
1159 
1160 static inline u16 packed_last_used(u16 last_used_idx)
1161 {
1162 	return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1163 }
1164 
1165 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1166 				     struct vring_desc_extra *extra)
1167 {
1168 	u16 flags;
1169 
1170 	if (!vq->use_dma_api)
1171 		return;
1172 
1173 	flags = extra->flags;
1174 
1175 	if (flags & VRING_DESC_F_INDIRECT) {
1176 		dma_unmap_single(vring_dma_dev(vq),
1177 				 extra->addr, extra->len,
1178 				 (flags & VRING_DESC_F_WRITE) ?
1179 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1180 	} else {
1181 		dma_unmap_page(vring_dma_dev(vq),
1182 			       extra->addr, extra->len,
1183 			       (flags & VRING_DESC_F_WRITE) ?
1184 			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
1185 	}
1186 }
1187 
1188 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1189 				   struct vring_packed_desc *desc)
1190 {
1191 	u16 flags;
1192 
1193 	if (!vq->use_dma_api)
1194 		return;
1195 
1196 	flags = le16_to_cpu(desc->flags);
1197 
1198 	dma_unmap_page(vring_dma_dev(vq),
1199 		       le64_to_cpu(desc->addr),
1200 		       le32_to_cpu(desc->len),
1201 		       (flags & VRING_DESC_F_WRITE) ?
1202 		       DMA_FROM_DEVICE : DMA_TO_DEVICE);
1203 }
1204 
1205 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1206 						       gfp_t gfp)
1207 {
1208 	struct vring_packed_desc *desc;
1209 
1210 	/*
1211 	 * We require lowmem mappings for the descriptors because
1212 	 * otherwise virt_to_phys will give us bogus addresses in the
1213 	 * virtqueue.
1214 	 */
1215 	gfp &= ~__GFP_HIGHMEM;
1216 
1217 	desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1218 
1219 	return desc;
1220 }
1221 
1222 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1223 					 struct scatterlist *sgs[],
1224 					 unsigned int total_sg,
1225 					 unsigned int out_sgs,
1226 					 unsigned int in_sgs,
1227 					 void *data,
1228 					 gfp_t gfp)
1229 {
1230 	struct vring_packed_desc *desc;
1231 	struct scatterlist *sg;
1232 	unsigned int i, n, err_idx;
1233 	u16 head, id;
1234 	dma_addr_t addr;
1235 
1236 	head = vq->packed.next_avail_idx;
1237 	desc = alloc_indirect_packed(total_sg, gfp);
1238 	if (!desc)
1239 		return -ENOMEM;
1240 
1241 	if (unlikely(vq->vq.num_free < 1)) {
1242 		pr_debug("Can't add buf len 1 - avail = 0\n");
1243 		kfree(desc);
1244 		END_USE(vq);
1245 		return -ENOSPC;
1246 	}
1247 
1248 	i = 0;
1249 	id = vq->free_head;
1250 	BUG_ON(id == vq->packed.vring.num);
1251 
1252 	for (n = 0; n < out_sgs + in_sgs; n++) {
1253 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1254 			addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1255 					DMA_TO_DEVICE : DMA_FROM_DEVICE);
1256 			if (vring_mapping_error(vq, addr))
1257 				goto unmap_release;
1258 
1259 			desc[i].flags = cpu_to_le16(n < out_sgs ?
1260 						0 : VRING_DESC_F_WRITE);
1261 			desc[i].addr = cpu_to_le64(addr);
1262 			desc[i].len = cpu_to_le32(sg->length);
1263 			i++;
1264 		}
1265 	}
1266 
1267 	/* Now that the indirect table is filled in, map it. */
1268 	addr = vring_map_single(vq, desc,
1269 			total_sg * sizeof(struct vring_packed_desc),
1270 			DMA_TO_DEVICE);
1271 	if (vring_mapping_error(vq, addr))
1272 		goto unmap_release;
1273 
1274 	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1275 	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1276 				sizeof(struct vring_packed_desc));
1277 	vq->packed.vring.desc[head].id = cpu_to_le16(id);
1278 
1279 	if (vq->use_dma_api) {
1280 		vq->packed.desc_extra[id].addr = addr;
1281 		vq->packed.desc_extra[id].len = total_sg *
1282 				sizeof(struct vring_packed_desc);
1283 		vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1284 						  vq->packed.avail_used_flags;
1285 	}
1286 
1287 	/*
1288 	 * A driver MUST NOT make the first descriptor in the list
1289 	 * available before all subsequent descriptors comprising
1290 	 * the list are made available.
1291 	 */
1292 	virtio_wmb(vq->weak_barriers);
1293 	vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1294 						vq->packed.avail_used_flags);
1295 
1296 	/* We're using some buffers from the free list. */
1297 	vq->vq.num_free -= 1;
1298 
1299 	/* Update free pointer */
1300 	n = head + 1;
1301 	if (n >= vq->packed.vring.num) {
1302 		n = 0;
1303 		vq->packed.avail_wrap_counter ^= 1;
1304 		vq->packed.avail_used_flags ^=
1305 				1 << VRING_PACKED_DESC_F_AVAIL |
1306 				1 << VRING_PACKED_DESC_F_USED;
1307 	}
1308 	vq->packed.next_avail_idx = n;
1309 	vq->free_head = vq->packed.desc_extra[id].next;
1310 
1311 	/* Store token and indirect buffer state. */
1312 	vq->packed.desc_state[id].num = 1;
1313 	vq->packed.desc_state[id].data = data;
1314 	vq->packed.desc_state[id].indir_desc = desc;
1315 	vq->packed.desc_state[id].last = id;
1316 
1317 	vq->num_added += 1;
1318 
1319 	pr_debug("Added buffer head %i to %p\n", head, vq);
1320 	END_USE(vq);
1321 
1322 	return 0;
1323 
1324 unmap_release:
1325 	err_idx = i;
1326 
1327 	for (i = 0; i < err_idx; i++)
1328 		vring_unmap_desc_packed(vq, &desc[i]);
1329 
1330 	kfree(desc);
1331 
1332 	END_USE(vq);
1333 	return -ENOMEM;
1334 }
1335 
1336 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1337 				       struct scatterlist *sgs[],
1338 				       unsigned int total_sg,
1339 				       unsigned int out_sgs,
1340 				       unsigned int in_sgs,
1341 				       void *data,
1342 				       void *ctx,
1343 				       gfp_t gfp)
1344 {
1345 	struct vring_virtqueue *vq = to_vvq(_vq);
1346 	struct vring_packed_desc *desc;
1347 	struct scatterlist *sg;
1348 	unsigned int i, n, c, descs_used, err_idx;
1349 	__le16 head_flags, flags;
1350 	u16 head, id, prev, curr, avail_used_flags;
1351 	int err;
1352 
1353 	START_USE(vq);
1354 
1355 	BUG_ON(data == NULL);
1356 	BUG_ON(ctx && vq->indirect);
1357 
1358 	if (unlikely(vq->broken)) {
1359 		END_USE(vq);
1360 		return -EIO;
1361 	}
1362 
1363 	LAST_ADD_TIME_UPDATE(vq);
1364 
1365 	BUG_ON(total_sg == 0);
1366 
1367 	if (virtqueue_use_indirect(vq, total_sg)) {
1368 		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1369 						    in_sgs, data, gfp);
1370 		if (err != -ENOMEM) {
1371 			END_USE(vq);
1372 			return err;
1373 		}
1374 
1375 		/* fall back on direct */
1376 	}
1377 
1378 	head = vq->packed.next_avail_idx;
1379 	avail_used_flags = vq->packed.avail_used_flags;
1380 
1381 	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1382 
1383 	desc = vq->packed.vring.desc;
1384 	i = head;
1385 	descs_used = total_sg;
1386 
1387 	if (unlikely(vq->vq.num_free < descs_used)) {
1388 		pr_debug("Can't add buf len %i - avail = %i\n",
1389 			 descs_used, vq->vq.num_free);
1390 		END_USE(vq);
1391 		return -ENOSPC;
1392 	}
1393 
1394 	id = vq->free_head;
1395 	BUG_ON(id == vq->packed.vring.num);
1396 
1397 	curr = id;
1398 	c = 0;
1399 	for (n = 0; n < out_sgs + in_sgs; n++) {
1400 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1401 			dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1402 					DMA_TO_DEVICE : DMA_FROM_DEVICE);
1403 			if (vring_mapping_error(vq, addr))
1404 				goto unmap_release;
1405 
1406 			flags = cpu_to_le16(vq->packed.avail_used_flags |
1407 				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1408 				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1409 			if (i == head)
1410 				head_flags = flags;
1411 			else
1412 				desc[i].flags = flags;
1413 
1414 			desc[i].addr = cpu_to_le64(addr);
1415 			desc[i].len = cpu_to_le32(sg->length);
1416 			desc[i].id = cpu_to_le16(id);
1417 
1418 			if (unlikely(vq->use_dma_api)) {
1419 				vq->packed.desc_extra[curr].addr = addr;
1420 				vq->packed.desc_extra[curr].len = sg->length;
1421 				vq->packed.desc_extra[curr].flags =
1422 					le16_to_cpu(flags);
1423 			}
1424 			prev = curr;
1425 			curr = vq->packed.desc_extra[curr].next;
1426 
1427 			if ((unlikely(++i >= vq->packed.vring.num))) {
1428 				i = 0;
1429 				vq->packed.avail_used_flags ^=
1430 					1 << VRING_PACKED_DESC_F_AVAIL |
1431 					1 << VRING_PACKED_DESC_F_USED;
1432 			}
1433 		}
1434 	}
1435 
1436 	if (i < head)
1437 		vq->packed.avail_wrap_counter ^= 1;
1438 
1439 	/* We're using some buffers from the free list. */
1440 	vq->vq.num_free -= descs_used;
1441 
1442 	/* Update free pointer */
1443 	vq->packed.next_avail_idx = i;
1444 	vq->free_head = curr;
1445 
1446 	/* Store token. */
1447 	vq->packed.desc_state[id].num = descs_used;
1448 	vq->packed.desc_state[id].data = data;
1449 	vq->packed.desc_state[id].indir_desc = ctx;
1450 	vq->packed.desc_state[id].last = prev;
1451 
1452 	/*
1453 	 * A driver MUST NOT make the first descriptor in the list
1454 	 * available before all subsequent descriptors comprising
1455 	 * the list are made available.
1456 	 */
1457 	virtio_wmb(vq->weak_barriers);
1458 	vq->packed.vring.desc[head].flags = head_flags;
1459 	vq->num_added += descs_used;
1460 
1461 	pr_debug("Added buffer head %i to %p\n", head, vq);
1462 	END_USE(vq);
1463 
1464 	return 0;
1465 
1466 unmap_release:
1467 	err_idx = i;
1468 	i = head;
1469 	curr = vq->free_head;
1470 
1471 	vq->packed.avail_used_flags = avail_used_flags;
1472 
1473 	for (n = 0; n < total_sg; n++) {
1474 		if (i == err_idx)
1475 			break;
1476 		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1477 		curr = vq->packed.desc_extra[curr].next;
1478 		i++;
1479 		if (i >= vq->packed.vring.num)
1480 			i = 0;
1481 	}
1482 
1483 	END_USE(vq);
1484 	return -EIO;
1485 }
1486 
1487 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1488 {
1489 	struct vring_virtqueue *vq = to_vvq(_vq);
1490 	u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1491 	bool needs_kick;
1492 	union {
1493 		struct {
1494 			__le16 off_wrap;
1495 			__le16 flags;
1496 		};
1497 		u32 u32;
1498 	} snapshot;
1499 
1500 	START_USE(vq);
1501 
1502 	/*
1503 	 * We need to expose the new flags value before checking notification
1504 	 * suppressions.
1505 	 */
1506 	virtio_mb(vq->weak_barriers);
1507 
1508 	old = vq->packed.next_avail_idx - vq->num_added;
1509 	new = vq->packed.next_avail_idx;
1510 	vq->num_added = 0;
1511 
1512 	snapshot.u32 = *(u32 *)vq->packed.vring.device;
1513 	flags = le16_to_cpu(snapshot.flags);
1514 
1515 	LAST_ADD_TIME_CHECK(vq);
1516 	LAST_ADD_TIME_INVALID(vq);
1517 
1518 	if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1519 		needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1520 		goto out;
1521 	}
1522 
1523 	off_wrap = le16_to_cpu(snapshot.off_wrap);
1524 
1525 	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1526 	event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1527 	if (wrap_counter != vq->packed.avail_wrap_counter)
1528 		event_idx -= vq->packed.vring.num;
1529 
1530 	needs_kick = vring_need_event(event_idx, new, old);
1531 out:
1532 	END_USE(vq);
1533 	return needs_kick;
1534 }
1535 
1536 static void detach_buf_packed(struct vring_virtqueue *vq,
1537 			      unsigned int id, void **ctx)
1538 {
1539 	struct vring_desc_state_packed *state = NULL;
1540 	struct vring_packed_desc *desc;
1541 	unsigned int i, curr;
1542 
1543 	state = &vq->packed.desc_state[id];
1544 
1545 	/* Clear data ptr. */
1546 	state->data = NULL;
1547 
1548 	vq->packed.desc_extra[state->last].next = vq->free_head;
1549 	vq->free_head = id;
1550 	vq->vq.num_free += state->num;
1551 
1552 	if (unlikely(vq->use_dma_api)) {
1553 		curr = id;
1554 		for (i = 0; i < state->num; i++) {
1555 			vring_unmap_extra_packed(vq,
1556 						 &vq->packed.desc_extra[curr]);
1557 			curr = vq->packed.desc_extra[curr].next;
1558 		}
1559 	}
1560 
1561 	if (vq->indirect) {
1562 		u32 len;
1563 
1564 		/* Free the indirect table, if any, now that it's unmapped. */
1565 		desc = state->indir_desc;
1566 		if (!desc)
1567 			return;
1568 
1569 		if (vq->use_dma_api) {
1570 			len = vq->packed.desc_extra[id].len;
1571 			for (i = 0; i < len / sizeof(struct vring_packed_desc);
1572 					i++)
1573 				vring_unmap_desc_packed(vq, &desc[i]);
1574 		}
1575 		kfree(desc);
1576 		state->indir_desc = NULL;
1577 	} else if (ctx) {
1578 		*ctx = state->indir_desc;
1579 	}
1580 }
1581 
1582 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1583 				       u16 idx, bool used_wrap_counter)
1584 {
1585 	bool avail, used;
1586 	u16 flags;
1587 
1588 	flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1589 	avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1590 	used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1591 
1592 	return avail == used && used == used_wrap_counter;
1593 }
1594 
1595 static inline bool more_used_packed(const struct vring_virtqueue *vq)
1596 {
1597 	u16 last_used;
1598 	u16 last_used_idx;
1599 	bool used_wrap_counter;
1600 
1601 	last_used_idx = READ_ONCE(vq->last_used_idx);
1602 	last_used = packed_last_used(last_used_idx);
1603 	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1604 	return is_used_desc_packed(vq, last_used, used_wrap_counter);
1605 }
1606 
1607 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1608 					  unsigned int *len,
1609 					  void **ctx)
1610 {
1611 	struct vring_virtqueue *vq = to_vvq(_vq);
1612 	u16 last_used, id, last_used_idx;
1613 	bool used_wrap_counter;
1614 	void *ret;
1615 
1616 	START_USE(vq);
1617 
1618 	if (unlikely(vq->broken)) {
1619 		END_USE(vq);
1620 		return NULL;
1621 	}
1622 
1623 	if (!more_used_packed(vq)) {
1624 		pr_debug("No more buffers in queue\n");
1625 		END_USE(vq);
1626 		return NULL;
1627 	}
1628 
1629 	/* Only get used elements after they have been exposed by host. */
1630 	virtio_rmb(vq->weak_barriers);
1631 
1632 	last_used_idx = READ_ONCE(vq->last_used_idx);
1633 	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1634 	last_used = packed_last_used(last_used_idx);
1635 	id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1636 	*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1637 
1638 	if (unlikely(id >= vq->packed.vring.num)) {
1639 		BAD_RING(vq, "id %u out of range\n", id);
1640 		return NULL;
1641 	}
1642 	if (unlikely(!vq->packed.desc_state[id].data)) {
1643 		BAD_RING(vq, "id %u is not a head!\n", id);
1644 		return NULL;
1645 	}
1646 
1647 	/* detach_buf_packed clears data, so grab it now. */
1648 	ret = vq->packed.desc_state[id].data;
1649 	detach_buf_packed(vq, id, ctx);
1650 
1651 	last_used += vq->packed.desc_state[id].num;
1652 	if (unlikely(last_used >= vq->packed.vring.num)) {
1653 		last_used -= vq->packed.vring.num;
1654 		used_wrap_counter ^= 1;
1655 	}
1656 
1657 	last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1658 	WRITE_ONCE(vq->last_used_idx, last_used);
1659 
1660 	/*
1661 	 * If we expect an interrupt for the next entry, tell host
1662 	 * by writing event index and flush out the write before
1663 	 * the read in the next get_buf call.
1664 	 */
1665 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1666 		virtio_store_mb(vq->weak_barriers,
1667 				&vq->packed.vring.driver->off_wrap,
1668 				cpu_to_le16(vq->last_used_idx));
1669 
1670 	LAST_ADD_TIME_INVALID(vq);
1671 
1672 	END_USE(vq);
1673 	return ret;
1674 }
1675 
1676 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1677 {
1678 	struct vring_virtqueue *vq = to_vvq(_vq);
1679 
1680 	if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1681 		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1682 		vq->packed.vring.driver->flags =
1683 			cpu_to_le16(vq->packed.event_flags_shadow);
1684 	}
1685 }
1686 
1687 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1688 {
1689 	struct vring_virtqueue *vq = to_vvq(_vq);
1690 
1691 	START_USE(vq);
1692 
1693 	/*
1694 	 * We optimistically turn back on interrupts, then check if there was
1695 	 * more to do.
1696 	 */
1697 
1698 	if (vq->event) {
1699 		vq->packed.vring.driver->off_wrap =
1700 			cpu_to_le16(vq->last_used_idx);
1701 		/*
1702 		 * We need to update event offset and event wrap
1703 		 * counter first before updating event flags.
1704 		 */
1705 		virtio_wmb(vq->weak_barriers);
1706 	}
1707 
1708 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1709 		vq->packed.event_flags_shadow = vq->event ?
1710 				VRING_PACKED_EVENT_FLAG_DESC :
1711 				VRING_PACKED_EVENT_FLAG_ENABLE;
1712 		vq->packed.vring.driver->flags =
1713 				cpu_to_le16(vq->packed.event_flags_shadow);
1714 	}
1715 
1716 	END_USE(vq);
1717 	return vq->last_used_idx;
1718 }
1719 
1720 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1721 {
1722 	struct vring_virtqueue *vq = to_vvq(_vq);
1723 	bool wrap_counter;
1724 	u16 used_idx;
1725 
1726 	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1727 	used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1728 
1729 	return is_used_desc_packed(vq, used_idx, wrap_counter);
1730 }
1731 
1732 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1733 {
1734 	struct vring_virtqueue *vq = to_vvq(_vq);
1735 	u16 used_idx, wrap_counter, last_used_idx;
1736 	u16 bufs;
1737 
1738 	START_USE(vq);
1739 
1740 	/*
1741 	 * We optimistically turn back on interrupts, then check if there was
1742 	 * more to do.
1743 	 */
1744 
1745 	if (vq->event) {
1746 		/* TODO: tune this threshold */
1747 		bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1748 		last_used_idx = READ_ONCE(vq->last_used_idx);
1749 		wrap_counter = packed_used_wrap_counter(last_used_idx);
1750 
1751 		used_idx = packed_last_used(last_used_idx) + bufs;
1752 		if (used_idx >= vq->packed.vring.num) {
1753 			used_idx -= vq->packed.vring.num;
1754 			wrap_counter ^= 1;
1755 		}
1756 
1757 		vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1758 			(wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1759 
1760 		/*
1761 		 * We need to update event offset and event wrap
1762 		 * counter first before updating event flags.
1763 		 */
1764 		virtio_wmb(vq->weak_barriers);
1765 	}
1766 
1767 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1768 		vq->packed.event_flags_shadow = vq->event ?
1769 				VRING_PACKED_EVENT_FLAG_DESC :
1770 				VRING_PACKED_EVENT_FLAG_ENABLE;
1771 		vq->packed.vring.driver->flags =
1772 				cpu_to_le16(vq->packed.event_flags_shadow);
1773 	}
1774 
1775 	/*
1776 	 * We need to update event suppression structure first
1777 	 * before re-checking for more used buffers.
1778 	 */
1779 	virtio_mb(vq->weak_barriers);
1780 
1781 	last_used_idx = READ_ONCE(vq->last_used_idx);
1782 	wrap_counter = packed_used_wrap_counter(last_used_idx);
1783 	used_idx = packed_last_used(last_used_idx);
1784 	if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1785 		END_USE(vq);
1786 		return false;
1787 	}
1788 
1789 	END_USE(vq);
1790 	return true;
1791 }
1792 
1793 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1794 {
1795 	struct vring_virtqueue *vq = to_vvq(_vq);
1796 	unsigned int i;
1797 	void *buf;
1798 
1799 	START_USE(vq);
1800 
1801 	for (i = 0; i < vq->packed.vring.num; i++) {
1802 		if (!vq->packed.desc_state[i].data)
1803 			continue;
1804 		/* detach_buf clears data, so grab it now. */
1805 		buf = vq->packed.desc_state[i].data;
1806 		detach_buf_packed(vq, i, NULL);
1807 		END_USE(vq);
1808 		return buf;
1809 	}
1810 	/* That should have freed everything. */
1811 	BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1812 
1813 	END_USE(vq);
1814 	return NULL;
1815 }
1816 
1817 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1818 {
1819 	struct vring_desc_extra *desc_extra;
1820 	unsigned int i;
1821 
1822 	desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1823 				   GFP_KERNEL);
1824 	if (!desc_extra)
1825 		return NULL;
1826 
1827 	memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1828 
1829 	for (i = 0; i < num - 1; i++)
1830 		desc_extra[i].next = i + 1;
1831 
1832 	return desc_extra;
1833 }
1834 
1835 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1836 			      struct virtio_device *vdev)
1837 {
1838 	if (vring_packed->vring.desc)
1839 		vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1840 				 vring_packed->vring.desc,
1841 				 vring_packed->ring_dma_addr);
1842 
1843 	if (vring_packed->vring.driver)
1844 		vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1845 				 vring_packed->vring.driver,
1846 				 vring_packed->driver_event_dma_addr);
1847 
1848 	if (vring_packed->vring.device)
1849 		vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1850 				 vring_packed->vring.device,
1851 				 vring_packed->device_event_dma_addr);
1852 
1853 	kfree(vring_packed->desc_state);
1854 	kfree(vring_packed->desc_extra);
1855 }
1856 
1857 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1858 				    struct virtio_device *vdev,
1859 				    u32 num)
1860 {
1861 	struct vring_packed_desc *ring;
1862 	struct vring_packed_desc_event *driver, *device;
1863 	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1864 	size_t ring_size_in_bytes, event_size_in_bytes;
1865 
1866 	ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1867 
1868 	ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1869 				 &ring_dma_addr,
1870 				 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1871 	if (!ring)
1872 		goto err;
1873 
1874 	vring_packed->vring.desc         = ring;
1875 	vring_packed->ring_dma_addr      = ring_dma_addr;
1876 	vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1877 
1878 	event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1879 
1880 	driver = vring_alloc_queue(vdev, event_size_in_bytes,
1881 				   &driver_event_dma_addr,
1882 				   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1883 	if (!driver)
1884 		goto err;
1885 
1886 	vring_packed->vring.driver          = driver;
1887 	vring_packed->event_size_in_bytes   = event_size_in_bytes;
1888 	vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1889 
1890 	device = vring_alloc_queue(vdev, event_size_in_bytes,
1891 				   &device_event_dma_addr,
1892 				   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1893 	if (!device)
1894 		goto err;
1895 
1896 	vring_packed->vring.device          = device;
1897 	vring_packed->device_event_dma_addr = device_event_dma_addr;
1898 
1899 	vring_packed->vring.num = num;
1900 
1901 	return 0;
1902 
1903 err:
1904 	vring_free_packed(vring_packed, vdev);
1905 	return -ENOMEM;
1906 }
1907 
1908 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1909 {
1910 	struct vring_desc_state_packed *state;
1911 	struct vring_desc_extra *extra;
1912 	u32 num = vring_packed->vring.num;
1913 
1914 	state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1915 	if (!state)
1916 		goto err_desc_state;
1917 
1918 	memset(state, 0, num * sizeof(struct vring_desc_state_packed));
1919 
1920 	extra = vring_alloc_desc_extra(num);
1921 	if (!extra)
1922 		goto err_desc_extra;
1923 
1924 	vring_packed->desc_state = state;
1925 	vring_packed->desc_extra = extra;
1926 
1927 	return 0;
1928 
1929 err_desc_extra:
1930 	kfree(state);
1931 err_desc_state:
1932 	return -ENOMEM;
1933 }
1934 
1935 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
1936 					bool callback)
1937 {
1938 	vring_packed->next_avail_idx = 0;
1939 	vring_packed->avail_wrap_counter = 1;
1940 	vring_packed->event_flags_shadow = 0;
1941 	vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1942 
1943 	/* No callback?  Tell other side not to bother us. */
1944 	if (!callback) {
1945 		vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1946 		vring_packed->vring.driver->flags =
1947 			cpu_to_le16(vring_packed->event_flags_shadow);
1948 	}
1949 }
1950 
1951 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
1952 					  struct vring_virtqueue_packed *vring_packed)
1953 {
1954 	vq->packed = *vring_packed;
1955 
1956 	/* Put everything in free lists. */
1957 	vq->free_head = 0;
1958 }
1959 
1960 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
1961 {
1962 	memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
1963 	memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
1964 
1965 	/* we need to reset the desc.flags. For more, see is_used_desc_packed() */
1966 	memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
1967 
1968 	virtqueue_init(vq, vq->packed.vring.num);
1969 	virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
1970 }
1971 
1972 static struct virtqueue *vring_create_virtqueue_packed(
1973 	unsigned int index,
1974 	unsigned int num,
1975 	unsigned int vring_align,
1976 	struct virtio_device *vdev,
1977 	bool weak_barriers,
1978 	bool may_reduce_num,
1979 	bool context,
1980 	bool (*notify)(struct virtqueue *),
1981 	void (*callback)(struct virtqueue *),
1982 	const char *name)
1983 {
1984 	struct vring_virtqueue_packed vring_packed = {};
1985 	struct vring_virtqueue *vq;
1986 	int err;
1987 
1988 	if (vring_alloc_queue_packed(&vring_packed, vdev, num))
1989 		goto err_ring;
1990 
1991 	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1992 	if (!vq)
1993 		goto err_vq;
1994 
1995 	vq->vq.callback = callback;
1996 	vq->vq.vdev = vdev;
1997 	vq->vq.name = name;
1998 	vq->vq.index = index;
1999 	vq->vq.reset = false;
2000 	vq->we_own_ring = true;
2001 	vq->notify = notify;
2002 	vq->weak_barriers = weak_barriers;
2003 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2004 	vq->broken = true;
2005 #else
2006 	vq->broken = false;
2007 #endif
2008 	vq->packed_ring = true;
2009 	vq->use_dma_api = vring_use_dma_api(vdev);
2010 
2011 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2012 		!context;
2013 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2014 
2015 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2016 		vq->weak_barriers = false;
2017 
2018 	err = vring_alloc_state_extra_packed(&vring_packed);
2019 	if (err)
2020 		goto err_state_extra;
2021 
2022 	virtqueue_vring_init_packed(&vring_packed, !!callback);
2023 
2024 	virtqueue_init(vq, num);
2025 	virtqueue_vring_attach_packed(vq, &vring_packed);
2026 
2027 	spin_lock(&vdev->vqs_list_lock);
2028 	list_add_tail(&vq->vq.list, &vdev->vqs);
2029 	spin_unlock(&vdev->vqs_list_lock);
2030 	return &vq->vq;
2031 
2032 err_state_extra:
2033 	kfree(vq);
2034 err_vq:
2035 	vring_free_packed(&vring_packed, vdev);
2036 err_ring:
2037 	return NULL;
2038 }
2039 
2040 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2041 {
2042 	struct vring_virtqueue_packed vring_packed = {};
2043 	struct vring_virtqueue *vq = to_vvq(_vq);
2044 	struct virtio_device *vdev = _vq->vdev;
2045 	int err;
2046 
2047 	if (vring_alloc_queue_packed(&vring_packed, vdev, num))
2048 		goto err_ring;
2049 
2050 	err = vring_alloc_state_extra_packed(&vring_packed);
2051 	if (err)
2052 		goto err_state_extra;
2053 
2054 	vring_free(&vq->vq);
2055 
2056 	virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2057 
2058 	virtqueue_init(vq, vring_packed.vring.num);
2059 	virtqueue_vring_attach_packed(vq, &vring_packed);
2060 
2061 	return 0;
2062 
2063 err_state_extra:
2064 	vring_free_packed(&vring_packed, vdev);
2065 err_ring:
2066 	virtqueue_reinit_packed(vq);
2067 	return -ENOMEM;
2068 }
2069 
2070 
2071 /*
2072  * Generic functions and exported symbols.
2073  */
2074 
2075 static inline int virtqueue_add(struct virtqueue *_vq,
2076 				struct scatterlist *sgs[],
2077 				unsigned int total_sg,
2078 				unsigned int out_sgs,
2079 				unsigned int in_sgs,
2080 				void *data,
2081 				void *ctx,
2082 				gfp_t gfp)
2083 {
2084 	struct vring_virtqueue *vq = to_vvq(_vq);
2085 
2086 	return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2087 					out_sgs, in_sgs, data, ctx, gfp) :
2088 				 virtqueue_add_split(_vq, sgs, total_sg,
2089 					out_sgs, in_sgs, data, ctx, gfp);
2090 }
2091 
2092 /**
2093  * virtqueue_add_sgs - expose buffers to other end
2094  * @_vq: the struct virtqueue we're talking about.
2095  * @sgs: array of terminated scatterlists.
2096  * @out_sgs: the number of scatterlists readable by other side
2097  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2098  * @data: the token identifying the buffer.
2099  * @gfp: how to do memory allocations (if necessary).
2100  *
2101  * Caller must ensure we don't call this with other virtqueue operations
2102  * at the same time (except where noted).
2103  *
2104  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2105  */
2106 int virtqueue_add_sgs(struct virtqueue *_vq,
2107 		      struct scatterlist *sgs[],
2108 		      unsigned int out_sgs,
2109 		      unsigned int in_sgs,
2110 		      void *data,
2111 		      gfp_t gfp)
2112 {
2113 	unsigned int i, total_sg = 0;
2114 
2115 	/* Count them first. */
2116 	for (i = 0; i < out_sgs + in_sgs; i++) {
2117 		struct scatterlist *sg;
2118 
2119 		for (sg = sgs[i]; sg; sg = sg_next(sg))
2120 			total_sg++;
2121 	}
2122 	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2123 			     data, NULL, gfp);
2124 }
2125 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2126 
2127 /**
2128  * virtqueue_add_outbuf - expose output buffers to other end
2129  * @vq: the struct virtqueue we're talking about.
2130  * @sg: scatterlist (must be well-formed and terminated!)
2131  * @num: the number of entries in @sg readable by other side
2132  * @data: the token identifying the buffer.
2133  * @gfp: how to do memory allocations (if necessary).
2134  *
2135  * Caller must ensure we don't call this with other virtqueue operations
2136  * at the same time (except where noted).
2137  *
2138  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2139  */
2140 int virtqueue_add_outbuf(struct virtqueue *vq,
2141 			 struct scatterlist *sg, unsigned int num,
2142 			 void *data,
2143 			 gfp_t gfp)
2144 {
2145 	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2146 }
2147 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2148 
2149 /**
2150  * virtqueue_add_inbuf - expose input buffers to other end
2151  * @vq: the struct virtqueue we're talking about.
2152  * @sg: scatterlist (must be well-formed and terminated!)
2153  * @num: the number of entries in @sg writable by other side
2154  * @data: the token identifying the buffer.
2155  * @gfp: how to do memory allocations (if necessary).
2156  *
2157  * Caller must ensure we don't call this with other virtqueue operations
2158  * at the same time (except where noted).
2159  *
2160  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2161  */
2162 int virtqueue_add_inbuf(struct virtqueue *vq,
2163 			struct scatterlist *sg, unsigned int num,
2164 			void *data,
2165 			gfp_t gfp)
2166 {
2167 	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2168 }
2169 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2170 
2171 /**
2172  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2173  * @vq: the struct virtqueue we're talking about.
2174  * @sg: scatterlist (must be well-formed and terminated!)
2175  * @num: the number of entries in @sg writable by other side
2176  * @data: the token identifying the buffer.
2177  * @ctx: extra context for the token
2178  * @gfp: how to do memory allocations (if necessary).
2179  *
2180  * Caller must ensure we don't call this with other virtqueue operations
2181  * at the same time (except where noted).
2182  *
2183  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2184  */
2185 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2186 			struct scatterlist *sg, unsigned int num,
2187 			void *data,
2188 			void *ctx,
2189 			gfp_t gfp)
2190 {
2191 	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2192 }
2193 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2194 
2195 /**
2196  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2197  * @_vq: the struct virtqueue
2198  *
2199  * Instead of virtqueue_kick(), you can do:
2200  *	if (virtqueue_kick_prepare(vq))
2201  *		virtqueue_notify(vq);
2202  *
2203  * This is sometimes useful because the virtqueue_kick_prepare() needs
2204  * to be serialized, but the actual virtqueue_notify() call does not.
2205  */
2206 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2207 {
2208 	struct vring_virtqueue *vq = to_vvq(_vq);
2209 
2210 	return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2211 				 virtqueue_kick_prepare_split(_vq);
2212 }
2213 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2214 
2215 /**
2216  * virtqueue_notify - second half of split virtqueue_kick call.
2217  * @_vq: the struct virtqueue
2218  *
2219  * This does not need to be serialized.
2220  *
2221  * Returns false if host notify failed or queue is broken, otherwise true.
2222  */
2223 bool virtqueue_notify(struct virtqueue *_vq)
2224 {
2225 	struct vring_virtqueue *vq = to_vvq(_vq);
2226 
2227 	if (unlikely(vq->broken))
2228 		return false;
2229 
2230 	/* Prod other side to tell it about changes. */
2231 	if (!vq->notify(_vq)) {
2232 		vq->broken = true;
2233 		return false;
2234 	}
2235 	return true;
2236 }
2237 EXPORT_SYMBOL_GPL(virtqueue_notify);
2238 
2239 /**
2240  * virtqueue_kick - update after add_buf
2241  * @vq: the struct virtqueue
2242  *
2243  * After one or more virtqueue_add_* calls, invoke this to kick
2244  * the other side.
2245  *
2246  * Caller must ensure we don't call this with other virtqueue
2247  * operations at the same time (except where noted).
2248  *
2249  * Returns false if kick failed, otherwise true.
2250  */
2251 bool virtqueue_kick(struct virtqueue *vq)
2252 {
2253 	if (virtqueue_kick_prepare(vq))
2254 		return virtqueue_notify(vq);
2255 	return true;
2256 }
2257 EXPORT_SYMBOL_GPL(virtqueue_kick);
2258 
2259 /**
2260  * virtqueue_get_buf_ctx - get the next used buffer
2261  * @_vq: the struct virtqueue we're talking about.
2262  * @len: the length written into the buffer
2263  * @ctx: extra context for the token
2264  *
2265  * If the device wrote data into the buffer, @len will be set to the
2266  * amount written.  This means you don't need to clear the buffer
2267  * beforehand to ensure there's no data leakage in the case of short
2268  * writes.
2269  *
2270  * Caller must ensure we don't call this with other virtqueue
2271  * operations at the same time (except where noted).
2272  *
2273  * Returns NULL if there are no used buffers, or the "data" token
2274  * handed to virtqueue_add_*().
2275  */
2276 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2277 			    void **ctx)
2278 {
2279 	struct vring_virtqueue *vq = to_vvq(_vq);
2280 
2281 	return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2282 				 virtqueue_get_buf_ctx_split(_vq, len, ctx);
2283 }
2284 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2285 
2286 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2287 {
2288 	return virtqueue_get_buf_ctx(_vq, len, NULL);
2289 }
2290 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2291 /**
2292  * virtqueue_disable_cb - disable callbacks
2293  * @_vq: the struct virtqueue we're talking about.
2294  *
2295  * Note that this is not necessarily synchronous, hence unreliable and only
2296  * useful as an optimization.
2297  *
2298  * Unlike other operations, this need not be serialized.
2299  */
2300 void virtqueue_disable_cb(struct virtqueue *_vq)
2301 {
2302 	struct vring_virtqueue *vq = to_vvq(_vq);
2303 
2304 	/* If device triggered an event already it won't trigger one again:
2305 	 * no need to disable.
2306 	 */
2307 	if (vq->event_triggered)
2308 		return;
2309 
2310 	if (vq->packed_ring)
2311 		virtqueue_disable_cb_packed(_vq);
2312 	else
2313 		virtqueue_disable_cb_split(_vq);
2314 }
2315 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2316 
2317 /**
2318  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2319  * @_vq: the struct virtqueue we're talking about.
2320  *
2321  * This re-enables callbacks; it returns current queue state
2322  * in an opaque unsigned value. This value should be later tested by
2323  * virtqueue_poll, to detect a possible race between the driver checking for
2324  * more work, and enabling callbacks.
2325  *
2326  * Caller must ensure we don't call this with other virtqueue
2327  * operations at the same time (except where noted).
2328  */
2329 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2330 {
2331 	struct vring_virtqueue *vq = to_vvq(_vq);
2332 
2333 	if (vq->event_triggered)
2334 		vq->event_triggered = false;
2335 
2336 	return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2337 				 virtqueue_enable_cb_prepare_split(_vq);
2338 }
2339 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2340 
2341 /**
2342  * virtqueue_poll - query pending used buffers
2343  * @_vq: the struct virtqueue we're talking about.
2344  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2345  *
2346  * Returns "true" if there are pending used buffers in the queue.
2347  *
2348  * This does not need to be serialized.
2349  */
2350 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2351 {
2352 	struct vring_virtqueue *vq = to_vvq(_vq);
2353 
2354 	if (unlikely(vq->broken))
2355 		return false;
2356 
2357 	virtio_mb(vq->weak_barriers);
2358 	return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2359 				 virtqueue_poll_split(_vq, last_used_idx);
2360 }
2361 EXPORT_SYMBOL_GPL(virtqueue_poll);
2362 
2363 /**
2364  * virtqueue_enable_cb - restart callbacks after disable_cb.
2365  * @_vq: the struct virtqueue we're talking about.
2366  *
2367  * This re-enables callbacks; it returns "false" if there are pending
2368  * buffers in the queue, to detect a possible race between the driver
2369  * checking for more work, and enabling callbacks.
2370  *
2371  * Caller must ensure we don't call this with other virtqueue
2372  * operations at the same time (except where noted).
2373  */
2374 bool virtqueue_enable_cb(struct virtqueue *_vq)
2375 {
2376 	unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2377 
2378 	return !virtqueue_poll(_vq, last_used_idx);
2379 }
2380 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2381 
2382 /**
2383  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2384  * @_vq: the struct virtqueue we're talking about.
2385  *
2386  * This re-enables callbacks but hints to the other side to delay
2387  * interrupts until most of the available buffers have been processed;
2388  * it returns "false" if there are many pending buffers in the queue,
2389  * to detect a possible race between the driver checking for more work,
2390  * and enabling callbacks.
2391  *
2392  * Caller must ensure we don't call this with other virtqueue
2393  * operations at the same time (except where noted).
2394  */
2395 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2396 {
2397 	struct vring_virtqueue *vq = to_vvq(_vq);
2398 
2399 	if (vq->event_triggered)
2400 		vq->event_triggered = false;
2401 
2402 	return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2403 				 virtqueue_enable_cb_delayed_split(_vq);
2404 }
2405 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2406 
2407 /**
2408  * virtqueue_detach_unused_buf - detach first unused buffer
2409  * @_vq: the struct virtqueue we're talking about.
2410  *
2411  * Returns NULL or the "data" token handed to virtqueue_add_*().
2412  * This is not valid on an active queue; it is useful for device
2413  * shutdown or the reset queue.
2414  */
2415 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2416 {
2417 	struct vring_virtqueue *vq = to_vvq(_vq);
2418 
2419 	return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2420 				 virtqueue_detach_unused_buf_split(_vq);
2421 }
2422 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2423 
2424 static inline bool more_used(const struct vring_virtqueue *vq)
2425 {
2426 	return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2427 }
2428 
2429 /**
2430  * vring_interrupt - notify a virtqueue on an interrupt
2431  * @irq: the IRQ number (ignored)
2432  * @_vq: the struct virtqueue to notify
2433  *
2434  * Calls the callback function of @_vq to process the virtqueue
2435  * notification.
2436  */
2437 irqreturn_t vring_interrupt(int irq, void *_vq)
2438 {
2439 	struct vring_virtqueue *vq = to_vvq(_vq);
2440 
2441 	if (!more_used(vq)) {
2442 		pr_debug("virtqueue interrupt with no work for %p\n", vq);
2443 		return IRQ_NONE;
2444 	}
2445 
2446 	if (unlikely(vq->broken)) {
2447 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2448 		dev_warn_once(&vq->vq.vdev->dev,
2449 			      "virtio vring IRQ raised before DRIVER_OK");
2450 		return IRQ_NONE;
2451 #else
2452 		return IRQ_HANDLED;
2453 #endif
2454 	}
2455 
2456 	/* Just a hint for performance: so it's ok that this can be racy! */
2457 	if (vq->event)
2458 		vq->event_triggered = true;
2459 
2460 	pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2461 	if (vq->vq.callback)
2462 		vq->vq.callback(&vq->vq);
2463 
2464 	return IRQ_HANDLED;
2465 }
2466 EXPORT_SYMBOL_GPL(vring_interrupt);
2467 
2468 /* Only available for split ring */
2469 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
2470 					       struct vring_virtqueue_split *vring_split,
2471 					       struct virtio_device *vdev,
2472 					       bool weak_barriers,
2473 					       bool context,
2474 					       bool (*notify)(struct virtqueue *),
2475 					       void (*callback)(struct virtqueue *),
2476 					       const char *name)
2477 {
2478 	struct vring_virtqueue *vq;
2479 	int err;
2480 
2481 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2482 		return NULL;
2483 
2484 	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2485 	if (!vq)
2486 		return NULL;
2487 
2488 	vq->packed_ring = false;
2489 	vq->vq.callback = callback;
2490 	vq->vq.vdev = vdev;
2491 	vq->vq.name = name;
2492 	vq->vq.index = index;
2493 	vq->vq.reset = false;
2494 	vq->we_own_ring = false;
2495 	vq->notify = notify;
2496 	vq->weak_barriers = weak_barriers;
2497 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2498 	vq->broken = true;
2499 #else
2500 	vq->broken = false;
2501 #endif
2502 	vq->use_dma_api = vring_use_dma_api(vdev);
2503 
2504 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2505 		!context;
2506 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2507 
2508 	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2509 		vq->weak_barriers = false;
2510 
2511 	err = vring_alloc_state_extra_split(vring_split);
2512 	if (err) {
2513 		kfree(vq);
2514 		return NULL;
2515 	}
2516 
2517 	virtqueue_vring_init_split(vring_split, vq);
2518 
2519 	virtqueue_init(vq, vring_split->vring.num);
2520 	virtqueue_vring_attach_split(vq, vring_split);
2521 
2522 	spin_lock(&vdev->vqs_list_lock);
2523 	list_add_tail(&vq->vq.list, &vdev->vqs);
2524 	spin_unlock(&vdev->vqs_list_lock);
2525 	return &vq->vq;
2526 }
2527 
2528 struct virtqueue *vring_create_virtqueue(
2529 	unsigned int index,
2530 	unsigned int num,
2531 	unsigned int vring_align,
2532 	struct virtio_device *vdev,
2533 	bool weak_barriers,
2534 	bool may_reduce_num,
2535 	bool context,
2536 	bool (*notify)(struct virtqueue *),
2537 	void (*callback)(struct virtqueue *),
2538 	const char *name)
2539 {
2540 
2541 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2542 		return vring_create_virtqueue_packed(index, num, vring_align,
2543 				vdev, weak_barriers, may_reduce_num,
2544 				context, notify, callback, name);
2545 
2546 	return vring_create_virtqueue_split(index, num, vring_align,
2547 			vdev, weak_barriers, may_reduce_num,
2548 			context, notify, callback, name);
2549 }
2550 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2551 
2552 /**
2553  * virtqueue_resize - resize the vring of vq
2554  * @_vq: the struct virtqueue we're talking about.
2555  * @num: new ring num
2556  * @recycle: callback for recycle the useless buffer
2557  *
2558  * When it is really necessary to create a new vring, it will set the current vq
2559  * into the reset state. Then call the passed callback to recycle the buffer
2560  * that is no longer used. Only after the new vring is successfully created, the
2561  * old vring will be released.
2562  *
2563  * Caller must ensure we don't call this with other virtqueue operations
2564  * at the same time (except where noted).
2565  *
2566  * Returns zero or a negative error.
2567  * 0: success.
2568  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2569  *  vq can still work normally
2570  * -EBUSY: Failed to sync with device, vq may not work properly
2571  * -ENOENT: Transport or device not supported
2572  * -E2BIG/-EINVAL: num error
2573  * -EPERM: Operation not permitted
2574  *
2575  */
2576 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2577 		     void (*recycle)(struct virtqueue *vq, void *buf))
2578 {
2579 	struct vring_virtqueue *vq = to_vvq(_vq);
2580 	struct virtio_device *vdev = vq->vq.vdev;
2581 	void *buf;
2582 	int err;
2583 
2584 	if (!vq->we_own_ring)
2585 		return -EPERM;
2586 
2587 	if (num > vq->vq.num_max)
2588 		return -E2BIG;
2589 
2590 	if (!num)
2591 		return -EINVAL;
2592 
2593 	if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2594 		return 0;
2595 
2596 	if (!vdev->config->disable_vq_and_reset)
2597 		return -ENOENT;
2598 
2599 	if (!vdev->config->enable_vq_after_reset)
2600 		return -ENOENT;
2601 
2602 	err = vdev->config->disable_vq_and_reset(_vq);
2603 	if (err)
2604 		return err;
2605 
2606 	while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2607 		recycle(_vq, buf);
2608 
2609 	if (vq->packed_ring)
2610 		err = virtqueue_resize_packed(_vq, num);
2611 	else
2612 		err = virtqueue_resize_split(_vq, num);
2613 
2614 	if (vdev->config->enable_vq_after_reset(_vq))
2615 		return -EBUSY;
2616 
2617 	return err;
2618 }
2619 EXPORT_SYMBOL_GPL(virtqueue_resize);
2620 
2621 /* Only available for split ring */
2622 struct virtqueue *vring_new_virtqueue(unsigned int index,
2623 				      unsigned int num,
2624 				      unsigned int vring_align,
2625 				      struct virtio_device *vdev,
2626 				      bool weak_barriers,
2627 				      bool context,
2628 				      void *pages,
2629 				      bool (*notify)(struct virtqueue *vq),
2630 				      void (*callback)(struct virtqueue *vq),
2631 				      const char *name)
2632 {
2633 	struct vring_virtqueue_split vring_split = {};
2634 
2635 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2636 		return NULL;
2637 
2638 	vring_init(&vring_split.vring, num, pages, vring_align);
2639 	return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2640 				     context, notify, callback, name);
2641 }
2642 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2643 
2644 static void vring_free(struct virtqueue *_vq)
2645 {
2646 	struct vring_virtqueue *vq = to_vvq(_vq);
2647 
2648 	if (vq->we_own_ring) {
2649 		if (vq->packed_ring) {
2650 			vring_free_queue(vq->vq.vdev,
2651 					 vq->packed.ring_size_in_bytes,
2652 					 vq->packed.vring.desc,
2653 					 vq->packed.ring_dma_addr);
2654 
2655 			vring_free_queue(vq->vq.vdev,
2656 					 vq->packed.event_size_in_bytes,
2657 					 vq->packed.vring.driver,
2658 					 vq->packed.driver_event_dma_addr);
2659 
2660 			vring_free_queue(vq->vq.vdev,
2661 					 vq->packed.event_size_in_bytes,
2662 					 vq->packed.vring.device,
2663 					 vq->packed.device_event_dma_addr);
2664 
2665 			kfree(vq->packed.desc_state);
2666 			kfree(vq->packed.desc_extra);
2667 		} else {
2668 			vring_free_queue(vq->vq.vdev,
2669 					 vq->split.queue_size_in_bytes,
2670 					 vq->split.vring.desc,
2671 					 vq->split.queue_dma_addr);
2672 		}
2673 	}
2674 	if (!vq->packed_ring) {
2675 		kfree(vq->split.desc_state);
2676 		kfree(vq->split.desc_extra);
2677 	}
2678 }
2679 
2680 void vring_del_virtqueue(struct virtqueue *_vq)
2681 {
2682 	struct vring_virtqueue *vq = to_vvq(_vq);
2683 
2684 	spin_lock(&vq->vq.vdev->vqs_list_lock);
2685 	list_del(&_vq->list);
2686 	spin_unlock(&vq->vq.vdev->vqs_list_lock);
2687 
2688 	vring_free(_vq);
2689 
2690 	kfree(vq);
2691 }
2692 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2693 
2694 /* Manipulates transport-specific feature bits. */
2695 void vring_transport_features(struct virtio_device *vdev)
2696 {
2697 	unsigned int i;
2698 
2699 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2700 		switch (i) {
2701 		case VIRTIO_RING_F_INDIRECT_DESC:
2702 			break;
2703 		case VIRTIO_RING_F_EVENT_IDX:
2704 			break;
2705 		case VIRTIO_F_VERSION_1:
2706 			break;
2707 		case VIRTIO_F_ACCESS_PLATFORM:
2708 			break;
2709 		case VIRTIO_F_RING_PACKED:
2710 			break;
2711 		case VIRTIO_F_ORDER_PLATFORM:
2712 			break;
2713 		default:
2714 			/* We don't understand this bit. */
2715 			__virtio_clear_bit(vdev, i);
2716 		}
2717 	}
2718 }
2719 EXPORT_SYMBOL_GPL(vring_transport_features);
2720 
2721 /**
2722  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2723  * @_vq: the struct virtqueue containing the vring of interest.
2724  *
2725  * Returns the size of the vring.  This is mainly used for boasting to
2726  * userspace.  Unlike other operations, this need not be serialized.
2727  */
2728 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2729 {
2730 
2731 	struct vring_virtqueue *vq = to_vvq(_vq);
2732 
2733 	return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2734 }
2735 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2736 
2737 /*
2738  * This function should only be called by the core, not directly by the driver.
2739  */
2740 void __virtqueue_break(struct virtqueue *_vq)
2741 {
2742 	struct vring_virtqueue *vq = to_vvq(_vq);
2743 
2744 	/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2745 	WRITE_ONCE(vq->broken, true);
2746 }
2747 EXPORT_SYMBOL_GPL(__virtqueue_break);
2748 
2749 /*
2750  * This function should only be called by the core, not directly by the driver.
2751  */
2752 void __virtqueue_unbreak(struct virtqueue *_vq)
2753 {
2754 	struct vring_virtqueue *vq = to_vvq(_vq);
2755 
2756 	/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2757 	WRITE_ONCE(vq->broken, false);
2758 }
2759 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
2760 
2761 bool virtqueue_is_broken(struct virtqueue *_vq)
2762 {
2763 	struct vring_virtqueue *vq = to_vvq(_vq);
2764 
2765 	return READ_ONCE(vq->broken);
2766 }
2767 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2768 
2769 /*
2770  * This should prevent the device from being used, allowing drivers to
2771  * recover.  You may need to grab appropriate locks to flush.
2772  */
2773 void virtio_break_device(struct virtio_device *dev)
2774 {
2775 	struct virtqueue *_vq;
2776 
2777 	spin_lock(&dev->vqs_list_lock);
2778 	list_for_each_entry(_vq, &dev->vqs, list) {
2779 		struct vring_virtqueue *vq = to_vvq(_vq);
2780 
2781 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2782 		WRITE_ONCE(vq->broken, true);
2783 	}
2784 	spin_unlock(&dev->vqs_list_lock);
2785 }
2786 EXPORT_SYMBOL_GPL(virtio_break_device);
2787 
2788 /*
2789  * This should allow the device to be used by the driver. You may
2790  * need to grab appropriate locks to flush the write to
2791  * vq->broken. This should only be used in some specific case e.g
2792  * (probing and restoring). This function should only be called by the
2793  * core, not directly by the driver.
2794  */
2795 void __virtio_unbreak_device(struct virtio_device *dev)
2796 {
2797 	struct virtqueue *_vq;
2798 
2799 	spin_lock(&dev->vqs_list_lock);
2800 	list_for_each_entry(_vq, &dev->vqs, list) {
2801 		struct vring_virtqueue *vq = to_vvq(_vq);
2802 
2803 		/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2804 		WRITE_ONCE(vq->broken, false);
2805 	}
2806 	spin_unlock(&dev->vqs_list_lock);
2807 }
2808 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2809 
2810 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2811 {
2812 	struct vring_virtqueue *vq = to_vvq(_vq);
2813 
2814 	BUG_ON(!vq->we_own_ring);
2815 
2816 	if (vq->packed_ring)
2817 		return vq->packed.ring_dma_addr;
2818 
2819 	return vq->split.queue_dma_addr;
2820 }
2821 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2822 
2823 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2824 {
2825 	struct vring_virtqueue *vq = to_vvq(_vq);
2826 
2827 	BUG_ON(!vq->we_own_ring);
2828 
2829 	if (vq->packed_ring)
2830 		return vq->packed.driver_event_dma_addr;
2831 
2832 	return vq->split.queue_dma_addr +
2833 		((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2834 }
2835 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2836 
2837 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2838 {
2839 	struct vring_virtqueue *vq = to_vvq(_vq);
2840 
2841 	BUG_ON(!vq->we_own_ring);
2842 
2843 	if (vq->packed_ring)
2844 		return vq->packed.device_event_dma_addr;
2845 
2846 	return vq->split.queue_dma_addr +
2847 		((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2848 }
2849 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2850 
2851 /* Only available for split ring */
2852 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2853 {
2854 	return &to_vvq(vq)->split.vring;
2855 }
2856 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2857 
2858 MODULE_LICENSE("GPL");
2859