1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/kmsan.h> 15 #include <linux/spinlock.h> 16 #include <xen/xen.h> 17 18 #ifdef DEBUG 19 /* For development, we want to crash whenever the ring is screwed. */ 20 #define BAD_RING(_vq, fmt, args...) \ 21 do { \ 22 dev_err(&(_vq)->vq.vdev->dev, \ 23 "%s:"fmt, (_vq)->vq.name, ##args); \ 24 BUG(); \ 25 } while (0) 26 /* Caller is supposed to guarantee no reentry. */ 27 #define START_USE(_vq) \ 28 do { \ 29 if ((_vq)->in_use) \ 30 panic("%s:in_use = %i\n", \ 31 (_vq)->vq.name, (_vq)->in_use); \ 32 (_vq)->in_use = __LINE__; \ 33 } while (0) 34 #define END_USE(_vq) \ 35 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 36 #define LAST_ADD_TIME_UPDATE(_vq) \ 37 do { \ 38 ktime_t now = ktime_get(); \ 39 \ 40 /* No kick or get, with .1 second between? Warn. */ \ 41 if ((_vq)->last_add_time_valid) \ 42 WARN_ON(ktime_to_ms(ktime_sub(now, \ 43 (_vq)->last_add_time)) > 100); \ 44 (_vq)->last_add_time = now; \ 45 (_vq)->last_add_time_valid = true; \ 46 } while (0) 47 #define LAST_ADD_TIME_CHECK(_vq) \ 48 do { \ 49 if ((_vq)->last_add_time_valid) { \ 50 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 51 (_vq)->last_add_time)) > 100); \ 52 } \ 53 } while (0) 54 #define LAST_ADD_TIME_INVALID(_vq) \ 55 ((_vq)->last_add_time_valid = false) 56 #else 57 #define BAD_RING(_vq, fmt, args...) \ 58 do { \ 59 dev_err(&_vq->vq.vdev->dev, \ 60 "%s:"fmt, (_vq)->vq.name, ##args); \ 61 (_vq)->broken = true; \ 62 } while (0) 63 #define START_USE(vq) 64 #define END_USE(vq) 65 #define LAST_ADD_TIME_UPDATE(vq) 66 #define LAST_ADD_TIME_CHECK(vq) 67 #define LAST_ADD_TIME_INVALID(vq) 68 #endif 69 70 struct vring_desc_state_split { 71 void *data; /* Data for callback. */ 72 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 73 }; 74 75 struct vring_desc_state_packed { 76 void *data; /* Data for callback. */ 77 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 78 u16 num; /* Descriptor list length. */ 79 u16 last; /* The last desc state in a list. */ 80 }; 81 82 struct vring_desc_extra { 83 dma_addr_t addr; /* Descriptor DMA addr. */ 84 u32 len; /* Descriptor length. */ 85 u16 flags; /* Descriptor flags. */ 86 u16 next; /* The next desc state in a list. */ 87 }; 88 89 struct vring_virtqueue_split { 90 /* Actual memory layout for this queue. */ 91 struct vring vring; 92 93 /* Last written value to avail->flags */ 94 u16 avail_flags_shadow; 95 96 /* 97 * Last written value to avail->idx in 98 * guest byte order. 99 */ 100 u16 avail_idx_shadow; 101 102 /* Per-descriptor state. */ 103 struct vring_desc_state_split *desc_state; 104 struct vring_desc_extra *desc_extra; 105 106 /* DMA address and size information */ 107 dma_addr_t queue_dma_addr; 108 size_t queue_size_in_bytes; 109 110 /* 111 * The parameters for creating vrings are reserved for creating new 112 * vring. 113 */ 114 u32 vring_align; 115 bool may_reduce_num; 116 }; 117 118 struct vring_virtqueue_packed { 119 /* Actual memory layout for this queue. */ 120 struct { 121 unsigned int num; 122 struct vring_packed_desc *desc; 123 struct vring_packed_desc_event *driver; 124 struct vring_packed_desc_event *device; 125 } vring; 126 127 /* Driver ring wrap counter. */ 128 bool avail_wrap_counter; 129 130 /* Avail used flags. */ 131 u16 avail_used_flags; 132 133 /* Index of the next avail descriptor. */ 134 u16 next_avail_idx; 135 136 /* 137 * Last written value to driver->flags in 138 * guest byte order. 139 */ 140 u16 event_flags_shadow; 141 142 /* Per-descriptor state. */ 143 struct vring_desc_state_packed *desc_state; 144 struct vring_desc_extra *desc_extra; 145 146 /* DMA address and size information */ 147 dma_addr_t ring_dma_addr; 148 dma_addr_t driver_event_dma_addr; 149 dma_addr_t device_event_dma_addr; 150 size_t ring_size_in_bytes; 151 size_t event_size_in_bytes; 152 }; 153 154 struct vring_virtqueue { 155 struct virtqueue vq; 156 157 /* Is this a packed ring? */ 158 bool packed_ring; 159 160 /* Is DMA API used? */ 161 bool use_dma_api; 162 163 /* Can we use weak barriers? */ 164 bool weak_barriers; 165 166 /* Other side has made a mess, don't try any more. */ 167 bool broken; 168 169 /* Host supports indirect buffers */ 170 bool indirect; 171 172 /* Host publishes avail event idx */ 173 bool event; 174 175 /* Head of free buffer list. */ 176 unsigned int free_head; 177 /* Number we've added since last sync. */ 178 unsigned int num_added; 179 180 /* Last used index we've seen. 181 * for split ring, it just contains last used index 182 * for packed ring: 183 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. 184 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. 185 */ 186 u16 last_used_idx; 187 188 /* Hint for event idx: already triggered no need to disable. */ 189 bool event_triggered; 190 191 union { 192 /* Available for split ring */ 193 struct vring_virtqueue_split split; 194 195 /* Available for packed ring */ 196 struct vring_virtqueue_packed packed; 197 }; 198 199 /* How to notify other side. FIXME: commonalize hcalls! */ 200 bool (*notify)(struct virtqueue *vq); 201 202 /* DMA, allocation, and size information */ 203 bool we_own_ring; 204 205 /* Device used for doing DMA */ 206 struct device *dma_dev; 207 208 #ifdef DEBUG 209 /* They're supposed to lock for us. */ 210 unsigned int in_use; 211 212 /* Figure out if their kicks are too delayed. */ 213 bool last_add_time_valid; 214 ktime_t last_add_time; 215 #endif 216 }; 217 218 static struct virtqueue *__vring_new_virtqueue(unsigned int index, 219 struct vring_virtqueue_split *vring_split, 220 struct virtio_device *vdev, 221 bool weak_barriers, 222 bool context, 223 bool (*notify)(struct virtqueue *), 224 void (*callback)(struct virtqueue *), 225 const char *name, 226 struct device *dma_dev); 227 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); 228 static void vring_free(struct virtqueue *_vq); 229 230 /* 231 * Helpers. 232 */ 233 234 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 235 236 static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq, 237 unsigned int total_sg) 238 { 239 /* 240 * If the host supports indirect descriptor tables, and we have multiple 241 * buffers, then go indirect. FIXME: tune this threshold 242 */ 243 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 244 } 245 246 /* 247 * Modern virtio devices have feature bits to specify whether they need a 248 * quirk and bypass the IOMMU. If not there, just use the DMA API. 249 * 250 * If there, the interaction between virtio and DMA API is messy. 251 * 252 * On most systems with virtio, physical addresses match bus addresses, 253 * and it doesn't particularly matter whether we use the DMA API. 254 * 255 * On some systems, including Xen and any system with a physical device 256 * that speaks virtio behind a physical IOMMU, we must use the DMA API 257 * for virtio DMA to work at all. 258 * 259 * On other systems, including SPARC and PPC64, virtio-pci devices are 260 * enumerated as though they are behind an IOMMU, but the virtio host 261 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 262 * there or somehow map everything as the identity. 263 * 264 * For the time being, we preserve historic behavior and bypass the DMA 265 * API. 266 * 267 * TODO: install a per-device DMA ops structure that does the right thing 268 * taking into account all the above quirks, and use the DMA API 269 * unconditionally on data path. 270 */ 271 272 static bool vring_use_dma_api(struct virtio_device *vdev) 273 { 274 if (!virtio_has_dma_quirk(vdev)) 275 return true; 276 277 /* Otherwise, we are left to guess. */ 278 /* 279 * In theory, it's possible to have a buggy QEMU-supposed 280 * emulated Q35 IOMMU and Xen enabled at the same time. On 281 * such a configuration, virtio has never worked and will 282 * not work without an even larger kludge. Instead, enable 283 * the DMA API if we're a Xen guest, which at least allows 284 * all of the sensible Xen configurations to work correctly. 285 */ 286 if (xen_domain()) 287 return true; 288 289 return false; 290 } 291 292 size_t virtio_max_dma_size(struct virtio_device *vdev) 293 { 294 size_t max_segment_size = SIZE_MAX; 295 296 if (vring_use_dma_api(vdev)) 297 max_segment_size = dma_max_mapping_size(vdev->dev.parent); 298 299 return max_segment_size; 300 } 301 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 302 303 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 304 dma_addr_t *dma_handle, gfp_t flag, 305 struct device *dma_dev) 306 { 307 if (vring_use_dma_api(vdev)) { 308 return dma_alloc_coherent(dma_dev, size, 309 dma_handle, flag); 310 } else { 311 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 312 313 if (queue) { 314 phys_addr_t phys_addr = virt_to_phys(queue); 315 *dma_handle = (dma_addr_t)phys_addr; 316 317 /* 318 * Sanity check: make sure we dind't truncate 319 * the address. The only arches I can find that 320 * have 64-bit phys_addr_t but 32-bit dma_addr_t 321 * are certain non-highmem MIPS and x86 322 * configurations, but these configurations 323 * should never allocate physical pages above 32 324 * bits, so this is fine. Just in case, throw a 325 * warning and abort if we end up with an 326 * unrepresentable address. 327 */ 328 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 329 free_pages_exact(queue, PAGE_ALIGN(size)); 330 return NULL; 331 } 332 } 333 return queue; 334 } 335 } 336 337 static void vring_free_queue(struct virtio_device *vdev, size_t size, 338 void *queue, dma_addr_t dma_handle, 339 struct device *dma_dev) 340 { 341 if (vring_use_dma_api(vdev)) 342 dma_free_coherent(dma_dev, size, queue, dma_handle); 343 else 344 free_pages_exact(queue, PAGE_ALIGN(size)); 345 } 346 347 /* 348 * The DMA ops on various arches are rather gnarly right now, and 349 * making all of the arch DMA ops work on the vring device itself 350 * is a mess. 351 */ 352 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 353 { 354 return vq->dma_dev; 355 } 356 357 /* Map one sg entry. */ 358 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 359 struct scatterlist *sg, 360 enum dma_data_direction direction) 361 { 362 if (!vq->use_dma_api) { 363 /* 364 * If DMA is not used, KMSAN doesn't know that the scatterlist 365 * is initialized by the hardware. Explicitly check/unpoison it 366 * depending on the direction. 367 */ 368 kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction); 369 return (dma_addr_t)sg_phys(sg); 370 } 371 372 /* 373 * We can't use dma_map_sg, because we don't use scatterlists in 374 * the way it expects (we don't guarantee that the scatterlist 375 * will exist for the lifetime of the mapping). 376 */ 377 return dma_map_page(vring_dma_dev(vq), 378 sg_page(sg), sg->offset, sg->length, 379 direction); 380 } 381 382 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 383 void *cpu_addr, size_t size, 384 enum dma_data_direction direction) 385 { 386 if (!vq->use_dma_api) 387 return (dma_addr_t)virt_to_phys(cpu_addr); 388 389 return dma_map_single(vring_dma_dev(vq), 390 cpu_addr, size, direction); 391 } 392 393 static int vring_mapping_error(const struct vring_virtqueue *vq, 394 dma_addr_t addr) 395 { 396 if (!vq->use_dma_api) 397 return 0; 398 399 return dma_mapping_error(vring_dma_dev(vq), addr); 400 } 401 402 static void virtqueue_init(struct vring_virtqueue *vq, u32 num) 403 { 404 vq->vq.num_free = num; 405 406 if (vq->packed_ring) 407 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); 408 else 409 vq->last_used_idx = 0; 410 411 vq->event_triggered = false; 412 vq->num_added = 0; 413 414 #ifdef DEBUG 415 vq->in_use = false; 416 vq->last_add_time_valid = false; 417 #endif 418 } 419 420 421 /* 422 * Split ring specific functions - *_split(). 423 */ 424 425 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 426 struct vring_desc *desc) 427 { 428 u16 flags; 429 430 if (!vq->use_dma_api) 431 return; 432 433 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 434 435 dma_unmap_page(vring_dma_dev(vq), 436 virtio64_to_cpu(vq->vq.vdev, desc->addr), 437 virtio32_to_cpu(vq->vq.vdev, desc->len), 438 (flags & VRING_DESC_F_WRITE) ? 439 DMA_FROM_DEVICE : DMA_TO_DEVICE); 440 } 441 442 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 443 unsigned int i) 444 { 445 struct vring_desc_extra *extra = vq->split.desc_extra; 446 u16 flags; 447 448 if (!vq->use_dma_api) 449 goto out; 450 451 flags = extra[i].flags; 452 453 if (flags & VRING_DESC_F_INDIRECT) { 454 dma_unmap_single(vring_dma_dev(vq), 455 extra[i].addr, 456 extra[i].len, 457 (flags & VRING_DESC_F_WRITE) ? 458 DMA_FROM_DEVICE : DMA_TO_DEVICE); 459 } else { 460 dma_unmap_page(vring_dma_dev(vq), 461 extra[i].addr, 462 extra[i].len, 463 (flags & VRING_DESC_F_WRITE) ? 464 DMA_FROM_DEVICE : DMA_TO_DEVICE); 465 } 466 467 out: 468 return extra[i].next; 469 } 470 471 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 472 unsigned int total_sg, 473 gfp_t gfp) 474 { 475 struct vring_desc *desc; 476 unsigned int i; 477 478 /* 479 * We require lowmem mappings for the descriptors because 480 * otherwise virt_to_phys will give us bogus addresses in the 481 * virtqueue. 482 */ 483 gfp &= ~__GFP_HIGHMEM; 484 485 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 486 if (!desc) 487 return NULL; 488 489 for (i = 0; i < total_sg; i++) 490 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 491 return desc; 492 } 493 494 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 495 struct vring_desc *desc, 496 unsigned int i, 497 dma_addr_t addr, 498 unsigned int len, 499 u16 flags, 500 bool indirect) 501 { 502 struct vring_virtqueue *vring = to_vvq(vq); 503 struct vring_desc_extra *extra = vring->split.desc_extra; 504 u16 next; 505 506 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 507 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 508 desc[i].len = cpu_to_virtio32(vq->vdev, len); 509 510 if (!indirect) { 511 next = extra[i].next; 512 desc[i].next = cpu_to_virtio16(vq->vdev, next); 513 514 extra[i].addr = addr; 515 extra[i].len = len; 516 extra[i].flags = flags; 517 } else 518 next = virtio16_to_cpu(vq->vdev, desc[i].next); 519 520 return next; 521 } 522 523 static inline int virtqueue_add_split(struct virtqueue *_vq, 524 struct scatterlist *sgs[], 525 unsigned int total_sg, 526 unsigned int out_sgs, 527 unsigned int in_sgs, 528 void *data, 529 void *ctx, 530 gfp_t gfp) 531 { 532 struct vring_virtqueue *vq = to_vvq(_vq); 533 struct scatterlist *sg; 534 struct vring_desc *desc; 535 unsigned int i, n, avail, descs_used, prev, err_idx; 536 int head; 537 bool indirect; 538 539 START_USE(vq); 540 541 BUG_ON(data == NULL); 542 BUG_ON(ctx && vq->indirect); 543 544 if (unlikely(vq->broken)) { 545 END_USE(vq); 546 return -EIO; 547 } 548 549 LAST_ADD_TIME_UPDATE(vq); 550 551 BUG_ON(total_sg == 0); 552 553 head = vq->free_head; 554 555 if (virtqueue_use_indirect(vq, total_sg)) 556 desc = alloc_indirect_split(_vq, total_sg, gfp); 557 else { 558 desc = NULL; 559 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 560 } 561 562 if (desc) { 563 /* Use a single buffer which doesn't continue */ 564 indirect = true; 565 /* Set up rest to use this indirect table. */ 566 i = 0; 567 descs_used = 1; 568 } else { 569 indirect = false; 570 desc = vq->split.vring.desc; 571 i = head; 572 descs_used = total_sg; 573 } 574 575 if (unlikely(vq->vq.num_free < descs_used)) { 576 pr_debug("Can't add buf len %i - avail = %i\n", 577 descs_used, vq->vq.num_free); 578 /* FIXME: for historical reasons, we force a notify here if 579 * there are outgoing parts to the buffer. Presumably the 580 * host should service the ring ASAP. */ 581 if (out_sgs) 582 vq->notify(&vq->vq); 583 if (indirect) 584 kfree(desc); 585 END_USE(vq); 586 return -ENOSPC; 587 } 588 589 for (n = 0; n < out_sgs; n++) { 590 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 591 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 592 if (vring_mapping_error(vq, addr)) 593 goto unmap_release; 594 595 prev = i; 596 /* Note that we trust indirect descriptor 597 * table since it use stream DMA mapping. 598 */ 599 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 600 VRING_DESC_F_NEXT, 601 indirect); 602 } 603 } 604 for (; n < (out_sgs + in_sgs); n++) { 605 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 606 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 607 if (vring_mapping_error(vq, addr)) 608 goto unmap_release; 609 610 prev = i; 611 /* Note that we trust indirect descriptor 612 * table since it use stream DMA mapping. 613 */ 614 i = virtqueue_add_desc_split(_vq, desc, i, addr, 615 sg->length, 616 VRING_DESC_F_NEXT | 617 VRING_DESC_F_WRITE, 618 indirect); 619 } 620 } 621 /* Last one doesn't continue. */ 622 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 623 if (!indirect && vq->use_dma_api) 624 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 625 ~VRING_DESC_F_NEXT; 626 627 if (indirect) { 628 /* Now that the indirect table is filled in, map it. */ 629 dma_addr_t addr = vring_map_single( 630 vq, desc, total_sg * sizeof(struct vring_desc), 631 DMA_TO_DEVICE); 632 if (vring_mapping_error(vq, addr)) 633 goto unmap_release; 634 635 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 636 head, addr, 637 total_sg * sizeof(struct vring_desc), 638 VRING_DESC_F_INDIRECT, 639 false); 640 } 641 642 /* We're using some buffers from the free list. */ 643 vq->vq.num_free -= descs_used; 644 645 /* Update free pointer */ 646 if (indirect) 647 vq->free_head = vq->split.desc_extra[head].next; 648 else 649 vq->free_head = i; 650 651 /* Store token and indirect buffer state. */ 652 vq->split.desc_state[head].data = data; 653 if (indirect) 654 vq->split.desc_state[head].indir_desc = desc; 655 else 656 vq->split.desc_state[head].indir_desc = ctx; 657 658 /* Put entry in available array (but don't update avail->idx until they 659 * do sync). */ 660 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 661 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 662 663 /* Descriptors and available array need to be set before we expose the 664 * new available array entries. */ 665 virtio_wmb(vq->weak_barriers); 666 vq->split.avail_idx_shadow++; 667 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 668 vq->split.avail_idx_shadow); 669 vq->num_added++; 670 671 pr_debug("Added buffer head %i to %p\n", head, vq); 672 END_USE(vq); 673 674 /* This is very unlikely, but theoretically possible. Kick 675 * just in case. */ 676 if (unlikely(vq->num_added == (1 << 16) - 1)) 677 virtqueue_kick(_vq); 678 679 return 0; 680 681 unmap_release: 682 err_idx = i; 683 684 if (indirect) 685 i = 0; 686 else 687 i = head; 688 689 for (n = 0; n < total_sg; n++) { 690 if (i == err_idx) 691 break; 692 if (indirect) { 693 vring_unmap_one_split_indirect(vq, &desc[i]); 694 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 695 } else 696 i = vring_unmap_one_split(vq, i); 697 } 698 699 if (indirect) 700 kfree(desc); 701 702 END_USE(vq); 703 return -ENOMEM; 704 } 705 706 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 707 { 708 struct vring_virtqueue *vq = to_vvq(_vq); 709 u16 new, old; 710 bool needs_kick; 711 712 START_USE(vq); 713 /* We need to expose available array entries before checking avail 714 * event. */ 715 virtio_mb(vq->weak_barriers); 716 717 old = vq->split.avail_idx_shadow - vq->num_added; 718 new = vq->split.avail_idx_shadow; 719 vq->num_added = 0; 720 721 LAST_ADD_TIME_CHECK(vq); 722 LAST_ADD_TIME_INVALID(vq); 723 724 if (vq->event) { 725 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 726 vring_avail_event(&vq->split.vring)), 727 new, old); 728 } else { 729 needs_kick = !(vq->split.vring.used->flags & 730 cpu_to_virtio16(_vq->vdev, 731 VRING_USED_F_NO_NOTIFY)); 732 } 733 END_USE(vq); 734 return needs_kick; 735 } 736 737 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 738 void **ctx) 739 { 740 unsigned int i, j; 741 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 742 743 /* Clear data ptr. */ 744 vq->split.desc_state[head].data = NULL; 745 746 /* Put back on free list: unmap first-level descriptors and find end */ 747 i = head; 748 749 while (vq->split.vring.desc[i].flags & nextflag) { 750 vring_unmap_one_split(vq, i); 751 i = vq->split.desc_extra[i].next; 752 vq->vq.num_free++; 753 } 754 755 vring_unmap_one_split(vq, i); 756 vq->split.desc_extra[i].next = vq->free_head; 757 vq->free_head = head; 758 759 /* Plus final descriptor */ 760 vq->vq.num_free++; 761 762 if (vq->indirect) { 763 struct vring_desc *indir_desc = 764 vq->split.desc_state[head].indir_desc; 765 u32 len; 766 767 /* Free the indirect table, if any, now that it's unmapped. */ 768 if (!indir_desc) 769 return; 770 771 len = vq->split.desc_extra[head].len; 772 773 BUG_ON(!(vq->split.desc_extra[head].flags & 774 VRING_DESC_F_INDIRECT)); 775 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 776 777 for (j = 0; j < len / sizeof(struct vring_desc); j++) 778 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 779 780 kfree(indir_desc); 781 vq->split.desc_state[head].indir_desc = NULL; 782 } else if (ctx) { 783 *ctx = vq->split.desc_state[head].indir_desc; 784 } 785 } 786 787 static inline bool more_used_split(const struct vring_virtqueue *vq) 788 { 789 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 790 vq->split.vring.used->idx); 791 } 792 793 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 794 unsigned int *len, 795 void **ctx) 796 { 797 struct vring_virtqueue *vq = to_vvq(_vq); 798 void *ret; 799 unsigned int i; 800 u16 last_used; 801 802 START_USE(vq); 803 804 if (unlikely(vq->broken)) { 805 END_USE(vq); 806 return NULL; 807 } 808 809 if (!more_used_split(vq)) { 810 pr_debug("No more buffers in queue\n"); 811 END_USE(vq); 812 return NULL; 813 } 814 815 /* Only get used array entries after they have been exposed by host. */ 816 virtio_rmb(vq->weak_barriers); 817 818 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 819 i = virtio32_to_cpu(_vq->vdev, 820 vq->split.vring.used->ring[last_used].id); 821 *len = virtio32_to_cpu(_vq->vdev, 822 vq->split.vring.used->ring[last_used].len); 823 824 if (unlikely(i >= vq->split.vring.num)) { 825 BAD_RING(vq, "id %u out of range\n", i); 826 return NULL; 827 } 828 if (unlikely(!vq->split.desc_state[i].data)) { 829 BAD_RING(vq, "id %u is not a head!\n", i); 830 return NULL; 831 } 832 833 /* detach_buf_split clears data, so grab it now. */ 834 ret = vq->split.desc_state[i].data; 835 detach_buf_split(vq, i, ctx); 836 vq->last_used_idx++; 837 /* If we expect an interrupt for the next entry, tell host 838 * by writing event index and flush out the write before 839 * the read in the next get_buf call. */ 840 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 841 virtio_store_mb(vq->weak_barriers, 842 &vring_used_event(&vq->split.vring), 843 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 844 845 LAST_ADD_TIME_INVALID(vq); 846 847 END_USE(vq); 848 return ret; 849 } 850 851 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 852 { 853 struct vring_virtqueue *vq = to_vvq(_vq); 854 855 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 856 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 857 if (vq->event) 858 /* TODO: this is a hack. Figure out a cleaner value to write. */ 859 vring_used_event(&vq->split.vring) = 0x0; 860 else 861 vq->split.vring.avail->flags = 862 cpu_to_virtio16(_vq->vdev, 863 vq->split.avail_flags_shadow); 864 } 865 } 866 867 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 868 { 869 struct vring_virtqueue *vq = to_vvq(_vq); 870 u16 last_used_idx; 871 872 START_USE(vq); 873 874 /* We optimistically turn back on interrupts, then check if there was 875 * more to do. */ 876 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 877 * either clear the flags bit or point the event index at the next 878 * entry. Always do both to keep code simple. */ 879 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 880 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 881 if (!vq->event) 882 vq->split.vring.avail->flags = 883 cpu_to_virtio16(_vq->vdev, 884 vq->split.avail_flags_shadow); 885 } 886 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 887 last_used_idx = vq->last_used_idx); 888 END_USE(vq); 889 return last_used_idx; 890 } 891 892 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) 893 { 894 struct vring_virtqueue *vq = to_vvq(_vq); 895 896 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 897 vq->split.vring.used->idx); 898 } 899 900 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 901 { 902 struct vring_virtqueue *vq = to_vvq(_vq); 903 u16 bufs; 904 905 START_USE(vq); 906 907 /* We optimistically turn back on interrupts, then check if there was 908 * more to do. */ 909 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 910 * either clear the flags bit or point the event index at the next 911 * entry. Always update the event index to keep code simple. */ 912 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 913 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 914 if (!vq->event) 915 vq->split.vring.avail->flags = 916 cpu_to_virtio16(_vq->vdev, 917 vq->split.avail_flags_shadow); 918 } 919 /* TODO: tune this threshold */ 920 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 921 922 virtio_store_mb(vq->weak_barriers, 923 &vring_used_event(&vq->split.vring), 924 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 925 926 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 927 - vq->last_used_idx) > bufs)) { 928 END_USE(vq); 929 return false; 930 } 931 932 END_USE(vq); 933 return true; 934 } 935 936 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 937 { 938 struct vring_virtqueue *vq = to_vvq(_vq); 939 unsigned int i; 940 void *buf; 941 942 START_USE(vq); 943 944 for (i = 0; i < vq->split.vring.num; i++) { 945 if (!vq->split.desc_state[i].data) 946 continue; 947 /* detach_buf_split clears data, so grab it now. */ 948 buf = vq->split.desc_state[i].data; 949 detach_buf_split(vq, i, NULL); 950 vq->split.avail_idx_shadow--; 951 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 952 vq->split.avail_idx_shadow); 953 END_USE(vq); 954 return buf; 955 } 956 /* That should have freed everything. */ 957 BUG_ON(vq->vq.num_free != vq->split.vring.num); 958 959 END_USE(vq); 960 return NULL; 961 } 962 963 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split, 964 struct vring_virtqueue *vq) 965 { 966 struct virtio_device *vdev; 967 968 vdev = vq->vq.vdev; 969 970 vring_split->avail_flags_shadow = 0; 971 vring_split->avail_idx_shadow = 0; 972 973 /* No callback? Tell other side not to bother us. */ 974 if (!vq->vq.callback) { 975 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 976 if (!vq->event) 977 vring_split->vring.avail->flags = cpu_to_virtio16(vdev, 978 vring_split->avail_flags_shadow); 979 } 980 } 981 982 static void virtqueue_reinit_split(struct vring_virtqueue *vq) 983 { 984 int num; 985 986 num = vq->split.vring.num; 987 988 vq->split.vring.avail->flags = 0; 989 vq->split.vring.avail->idx = 0; 990 991 /* reset avail event */ 992 vq->split.vring.avail->ring[num] = 0; 993 994 vq->split.vring.used->flags = 0; 995 vq->split.vring.used->idx = 0; 996 997 /* reset used event */ 998 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0; 999 1000 virtqueue_init(vq, num); 1001 1002 virtqueue_vring_init_split(&vq->split, vq); 1003 } 1004 1005 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq, 1006 struct vring_virtqueue_split *vring_split) 1007 { 1008 vq->split = *vring_split; 1009 1010 /* Put everything in free lists. */ 1011 vq->free_head = 0; 1012 } 1013 1014 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split) 1015 { 1016 struct vring_desc_state_split *state; 1017 struct vring_desc_extra *extra; 1018 u32 num = vring_split->vring.num; 1019 1020 state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL); 1021 if (!state) 1022 goto err_state; 1023 1024 extra = vring_alloc_desc_extra(num); 1025 if (!extra) 1026 goto err_extra; 1027 1028 memset(state, 0, num * sizeof(struct vring_desc_state_split)); 1029 1030 vring_split->desc_state = state; 1031 vring_split->desc_extra = extra; 1032 return 0; 1033 1034 err_extra: 1035 kfree(state); 1036 err_state: 1037 return -ENOMEM; 1038 } 1039 1040 static void vring_free_split(struct vring_virtqueue_split *vring_split, 1041 struct virtio_device *vdev, struct device *dma_dev) 1042 { 1043 vring_free_queue(vdev, vring_split->queue_size_in_bytes, 1044 vring_split->vring.desc, 1045 vring_split->queue_dma_addr, 1046 dma_dev); 1047 1048 kfree(vring_split->desc_state); 1049 kfree(vring_split->desc_extra); 1050 } 1051 1052 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, 1053 struct virtio_device *vdev, 1054 u32 num, 1055 unsigned int vring_align, 1056 bool may_reduce_num, 1057 struct device *dma_dev) 1058 { 1059 void *queue = NULL; 1060 dma_addr_t dma_addr; 1061 1062 /* We assume num is a power of 2. */ 1063 if (!is_power_of_2(num)) { 1064 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 1065 return -EINVAL; 1066 } 1067 1068 /* TODO: allocate each queue chunk individually */ 1069 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 1070 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1071 &dma_addr, 1072 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1073 dma_dev); 1074 if (queue) 1075 break; 1076 if (!may_reduce_num) 1077 return -ENOMEM; 1078 } 1079 1080 if (!num) 1081 return -ENOMEM; 1082 1083 if (!queue) { 1084 /* Try to get a single page. You are my only hope! */ 1085 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1086 &dma_addr, GFP_KERNEL | __GFP_ZERO, 1087 dma_dev); 1088 } 1089 if (!queue) 1090 return -ENOMEM; 1091 1092 vring_init(&vring_split->vring, num, queue, vring_align); 1093 1094 vring_split->queue_dma_addr = dma_addr; 1095 vring_split->queue_size_in_bytes = vring_size(num, vring_align); 1096 1097 vring_split->vring_align = vring_align; 1098 vring_split->may_reduce_num = may_reduce_num; 1099 1100 return 0; 1101 } 1102 1103 static struct virtqueue *vring_create_virtqueue_split( 1104 unsigned int index, 1105 unsigned int num, 1106 unsigned int vring_align, 1107 struct virtio_device *vdev, 1108 bool weak_barriers, 1109 bool may_reduce_num, 1110 bool context, 1111 bool (*notify)(struct virtqueue *), 1112 void (*callback)(struct virtqueue *), 1113 const char *name, 1114 struct device *dma_dev) 1115 { 1116 struct vring_virtqueue_split vring_split = {}; 1117 struct virtqueue *vq; 1118 int err; 1119 1120 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, 1121 may_reduce_num, dma_dev); 1122 if (err) 1123 return NULL; 1124 1125 vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 1126 context, notify, callback, name, dma_dev); 1127 if (!vq) { 1128 vring_free_split(&vring_split, vdev, dma_dev); 1129 return NULL; 1130 } 1131 1132 to_vvq(vq)->we_own_ring = true; 1133 1134 return vq; 1135 } 1136 1137 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) 1138 { 1139 struct vring_virtqueue_split vring_split = {}; 1140 struct vring_virtqueue *vq = to_vvq(_vq); 1141 struct virtio_device *vdev = _vq->vdev; 1142 int err; 1143 1144 err = vring_alloc_queue_split(&vring_split, vdev, num, 1145 vq->split.vring_align, 1146 vq->split.may_reduce_num, 1147 vring_dma_dev(vq)); 1148 if (err) 1149 goto err; 1150 1151 err = vring_alloc_state_extra_split(&vring_split); 1152 if (err) 1153 goto err_state_extra; 1154 1155 vring_free(&vq->vq); 1156 1157 virtqueue_vring_init_split(&vring_split, vq); 1158 1159 virtqueue_init(vq, vring_split.vring.num); 1160 virtqueue_vring_attach_split(vq, &vring_split); 1161 1162 return 0; 1163 1164 err_state_extra: 1165 vring_free_split(&vring_split, vdev, vring_dma_dev(vq)); 1166 err: 1167 virtqueue_reinit_split(vq); 1168 return -ENOMEM; 1169 } 1170 1171 1172 /* 1173 * Packed ring specific functions - *_packed(). 1174 */ 1175 static inline bool packed_used_wrap_counter(u16 last_used_idx) 1176 { 1177 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1178 } 1179 1180 static inline u16 packed_last_used(u16 last_used_idx) 1181 { 1182 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1183 } 1184 1185 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 1186 struct vring_desc_extra *extra) 1187 { 1188 u16 flags; 1189 1190 if (!vq->use_dma_api) 1191 return; 1192 1193 flags = extra->flags; 1194 1195 if (flags & VRING_DESC_F_INDIRECT) { 1196 dma_unmap_single(vring_dma_dev(vq), 1197 extra->addr, extra->len, 1198 (flags & VRING_DESC_F_WRITE) ? 1199 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1200 } else { 1201 dma_unmap_page(vring_dma_dev(vq), 1202 extra->addr, extra->len, 1203 (flags & VRING_DESC_F_WRITE) ? 1204 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1205 } 1206 } 1207 1208 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1209 struct vring_packed_desc *desc) 1210 { 1211 u16 flags; 1212 1213 if (!vq->use_dma_api) 1214 return; 1215 1216 flags = le16_to_cpu(desc->flags); 1217 1218 dma_unmap_page(vring_dma_dev(vq), 1219 le64_to_cpu(desc->addr), 1220 le32_to_cpu(desc->len), 1221 (flags & VRING_DESC_F_WRITE) ? 1222 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1223 } 1224 1225 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1226 gfp_t gfp) 1227 { 1228 struct vring_packed_desc *desc; 1229 1230 /* 1231 * We require lowmem mappings for the descriptors because 1232 * otherwise virt_to_phys will give us bogus addresses in the 1233 * virtqueue. 1234 */ 1235 gfp &= ~__GFP_HIGHMEM; 1236 1237 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1238 1239 return desc; 1240 } 1241 1242 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1243 struct scatterlist *sgs[], 1244 unsigned int total_sg, 1245 unsigned int out_sgs, 1246 unsigned int in_sgs, 1247 void *data, 1248 gfp_t gfp) 1249 { 1250 struct vring_packed_desc *desc; 1251 struct scatterlist *sg; 1252 unsigned int i, n, err_idx; 1253 u16 head, id; 1254 dma_addr_t addr; 1255 1256 head = vq->packed.next_avail_idx; 1257 desc = alloc_indirect_packed(total_sg, gfp); 1258 if (!desc) 1259 return -ENOMEM; 1260 1261 if (unlikely(vq->vq.num_free < 1)) { 1262 pr_debug("Can't add buf len 1 - avail = 0\n"); 1263 kfree(desc); 1264 END_USE(vq); 1265 return -ENOSPC; 1266 } 1267 1268 i = 0; 1269 id = vq->free_head; 1270 BUG_ON(id == vq->packed.vring.num); 1271 1272 for (n = 0; n < out_sgs + in_sgs; n++) { 1273 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1274 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1275 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1276 if (vring_mapping_error(vq, addr)) 1277 goto unmap_release; 1278 1279 desc[i].flags = cpu_to_le16(n < out_sgs ? 1280 0 : VRING_DESC_F_WRITE); 1281 desc[i].addr = cpu_to_le64(addr); 1282 desc[i].len = cpu_to_le32(sg->length); 1283 i++; 1284 } 1285 } 1286 1287 /* Now that the indirect table is filled in, map it. */ 1288 addr = vring_map_single(vq, desc, 1289 total_sg * sizeof(struct vring_packed_desc), 1290 DMA_TO_DEVICE); 1291 if (vring_mapping_error(vq, addr)) 1292 goto unmap_release; 1293 1294 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1295 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1296 sizeof(struct vring_packed_desc)); 1297 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1298 1299 if (vq->use_dma_api) { 1300 vq->packed.desc_extra[id].addr = addr; 1301 vq->packed.desc_extra[id].len = total_sg * 1302 sizeof(struct vring_packed_desc); 1303 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1304 vq->packed.avail_used_flags; 1305 } 1306 1307 /* 1308 * A driver MUST NOT make the first descriptor in the list 1309 * available before all subsequent descriptors comprising 1310 * the list are made available. 1311 */ 1312 virtio_wmb(vq->weak_barriers); 1313 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1314 vq->packed.avail_used_flags); 1315 1316 /* We're using some buffers from the free list. */ 1317 vq->vq.num_free -= 1; 1318 1319 /* Update free pointer */ 1320 n = head + 1; 1321 if (n >= vq->packed.vring.num) { 1322 n = 0; 1323 vq->packed.avail_wrap_counter ^= 1; 1324 vq->packed.avail_used_flags ^= 1325 1 << VRING_PACKED_DESC_F_AVAIL | 1326 1 << VRING_PACKED_DESC_F_USED; 1327 } 1328 vq->packed.next_avail_idx = n; 1329 vq->free_head = vq->packed.desc_extra[id].next; 1330 1331 /* Store token and indirect buffer state. */ 1332 vq->packed.desc_state[id].num = 1; 1333 vq->packed.desc_state[id].data = data; 1334 vq->packed.desc_state[id].indir_desc = desc; 1335 vq->packed.desc_state[id].last = id; 1336 1337 vq->num_added += 1; 1338 1339 pr_debug("Added buffer head %i to %p\n", head, vq); 1340 END_USE(vq); 1341 1342 return 0; 1343 1344 unmap_release: 1345 err_idx = i; 1346 1347 for (i = 0; i < err_idx; i++) 1348 vring_unmap_desc_packed(vq, &desc[i]); 1349 1350 kfree(desc); 1351 1352 END_USE(vq); 1353 return -ENOMEM; 1354 } 1355 1356 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1357 struct scatterlist *sgs[], 1358 unsigned int total_sg, 1359 unsigned int out_sgs, 1360 unsigned int in_sgs, 1361 void *data, 1362 void *ctx, 1363 gfp_t gfp) 1364 { 1365 struct vring_virtqueue *vq = to_vvq(_vq); 1366 struct vring_packed_desc *desc; 1367 struct scatterlist *sg; 1368 unsigned int i, n, c, descs_used, err_idx; 1369 __le16 head_flags, flags; 1370 u16 head, id, prev, curr, avail_used_flags; 1371 int err; 1372 1373 START_USE(vq); 1374 1375 BUG_ON(data == NULL); 1376 BUG_ON(ctx && vq->indirect); 1377 1378 if (unlikely(vq->broken)) { 1379 END_USE(vq); 1380 return -EIO; 1381 } 1382 1383 LAST_ADD_TIME_UPDATE(vq); 1384 1385 BUG_ON(total_sg == 0); 1386 1387 if (virtqueue_use_indirect(vq, total_sg)) { 1388 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1389 in_sgs, data, gfp); 1390 if (err != -ENOMEM) { 1391 END_USE(vq); 1392 return err; 1393 } 1394 1395 /* fall back on direct */ 1396 } 1397 1398 head = vq->packed.next_avail_idx; 1399 avail_used_flags = vq->packed.avail_used_flags; 1400 1401 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1402 1403 desc = vq->packed.vring.desc; 1404 i = head; 1405 descs_used = total_sg; 1406 1407 if (unlikely(vq->vq.num_free < descs_used)) { 1408 pr_debug("Can't add buf len %i - avail = %i\n", 1409 descs_used, vq->vq.num_free); 1410 END_USE(vq); 1411 return -ENOSPC; 1412 } 1413 1414 id = vq->free_head; 1415 BUG_ON(id == vq->packed.vring.num); 1416 1417 curr = id; 1418 c = 0; 1419 for (n = 0; n < out_sgs + in_sgs; n++) { 1420 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1421 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1422 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1423 if (vring_mapping_error(vq, addr)) 1424 goto unmap_release; 1425 1426 flags = cpu_to_le16(vq->packed.avail_used_flags | 1427 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1428 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1429 if (i == head) 1430 head_flags = flags; 1431 else 1432 desc[i].flags = flags; 1433 1434 desc[i].addr = cpu_to_le64(addr); 1435 desc[i].len = cpu_to_le32(sg->length); 1436 desc[i].id = cpu_to_le16(id); 1437 1438 if (unlikely(vq->use_dma_api)) { 1439 vq->packed.desc_extra[curr].addr = addr; 1440 vq->packed.desc_extra[curr].len = sg->length; 1441 vq->packed.desc_extra[curr].flags = 1442 le16_to_cpu(flags); 1443 } 1444 prev = curr; 1445 curr = vq->packed.desc_extra[curr].next; 1446 1447 if ((unlikely(++i >= vq->packed.vring.num))) { 1448 i = 0; 1449 vq->packed.avail_used_flags ^= 1450 1 << VRING_PACKED_DESC_F_AVAIL | 1451 1 << VRING_PACKED_DESC_F_USED; 1452 } 1453 } 1454 } 1455 1456 if (i < head) 1457 vq->packed.avail_wrap_counter ^= 1; 1458 1459 /* We're using some buffers from the free list. */ 1460 vq->vq.num_free -= descs_used; 1461 1462 /* Update free pointer */ 1463 vq->packed.next_avail_idx = i; 1464 vq->free_head = curr; 1465 1466 /* Store token. */ 1467 vq->packed.desc_state[id].num = descs_used; 1468 vq->packed.desc_state[id].data = data; 1469 vq->packed.desc_state[id].indir_desc = ctx; 1470 vq->packed.desc_state[id].last = prev; 1471 1472 /* 1473 * A driver MUST NOT make the first descriptor in the list 1474 * available before all subsequent descriptors comprising 1475 * the list are made available. 1476 */ 1477 virtio_wmb(vq->weak_barriers); 1478 vq->packed.vring.desc[head].flags = head_flags; 1479 vq->num_added += descs_used; 1480 1481 pr_debug("Added buffer head %i to %p\n", head, vq); 1482 END_USE(vq); 1483 1484 return 0; 1485 1486 unmap_release: 1487 err_idx = i; 1488 i = head; 1489 curr = vq->free_head; 1490 1491 vq->packed.avail_used_flags = avail_used_flags; 1492 1493 for (n = 0; n < total_sg; n++) { 1494 if (i == err_idx) 1495 break; 1496 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 1497 curr = vq->packed.desc_extra[curr].next; 1498 i++; 1499 if (i >= vq->packed.vring.num) 1500 i = 0; 1501 } 1502 1503 END_USE(vq); 1504 return -EIO; 1505 } 1506 1507 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1508 { 1509 struct vring_virtqueue *vq = to_vvq(_vq); 1510 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1511 bool needs_kick; 1512 union { 1513 struct { 1514 __le16 off_wrap; 1515 __le16 flags; 1516 }; 1517 u32 u32; 1518 } snapshot; 1519 1520 START_USE(vq); 1521 1522 /* 1523 * We need to expose the new flags value before checking notification 1524 * suppressions. 1525 */ 1526 virtio_mb(vq->weak_barriers); 1527 1528 old = vq->packed.next_avail_idx - vq->num_added; 1529 new = vq->packed.next_avail_idx; 1530 vq->num_added = 0; 1531 1532 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1533 flags = le16_to_cpu(snapshot.flags); 1534 1535 LAST_ADD_TIME_CHECK(vq); 1536 LAST_ADD_TIME_INVALID(vq); 1537 1538 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1539 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1540 goto out; 1541 } 1542 1543 off_wrap = le16_to_cpu(snapshot.off_wrap); 1544 1545 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1546 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1547 if (wrap_counter != vq->packed.avail_wrap_counter) 1548 event_idx -= vq->packed.vring.num; 1549 1550 needs_kick = vring_need_event(event_idx, new, old); 1551 out: 1552 END_USE(vq); 1553 return needs_kick; 1554 } 1555 1556 static void detach_buf_packed(struct vring_virtqueue *vq, 1557 unsigned int id, void **ctx) 1558 { 1559 struct vring_desc_state_packed *state = NULL; 1560 struct vring_packed_desc *desc; 1561 unsigned int i, curr; 1562 1563 state = &vq->packed.desc_state[id]; 1564 1565 /* Clear data ptr. */ 1566 state->data = NULL; 1567 1568 vq->packed.desc_extra[state->last].next = vq->free_head; 1569 vq->free_head = id; 1570 vq->vq.num_free += state->num; 1571 1572 if (unlikely(vq->use_dma_api)) { 1573 curr = id; 1574 for (i = 0; i < state->num; i++) { 1575 vring_unmap_extra_packed(vq, 1576 &vq->packed.desc_extra[curr]); 1577 curr = vq->packed.desc_extra[curr].next; 1578 } 1579 } 1580 1581 if (vq->indirect) { 1582 u32 len; 1583 1584 /* Free the indirect table, if any, now that it's unmapped. */ 1585 desc = state->indir_desc; 1586 if (!desc) 1587 return; 1588 1589 if (vq->use_dma_api) { 1590 len = vq->packed.desc_extra[id].len; 1591 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1592 i++) 1593 vring_unmap_desc_packed(vq, &desc[i]); 1594 } 1595 kfree(desc); 1596 state->indir_desc = NULL; 1597 } else if (ctx) { 1598 *ctx = state->indir_desc; 1599 } 1600 } 1601 1602 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1603 u16 idx, bool used_wrap_counter) 1604 { 1605 bool avail, used; 1606 u16 flags; 1607 1608 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1609 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1610 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1611 1612 return avail == used && used == used_wrap_counter; 1613 } 1614 1615 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1616 { 1617 u16 last_used; 1618 u16 last_used_idx; 1619 bool used_wrap_counter; 1620 1621 last_used_idx = READ_ONCE(vq->last_used_idx); 1622 last_used = packed_last_used(last_used_idx); 1623 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1624 return is_used_desc_packed(vq, last_used, used_wrap_counter); 1625 } 1626 1627 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1628 unsigned int *len, 1629 void **ctx) 1630 { 1631 struct vring_virtqueue *vq = to_vvq(_vq); 1632 u16 last_used, id, last_used_idx; 1633 bool used_wrap_counter; 1634 void *ret; 1635 1636 START_USE(vq); 1637 1638 if (unlikely(vq->broken)) { 1639 END_USE(vq); 1640 return NULL; 1641 } 1642 1643 if (!more_used_packed(vq)) { 1644 pr_debug("No more buffers in queue\n"); 1645 END_USE(vq); 1646 return NULL; 1647 } 1648 1649 /* Only get used elements after they have been exposed by host. */ 1650 virtio_rmb(vq->weak_barriers); 1651 1652 last_used_idx = READ_ONCE(vq->last_used_idx); 1653 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1654 last_used = packed_last_used(last_used_idx); 1655 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1656 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1657 1658 if (unlikely(id >= vq->packed.vring.num)) { 1659 BAD_RING(vq, "id %u out of range\n", id); 1660 return NULL; 1661 } 1662 if (unlikely(!vq->packed.desc_state[id].data)) { 1663 BAD_RING(vq, "id %u is not a head!\n", id); 1664 return NULL; 1665 } 1666 1667 /* detach_buf_packed clears data, so grab it now. */ 1668 ret = vq->packed.desc_state[id].data; 1669 detach_buf_packed(vq, id, ctx); 1670 1671 last_used += vq->packed.desc_state[id].num; 1672 if (unlikely(last_used >= vq->packed.vring.num)) { 1673 last_used -= vq->packed.vring.num; 1674 used_wrap_counter ^= 1; 1675 } 1676 1677 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1678 WRITE_ONCE(vq->last_used_idx, last_used); 1679 1680 /* 1681 * If we expect an interrupt for the next entry, tell host 1682 * by writing event index and flush out the write before 1683 * the read in the next get_buf call. 1684 */ 1685 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1686 virtio_store_mb(vq->weak_barriers, 1687 &vq->packed.vring.driver->off_wrap, 1688 cpu_to_le16(vq->last_used_idx)); 1689 1690 LAST_ADD_TIME_INVALID(vq); 1691 1692 END_USE(vq); 1693 return ret; 1694 } 1695 1696 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1697 { 1698 struct vring_virtqueue *vq = to_vvq(_vq); 1699 1700 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1701 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1702 vq->packed.vring.driver->flags = 1703 cpu_to_le16(vq->packed.event_flags_shadow); 1704 } 1705 } 1706 1707 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1708 { 1709 struct vring_virtqueue *vq = to_vvq(_vq); 1710 1711 START_USE(vq); 1712 1713 /* 1714 * We optimistically turn back on interrupts, then check if there was 1715 * more to do. 1716 */ 1717 1718 if (vq->event) { 1719 vq->packed.vring.driver->off_wrap = 1720 cpu_to_le16(vq->last_used_idx); 1721 /* 1722 * We need to update event offset and event wrap 1723 * counter first before updating event flags. 1724 */ 1725 virtio_wmb(vq->weak_barriers); 1726 } 1727 1728 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1729 vq->packed.event_flags_shadow = vq->event ? 1730 VRING_PACKED_EVENT_FLAG_DESC : 1731 VRING_PACKED_EVENT_FLAG_ENABLE; 1732 vq->packed.vring.driver->flags = 1733 cpu_to_le16(vq->packed.event_flags_shadow); 1734 } 1735 1736 END_USE(vq); 1737 return vq->last_used_idx; 1738 } 1739 1740 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1741 { 1742 struct vring_virtqueue *vq = to_vvq(_vq); 1743 bool wrap_counter; 1744 u16 used_idx; 1745 1746 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1747 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1748 1749 return is_used_desc_packed(vq, used_idx, wrap_counter); 1750 } 1751 1752 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1753 { 1754 struct vring_virtqueue *vq = to_vvq(_vq); 1755 u16 used_idx, wrap_counter, last_used_idx; 1756 u16 bufs; 1757 1758 START_USE(vq); 1759 1760 /* 1761 * We optimistically turn back on interrupts, then check if there was 1762 * more to do. 1763 */ 1764 1765 if (vq->event) { 1766 /* TODO: tune this threshold */ 1767 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1768 last_used_idx = READ_ONCE(vq->last_used_idx); 1769 wrap_counter = packed_used_wrap_counter(last_used_idx); 1770 1771 used_idx = packed_last_used(last_used_idx) + bufs; 1772 if (used_idx >= vq->packed.vring.num) { 1773 used_idx -= vq->packed.vring.num; 1774 wrap_counter ^= 1; 1775 } 1776 1777 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1778 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1779 1780 /* 1781 * We need to update event offset and event wrap 1782 * counter first before updating event flags. 1783 */ 1784 virtio_wmb(vq->weak_barriers); 1785 } 1786 1787 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1788 vq->packed.event_flags_shadow = vq->event ? 1789 VRING_PACKED_EVENT_FLAG_DESC : 1790 VRING_PACKED_EVENT_FLAG_ENABLE; 1791 vq->packed.vring.driver->flags = 1792 cpu_to_le16(vq->packed.event_flags_shadow); 1793 } 1794 1795 /* 1796 * We need to update event suppression structure first 1797 * before re-checking for more used buffers. 1798 */ 1799 virtio_mb(vq->weak_barriers); 1800 1801 last_used_idx = READ_ONCE(vq->last_used_idx); 1802 wrap_counter = packed_used_wrap_counter(last_used_idx); 1803 used_idx = packed_last_used(last_used_idx); 1804 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1805 END_USE(vq); 1806 return false; 1807 } 1808 1809 END_USE(vq); 1810 return true; 1811 } 1812 1813 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1814 { 1815 struct vring_virtqueue *vq = to_vvq(_vq); 1816 unsigned int i; 1817 void *buf; 1818 1819 START_USE(vq); 1820 1821 for (i = 0; i < vq->packed.vring.num; i++) { 1822 if (!vq->packed.desc_state[i].data) 1823 continue; 1824 /* detach_buf clears data, so grab it now. */ 1825 buf = vq->packed.desc_state[i].data; 1826 detach_buf_packed(vq, i, NULL); 1827 END_USE(vq); 1828 return buf; 1829 } 1830 /* That should have freed everything. */ 1831 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1832 1833 END_USE(vq); 1834 return NULL; 1835 } 1836 1837 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) 1838 { 1839 struct vring_desc_extra *desc_extra; 1840 unsigned int i; 1841 1842 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1843 GFP_KERNEL); 1844 if (!desc_extra) 1845 return NULL; 1846 1847 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1848 1849 for (i = 0; i < num - 1; i++) 1850 desc_extra[i].next = i + 1; 1851 1852 return desc_extra; 1853 } 1854 1855 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, 1856 struct virtio_device *vdev, 1857 struct device *dma_dev) 1858 { 1859 if (vring_packed->vring.desc) 1860 vring_free_queue(vdev, vring_packed->ring_size_in_bytes, 1861 vring_packed->vring.desc, 1862 vring_packed->ring_dma_addr, 1863 dma_dev); 1864 1865 if (vring_packed->vring.driver) 1866 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 1867 vring_packed->vring.driver, 1868 vring_packed->driver_event_dma_addr, 1869 dma_dev); 1870 1871 if (vring_packed->vring.device) 1872 vring_free_queue(vdev, vring_packed->event_size_in_bytes, 1873 vring_packed->vring.device, 1874 vring_packed->device_event_dma_addr, 1875 dma_dev); 1876 1877 kfree(vring_packed->desc_state); 1878 kfree(vring_packed->desc_extra); 1879 } 1880 1881 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, 1882 struct virtio_device *vdev, 1883 u32 num, struct device *dma_dev) 1884 { 1885 struct vring_packed_desc *ring; 1886 struct vring_packed_desc_event *driver, *device; 1887 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1888 size_t ring_size_in_bytes, event_size_in_bytes; 1889 1890 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1891 1892 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1893 &ring_dma_addr, 1894 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1895 dma_dev); 1896 if (!ring) 1897 goto err; 1898 1899 vring_packed->vring.desc = ring; 1900 vring_packed->ring_dma_addr = ring_dma_addr; 1901 vring_packed->ring_size_in_bytes = ring_size_in_bytes; 1902 1903 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1904 1905 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1906 &driver_event_dma_addr, 1907 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1908 dma_dev); 1909 if (!driver) 1910 goto err; 1911 1912 vring_packed->vring.driver = driver; 1913 vring_packed->event_size_in_bytes = event_size_in_bytes; 1914 vring_packed->driver_event_dma_addr = driver_event_dma_addr; 1915 1916 device = vring_alloc_queue(vdev, event_size_in_bytes, 1917 &device_event_dma_addr, 1918 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1919 dma_dev); 1920 if (!device) 1921 goto err; 1922 1923 vring_packed->vring.device = device; 1924 vring_packed->device_event_dma_addr = device_event_dma_addr; 1925 1926 vring_packed->vring.num = num; 1927 1928 return 0; 1929 1930 err: 1931 vring_free_packed(vring_packed, vdev, dma_dev); 1932 return -ENOMEM; 1933 } 1934 1935 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed) 1936 { 1937 struct vring_desc_state_packed *state; 1938 struct vring_desc_extra *extra; 1939 u32 num = vring_packed->vring.num; 1940 1941 state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL); 1942 if (!state) 1943 goto err_desc_state; 1944 1945 memset(state, 0, num * sizeof(struct vring_desc_state_packed)); 1946 1947 extra = vring_alloc_desc_extra(num); 1948 if (!extra) 1949 goto err_desc_extra; 1950 1951 vring_packed->desc_state = state; 1952 vring_packed->desc_extra = extra; 1953 1954 return 0; 1955 1956 err_desc_extra: 1957 kfree(state); 1958 err_desc_state: 1959 return -ENOMEM; 1960 } 1961 1962 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed, 1963 bool callback) 1964 { 1965 vring_packed->next_avail_idx = 0; 1966 vring_packed->avail_wrap_counter = 1; 1967 vring_packed->event_flags_shadow = 0; 1968 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1969 1970 /* No callback? Tell other side not to bother us. */ 1971 if (!callback) { 1972 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1973 vring_packed->vring.driver->flags = 1974 cpu_to_le16(vring_packed->event_flags_shadow); 1975 } 1976 } 1977 1978 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq, 1979 struct vring_virtqueue_packed *vring_packed) 1980 { 1981 vq->packed = *vring_packed; 1982 1983 /* Put everything in free lists. */ 1984 vq->free_head = 0; 1985 } 1986 1987 static void virtqueue_reinit_packed(struct vring_virtqueue *vq) 1988 { 1989 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes); 1990 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes); 1991 1992 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */ 1993 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes); 1994 1995 virtqueue_init(vq, vq->packed.vring.num); 1996 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback); 1997 } 1998 1999 static struct virtqueue *vring_create_virtqueue_packed( 2000 unsigned int index, 2001 unsigned int num, 2002 unsigned int vring_align, 2003 struct virtio_device *vdev, 2004 bool weak_barriers, 2005 bool may_reduce_num, 2006 bool context, 2007 bool (*notify)(struct virtqueue *), 2008 void (*callback)(struct virtqueue *), 2009 const char *name, 2010 struct device *dma_dev) 2011 { 2012 struct vring_virtqueue_packed vring_packed = {}; 2013 struct vring_virtqueue *vq; 2014 int err; 2015 2016 if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev)) 2017 goto err_ring; 2018 2019 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2020 if (!vq) 2021 goto err_vq; 2022 2023 vq->vq.callback = callback; 2024 vq->vq.vdev = vdev; 2025 vq->vq.name = name; 2026 vq->vq.index = index; 2027 vq->vq.reset = false; 2028 vq->we_own_ring = true; 2029 vq->notify = notify; 2030 vq->weak_barriers = weak_barriers; 2031 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2032 vq->broken = true; 2033 #else 2034 vq->broken = false; 2035 #endif 2036 vq->packed_ring = true; 2037 vq->dma_dev = dma_dev; 2038 vq->use_dma_api = vring_use_dma_api(vdev); 2039 2040 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2041 !context; 2042 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2043 2044 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2045 vq->weak_barriers = false; 2046 2047 err = vring_alloc_state_extra_packed(&vring_packed); 2048 if (err) 2049 goto err_state_extra; 2050 2051 virtqueue_vring_init_packed(&vring_packed, !!callback); 2052 2053 virtqueue_init(vq, num); 2054 virtqueue_vring_attach_packed(vq, &vring_packed); 2055 2056 spin_lock(&vdev->vqs_list_lock); 2057 list_add_tail(&vq->vq.list, &vdev->vqs); 2058 spin_unlock(&vdev->vqs_list_lock); 2059 return &vq->vq; 2060 2061 err_state_extra: 2062 kfree(vq); 2063 err_vq: 2064 vring_free_packed(&vring_packed, vdev, dma_dev); 2065 err_ring: 2066 return NULL; 2067 } 2068 2069 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) 2070 { 2071 struct vring_virtqueue_packed vring_packed = {}; 2072 struct vring_virtqueue *vq = to_vvq(_vq); 2073 struct virtio_device *vdev = _vq->vdev; 2074 int err; 2075 2076 if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq))) 2077 goto err_ring; 2078 2079 err = vring_alloc_state_extra_packed(&vring_packed); 2080 if (err) 2081 goto err_state_extra; 2082 2083 vring_free(&vq->vq); 2084 2085 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback); 2086 2087 virtqueue_init(vq, vring_packed.vring.num); 2088 virtqueue_vring_attach_packed(vq, &vring_packed); 2089 2090 return 0; 2091 2092 err_state_extra: 2093 vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq)); 2094 err_ring: 2095 virtqueue_reinit_packed(vq); 2096 return -ENOMEM; 2097 } 2098 2099 2100 /* 2101 * Generic functions and exported symbols. 2102 */ 2103 2104 static inline int virtqueue_add(struct virtqueue *_vq, 2105 struct scatterlist *sgs[], 2106 unsigned int total_sg, 2107 unsigned int out_sgs, 2108 unsigned int in_sgs, 2109 void *data, 2110 void *ctx, 2111 gfp_t gfp) 2112 { 2113 struct vring_virtqueue *vq = to_vvq(_vq); 2114 2115 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 2116 out_sgs, in_sgs, data, ctx, gfp) : 2117 virtqueue_add_split(_vq, sgs, total_sg, 2118 out_sgs, in_sgs, data, ctx, gfp); 2119 } 2120 2121 /** 2122 * virtqueue_add_sgs - expose buffers to other end 2123 * @_vq: the struct virtqueue we're talking about. 2124 * @sgs: array of terminated scatterlists. 2125 * @out_sgs: the number of scatterlists readable by other side 2126 * @in_sgs: the number of scatterlists which are writable (after readable ones) 2127 * @data: the token identifying the buffer. 2128 * @gfp: how to do memory allocations (if necessary). 2129 * 2130 * Caller must ensure we don't call this with other virtqueue operations 2131 * at the same time (except where noted). 2132 * 2133 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2134 */ 2135 int virtqueue_add_sgs(struct virtqueue *_vq, 2136 struct scatterlist *sgs[], 2137 unsigned int out_sgs, 2138 unsigned int in_sgs, 2139 void *data, 2140 gfp_t gfp) 2141 { 2142 unsigned int i, total_sg = 0; 2143 2144 /* Count them first. */ 2145 for (i = 0; i < out_sgs + in_sgs; i++) { 2146 struct scatterlist *sg; 2147 2148 for (sg = sgs[i]; sg; sg = sg_next(sg)) 2149 total_sg++; 2150 } 2151 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 2152 data, NULL, gfp); 2153 } 2154 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 2155 2156 /** 2157 * virtqueue_add_outbuf - expose output buffers to other end 2158 * @vq: the struct virtqueue we're talking about. 2159 * @sg: scatterlist (must be well-formed and terminated!) 2160 * @num: the number of entries in @sg readable by other side 2161 * @data: the token identifying the buffer. 2162 * @gfp: how to do memory allocations (if necessary). 2163 * 2164 * Caller must ensure we don't call this with other virtqueue operations 2165 * at the same time (except where noted). 2166 * 2167 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2168 */ 2169 int virtqueue_add_outbuf(struct virtqueue *vq, 2170 struct scatterlist *sg, unsigned int num, 2171 void *data, 2172 gfp_t gfp) 2173 { 2174 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 2175 } 2176 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 2177 2178 /** 2179 * virtqueue_add_inbuf - expose input buffers to other end 2180 * @vq: the struct virtqueue we're talking about. 2181 * @sg: scatterlist (must be well-formed and terminated!) 2182 * @num: the number of entries in @sg writable by other side 2183 * @data: the token identifying the buffer. 2184 * @gfp: how to do memory allocations (if necessary). 2185 * 2186 * Caller must ensure we don't call this with other virtqueue operations 2187 * at the same time (except where noted). 2188 * 2189 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2190 */ 2191 int virtqueue_add_inbuf(struct virtqueue *vq, 2192 struct scatterlist *sg, unsigned int num, 2193 void *data, 2194 gfp_t gfp) 2195 { 2196 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 2197 } 2198 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 2199 2200 /** 2201 * virtqueue_add_inbuf_ctx - expose input buffers to other end 2202 * @vq: the struct virtqueue we're talking about. 2203 * @sg: scatterlist (must be well-formed and terminated!) 2204 * @num: the number of entries in @sg writable by other side 2205 * @data: the token identifying the buffer. 2206 * @ctx: extra context for the token 2207 * @gfp: how to do memory allocations (if necessary). 2208 * 2209 * Caller must ensure we don't call this with other virtqueue operations 2210 * at the same time (except where noted). 2211 * 2212 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 2213 */ 2214 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 2215 struct scatterlist *sg, unsigned int num, 2216 void *data, 2217 void *ctx, 2218 gfp_t gfp) 2219 { 2220 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 2221 } 2222 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 2223 2224 /** 2225 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 2226 * @_vq: the struct virtqueue 2227 * 2228 * Instead of virtqueue_kick(), you can do: 2229 * if (virtqueue_kick_prepare(vq)) 2230 * virtqueue_notify(vq); 2231 * 2232 * This is sometimes useful because the virtqueue_kick_prepare() needs 2233 * to be serialized, but the actual virtqueue_notify() call does not. 2234 */ 2235 bool virtqueue_kick_prepare(struct virtqueue *_vq) 2236 { 2237 struct vring_virtqueue *vq = to_vvq(_vq); 2238 2239 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 2240 virtqueue_kick_prepare_split(_vq); 2241 } 2242 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 2243 2244 /** 2245 * virtqueue_notify - second half of split virtqueue_kick call. 2246 * @_vq: the struct virtqueue 2247 * 2248 * This does not need to be serialized. 2249 * 2250 * Returns false if host notify failed or queue is broken, otherwise true. 2251 */ 2252 bool virtqueue_notify(struct virtqueue *_vq) 2253 { 2254 struct vring_virtqueue *vq = to_vvq(_vq); 2255 2256 if (unlikely(vq->broken)) 2257 return false; 2258 2259 /* Prod other side to tell it about changes. */ 2260 if (!vq->notify(_vq)) { 2261 vq->broken = true; 2262 return false; 2263 } 2264 return true; 2265 } 2266 EXPORT_SYMBOL_GPL(virtqueue_notify); 2267 2268 /** 2269 * virtqueue_kick - update after add_buf 2270 * @vq: the struct virtqueue 2271 * 2272 * After one or more virtqueue_add_* calls, invoke this to kick 2273 * the other side. 2274 * 2275 * Caller must ensure we don't call this with other virtqueue 2276 * operations at the same time (except where noted). 2277 * 2278 * Returns false if kick failed, otherwise true. 2279 */ 2280 bool virtqueue_kick(struct virtqueue *vq) 2281 { 2282 if (virtqueue_kick_prepare(vq)) 2283 return virtqueue_notify(vq); 2284 return true; 2285 } 2286 EXPORT_SYMBOL_GPL(virtqueue_kick); 2287 2288 /** 2289 * virtqueue_get_buf_ctx - get the next used buffer 2290 * @_vq: the struct virtqueue we're talking about. 2291 * @len: the length written into the buffer 2292 * @ctx: extra context for the token 2293 * 2294 * If the device wrote data into the buffer, @len will be set to the 2295 * amount written. This means you don't need to clear the buffer 2296 * beforehand to ensure there's no data leakage in the case of short 2297 * writes. 2298 * 2299 * Caller must ensure we don't call this with other virtqueue 2300 * operations at the same time (except where noted). 2301 * 2302 * Returns NULL if there are no used buffers, or the "data" token 2303 * handed to virtqueue_add_*(). 2304 */ 2305 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 2306 void **ctx) 2307 { 2308 struct vring_virtqueue *vq = to_vvq(_vq); 2309 2310 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 2311 virtqueue_get_buf_ctx_split(_vq, len, ctx); 2312 } 2313 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2314 2315 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2316 { 2317 return virtqueue_get_buf_ctx(_vq, len, NULL); 2318 } 2319 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2320 /** 2321 * virtqueue_disable_cb - disable callbacks 2322 * @_vq: the struct virtqueue we're talking about. 2323 * 2324 * Note that this is not necessarily synchronous, hence unreliable and only 2325 * useful as an optimization. 2326 * 2327 * Unlike other operations, this need not be serialized. 2328 */ 2329 void virtqueue_disable_cb(struct virtqueue *_vq) 2330 { 2331 struct vring_virtqueue *vq = to_vvq(_vq); 2332 2333 /* If device triggered an event already it won't trigger one again: 2334 * no need to disable. 2335 */ 2336 if (vq->event_triggered) 2337 return; 2338 2339 if (vq->packed_ring) 2340 virtqueue_disable_cb_packed(_vq); 2341 else 2342 virtqueue_disable_cb_split(_vq); 2343 } 2344 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2345 2346 /** 2347 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2348 * @_vq: the struct virtqueue we're talking about. 2349 * 2350 * This re-enables callbacks; it returns current queue state 2351 * in an opaque unsigned value. This value should be later tested by 2352 * virtqueue_poll, to detect a possible race between the driver checking for 2353 * more work, and enabling callbacks. 2354 * 2355 * Caller must ensure we don't call this with other virtqueue 2356 * operations at the same time (except where noted). 2357 */ 2358 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2359 { 2360 struct vring_virtqueue *vq = to_vvq(_vq); 2361 2362 if (vq->event_triggered) 2363 vq->event_triggered = false; 2364 2365 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2366 virtqueue_enable_cb_prepare_split(_vq); 2367 } 2368 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2369 2370 /** 2371 * virtqueue_poll - query pending used buffers 2372 * @_vq: the struct virtqueue we're talking about. 2373 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2374 * 2375 * Returns "true" if there are pending used buffers in the queue. 2376 * 2377 * This does not need to be serialized. 2378 */ 2379 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 2380 { 2381 struct vring_virtqueue *vq = to_vvq(_vq); 2382 2383 if (unlikely(vq->broken)) 2384 return false; 2385 2386 virtio_mb(vq->weak_barriers); 2387 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2388 virtqueue_poll_split(_vq, last_used_idx); 2389 } 2390 EXPORT_SYMBOL_GPL(virtqueue_poll); 2391 2392 /** 2393 * virtqueue_enable_cb - restart callbacks after disable_cb. 2394 * @_vq: the struct virtqueue we're talking about. 2395 * 2396 * This re-enables callbacks; it returns "false" if there are pending 2397 * buffers in the queue, to detect a possible race between the driver 2398 * checking for more work, and enabling callbacks. 2399 * 2400 * Caller must ensure we don't call this with other virtqueue 2401 * operations at the same time (except where noted). 2402 */ 2403 bool virtqueue_enable_cb(struct virtqueue *_vq) 2404 { 2405 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 2406 2407 return !virtqueue_poll(_vq, last_used_idx); 2408 } 2409 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2410 2411 /** 2412 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2413 * @_vq: the struct virtqueue we're talking about. 2414 * 2415 * This re-enables callbacks but hints to the other side to delay 2416 * interrupts until most of the available buffers have been processed; 2417 * it returns "false" if there are many pending buffers in the queue, 2418 * to detect a possible race between the driver checking for more work, 2419 * and enabling callbacks. 2420 * 2421 * Caller must ensure we don't call this with other virtqueue 2422 * operations at the same time (except where noted). 2423 */ 2424 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2425 { 2426 struct vring_virtqueue *vq = to_vvq(_vq); 2427 2428 if (vq->event_triggered) 2429 vq->event_triggered = false; 2430 2431 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2432 virtqueue_enable_cb_delayed_split(_vq); 2433 } 2434 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2435 2436 /** 2437 * virtqueue_detach_unused_buf - detach first unused buffer 2438 * @_vq: the struct virtqueue we're talking about. 2439 * 2440 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2441 * This is not valid on an active queue; it is useful for device 2442 * shutdown or the reset queue. 2443 */ 2444 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2445 { 2446 struct vring_virtqueue *vq = to_vvq(_vq); 2447 2448 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2449 virtqueue_detach_unused_buf_split(_vq); 2450 } 2451 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2452 2453 static inline bool more_used(const struct vring_virtqueue *vq) 2454 { 2455 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2456 } 2457 2458 /** 2459 * vring_interrupt - notify a virtqueue on an interrupt 2460 * @irq: the IRQ number (ignored) 2461 * @_vq: the struct virtqueue to notify 2462 * 2463 * Calls the callback function of @_vq to process the virtqueue 2464 * notification. 2465 */ 2466 irqreturn_t vring_interrupt(int irq, void *_vq) 2467 { 2468 struct vring_virtqueue *vq = to_vvq(_vq); 2469 2470 if (!more_used(vq)) { 2471 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2472 return IRQ_NONE; 2473 } 2474 2475 if (unlikely(vq->broken)) { 2476 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2477 dev_warn_once(&vq->vq.vdev->dev, 2478 "virtio vring IRQ raised before DRIVER_OK"); 2479 return IRQ_NONE; 2480 #else 2481 return IRQ_HANDLED; 2482 #endif 2483 } 2484 2485 /* Just a hint for performance: so it's ok that this can be racy! */ 2486 if (vq->event) 2487 vq->event_triggered = true; 2488 2489 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2490 if (vq->vq.callback) 2491 vq->vq.callback(&vq->vq); 2492 2493 return IRQ_HANDLED; 2494 } 2495 EXPORT_SYMBOL_GPL(vring_interrupt); 2496 2497 /* Only available for split ring */ 2498 static struct virtqueue *__vring_new_virtqueue(unsigned int index, 2499 struct vring_virtqueue_split *vring_split, 2500 struct virtio_device *vdev, 2501 bool weak_barriers, 2502 bool context, 2503 bool (*notify)(struct virtqueue *), 2504 void (*callback)(struct virtqueue *), 2505 const char *name, 2506 struct device *dma_dev) 2507 { 2508 struct vring_virtqueue *vq; 2509 int err; 2510 2511 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2512 return NULL; 2513 2514 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2515 if (!vq) 2516 return NULL; 2517 2518 vq->packed_ring = false; 2519 vq->vq.callback = callback; 2520 vq->vq.vdev = vdev; 2521 vq->vq.name = name; 2522 vq->vq.index = index; 2523 vq->vq.reset = false; 2524 vq->we_own_ring = false; 2525 vq->notify = notify; 2526 vq->weak_barriers = weak_barriers; 2527 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2528 vq->broken = true; 2529 #else 2530 vq->broken = false; 2531 #endif 2532 vq->dma_dev = dma_dev; 2533 vq->use_dma_api = vring_use_dma_api(vdev); 2534 2535 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2536 !context; 2537 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2538 2539 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2540 vq->weak_barriers = false; 2541 2542 err = vring_alloc_state_extra_split(vring_split); 2543 if (err) { 2544 kfree(vq); 2545 return NULL; 2546 } 2547 2548 virtqueue_vring_init_split(vring_split, vq); 2549 2550 virtqueue_init(vq, vring_split->vring.num); 2551 virtqueue_vring_attach_split(vq, vring_split); 2552 2553 spin_lock(&vdev->vqs_list_lock); 2554 list_add_tail(&vq->vq.list, &vdev->vqs); 2555 spin_unlock(&vdev->vqs_list_lock); 2556 return &vq->vq; 2557 } 2558 2559 struct virtqueue *vring_create_virtqueue( 2560 unsigned int index, 2561 unsigned int num, 2562 unsigned int vring_align, 2563 struct virtio_device *vdev, 2564 bool weak_barriers, 2565 bool may_reduce_num, 2566 bool context, 2567 bool (*notify)(struct virtqueue *), 2568 void (*callback)(struct virtqueue *), 2569 const char *name) 2570 { 2571 2572 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2573 return vring_create_virtqueue_packed(index, num, vring_align, 2574 vdev, weak_barriers, may_reduce_num, 2575 context, notify, callback, name, vdev->dev.parent); 2576 2577 return vring_create_virtqueue_split(index, num, vring_align, 2578 vdev, weak_barriers, may_reduce_num, 2579 context, notify, callback, name, vdev->dev.parent); 2580 } 2581 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2582 2583 struct virtqueue *vring_create_virtqueue_dma( 2584 unsigned int index, 2585 unsigned int num, 2586 unsigned int vring_align, 2587 struct virtio_device *vdev, 2588 bool weak_barriers, 2589 bool may_reduce_num, 2590 bool context, 2591 bool (*notify)(struct virtqueue *), 2592 void (*callback)(struct virtqueue *), 2593 const char *name, 2594 struct device *dma_dev) 2595 { 2596 2597 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2598 return vring_create_virtqueue_packed(index, num, vring_align, 2599 vdev, weak_barriers, may_reduce_num, 2600 context, notify, callback, name, dma_dev); 2601 2602 return vring_create_virtqueue_split(index, num, vring_align, 2603 vdev, weak_barriers, may_reduce_num, 2604 context, notify, callback, name, dma_dev); 2605 } 2606 EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); 2607 2608 /** 2609 * virtqueue_resize - resize the vring of vq 2610 * @_vq: the struct virtqueue we're talking about. 2611 * @num: new ring num 2612 * @recycle: callback for recycle the useless buffer 2613 * 2614 * When it is really necessary to create a new vring, it will set the current vq 2615 * into the reset state. Then call the passed callback to recycle the buffer 2616 * that is no longer used. Only after the new vring is successfully created, the 2617 * old vring will be released. 2618 * 2619 * Caller must ensure we don't call this with other virtqueue operations 2620 * at the same time (except where noted). 2621 * 2622 * Returns zero or a negative error. 2623 * 0: success. 2624 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size. 2625 * vq can still work normally 2626 * -EBUSY: Failed to sync with device, vq may not work properly 2627 * -ENOENT: Transport or device not supported 2628 * -E2BIG/-EINVAL: num error 2629 * -EPERM: Operation not permitted 2630 * 2631 */ 2632 int virtqueue_resize(struct virtqueue *_vq, u32 num, 2633 void (*recycle)(struct virtqueue *vq, void *buf)) 2634 { 2635 struct vring_virtqueue *vq = to_vvq(_vq); 2636 struct virtio_device *vdev = vq->vq.vdev; 2637 void *buf; 2638 int err; 2639 2640 if (!vq->we_own_ring) 2641 return -EPERM; 2642 2643 if (num > vq->vq.num_max) 2644 return -E2BIG; 2645 2646 if (!num) 2647 return -EINVAL; 2648 2649 if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) 2650 return 0; 2651 2652 if (!vdev->config->disable_vq_and_reset) 2653 return -ENOENT; 2654 2655 if (!vdev->config->enable_vq_after_reset) 2656 return -ENOENT; 2657 2658 err = vdev->config->disable_vq_and_reset(_vq); 2659 if (err) 2660 return err; 2661 2662 while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) 2663 recycle(_vq, buf); 2664 2665 if (vq->packed_ring) 2666 err = virtqueue_resize_packed(_vq, num); 2667 else 2668 err = virtqueue_resize_split(_vq, num); 2669 2670 if (vdev->config->enable_vq_after_reset(_vq)) 2671 return -EBUSY; 2672 2673 return err; 2674 } 2675 EXPORT_SYMBOL_GPL(virtqueue_resize); 2676 2677 /* Only available for split ring */ 2678 struct virtqueue *vring_new_virtqueue(unsigned int index, 2679 unsigned int num, 2680 unsigned int vring_align, 2681 struct virtio_device *vdev, 2682 bool weak_barriers, 2683 bool context, 2684 void *pages, 2685 bool (*notify)(struct virtqueue *vq), 2686 void (*callback)(struct virtqueue *vq), 2687 const char *name) 2688 { 2689 struct vring_virtqueue_split vring_split = {}; 2690 2691 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2692 return NULL; 2693 2694 vring_init(&vring_split.vring, num, pages, vring_align); 2695 return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 2696 context, notify, callback, name, 2697 vdev->dev.parent); 2698 } 2699 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2700 2701 static void vring_free(struct virtqueue *_vq) 2702 { 2703 struct vring_virtqueue *vq = to_vvq(_vq); 2704 2705 if (vq->we_own_ring) { 2706 if (vq->packed_ring) { 2707 vring_free_queue(vq->vq.vdev, 2708 vq->packed.ring_size_in_bytes, 2709 vq->packed.vring.desc, 2710 vq->packed.ring_dma_addr, 2711 vring_dma_dev(vq)); 2712 2713 vring_free_queue(vq->vq.vdev, 2714 vq->packed.event_size_in_bytes, 2715 vq->packed.vring.driver, 2716 vq->packed.driver_event_dma_addr, 2717 vring_dma_dev(vq)); 2718 2719 vring_free_queue(vq->vq.vdev, 2720 vq->packed.event_size_in_bytes, 2721 vq->packed.vring.device, 2722 vq->packed.device_event_dma_addr, 2723 vring_dma_dev(vq)); 2724 2725 kfree(vq->packed.desc_state); 2726 kfree(vq->packed.desc_extra); 2727 } else { 2728 vring_free_queue(vq->vq.vdev, 2729 vq->split.queue_size_in_bytes, 2730 vq->split.vring.desc, 2731 vq->split.queue_dma_addr, 2732 vring_dma_dev(vq)); 2733 } 2734 } 2735 if (!vq->packed_ring) { 2736 kfree(vq->split.desc_state); 2737 kfree(vq->split.desc_extra); 2738 } 2739 } 2740 2741 void vring_del_virtqueue(struct virtqueue *_vq) 2742 { 2743 struct vring_virtqueue *vq = to_vvq(_vq); 2744 2745 spin_lock(&vq->vq.vdev->vqs_list_lock); 2746 list_del(&_vq->list); 2747 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2748 2749 vring_free(_vq); 2750 2751 kfree(vq); 2752 } 2753 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2754 2755 /* Manipulates transport-specific feature bits. */ 2756 void vring_transport_features(struct virtio_device *vdev) 2757 { 2758 unsigned int i; 2759 2760 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2761 switch (i) { 2762 case VIRTIO_RING_F_INDIRECT_DESC: 2763 break; 2764 case VIRTIO_RING_F_EVENT_IDX: 2765 break; 2766 case VIRTIO_F_VERSION_1: 2767 break; 2768 case VIRTIO_F_ACCESS_PLATFORM: 2769 break; 2770 case VIRTIO_F_RING_PACKED: 2771 break; 2772 case VIRTIO_F_ORDER_PLATFORM: 2773 break; 2774 default: 2775 /* We don't understand this bit. */ 2776 __virtio_clear_bit(vdev, i); 2777 } 2778 } 2779 } 2780 EXPORT_SYMBOL_GPL(vring_transport_features); 2781 2782 /** 2783 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2784 * @_vq: the struct virtqueue containing the vring of interest. 2785 * 2786 * Returns the size of the vring. This is mainly used for boasting to 2787 * userspace. Unlike other operations, this need not be serialized. 2788 */ 2789 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2790 { 2791 2792 struct vring_virtqueue *vq = to_vvq(_vq); 2793 2794 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2795 } 2796 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2797 2798 /* 2799 * This function should only be called by the core, not directly by the driver. 2800 */ 2801 void __virtqueue_break(struct virtqueue *_vq) 2802 { 2803 struct vring_virtqueue *vq = to_vvq(_vq); 2804 2805 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2806 WRITE_ONCE(vq->broken, true); 2807 } 2808 EXPORT_SYMBOL_GPL(__virtqueue_break); 2809 2810 /* 2811 * This function should only be called by the core, not directly by the driver. 2812 */ 2813 void __virtqueue_unbreak(struct virtqueue *_vq) 2814 { 2815 struct vring_virtqueue *vq = to_vvq(_vq); 2816 2817 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2818 WRITE_ONCE(vq->broken, false); 2819 } 2820 EXPORT_SYMBOL_GPL(__virtqueue_unbreak); 2821 2822 bool virtqueue_is_broken(struct virtqueue *_vq) 2823 { 2824 struct vring_virtqueue *vq = to_vvq(_vq); 2825 2826 return READ_ONCE(vq->broken); 2827 } 2828 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2829 2830 /* 2831 * This should prevent the device from being used, allowing drivers to 2832 * recover. You may need to grab appropriate locks to flush. 2833 */ 2834 void virtio_break_device(struct virtio_device *dev) 2835 { 2836 struct virtqueue *_vq; 2837 2838 spin_lock(&dev->vqs_list_lock); 2839 list_for_each_entry(_vq, &dev->vqs, list) { 2840 struct vring_virtqueue *vq = to_vvq(_vq); 2841 2842 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2843 WRITE_ONCE(vq->broken, true); 2844 } 2845 spin_unlock(&dev->vqs_list_lock); 2846 } 2847 EXPORT_SYMBOL_GPL(virtio_break_device); 2848 2849 /* 2850 * This should allow the device to be used by the driver. You may 2851 * need to grab appropriate locks to flush the write to 2852 * vq->broken. This should only be used in some specific case e.g 2853 * (probing and restoring). This function should only be called by the 2854 * core, not directly by the driver. 2855 */ 2856 void __virtio_unbreak_device(struct virtio_device *dev) 2857 { 2858 struct virtqueue *_vq; 2859 2860 spin_lock(&dev->vqs_list_lock); 2861 list_for_each_entry(_vq, &dev->vqs, list) { 2862 struct vring_virtqueue *vq = to_vvq(_vq); 2863 2864 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2865 WRITE_ONCE(vq->broken, false); 2866 } 2867 spin_unlock(&dev->vqs_list_lock); 2868 } 2869 EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 2870 2871 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2872 { 2873 struct vring_virtqueue *vq = to_vvq(_vq); 2874 2875 BUG_ON(!vq->we_own_ring); 2876 2877 if (vq->packed_ring) 2878 return vq->packed.ring_dma_addr; 2879 2880 return vq->split.queue_dma_addr; 2881 } 2882 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2883 2884 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2885 { 2886 struct vring_virtqueue *vq = to_vvq(_vq); 2887 2888 BUG_ON(!vq->we_own_ring); 2889 2890 if (vq->packed_ring) 2891 return vq->packed.driver_event_dma_addr; 2892 2893 return vq->split.queue_dma_addr + 2894 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2895 } 2896 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2897 2898 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2899 { 2900 struct vring_virtqueue *vq = to_vvq(_vq); 2901 2902 BUG_ON(!vq->we_own_ring); 2903 2904 if (vq->packed_ring) 2905 return vq->packed.device_event_dma_addr; 2906 2907 return vq->split.queue_dma_addr + 2908 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2909 } 2910 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2911 2912 /* Only available for split ring */ 2913 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2914 { 2915 return &to_vvq(vq)->split.vring; 2916 } 2917 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2918 2919 MODULE_LICENSE("GPL"); 2920