1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/spinlock.h> 15 #include <xen/xen.h> 16 17 #ifdef DEBUG 18 /* For development, we want to crash whenever the ring is screwed. */ 19 #define BAD_RING(_vq, fmt, args...) \ 20 do { \ 21 dev_err(&(_vq)->vq.vdev->dev, \ 22 "%s:"fmt, (_vq)->vq.name, ##args); \ 23 BUG(); \ 24 } while (0) 25 /* Caller is supposed to guarantee no reentry. */ 26 #define START_USE(_vq) \ 27 do { \ 28 if ((_vq)->in_use) \ 29 panic("%s:in_use = %i\n", \ 30 (_vq)->vq.name, (_vq)->in_use); \ 31 (_vq)->in_use = __LINE__; \ 32 } while (0) 33 #define END_USE(_vq) \ 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 35 #define LAST_ADD_TIME_UPDATE(_vq) \ 36 do { \ 37 ktime_t now = ktime_get(); \ 38 \ 39 /* No kick or get, with .1 second between? Warn. */ \ 40 if ((_vq)->last_add_time_valid) \ 41 WARN_ON(ktime_to_ms(ktime_sub(now, \ 42 (_vq)->last_add_time)) > 100); \ 43 (_vq)->last_add_time = now; \ 44 (_vq)->last_add_time_valid = true; \ 45 } while (0) 46 #define LAST_ADD_TIME_CHECK(_vq) \ 47 do { \ 48 if ((_vq)->last_add_time_valid) { \ 49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 50 (_vq)->last_add_time)) > 100); \ 51 } \ 52 } while (0) 53 #define LAST_ADD_TIME_INVALID(_vq) \ 54 ((_vq)->last_add_time_valid = false) 55 #else 56 #define BAD_RING(_vq, fmt, args...) \ 57 do { \ 58 dev_err(&_vq->vq.vdev->dev, \ 59 "%s:"fmt, (_vq)->vq.name, ##args); \ 60 (_vq)->broken = true; \ 61 } while (0) 62 #define START_USE(vq) 63 #define END_USE(vq) 64 #define LAST_ADD_TIME_UPDATE(vq) 65 #define LAST_ADD_TIME_CHECK(vq) 66 #define LAST_ADD_TIME_INVALID(vq) 67 #endif 68 69 struct vring_desc_state_split { 70 void *data; /* Data for callback. */ 71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 72 }; 73 74 struct vring_desc_state_packed { 75 void *data; /* Data for callback. */ 76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 77 u16 num; /* Descriptor list length. */ 78 u16 last; /* The last desc state in a list. */ 79 }; 80 81 struct vring_desc_extra { 82 dma_addr_t addr; /* Descriptor DMA addr. */ 83 u32 len; /* Descriptor length. */ 84 u16 flags; /* Descriptor flags. */ 85 u16 next; /* The next desc state in a list. */ 86 }; 87 88 struct vring_virtqueue { 89 struct virtqueue vq; 90 91 /* Is this a packed ring? */ 92 bool packed_ring; 93 94 /* Is DMA API used? */ 95 bool use_dma_api; 96 97 /* Can we use weak barriers? */ 98 bool weak_barriers; 99 100 /* Other side has made a mess, don't try any more. */ 101 bool broken; 102 103 /* Host supports indirect buffers */ 104 bool indirect; 105 106 /* Host publishes avail event idx */ 107 bool event; 108 109 /* Head of free buffer list. */ 110 unsigned int free_head; 111 /* Number we've added since last sync. */ 112 unsigned int num_added; 113 114 /* Last used index we've seen. */ 115 u16 last_used_idx; 116 117 /* Hint for event idx: already triggered no need to disable. */ 118 bool event_triggered; 119 120 union { 121 /* Available for split ring */ 122 struct { 123 /* Actual memory layout for this queue. */ 124 struct vring vring; 125 126 /* Last written value to avail->flags */ 127 u16 avail_flags_shadow; 128 129 /* 130 * Last written value to avail->idx in 131 * guest byte order. 132 */ 133 u16 avail_idx_shadow; 134 135 /* Per-descriptor state. */ 136 struct vring_desc_state_split *desc_state; 137 struct vring_desc_extra *desc_extra; 138 139 /* DMA address and size information */ 140 dma_addr_t queue_dma_addr; 141 size_t queue_size_in_bytes; 142 } split; 143 144 /* Available for packed ring */ 145 struct { 146 /* Actual memory layout for this queue. */ 147 struct { 148 unsigned int num; 149 struct vring_packed_desc *desc; 150 struct vring_packed_desc_event *driver; 151 struct vring_packed_desc_event *device; 152 } vring; 153 154 /* Driver ring wrap counter. */ 155 bool avail_wrap_counter; 156 157 /* Device ring wrap counter. */ 158 bool used_wrap_counter; 159 160 /* Avail used flags. */ 161 u16 avail_used_flags; 162 163 /* Index of the next avail descriptor. */ 164 u16 next_avail_idx; 165 166 /* 167 * Last written value to driver->flags in 168 * guest byte order. 169 */ 170 u16 event_flags_shadow; 171 172 /* Per-descriptor state. */ 173 struct vring_desc_state_packed *desc_state; 174 struct vring_desc_extra *desc_extra; 175 176 /* DMA address and size information */ 177 dma_addr_t ring_dma_addr; 178 dma_addr_t driver_event_dma_addr; 179 dma_addr_t device_event_dma_addr; 180 size_t ring_size_in_bytes; 181 size_t event_size_in_bytes; 182 } packed; 183 }; 184 185 /* How to notify other side. FIXME: commonalize hcalls! */ 186 bool (*notify)(struct virtqueue *vq); 187 188 /* DMA, allocation, and size information */ 189 bool we_own_ring; 190 191 #ifdef DEBUG 192 /* They're supposed to lock for us. */ 193 unsigned int in_use; 194 195 /* Figure out if their kicks are too delayed. */ 196 bool last_add_time_valid; 197 ktime_t last_add_time; 198 #endif 199 }; 200 201 202 /* 203 * Helpers. 204 */ 205 206 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 207 208 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 209 unsigned int total_sg) 210 { 211 struct vring_virtqueue *vq = to_vvq(_vq); 212 213 /* 214 * If the host supports indirect descriptor tables, and we have multiple 215 * buffers, then go indirect. FIXME: tune this threshold 216 */ 217 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 218 } 219 220 /* 221 * Modern virtio devices have feature bits to specify whether they need a 222 * quirk and bypass the IOMMU. If not there, just use the DMA API. 223 * 224 * If there, the interaction between virtio and DMA API is messy. 225 * 226 * On most systems with virtio, physical addresses match bus addresses, 227 * and it doesn't particularly matter whether we use the DMA API. 228 * 229 * On some systems, including Xen and any system with a physical device 230 * that speaks virtio behind a physical IOMMU, we must use the DMA API 231 * for virtio DMA to work at all. 232 * 233 * On other systems, including SPARC and PPC64, virtio-pci devices are 234 * enumerated as though they are behind an IOMMU, but the virtio host 235 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 236 * there or somehow map everything as the identity. 237 * 238 * For the time being, we preserve historic behavior and bypass the DMA 239 * API. 240 * 241 * TODO: install a per-device DMA ops structure that does the right thing 242 * taking into account all the above quirks, and use the DMA API 243 * unconditionally on data path. 244 */ 245 246 static bool vring_use_dma_api(struct virtio_device *vdev) 247 { 248 if (!virtio_has_dma_quirk(vdev)) 249 return true; 250 251 /* Otherwise, we are left to guess. */ 252 /* 253 * In theory, it's possible to have a buggy QEMU-supposed 254 * emulated Q35 IOMMU and Xen enabled at the same time. On 255 * such a configuration, virtio has never worked and will 256 * not work without an even larger kludge. Instead, enable 257 * the DMA API if we're a Xen guest, which at least allows 258 * all of the sensible Xen configurations to work correctly. 259 */ 260 if (xen_domain()) 261 return true; 262 263 return false; 264 } 265 266 size_t virtio_max_dma_size(struct virtio_device *vdev) 267 { 268 size_t max_segment_size = SIZE_MAX; 269 270 if (vring_use_dma_api(vdev)) 271 max_segment_size = dma_max_mapping_size(vdev->dev.parent); 272 273 return max_segment_size; 274 } 275 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 276 277 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 278 dma_addr_t *dma_handle, gfp_t flag) 279 { 280 if (vring_use_dma_api(vdev)) { 281 return dma_alloc_coherent(vdev->dev.parent, size, 282 dma_handle, flag); 283 } else { 284 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 285 286 if (queue) { 287 phys_addr_t phys_addr = virt_to_phys(queue); 288 *dma_handle = (dma_addr_t)phys_addr; 289 290 /* 291 * Sanity check: make sure we dind't truncate 292 * the address. The only arches I can find that 293 * have 64-bit phys_addr_t but 32-bit dma_addr_t 294 * are certain non-highmem MIPS and x86 295 * configurations, but these configurations 296 * should never allocate physical pages above 32 297 * bits, so this is fine. Just in case, throw a 298 * warning and abort if we end up with an 299 * unrepresentable address. 300 */ 301 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 302 free_pages_exact(queue, PAGE_ALIGN(size)); 303 return NULL; 304 } 305 } 306 return queue; 307 } 308 } 309 310 static void vring_free_queue(struct virtio_device *vdev, size_t size, 311 void *queue, dma_addr_t dma_handle) 312 { 313 if (vring_use_dma_api(vdev)) 314 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 315 else 316 free_pages_exact(queue, PAGE_ALIGN(size)); 317 } 318 319 /* 320 * The DMA ops on various arches are rather gnarly right now, and 321 * making all of the arch DMA ops work on the vring device itself 322 * is a mess. For now, we use the parent device for DMA ops. 323 */ 324 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 325 { 326 return vq->vq.vdev->dev.parent; 327 } 328 329 /* Map one sg entry. */ 330 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 331 struct scatterlist *sg, 332 enum dma_data_direction direction) 333 { 334 if (!vq->use_dma_api) 335 return (dma_addr_t)sg_phys(sg); 336 337 /* 338 * We can't use dma_map_sg, because we don't use scatterlists in 339 * the way it expects (we don't guarantee that the scatterlist 340 * will exist for the lifetime of the mapping). 341 */ 342 return dma_map_page(vring_dma_dev(vq), 343 sg_page(sg), sg->offset, sg->length, 344 direction); 345 } 346 347 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 348 void *cpu_addr, size_t size, 349 enum dma_data_direction direction) 350 { 351 if (!vq->use_dma_api) 352 return (dma_addr_t)virt_to_phys(cpu_addr); 353 354 return dma_map_single(vring_dma_dev(vq), 355 cpu_addr, size, direction); 356 } 357 358 static int vring_mapping_error(const struct vring_virtqueue *vq, 359 dma_addr_t addr) 360 { 361 if (!vq->use_dma_api) 362 return 0; 363 364 return dma_mapping_error(vring_dma_dev(vq), addr); 365 } 366 367 368 /* 369 * Split ring specific functions - *_split(). 370 */ 371 372 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 373 struct vring_desc *desc) 374 { 375 u16 flags; 376 377 if (!vq->use_dma_api) 378 return; 379 380 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 381 382 dma_unmap_page(vring_dma_dev(vq), 383 virtio64_to_cpu(vq->vq.vdev, desc->addr), 384 virtio32_to_cpu(vq->vq.vdev, desc->len), 385 (flags & VRING_DESC_F_WRITE) ? 386 DMA_FROM_DEVICE : DMA_TO_DEVICE); 387 } 388 389 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 390 unsigned int i) 391 { 392 struct vring_desc_extra *extra = vq->split.desc_extra; 393 u16 flags; 394 395 if (!vq->use_dma_api) 396 goto out; 397 398 flags = extra[i].flags; 399 400 if (flags & VRING_DESC_F_INDIRECT) { 401 dma_unmap_single(vring_dma_dev(vq), 402 extra[i].addr, 403 extra[i].len, 404 (flags & VRING_DESC_F_WRITE) ? 405 DMA_FROM_DEVICE : DMA_TO_DEVICE); 406 } else { 407 dma_unmap_page(vring_dma_dev(vq), 408 extra[i].addr, 409 extra[i].len, 410 (flags & VRING_DESC_F_WRITE) ? 411 DMA_FROM_DEVICE : DMA_TO_DEVICE); 412 } 413 414 out: 415 return extra[i].next; 416 } 417 418 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 419 unsigned int total_sg, 420 gfp_t gfp) 421 { 422 struct vring_desc *desc; 423 unsigned int i; 424 425 /* 426 * We require lowmem mappings for the descriptors because 427 * otherwise virt_to_phys will give us bogus addresses in the 428 * virtqueue. 429 */ 430 gfp &= ~__GFP_HIGHMEM; 431 432 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 433 if (!desc) 434 return NULL; 435 436 for (i = 0; i < total_sg; i++) 437 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 438 return desc; 439 } 440 441 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 442 struct vring_desc *desc, 443 unsigned int i, 444 dma_addr_t addr, 445 unsigned int len, 446 u16 flags, 447 bool indirect) 448 { 449 struct vring_virtqueue *vring = to_vvq(vq); 450 struct vring_desc_extra *extra = vring->split.desc_extra; 451 u16 next; 452 453 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 454 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 455 desc[i].len = cpu_to_virtio32(vq->vdev, len); 456 457 if (!indirect) { 458 next = extra[i].next; 459 desc[i].next = cpu_to_virtio16(vq->vdev, next); 460 461 extra[i].addr = addr; 462 extra[i].len = len; 463 extra[i].flags = flags; 464 } else 465 next = virtio16_to_cpu(vq->vdev, desc[i].next); 466 467 return next; 468 } 469 470 static inline int virtqueue_add_split(struct virtqueue *_vq, 471 struct scatterlist *sgs[], 472 unsigned int total_sg, 473 unsigned int out_sgs, 474 unsigned int in_sgs, 475 void *data, 476 void *ctx, 477 gfp_t gfp) 478 { 479 struct vring_virtqueue *vq = to_vvq(_vq); 480 struct scatterlist *sg; 481 struct vring_desc *desc; 482 unsigned int i, n, avail, descs_used, prev, err_idx; 483 int head; 484 bool indirect; 485 486 START_USE(vq); 487 488 BUG_ON(data == NULL); 489 BUG_ON(ctx && vq->indirect); 490 491 if (unlikely(vq->broken)) { 492 END_USE(vq); 493 return -EIO; 494 } 495 496 LAST_ADD_TIME_UPDATE(vq); 497 498 BUG_ON(total_sg == 0); 499 500 head = vq->free_head; 501 502 if (virtqueue_use_indirect(_vq, total_sg)) 503 desc = alloc_indirect_split(_vq, total_sg, gfp); 504 else { 505 desc = NULL; 506 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 507 } 508 509 if (desc) { 510 /* Use a single buffer which doesn't continue */ 511 indirect = true; 512 /* Set up rest to use this indirect table. */ 513 i = 0; 514 descs_used = 1; 515 } else { 516 indirect = false; 517 desc = vq->split.vring.desc; 518 i = head; 519 descs_used = total_sg; 520 } 521 522 if (vq->vq.num_free < descs_used) { 523 pr_debug("Can't add buf len %i - avail = %i\n", 524 descs_used, vq->vq.num_free); 525 /* FIXME: for historical reasons, we force a notify here if 526 * there are outgoing parts to the buffer. Presumably the 527 * host should service the ring ASAP. */ 528 if (out_sgs) 529 vq->notify(&vq->vq); 530 if (indirect) 531 kfree(desc); 532 END_USE(vq); 533 return -ENOSPC; 534 } 535 536 for (n = 0; n < out_sgs; n++) { 537 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 538 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 539 if (vring_mapping_error(vq, addr)) 540 goto unmap_release; 541 542 prev = i; 543 /* Note that we trust indirect descriptor 544 * table since it use stream DMA mapping. 545 */ 546 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 547 VRING_DESC_F_NEXT, 548 indirect); 549 } 550 } 551 for (; n < (out_sgs + in_sgs); n++) { 552 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 553 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 554 if (vring_mapping_error(vq, addr)) 555 goto unmap_release; 556 557 prev = i; 558 /* Note that we trust indirect descriptor 559 * table since it use stream DMA mapping. 560 */ 561 i = virtqueue_add_desc_split(_vq, desc, i, addr, 562 sg->length, 563 VRING_DESC_F_NEXT | 564 VRING_DESC_F_WRITE, 565 indirect); 566 } 567 } 568 /* Last one doesn't continue. */ 569 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 570 if (!indirect && vq->use_dma_api) 571 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 572 ~VRING_DESC_F_NEXT; 573 574 if (indirect) { 575 /* Now that the indirect table is filled in, map it. */ 576 dma_addr_t addr = vring_map_single( 577 vq, desc, total_sg * sizeof(struct vring_desc), 578 DMA_TO_DEVICE); 579 if (vring_mapping_error(vq, addr)) 580 goto unmap_release; 581 582 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 583 head, addr, 584 total_sg * sizeof(struct vring_desc), 585 VRING_DESC_F_INDIRECT, 586 false); 587 } 588 589 /* We're using some buffers from the free list. */ 590 vq->vq.num_free -= descs_used; 591 592 /* Update free pointer */ 593 if (indirect) 594 vq->free_head = vq->split.desc_extra[head].next; 595 else 596 vq->free_head = i; 597 598 /* Store token and indirect buffer state. */ 599 vq->split.desc_state[head].data = data; 600 if (indirect) 601 vq->split.desc_state[head].indir_desc = desc; 602 else 603 vq->split.desc_state[head].indir_desc = ctx; 604 605 /* Put entry in available array (but don't update avail->idx until they 606 * do sync). */ 607 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 608 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 609 610 /* Descriptors and available array need to be set before we expose the 611 * new available array entries. */ 612 virtio_wmb(vq->weak_barriers); 613 vq->split.avail_idx_shadow++; 614 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 615 vq->split.avail_idx_shadow); 616 vq->num_added++; 617 618 pr_debug("Added buffer head %i to %p\n", head, vq); 619 END_USE(vq); 620 621 /* This is very unlikely, but theoretically possible. Kick 622 * just in case. */ 623 if (unlikely(vq->num_added == (1 << 16) - 1)) 624 virtqueue_kick(_vq); 625 626 return 0; 627 628 unmap_release: 629 err_idx = i; 630 631 if (indirect) 632 i = 0; 633 else 634 i = head; 635 636 for (n = 0; n < total_sg; n++) { 637 if (i == err_idx) 638 break; 639 if (indirect) { 640 vring_unmap_one_split_indirect(vq, &desc[i]); 641 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 642 } else 643 i = vring_unmap_one_split(vq, i); 644 } 645 646 if (indirect) 647 kfree(desc); 648 649 END_USE(vq); 650 return -ENOMEM; 651 } 652 653 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 654 { 655 struct vring_virtqueue *vq = to_vvq(_vq); 656 u16 new, old; 657 bool needs_kick; 658 659 START_USE(vq); 660 /* We need to expose available array entries before checking avail 661 * event. */ 662 virtio_mb(vq->weak_barriers); 663 664 old = vq->split.avail_idx_shadow - vq->num_added; 665 new = vq->split.avail_idx_shadow; 666 vq->num_added = 0; 667 668 LAST_ADD_TIME_CHECK(vq); 669 LAST_ADD_TIME_INVALID(vq); 670 671 if (vq->event) { 672 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 673 vring_avail_event(&vq->split.vring)), 674 new, old); 675 } else { 676 needs_kick = !(vq->split.vring.used->flags & 677 cpu_to_virtio16(_vq->vdev, 678 VRING_USED_F_NO_NOTIFY)); 679 } 680 END_USE(vq); 681 return needs_kick; 682 } 683 684 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 685 void **ctx) 686 { 687 unsigned int i, j; 688 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 689 690 /* Clear data ptr. */ 691 vq->split.desc_state[head].data = NULL; 692 693 /* Put back on free list: unmap first-level descriptors and find end */ 694 i = head; 695 696 while (vq->split.vring.desc[i].flags & nextflag) { 697 vring_unmap_one_split(vq, i); 698 i = vq->split.desc_extra[i].next; 699 vq->vq.num_free++; 700 } 701 702 vring_unmap_one_split(vq, i); 703 vq->split.desc_extra[i].next = vq->free_head; 704 vq->free_head = head; 705 706 /* Plus final descriptor */ 707 vq->vq.num_free++; 708 709 if (vq->indirect) { 710 struct vring_desc *indir_desc = 711 vq->split.desc_state[head].indir_desc; 712 u32 len; 713 714 /* Free the indirect table, if any, now that it's unmapped. */ 715 if (!indir_desc) 716 return; 717 718 len = vq->split.desc_extra[head].len; 719 720 BUG_ON(!(vq->split.desc_extra[head].flags & 721 VRING_DESC_F_INDIRECT)); 722 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 723 724 for (j = 0; j < len / sizeof(struct vring_desc); j++) 725 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 726 727 kfree(indir_desc); 728 vq->split.desc_state[head].indir_desc = NULL; 729 } else if (ctx) { 730 *ctx = vq->split.desc_state[head].indir_desc; 731 } 732 } 733 734 static inline bool more_used_split(const struct vring_virtqueue *vq) 735 { 736 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 737 vq->split.vring.used->idx); 738 } 739 740 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 741 unsigned int *len, 742 void **ctx) 743 { 744 struct vring_virtqueue *vq = to_vvq(_vq); 745 void *ret; 746 unsigned int i; 747 u16 last_used; 748 749 START_USE(vq); 750 751 if (unlikely(vq->broken)) { 752 END_USE(vq); 753 return NULL; 754 } 755 756 if (!more_used_split(vq)) { 757 pr_debug("No more buffers in queue\n"); 758 END_USE(vq); 759 return NULL; 760 } 761 762 /* Only get used array entries after they have been exposed by host. */ 763 virtio_rmb(vq->weak_barriers); 764 765 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 766 i = virtio32_to_cpu(_vq->vdev, 767 vq->split.vring.used->ring[last_used].id); 768 *len = virtio32_to_cpu(_vq->vdev, 769 vq->split.vring.used->ring[last_used].len); 770 771 if (unlikely(i >= vq->split.vring.num)) { 772 BAD_RING(vq, "id %u out of range\n", i); 773 return NULL; 774 } 775 if (unlikely(!vq->split.desc_state[i].data)) { 776 BAD_RING(vq, "id %u is not a head!\n", i); 777 return NULL; 778 } 779 780 /* detach_buf_split clears data, so grab it now. */ 781 ret = vq->split.desc_state[i].data; 782 detach_buf_split(vq, i, ctx); 783 vq->last_used_idx++; 784 /* If we expect an interrupt for the next entry, tell host 785 * by writing event index and flush out the write before 786 * the read in the next get_buf call. */ 787 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 788 virtio_store_mb(vq->weak_barriers, 789 &vring_used_event(&vq->split.vring), 790 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 791 792 LAST_ADD_TIME_INVALID(vq); 793 794 END_USE(vq); 795 return ret; 796 } 797 798 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 799 { 800 struct vring_virtqueue *vq = to_vvq(_vq); 801 802 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 803 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 804 if (vq->event) 805 /* TODO: this is a hack. Figure out a cleaner value to write. */ 806 vring_used_event(&vq->split.vring) = 0x0; 807 else 808 vq->split.vring.avail->flags = 809 cpu_to_virtio16(_vq->vdev, 810 vq->split.avail_flags_shadow); 811 } 812 } 813 814 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 815 { 816 struct vring_virtqueue *vq = to_vvq(_vq); 817 u16 last_used_idx; 818 819 START_USE(vq); 820 821 /* We optimistically turn back on interrupts, then check if there was 822 * more to do. */ 823 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 824 * either clear the flags bit or point the event index at the next 825 * entry. Always do both to keep code simple. */ 826 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 827 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 828 if (!vq->event) 829 vq->split.vring.avail->flags = 830 cpu_to_virtio16(_vq->vdev, 831 vq->split.avail_flags_shadow); 832 } 833 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 834 last_used_idx = vq->last_used_idx); 835 END_USE(vq); 836 return last_used_idx; 837 } 838 839 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 840 { 841 struct vring_virtqueue *vq = to_vvq(_vq); 842 843 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 844 vq->split.vring.used->idx); 845 } 846 847 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 848 { 849 struct vring_virtqueue *vq = to_vvq(_vq); 850 u16 bufs; 851 852 START_USE(vq); 853 854 /* We optimistically turn back on interrupts, then check if there was 855 * more to do. */ 856 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 857 * either clear the flags bit or point the event index at the next 858 * entry. Always update the event index to keep code simple. */ 859 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 860 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 861 if (!vq->event) 862 vq->split.vring.avail->flags = 863 cpu_to_virtio16(_vq->vdev, 864 vq->split.avail_flags_shadow); 865 } 866 /* TODO: tune this threshold */ 867 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 868 869 virtio_store_mb(vq->weak_barriers, 870 &vring_used_event(&vq->split.vring), 871 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 872 873 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 874 - vq->last_used_idx) > bufs)) { 875 END_USE(vq); 876 return false; 877 } 878 879 END_USE(vq); 880 return true; 881 } 882 883 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 884 { 885 struct vring_virtqueue *vq = to_vvq(_vq); 886 unsigned int i; 887 void *buf; 888 889 START_USE(vq); 890 891 for (i = 0; i < vq->split.vring.num; i++) { 892 if (!vq->split.desc_state[i].data) 893 continue; 894 /* detach_buf_split clears data, so grab it now. */ 895 buf = vq->split.desc_state[i].data; 896 detach_buf_split(vq, i, NULL); 897 vq->split.avail_idx_shadow--; 898 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 899 vq->split.avail_idx_shadow); 900 END_USE(vq); 901 return buf; 902 } 903 /* That should have freed everything. */ 904 BUG_ON(vq->vq.num_free != vq->split.vring.num); 905 906 END_USE(vq); 907 return NULL; 908 } 909 910 static struct virtqueue *vring_create_virtqueue_split( 911 unsigned int index, 912 unsigned int num, 913 unsigned int vring_align, 914 struct virtio_device *vdev, 915 bool weak_barriers, 916 bool may_reduce_num, 917 bool context, 918 bool (*notify)(struct virtqueue *), 919 void (*callback)(struct virtqueue *), 920 const char *name) 921 { 922 struct virtqueue *vq; 923 void *queue = NULL; 924 dma_addr_t dma_addr; 925 size_t queue_size_in_bytes; 926 struct vring vring; 927 928 /* We assume num is a power of 2. */ 929 if (num & (num - 1)) { 930 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 931 return NULL; 932 } 933 934 /* TODO: allocate each queue chunk individually */ 935 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 936 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 937 &dma_addr, 938 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 939 if (queue) 940 break; 941 if (!may_reduce_num) 942 return NULL; 943 } 944 945 if (!num) 946 return NULL; 947 948 if (!queue) { 949 /* Try to get a single page. You are my only hope! */ 950 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 951 &dma_addr, GFP_KERNEL|__GFP_ZERO); 952 } 953 if (!queue) 954 return NULL; 955 956 queue_size_in_bytes = vring_size(num, vring_align); 957 vring_init(&vring, num, queue, vring_align); 958 959 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 960 notify, callback, name); 961 if (!vq) { 962 vring_free_queue(vdev, queue_size_in_bytes, queue, 963 dma_addr); 964 return NULL; 965 } 966 967 to_vvq(vq)->split.queue_dma_addr = dma_addr; 968 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 969 to_vvq(vq)->we_own_ring = true; 970 971 return vq; 972 } 973 974 975 /* 976 * Packed ring specific functions - *_packed(). 977 */ 978 979 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 980 struct vring_desc_extra *extra) 981 { 982 u16 flags; 983 984 if (!vq->use_dma_api) 985 return; 986 987 flags = extra->flags; 988 989 if (flags & VRING_DESC_F_INDIRECT) { 990 dma_unmap_single(vring_dma_dev(vq), 991 extra->addr, extra->len, 992 (flags & VRING_DESC_F_WRITE) ? 993 DMA_FROM_DEVICE : DMA_TO_DEVICE); 994 } else { 995 dma_unmap_page(vring_dma_dev(vq), 996 extra->addr, extra->len, 997 (flags & VRING_DESC_F_WRITE) ? 998 DMA_FROM_DEVICE : DMA_TO_DEVICE); 999 } 1000 } 1001 1002 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1003 struct vring_packed_desc *desc) 1004 { 1005 u16 flags; 1006 1007 if (!vq->use_dma_api) 1008 return; 1009 1010 flags = le16_to_cpu(desc->flags); 1011 1012 dma_unmap_page(vring_dma_dev(vq), 1013 le64_to_cpu(desc->addr), 1014 le32_to_cpu(desc->len), 1015 (flags & VRING_DESC_F_WRITE) ? 1016 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1017 } 1018 1019 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1020 gfp_t gfp) 1021 { 1022 struct vring_packed_desc *desc; 1023 1024 /* 1025 * We require lowmem mappings for the descriptors because 1026 * otherwise virt_to_phys will give us bogus addresses in the 1027 * virtqueue. 1028 */ 1029 gfp &= ~__GFP_HIGHMEM; 1030 1031 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1032 1033 return desc; 1034 } 1035 1036 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1037 struct scatterlist *sgs[], 1038 unsigned int total_sg, 1039 unsigned int out_sgs, 1040 unsigned int in_sgs, 1041 void *data, 1042 gfp_t gfp) 1043 { 1044 struct vring_packed_desc *desc; 1045 struct scatterlist *sg; 1046 unsigned int i, n, err_idx; 1047 u16 head, id; 1048 dma_addr_t addr; 1049 1050 head = vq->packed.next_avail_idx; 1051 desc = alloc_indirect_packed(total_sg, gfp); 1052 if (!desc) 1053 return -ENOMEM; 1054 1055 if (unlikely(vq->vq.num_free < 1)) { 1056 pr_debug("Can't add buf len 1 - avail = 0\n"); 1057 kfree(desc); 1058 END_USE(vq); 1059 return -ENOSPC; 1060 } 1061 1062 i = 0; 1063 id = vq->free_head; 1064 BUG_ON(id == vq->packed.vring.num); 1065 1066 for (n = 0; n < out_sgs + in_sgs; n++) { 1067 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1068 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1069 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1070 if (vring_mapping_error(vq, addr)) 1071 goto unmap_release; 1072 1073 desc[i].flags = cpu_to_le16(n < out_sgs ? 1074 0 : VRING_DESC_F_WRITE); 1075 desc[i].addr = cpu_to_le64(addr); 1076 desc[i].len = cpu_to_le32(sg->length); 1077 i++; 1078 } 1079 } 1080 1081 /* Now that the indirect table is filled in, map it. */ 1082 addr = vring_map_single(vq, desc, 1083 total_sg * sizeof(struct vring_packed_desc), 1084 DMA_TO_DEVICE); 1085 if (vring_mapping_error(vq, addr)) 1086 goto unmap_release; 1087 1088 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1089 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1090 sizeof(struct vring_packed_desc)); 1091 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1092 1093 if (vq->use_dma_api) { 1094 vq->packed.desc_extra[id].addr = addr; 1095 vq->packed.desc_extra[id].len = total_sg * 1096 sizeof(struct vring_packed_desc); 1097 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1098 vq->packed.avail_used_flags; 1099 } 1100 1101 /* 1102 * A driver MUST NOT make the first descriptor in the list 1103 * available before all subsequent descriptors comprising 1104 * the list are made available. 1105 */ 1106 virtio_wmb(vq->weak_barriers); 1107 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1108 vq->packed.avail_used_flags); 1109 1110 /* We're using some buffers from the free list. */ 1111 vq->vq.num_free -= 1; 1112 1113 /* Update free pointer */ 1114 n = head + 1; 1115 if (n >= vq->packed.vring.num) { 1116 n = 0; 1117 vq->packed.avail_wrap_counter ^= 1; 1118 vq->packed.avail_used_flags ^= 1119 1 << VRING_PACKED_DESC_F_AVAIL | 1120 1 << VRING_PACKED_DESC_F_USED; 1121 } 1122 vq->packed.next_avail_idx = n; 1123 vq->free_head = vq->packed.desc_extra[id].next; 1124 1125 /* Store token and indirect buffer state. */ 1126 vq->packed.desc_state[id].num = 1; 1127 vq->packed.desc_state[id].data = data; 1128 vq->packed.desc_state[id].indir_desc = desc; 1129 vq->packed.desc_state[id].last = id; 1130 1131 vq->num_added += 1; 1132 1133 pr_debug("Added buffer head %i to %p\n", head, vq); 1134 END_USE(vq); 1135 1136 return 0; 1137 1138 unmap_release: 1139 err_idx = i; 1140 1141 for (i = 0; i < err_idx; i++) 1142 vring_unmap_desc_packed(vq, &desc[i]); 1143 1144 kfree(desc); 1145 1146 END_USE(vq); 1147 return -ENOMEM; 1148 } 1149 1150 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1151 struct scatterlist *sgs[], 1152 unsigned int total_sg, 1153 unsigned int out_sgs, 1154 unsigned int in_sgs, 1155 void *data, 1156 void *ctx, 1157 gfp_t gfp) 1158 { 1159 struct vring_virtqueue *vq = to_vvq(_vq); 1160 struct vring_packed_desc *desc; 1161 struct scatterlist *sg; 1162 unsigned int i, n, c, descs_used, err_idx; 1163 __le16 head_flags, flags; 1164 u16 head, id, prev, curr, avail_used_flags; 1165 int err; 1166 1167 START_USE(vq); 1168 1169 BUG_ON(data == NULL); 1170 BUG_ON(ctx && vq->indirect); 1171 1172 if (unlikely(vq->broken)) { 1173 END_USE(vq); 1174 return -EIO; 1175 } 1176 1177 LAST_ADD_TIME_UPDATE(vq); 1178 1179 BUG_ON(total_sg == 0); 1180 1181 if (virtqueue_use_indirect(_vq, total_sg)) { 1182 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1183 in_sgs, data, gfp); 1184 if (err != -ENOMEM) { 1185 END_USE(vq); 1186 return err; 1187 } 1188 1189 /* fall back on direct */ 1190 } 1191 1192 head = vq->packed.next_avail_idx; 1193 avail_used_flags = vq->packed.avail_used_flags; 1194 1195 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1196 1197 desc = vq->packed.vring.desc; 1198 i = head; 1199 descs_used = total_sg; 1200 1201 if (unlikely(vq->vq.num_free < descs_used)) { 1202 pr_debug("Can't add buf len %i - avail = %i\n", 1203 descs_used, vq->vq.num_free); 1204 END_USE(vq); 1205 return -ENOSPC; 1206 } 1207 1208 id = vq->free_head; 1209 BUG_ON(id == vq->packed.vring.num); 1210 1211 curr = id; 1212 c = 0; 1213 for (n = 0; n < out_sgs + in_sgs; n++) { 1214 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1215 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1216 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1217 if (vring_mapping_error(vq, addr)) 1218 goto unmap_release; 1219 1220 flags = cpu_to_le16(vq->packed.avail_used_flags | 1221 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1222 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1223 if (i == head) 1224 head_flags = flags; 1225 else 1226 desc[i].flags = flags; 1227 1228 desc[i].addr = cpu_to_le64(addr); 1229 desc[i].len = cpu_to_le32(sg->length); 1230 desc[i].id = cpu_to_le16(id); 1231 1232 if (unlikely(vq->use_dma_api)) { 1233 vq->packed.desc_extra[curr].addr = addr; 1234 vq->packed.desc_extra[curr].len = sg->length; 1235 vq->packed.desc_extra[curr].flags = 1236 le16_to_cpu(flags); 1237 } 1238 prev = curr; 1239 curr = vq->packed.desc_extra[curr].next; 1240 1241 if ((unlikely(++i >= vq->packed.vring.num))) { 1242 i = 0; 1243 vq->packed.avail_used_flags ^= 1244 1 << VRING_PACKED_DESC_F_AVAIL | 1245 1 << VRING_PACKED_DESC_F_USED; 1246 } 1247 } 1248 } 1249 1250 if (i < head) 1251 vq->packed.avail_wrap_counter ^= 1; 1252 1253 /* We're using some buffers from the free list. */ 1254 vq->vq.num_free -= descs_used; 1255 1256 /* Update free pointer */ 1257 vq->packed.next_avail_idx = i; 1258 vq->free_head = curr; 1259 1260 /* Store token. */ 1261 vq->packed.desc_state[id].num = descs_used; 1262 vq->packed.desc_state[id].data = data; 1263 vq->packed.desc_state[id].indir_desc = ctx; 1264 vq->packed.desc_state[id].last = prev; 1265 1266 /* 1267 * A driver MUST NOT make the first descriptor in the list 1268 * available before all subsequent descriptors comprising 1269 * the list are made available. 1270 */ 1271 virtio_wmb(vq->weak_barriers); 1272 vq->packed.vring.desc[head].flags = head_flags; 1273 vq->num_added += descs_used; 1274 1275 pr_debug("Added buffer head %i to %p\n", head, vq); 1276 END_USE(vq); 1277 1278 return 0; 1279 1280 unmap_release: 1281 err_idx = i; 1282 i = head; 1283 curr = vq->free_head; 1284 1285 vq->packed.avail_used_flags = avail_used_flags; 1286 1287 for (n = 0; n < total_sg; n++) { 1288 if (i == err_idx) 1289 break; 1290 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 1291 curr = vq->packed.desc_extra[curr].next; 1292 i++; 1293 if (i >= vq->packed.vring.num) 1294 i = 0; 1295 } 1296 1297 END_USE(vq); 1298 return -EIO; 1299 } 1300 1301 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1302 { 1303 struct vring_virtqueue *vq = to_vvq(_vq); 1304 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1305 bool needs_kick; 1306 union { 1307 struct { 1308 __le16 off_wrap; 1309 __le16 flags; 1310 }; 1311 u32 u32; 1312 } snapshot; 1313 1314 START_USE(vq); 1315 1316 /* 1317 * We need to expose the new flags value before checking notification 1318 * suppressions. 1319 */ 1320 virtio_mb(vq->weak_barriers); 1321 1322 old = vq->packed.next_avail_idx - vq->num_added; 1323 new = vq->packed.next_avail_idx; 1324 vq->num_added = 0; 1325 1326 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1327 flags = le16_to_cpu(snapshot.flags); 1328 1329 LAST_ADD_TIME_CHECK(vq); 1330 LAST_ADD_TIME_INVALID(vq); 1331 1332 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1333 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1334 goto out; 1335 } 1336 1337 off_wrap = le16_to_cpu(snapshot.off_wrap); 1338 1339 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1340 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1341 if (wrap_counter != vq->packed.avail_wrap_counter) 1342 event_idx -= vq->packed.vring.num; 1343 1344 needs_kick = vring_need_event(event_idx, new, old); 1345 out: 1346 END_USE(vq); 1347 return needs_kick; 1348 } 1349 1350 static void detach_buf_packed(struct vring_virtqueue *vq, 1351 unsigned int id, void **ctx) 1352 { 1353 struct vring_desc_state_packed *state = NULL; 1354 struct vring_packed_desc *desc; 1355 unsigned int i, curr; 1356 1357 state = &vq->packed.desc_state[id]; 1358 1359 /* Clear data ptr. */ 1360 state->data = NULL; 1361 1362 vq->packed.desc_extra[state->last].next = vq->free_head; 1363 vq->free_head = id; 1364 vq->vq.num_free += state->num; 1365 1366 if (unlikely(vq->use_dma_api)) { 1367 curr = id; 1368 for (i = 0; i < state->num; i++) { 1369 vring_unmap_extra_packed(vq, 1370 &vq->packed.desc_extra[curr]); 1371 curr = vq->packed.desc_extra[curr].next; 1372 } 1373 } 1374 1375 if (vq->indirect) { 1376 u32 len; 1377 1378 /* Free the indirect table, if any, now that it's unmapped. */ 1379 desc = state->indir_desc; 1380 if (!desc) 1381 return; 1382 1383 if (vq->use_dma_api) { 1384 len = vq->packed.desc_extra[id].len; 1385 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1386 i++) 1387 vring_unmap_desc_packed(vq, &desc[i]); 1388 } 1389 kfree(desc); 1390 state->indir_desc = NULL; 1391 } else if (ctx) { 1392 *ctx = state->indir_desc; 1393 } 1394 } 1395 1396 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1397 u16 idx, bool used_wrap_counter) 1398 { 1399 bool avail, used; 1400 u16 flags; 1401 1402 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1403 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1404 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1405 1406 return avail == used && used == used_wrap_counter; 1407 } 1408 1409 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1410 { 1411 return is_used_desc_packed(vq, vq->last_used_idx, 1412 vq->packed.used_wrap_counter); 1413 } 1414 1415 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1416 unsigned int *len, 1417 void **ctx) 1418 { 1419 struct vring_virtqueue *vq = to_vvq(_vq); 1420 u16 last_used, id; 1421 void *ret; 1422 1423 START_USE(vq); 1424 1425 if (unlikely(vq->broken)) { 1426 END_USE(vq); 1427 return NULL; 1428 } 1429 1430 if (!more_used_packed(vq)) { 1431 pr_debug("No more buffers in queue\n"); 1432 END_USE(vq); 1433 return NULL; 1434 } 1435 1436 /* Only get used elements after they have been exposed by host. */ 1437 virtio_rmb(vq->weak_barriers); 1438 1439 last_used = vq->last_used_idx; 1440 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1441 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1442 1443 if (unlikely(id >= vq->packed.vring.num)) { 1444 BAD_RING(vq, "id %u out of range\n", id); 1445 return NULL; 1446 } 1447 if (unlikely(!vq->packed.desc_state[id].data)) { 1448 BAD_RING(vq, "id %u is not a head!\n", id); 1449 return NULL; 1450 } 1451 1452 /* detach_buf_packed clears data, so grab it now. */ 1453 ret = vq->packed.desc_state[id].data; 1454 detach_buf_packed(vq, id, ctx); 1455 1456 vq->last_used_idx += vq->packed.desc_state[id].num; 1457 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1458 vq->last_used_idx -= vq->packed.vring.num; 1459 vq->packed.used_wrap_counter ^= 1; 1460 } 1461 1462 /* 1463 * If we expect an interrupt for the next entry, tell host 1464 * by writing event index and flush out the write before 1465 * the read in the next get_buf call. 1466 */ 1467 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1468 virtio_store_mb(vq->weak_barriers, 1469 &vq->packed.vring.driver->off_wrap, 1470 cpu_to_le16(vq->last_used_idx | 1471 (vq->packed.used_wrap_counter << 1472 VRING_PACKED_EVENT_F_WRAP_CTR))); 1473 1474 LAST_ADD_TIME_INVALID(vq); 1475 1476 END_USE(vq); 1477 return ret; 1478 } 1479 1480 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1481 { 1482 struct vring_virtqueue *vq = to_vvq(_vq); 1483 1484 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1485 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1486 vq->packed.vring.driver->flags = 1487 cpu_to_le16(vq->packed.event_flags_shadow); 1488 } 1489 } 1490 1491 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1492 { 1493 struct vring_virtqueue *vq = to_vvq(_vq); 1494 1495 START_USE(vq); 1496 1497 /* 1498 * We optimistically turn back on interrupts, then check if there was 1499 * more to do. 1500 */ 1501 1502 if (vq->event) { 1503 vq->packed.vring.driver->off_wrap = 1504 cpu_to_le16(vq->last_used_idx | 1505 (vq->packed.used_wrap_counter << 1506 VRING_PACKED_EVENT_F_WRAP_CTR)); 1507 /* 1508 * We need to update event offset and event wrap 1509 * counter first before updating event flags. 1510 */ 1511 virtio_wmb(vq->weak_barriers); 1512 } 1513 1514 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1515 vq->packed.event_flags_shadow = vq->event ? 1516 VRING_PACKED_EVENT_FLAG_DESC : 1517 VRING_PACKED_EVENT_FLAG_ENABLE; 1518 vq->packed.vring.driver->flags = 1519 cpu_to_le16(vq->packed.event_flags_shadow); 1520 } 1521 1522 END_USE(vq); 1523 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1524 VRING_PACKED_EVENT_F_WRAP_CTR); 1525 } 1526 1527 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1528 { 1529 struct vring_virtqueue *vq = to_vvq(_vq); 1530 bool wrap_counter; 1531 u16 used_idx; 1532 1533 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1534 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1535 1536 return is_used_desc_packed(vq, used_idx, wrap_counter); 1537 } 1538 1539 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1540 { 1541 struct vring_virtqueue *vq = to_vvq(_vq); 1542 u16 used_idx, wrap_counter; 1543 u16 bufs; 1544 1545 START_USE(vq); 1546 1547 /* 1548 * We optimistically turn back on interrupts, then check if there was 1549 * more to do. 1550 */ 1551 1552 if (vq->event) { 1553 /* TODO: tune this threshold */ 1554 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1555 wrap_counter = vq->packed.used_wrap_counter; 1556 1557 used_idx = vq->last_used_idx + bufs; 1558 if (used_idx >= vq->packed.vring.num) { 1559 used_idx -= vq->packed.vring.num; 1560 wrap_counter ^= 1; 1561 } 1562 1563 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1564 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1565 1566 /* 1567 * We need to update event offset and event wrap 1568 * counter first before updating event flags. 1569 */ 1570 virtio_wmb(vq->weak_barriers); 1571 } 1572 1573 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1574 vq->packed.event_flags_shadow = vq->event ? 1575 VRING_PACKED_EVENT_FLAG_DESC : 1576 VRING_PACKED_EVENT_FLAG_ENABLE; 1577 vq->packed.vring.driver->flags = 1578 cpu_to_le16(vq->packed.event_flags_shadow); 1579 } 1580 1581 /* 1582 * We need to update event suppression structure first 1583 * before re-checking for more used buffers. 1584 */ 1585 virtio_mb(vq->weak_barriers); 1586 1587 if (is_used_desc_packed(vq, 1588 vq->last_used_idx, 1589 vq->packed.used_wrap_counter)) { 1590 END_USE(vq); 1591 return false; 1592 } 1593 1594 END_USE(vq); 1595 return true; 1596 } 1597 1598 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1599 { 1600 struct vring_virtqueue *vq = to_vvq(_vq); 1601 unsigned int i; 1602 void *buf; 1603 1604 START_USE(vq); 1605 1606 for (i = 0; i < vq->packed.vring.num; i++) { 1607 if (!vq->packed.desc_state[i].data) 1608 continue; 1609 /* detach_buf clears data, so grab it now. */ 1610 buf = vq->packed.desc_state[i].data; 1611 detach_buf_packed(vq, i, NULL); 1612 END_USE(vq); 1613 return buf; 1614 } 1615 /* That should have freed everything. */ 1616 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1617 1618 END_USE(vq); 1619 return NULL; 1620 } 1621 1622 static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, 1623 unsigned int num) 1624 { 1625 struct vring_desc_extra *desc_extra; 1626 unsigned int i; 1627 1628 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1629 GFP_KERNEL); 1630 if (!desc_extra) 1631 return NULL; 1632 1633 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1634 1635 for (i = 0; i < num - 1; i++) 1636 desc_extra[i].next = i + 1; 1637 1638 return desc_extra; 1639 } 1640 1641 static struct virtqueue *vring_create_virtqueue_packed( 1642 unsigned int index, 1643 unsigned int num, 1644 unsigned int vring_align, 1645 struct virtio_device *vdev, 1646 bool weak_barriers, 1647 bool may_reduce_num, 1648 bool context, 1649 bool (*notify)(struct virtqueue *), 1650 void (*callback)(struct virtqueue *), 1651 const char *name) 1652 { 1653 struct vring_virtqueue *vq; 1654 struct vring_packed_desc *ring; 1655 struct vring_packed_desc_event *driver, *device; 1656 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1657 size_t ring_size_in_bytes, event_size_in_bytes; 1658 1659 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1660 1661 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1662 &ring_dma_addr, 1663 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1664 if (!ring) 1665 goto err_ring; 1666 1667 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1668 1669 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1670 &driver_event_dma_addr, 1671 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1672 if (!driver) 1673 goto err_driver; 1674 1675 device = vring_alloc_queue(vdev, event_size_in_bytes, 1676 &device_event_dma_addr, 1677 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1678 if (!device) 1679 goto err_device; 1680 1681 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1682 if (!vq) 1683 goto err_vq; 1684 1685 vq->vq.callback = callback; 1686 vq->vq.vdev = vdev; 1687 vq->vq.name = name; 1688 vq->vq.num_free = num; 1689 vq->vq.index = index; 1690 vq->we_own_ring = true; 1691 vq->notify = notify; 1692 vq->weak_barriers = weak_barriers; 1693 vq->broken = false; 1694 vq->last_used_idx = 0; 1695 vq->event_triggered = false; 1696 vq->num_added = 0; 1697 vq->packed_ring = true; 1698 vq->use_dma_api = vring_use_dma_api(vdev); 1699 #ifdef DEBUG 1700 vq->in_use = false; 1701 vq->last_add_time_valid = false; 1702 #endif 1703 1704 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1705 !context; 1706 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1707 1708 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1709 vq->weak_barriers = false; 1710 1711 vq->packed.ring_dma_addr = ring_dma_addr; 1712 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1713 vq->packed.device_event_dma_addr = device_event_dma_addr; 1714 1715 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1716 vq->packed.event_size_in_bytes = event_size_in_bytes; 1717 1718 vq->packed.vring.num = num; 1719 vq->packed.vring.desc = ring; 1720 vq->packed.vring.driver = driver; 1721 vq->packed.vring.device = device; 1722 1723 vq->packed.next_avail_idx = 0; 1724 vq->packed.avail_wrap_counter = 1; 1725 vq->packed.used_wrap_counter = 1; 1726 vq->packed.event_flags_shadow = 0; 1727 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1728 1729 vq->packed.desc_state = kmalloc_array(num, 1730 sizeof(struct vring_desc_state_packed), 1731 GFP_KERNEL); 1732 if (!vq->packed.desc_state) 1733 goto err_desc_state; 1734 1735 memset(vq->packed.desc_state, 0, 1736 num * sizeof(struct vring_desc_state_packed)); 1737 1738 /* Put everything in free lists. */ 1739 vq->free_head = 0; 1740 1741 vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); 1742 if (!vq->packed.desc_extra) 1743 goto err_desc_extra; 1744 1745 /* No callback? Tell other side not to bother us. */ 1746 if (!callback) { 1747 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1748 vq->packed.vring.driver->flags = 1749 cpu_to_le16(vq->packed.event_flags_shadow); 1750 } 1751 1752 spin_lock(&vdev->vqs_list_lock); 1753 list_add_tail(&vq->vq.list, &vdev->vqs); 1754 spin_unlock(&vdev->vqs_list_lock); 1755 return &vq->vq; 1756 1757 err_desc_extra: 1758 kfree(vq->packed.desc_state); 1759 err_desc_state: 1760 kfree(vq); 1761 err_vq: 1762 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 1763 err_device: 1764 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 1765 err_driver: 1766 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1767 err_ring: 1768 return NULL; 1769 } 1770 1771 1772 /* 1773 * Generic functions and exported symbols. 1774 */ 1775 1776 static inline int virtqueue_add(struct virtqueue *_vq, 1777 struct scatterlist *sgs[], 1778 unsigned int total_sg, 1779 unsigned int out_sgs, 1780 unsigned int in_sgs, 1781 void *data, 1782 void *ctx, 1783 gfp_t gfp) 1784 { 1785 struct vring_virtqueue *vq = to_vvq(_vq); 1786 1787 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1788 out_sgs, in_sgs, data, ctx, gfp) : 1789 virtqueue_add_split(_vq, sgs, total_sg, 1790 out_sgs, in_sgs, data, ctx, gfp); 1791 } 1792 1793 /** 1794 * virtqueue_add_sgs - expose buffers to other end 1795 * @_vq: the struct virtqueue we're talking about. 1796 * @sgs: array of terminated scatterlists. 1797 * @out_sgs: the number of scatterlists readable by other side 1798 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1799 * @data: the token identifying the buffer. 1800 * @gfp: how to do memory allocations (if necessary). 1801 * 1802 * Caller must ensure we don't call this with other virtqueue operations 1803 * at the same time (except where noted). 1804 * 1805 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1806 */ 1807 int virtqueue_add_sgs(struct virtqueue *_vq, 1808 struct scatterlist *sgs[], 1809 unsigned int out_sgs, 1810 unsigned int in_sgs, 1811 void *data, 1812 gfp_t gfp) 1813 { 1814 unsigned int i, total_sg = 0; 1815 1816 /* Count them first. */ 1817 for (i = 0; i < out_sgs + in_sgs; i++) { 1818 struct scatterlist *sg; 1819 1820 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1821 total_sg++; 1822 } 1823 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1824 data, NULL, gfp); 1825 } 1826 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1827 1828 /** 1829 * virtqueue_add_outbuf - expose output buffers to other end 1830 * @vq: the struct virtqueue we're talking about. 1831 * @sg: scatterlist (must be well-formed and terminated!) 1832 * @num: the number of entries in @sg readable by other side 1833 * @data: the token identifying the buffer. 1834 * @gfp: how to do memory allocations (if necessary). 1835 * 1836 * Caller must ensure we don't call this with other virtqueue operations 1837 * at the same time (except where noted). 1838 * 1839 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1840 */ 1841 int virtqueue_add_outbuf(struct virtqueue *vq, 1842 struct scatterlist *sg, unsigned int num, 1843 void *data, 1844 gfp_t gfp) 1845 { 1846 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1847 } 1848 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1849 1850 /** 1851 * virtqueue_add_inbuf - expose input buffers to other end 1852 * @vq: the struct virtqueue we're talking about. 1853 * @sg: scatterlist (must be well-formed and terminated!) 1854 * @num: the number of entries in @sg writable by other side 1855 * @data: the token identifying the buffer. 1856 * @gfp: how to do memory allocations (if necessary). 1857 * 1858 * Caller must ensure we don't call this with other virtqueue operations 1859 * at the same time (except where noted). 1860 * 1861 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1862 */ 1863 int virtqueue_add_inbuf(struct virtqueue *vq, 1864 struct scatterlist *sg, unsigned int num, 1865 void *data, 1866 gfp_t gfp) 1867 { 1868 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1869 } 1870 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1871 1872 /** 1873 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1874 * @vq: the struct virtqueue we're talking about. 1875 * @sg: scatterlist (must be well-formed and terminated!) 1876 * @num: the number of entries in @sg writable by other side 1877 * @data: the token identifying the buffer. 1878 * @ctx: extra context for the token 1879 * @gfp: how to do memory allocations (if necessary). 1880 * 1881 * Caller must ensure we don't call this with other virtqueue operations 1882 * at the same time (except where noted). 1883 * 1884 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1885 */ 1886 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1887 struct scatterlist *sg, unsigned int num, 1888 void *data, 1889 void *ctx, 1890 gfp_t gfp) 1891 { 1892 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1893 } 1894 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1895 1896 /** 1897 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1898 * @_vq: the struct virtqueue 1899 * 1900 * Instead of virtqueue_kick(), you can do: 1901 * if (virtqueue_kick_prepare(vq)) 1902 * virtqueue_notify(vq); 1903 * 1904 * This is sometimes useful because the virtqueue_kick_prepare() needs 1905 * to be serialized, but the actual virtqueue_notify() call does not. 1906 */ 1907 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1908 { 1909 struct vring_virtqueue *vq = to_vvq(_vq); 1910 1911 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1912 virtqueue_kick_prepare_split(_vq); 1913 } 1914 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1915 1916 /** 1917 * virtqueue_notify - second half of split virtqueue_kick call. 1918 * @_vq: the struct virtqueue 1919 * 1920 * This does not need to be serialized. 1921 * 1922 * Returns false if host notify failed or queue is broken, otherwise true. 1923 */ 1924 bool virtqueue_notify(struct virtqueue *_vq) 1925 { 1926 struct vring_virtqueue *vq = to_vvq(_vq); 1927 1928 if (unlikely(vq->broken)) 1929 return false; 1930 1931 /* Prod other side to tell it about changes. */ 1932 if (!vq->notify(_vq)) { 1933 vq->broken = true; 1934 return false; 1935 } 1936 return true; 1937 } 1938 EXPORT_SYMBOL_GPL(virtqueue_notify); 1939 1940 /** 1941 * virtqueue_kick - update after add_buf 1942 * @vq: the struct virtqueue 1943 * 1944 * After one or more virtqueue_add_* calls, invoke this to kick 1945 * the other side. 1946 * 1947 * Caller must ensure we don't call this with other virtqueue 1948 * operations at the same time (except where noted). 1949 * 1950 * Returns false if kick failed, otherwise true. 1951 */ 1952 bool virtqueue_kick(struct virtqueue *vq) 1953 { 1954 if (virtqueue_kick_prepare(vq)) 1955 return virtqueue_notify(vq); 1956 return true; 1957 } 1958 EXPORT_SYMBOL_GPL(virtqueue_kick); 1959 1960 /** 1961 * virtqueue_get_buf_ctx - get the next used buffer 1962 * @_vq: the struct virtqueue we're talking about. 1963 * @len: the length written into the buffer 1964 * @ctx: extra context for the token 1965 * 1966 * If the device wrote data into the buffer, @len will be set to the 1967 * amount written. This means you don't need to clear the buffer 1968 * beforehand to ensure there's no data leakage in the case of short 1969 * writes. 1970 * 1971 * Caller must ensure we don't call this with other virtqueue 1972 * operations at the same time (except where noted). 1973 * 1974 * Returns NULL if there are no used buffers, or the "data" token 1975 * handed to virtqueue_add_*(). 1976 */ 1977 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1978 void **ctx) 1979 { 1980 struct vring_virtqueue *vq = to_vvq(_vq); 1981 1982 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1983 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1984 } 1985 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1986 1987 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1988 { 1989 return virtqueue_get_buf_ctx(_vq, len, NULL); 1990 } 1991 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1992 /** 1993 * virtqueue_disable_cb - disable callbacks 1994 * @_vq: the struct virtqueue we're talking about. 1995 * 1996 * Note that this is not necessarily synchronous, hence unreliable and only 1997 * useful as an optimization. 1998 * 1999 * Unlike other operations, this need not be serialized. 2000 */ 2001 void virtqueue_disable_cb(struct virtqueue *_vq) 2002 { 2003 struct vring_virtqueue *vq = to_vvq(_vq); 2004 2005 /* If device triggered an event already it won't trigger one again: 2006 * no need to disable. 2007 */ 2008 if (vq->event_triggered) 2009 return; 2010 2011 if (vq->packed_ring) 2012 virtqueue_disable_cb_packed(_vq); 2013 else 2014 virtqueue_disable_cb_split(_vq); 2015 } 2016 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2017 2018 /** 2019 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2020 * @_vq: the struct virtqueue we're talking about. 2021 * 2022 * This re-enables callbacks; it returns current queue state 2023 * in an opaque unsigned value. This value should be later tested by 2024 * virtqueue_poll, to detect a possible race between the driver checking for 2025 * more work, and enabling callbacks. 2026 * 2027 * Caller must ensure we don't call this with other virtqueue 2028 * operations at the same time (except where noted). 2029 */ 2030 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2031 { 2032 struct vring_virtqueue *vq = to_vvq(_vq); 2033 2034 if (vq->event_triggered) 2035 vq->event_triggered = false; 2036 2037 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2038 virtqueue_enable_cb_prepare_split(_vq); 2039 } 2040 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2041 2042 /** 2043 * virtqueue_poll - query pending used buffers 2044 * @_vq: the struct virtqueue we're talking about. 2045 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2046 * 2047 * Returns "true" if there are pending used buffers in the queue. 2048 * 2049 * This does not need to be serialized. 2050 */ 2051 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 2052 { 2053 struct vring_virtqueue *vq = to_vvq(_vq); 2054 2055 if (unlikely(vq->broken)) 2056 return false; 2057 2058 virtio_mb(vq->weak_barriers); 2059 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2060 virtqueue_poll_split(_vq, last_used_idx); 2061 } 2062 EXPORT_SYMBOL_GPL(virtqueue_poll); 2063 2064 /** 2065 * virtqueue_enable_cb - restart callbacks after disable_cb. 2066 * @_vq: the struct virtqueue we're talking about. 2067 * 2068 * This re-enables callbacks; it returns "false" if there are pending 2069 * buffers in the queue, to detect a possible race between the driver 2070 * checking for more work, and enabling callbacks. 2071 * 2072 * Caller must ensure we don't call this with other virtqueue 2073 * operations at the same time (except where noted). 2074 */ 2075 bool virtqueue_enable_cb(struct virtqueue *_vq) 2076 { 2077 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 2078 2079 return !virtqueue_poll(_vq, last_used_idx); 2080 } 2081 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2082 2083 /** 2084 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2085 * @_vq: the struct virtqueue we're talking about. 2086 * 2087 * This re-enables callbacks but hints to the other side to delay 2088 * interrupts until most of the available buffers have been processed; 2089 * it returns "false" if there are many pending buffers in the queue, 2090 * to detect a possible race between the driver checking for more work, 2091 * and enabling callbacks. 2092 * 2093 * Caller must ensure we don't call this with other virtqueue 2094 * operations at the same time (except where noted). 2095 */ 2096 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2097 { 2098 struct vring_virtqueue *vq = to_vvq(_vq); 2099 2100 if (vq->event_triggered) 2101 vq->event_triggered = false; 2102 2103 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2104 virtqueue_enable_cb_delayed_split(_vq); 2105 } 2106 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2107 2108 /** 2109 * virtqueue_detach_unused_buf - detach first unused buffer 2110 * @_vq: the struct virtqueue we're talking about. 2111 * 2112 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2113 * This is not valid on an active queue; it is useful only for device 2114 * shutdown. 2115 */ 2116 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2117 { 2118 struct vring_virtqueue *vq = to_vvq(_vq); 2119 2120 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2121 virtqueue_detach_unused_buf_split(_vq); 2122 } 2123 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2124 2125 static inline bool more_used(const struct vring_virtqueue *vq) 2126 { 2127 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2128 } 2129 2130 irqreturn_t vring_interrupt(int irq, void *_vq) 2131 { 2132 struct vring_virtqueue *vq = to_vvq(_vq); 2133 2134 if (!more_used(vq)) { 2135 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2136 return IRQ_NONE; 2137 } 2138 2139 if (unlikely(vq->broken)) 2140 return IRQ_HANDLED; 2141 2142 /* Just a hint for performance: so it's ok that this can be racy! */ 2143 if (vq->event) 2144 vq->event_triggered = true; 2145 2146 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2147 if (vq->vq.callback) 2148 vq->vq.callback(&vq->vq); 2149 2150 return IRQ_HANDLED; 2151 } 2152 EXPORT_SYMBOL_GPL(vring_interrupt); 2153 2154 /* Only available for split ring */ 2155 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2156 struct vring vring, 2157 struct virtio_device *vdev, 2158 bool weak_barriers, 2159 bool context, 2160 bool (*notify)(struct virtqueue *), 2161 void (*callback)(struct virtqueue *), 2162 const char *name) 2163 { 2164 struct vring_virtqueue *vq; 2165 2166 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2167 return NULL; 2168 2169 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2170 if (!vq) 2171 return NULL; 2172 2173 vq->packed_ring = false; 2174 vq->vq.callback = callback; 2175 vq->vq.vdev = vdev; 2176 vq->vq.name = name; 2177 vq->vq.num_free = vring.num; 2178 vq->vq.index = index; 2179 vq->we_own_ring = false; 2180 vq->notify = notify; 2181 vq->weak_barriers = weak_barriers; 2182 vq->broken = false; 2183 vq->last_used_idx = 0; 2184 vq->event_triggered = false; 2185 vq->num_added = 0; 2186 vq->use_dma_api = vring_use_dma_api(vdev); 2187 #ifdef DEBUG 2188 vq->in_use = false; 2189 vq->last_add_time_valid = false; 2190 #endif 2191 2192 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2193 !context; 2194 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2195 2196 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2197 vq->weak_barriers = false; 2198 2199 vq->split.queue_dma_addr = 0; 2200 vq->split.queue_size_in_bytes = 0; 2201 2202 vq->split.vring = vring; 2203 vq->split.avail_flags_shadow = 0; 2204 vq->split.avail_idx_shadow = 0; 2205 2206 /* No callback? Tell other side not to bother us. */ 2207 if (!callback) { 2208 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2209 if (!vq->event) 2210 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2211 vq->split.avail_flags_shadow); 2212 } 2213 2214 vq->split.desc_state = kmalloc_array(vring.num, 2215 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2216 if (!vq->split.desc_state) 2217 goto err_state; 2218 2219 vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); 2220 if (!vq->split.desc_extra) 2221 goto err_extra; 2222 2223 /* Put everything in free lists. */ 2224 vq->free_head = 0; 2225 memset(vq->split.desc_state, 0, vring.num * 2226 sizeof(struct vring_desc_state_split)); 2227 2228 spin_lock(&vdev->vqs_list_lock); 2229 list_add_tail(&vq->vq.list, &vdev->vqs); 2230 spin_unlock(&vdev->vqs_list_lock); 2231 return &vq->vq; 2232 2233 err_extra: 2234 kfree(vq->split.desc_state); 2235 err_state: 2236 kfree(vq); 2237 return NULL; 2238 } 2239 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2240 2241 struct virtqueue *vring_create_virtqueue( 2242 unsigned int index, 2243 unsigned int num, 2244 unsigned int vring_align, 2245 struct virtio_device *vdev, 2246 bool weak_barriers, 2247 bool may_reduce_num, 2248 bool context, 2249 bool (*notify)(struct virtqueue *), 2250 void (*callback)(struct virtqueue *), 2251 const char *name) 2252 { 2253 2254 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2255 return vring_create_virtqueue_packed(index, num, vring_align, 2256 vdev, weak_barriers, may_reduce_num, 2257 context, notify, callback, name); 2258 2259 return vring_create_virtqueue_split(index, num, vring_align, 2260 vdev, weak_barriers, may_reduce_num, 2261 context, notify, callback, name); 2262 } 2263 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2264 2265 /* Only available for split ring */ 2266 struct virtqueue *vring_new_virtqueue(unsigned int index, 2267 unsigned int num, 2268 unsigned int vring_align, 2269 struct virtio_device *vdev, 2270 bool weak_barriers, 2271 bool context, 2272 void *pages, 2273 bool (*notify)(struct virtqueue *vq), 2274 void (*callback)(struct virtqueue *vq), 2275 const char *name) 2276 { 2277 struct vring vring; 2278 2279 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2280 return NULL; 2281 2282 vring_init(&vring, num, pages, vring_align); 2283 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2284 notify, callback, name); 2285 } 2286 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2287 2288 void vring_del_virtqueue(struct virtqueue *_vq) 2289 { 2290 struct vring_virtqueue *vq = to_vvq(_vq); 2291 2292 spin_lock(&vq->vq.vdev->vqs_list_lock); 2293 list_del(&_vq->list); 2294 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2295 2296 if (vq->we_own_ring) { 2297 if (vq->packed_ring) { 2298 vring_free_queue(vq->vq.vdev, 2299 vq->packed.ring_size_in_bytes, 2300 vq->packed.vring.desc, 2301 vq->packed.ring_dma_addr); 2302 2303 vring_free_queue(vq->vq.vdev, 2304 vq->packed.event_size_in_bytes, 2305 vq->packed.vring.driver, 2306 vq->packed.driver_event_dma_addr); 2307 2308 vring_free_queue(vq->vq.vdev, 2309 vq->packed.event_size_in_bytes, 2310 vq->packed.vring.device, 2311 vq->packed.device_event_dma_addr); 2312 2313 kfree(vq->packed.desc_state); 2314 kfree(vq->packed.desc_extra); 2315 } else { 2316 vring_free_queue(vq->vq.vdev, 2317 vq->split.queue_size_in_bytes, 2318 vq->split.vring.desc, 2319 vq->split.queue_dma_addr); 2320 } 2321 } 2322 if (!vq->packed_ring) { 2323 kfree(vq->split.desc_state); 2324 kfree(vq->split.desc_extra); 2325 } 2326 kfree(vq); 2327 } 2328 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2329 2330 /* Manipulates transport-specific feature bits. */ 2331 void vring_transport_features(struct virtio_device *vdev) 2332 { 2333 unsigned int i; 2334 2335 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2336 switch (i) { 2337 case VIRTIO_RING_F_INDIRECT_DESC: 2338 break; 2339 case VIRTIO_RING_F_EVENT_IDX: 2340 break; 2341 case VIRTIO_F_VERSION_1: 2342 break; 2343 case VIRTIO_F_ACCESS_PLATFORM: 2344 break; 2345 case VIRTIO_F_RING_PACKED: 2346 break; 2347 case VIRTIO_F_ORDER_PLATFORM: 2348 break; 2349 default: 2350 /* We don't understand this bit. */ 2351 __virtio_clear_bit(vdev, i); 2352 } 2353 } 2354 } 2355 EXPORT_SYMBOL_GPL(vring_transport_features); 2356 2357 /** 2358 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2359 * @_vq: the struct virtqueue containing the vring of interest. 2360 * 2361 * Returns the size of the vring. This is mainly used for boasting to 2362 * userspace. Unlike other operations, this need not be serialized. 2363 */ 2364 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2365 { 2366 2367 struct vring_virtqueue *vq = to_vvq(_vq); 2368 2369 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2370 } 2371 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2372 2373 bool virtqueue_is_broken(struct virtqueue *_vq) 2374 { 2375 struct vring_virtqueue *vq = to_vvq(_vq); 2376 2377 return READ_ONCE(vq->broken); 2378 } 2379 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2380 2381 /* 2382 * This should prevent the device from being used, allowing drivers to 2383 * recover. You may need to grab appropriate locks to flush. 2384 */ 2385 void virtio_break_device(struct virtio_device *dev) 2386 { 2387 struct virtqueue *_vq; 2388 2389 spin_lock(&dev->vqs_list_lock); 2390 list_for_each_entry(_vq, &dev->vqs, list) { 2391 struct vring_virtqueue *vq = to_vvq(_vq); 2392 2393 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2394 WRITE_ONCE(vq->broken, true); 2395 } 2396 spin_unlock(&dev->vqs_list_lock); 2397 } 2398 EXPORT_SYMBOL_GPL(virtio_break_device); 2399 2400 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2401 { 2402 struct vring_virtqueue *vq = to_vvq(_vq); 2403 2404 BUG_ON(!vq->we_own_ring); 2405 2406 if (vq->packed_ring) 2407 return vq->packed.ring_dma_addr; 2408 2409 return vq->split.queue_dma_addr; 2410 } 2411 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2412 2413 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2414 { 2415 struct vring_virtqueue *vq = to_vvq(_vq); 2416 2417 BUG_ON(!vq->we_own_ring); 2418 2419 if (vq->packed_ring) 2420 return vq->packed.driver_event_dma_addr; 2421 2422 return vq->split.queue_dma_addr + 2423 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2424 } 2425 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2426 2427 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2428 { 2429 struct vring_virtqueue *vq = to_vvq(_vq); 2430 2431 BUG_ON(!vq->we_own_ring); 2432 2433 if (vq->packed_ring) 2434 return vq->packed.device_event_dma_addr; 2435 2436 return vq->split.queue_dma_addr + 2437 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2438 } 2439 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2440 2441 /* Only available for split ring */ 2442 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2443 { 2444 return &to_vvq(vq)->split.vring; 2445 } 2446 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2447 2448 MODULE_LICENSE("GPL"); 2449