1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/spinlock.h> 15 #include <xen/xen.h> 16 17 #ifdef DEBUG 18 /* For development, we want to crash whenever the ring is screwed. */ 19 #define BAD_RING(_vq, fmt, args...) \ 20 do { \ 21 dev_err(&(_vq)->vq.vdev->dev, \ 22 "%s:"fmt, (_vq)->vq.name, ##args); \ 23 BUG(); \ 24 } while (0) 25 /* Caller is supposed to guarantee no reentry. */ 26 #define START_USE(_vq) \ 27 do { \ 28 if ((_vq)->in_use) \ 29 panic("%s:in_use = %i\n", \ 30 (_vq)->vq.name, (_vq)->in_use); \ 31 (_vq)->in_use = __LINE__; \ 32 } while (0) 33 #define END_USE(_vq) \ 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 35 #define LAST_ADD_TIME_UPDATE(_vq) \ 36 do { \ 37 ktime_t now = ktime_get(); \ 38 \ 39 /* No kick or get, with .1 second between? Warn. */ \ 40 if ((_vq)->last_add_time_valid) \ 41 WARN_ON(ktime_to_ms(ktime_sub(now, \ 42 (_vq)->last_add_time)) > 100); \ 43 (_vq)->last_add_time = now; \ 44 (_vq)->last_add_time_valid = true; \ 45 } while (0) 46 #define LAST_ADD_TIME_CHECK(_vq) \ 47 do { \ 48 if ((_vq)->last_add_time_valid) { \ 49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 50 (_vq)->last_add_time)) > 100); \ 51 } \ 52 } while (0) 53 #define LAST_ADD_TIME_INVALID(_vq) \ 54 ((_vq)->last_add_time_valid = false) 55 #else 56 #define BAD_RING(_vq, fmt, args...) \ 57 do { \ 58 dev_err(&_vq->vq.vdev->dev, \ 59 "%s:"fmt, (_vq)->vq.name, ##args); \ 60 (_vq)->broken = true; \ 61 } while (0) 62 #define START_USE(vq) 63 #define END_USE(vq) 64 #define LAST_ADD_TIME_UPDATE(vq) 65 #define LAST_ADD_TIME_CHECK(vq) 66 #define LAST_ADD_TIME_INVALID(vq) 67 #endif 68 69 struct vring_desc_state_split { 70 void *data; /* Data for callback. */ 71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 72 }; 73 74 struct vring_desc_state_packed { 75 void *data; /* Data for callback. */ 76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 77 u16 num; /* Descriptor list length. */ 78 u16 last; /* The last desc state in a list. */ 79 }; 80 81 struct vring_desc_extra { 82 dma_addr_t addr; /* Descriptor DMA addr. */ 83 u32 len; /* Descriptor length. */ 84 u16 flags; /* Descriptor flags. */ 85 u16 next; /* The next desc state in a list. */ 86 }; 87 88 struct vring_virtqueue { 89 struct virtqueue vq; 90 91 /* Is this a packed ring? */ 92 bool packed_ring; 93 94 /* Is DMA API used? */ 95 bool use_dma_api; 96 97 /* Can we use weak barriers? */ 98 bool weak_barriers; 99 100 /* Other side has made a mess, don't try any more. */ 101 bool broken; 102 103 /* Host supports indirect buffers */ 104 bool indirect; 105 106 /* Host publishes avail event idx */ 107 bool event; 108 109 /* Head of free buffer list. */ 110 unsigned int free_head; 111 /* Number we've added since last sync. */ 112 unsigned int num_added; 113 114 /* Last used index we've seen. */ 115 u16 last_used_idx; 116 117 /* Hint for event idx: already triggered no need to disable. */ 118 bool event_triggered; 119 120 union { 121 /* Available for split ring */ 122 struct { 123 /* Actual memory layout for this queue. */ 124 struct vring vring; 125 126 /* Last written value to avail->flags */ 127 u16 avail_flags_shadow; 128 129 /* 130 * Last written value to avail->idx in 131 * guest byte order. 132 */ 133 u16 avail_idx_shadow; 134 135 /* Per-descriptor state. */ 136 struct vring_desc_state_split *desc_state; 137 struct vring_desc_extra *desc_extra; 138 139 /* DMA address and size information */ 140 dma_addr_t queue_dma_addr; 141 size_t queue_size_in_bytes; 142 } split; 143 144 /* Available for packed ring */ 145 struct { 146 /* Actual memory layout for this queue. */ 147 struct { 148 unsigned int num; 149 struct vring_packed_desc *desc; 150 struct vring_packed_desc_event *driver; 151 struct vring_packed_desc_event *device; 152 } vring; 153 154 /* Driver ring wrap counter. */ 155 bool avail_wrap_counter; 156 157 /* Device ring wrap counter. */ 158 bool used_wrap_counter; 159 160 /* Avail used flags. */ 161 u16 avail_used_flags; 162 163 /* Index of the next avail descriptor. */ 164 u16 next_avail_idx; 165 166 /* 167 * Last written value to driver->flags in 168 * guest byte order. 169 */ 170 u16 event_flags_shadow; 171 172 /* Per-descriptor state. */ 173 struct vring_desc_state_packed *desc_state; 174 struct vring_desc_extra *desc_extra; 175 176 /* DMA address and size information */ 177 dma_addr_t ring_dma_addr; 178 dma_addr_t driver_event_dma_addr; 179 dma_addr_t device_event_dma_addr; 180 size_t ring_size_in_bytes; 181 size_t event_size_in_bytes; 182 } packed; 183 }; 184 185 /* How to notify other side. FIXME: commonalize hcalls! */ 186 bool (*notify)(struct virtqueue *vq); 187 188 /* DMA, allocation, and size information */ 189 bool we_own_ring; 190 191 #ifdef DEBUG 192 /* They're supposed to lock for us. */ 193 unsigned int in_use; 194 195 /* Figure out if their kicks are too delayed. */ 196 bool last_add_time_valid; 197 ktime_t last_add_time; 198 #endif 199 }; 200 201 202 /* 203 * Helpers. 204 */ 205 206 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 207 208 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 209 unsigned int total_sg) 210 { 211 struct vring_virtqueue *vq = to_vvq(_vq); 212 213 /* 214 * If the host supports indirect descriptor tables, and we have multiple 215 * buffers, then go indirect. FIXME: tune this threshold 216 */ 217 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 218 } 219 220 /* 221 * Modern virtio devices have feature bits to specify whether they need a 222 * quirk and bypass the IOMMU. If not there, just use the DMA API. 223 * 224 * If there, the interaction between virtio and DMA API is messy. 225 * 226 * On most systems with virtio, physical addresses match bus addresses, 227 * and it doesn't particularly matter whether we use the DMA API. 228 * 229 * On some systems, including Xen and any system with a physical device 230 * that speaks virtio behind a physical IOMMU, we must use the DMA API 231 * for virtio DMA to work at all. 232 * 233 * On other systems, including SPARC and PPC64, virtio-pci devices are 234 * enumerated as though they are behind an IOMMU, but the virtio host 235 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 236 * there or somehow map everything as the identity. 237 * 238 * For the time being, we preserve historic behavior and bypass the DMA 239 * API. 240 * 241 * TODO: install a per-device DMA ops structure that does the right thing 242 * taking into account all the above quirks, and use the DMA API 243 * unconditionally on data path. 244 */ 245 246 static bool vring_use_dma_api(struct virtio_device *vdev) 247 { 248 if (!virtio_has_dma_quirk(vdev)) 249 return true; 250 251 /* Otherwise, we are left to guess. */ 252 /* 253 * In theory, it's possible to have a buggy QEMU-supposed 254 * emulated Q35 IOMMU and Xen enabled at the same time. On 255 * such a configuration, virtio has never worked and will 256 * not work without an even larger kludge. Instead, enable 257 * the DMA API if we're a Xen guest, which at least allows 258 * all of the sensible Xen configurations to work correctly. 259 */ 260 if (xen_domain()) 261 return true; 262 263 return false; 264 } 265 266 size_t virtio_max_dma_size(struct virtio_device *vdev) 267 { 268 size_t max_segment_size = SIZE_MAX; 269 270 if (vring_use_dma_api(vdev)) 271 max_segment_size = dma_max_mapping_size(vdev->dev.parent); 272 273 return max_segment_size; 274 } 275 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 276 277 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 278 dma_addr_t *dma_handle, gfp_t flag) 279 { 280 if (vring_use_dma_api(vdev)) { 281 return dma_alloc_coherent(vdev->dev.parent, size, 282 dma_handle, flag); 283 } else { 284 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 285 286 if (queue) { 287 phys_addr_t phys_addr = virt_to_phys(queue); 288 *dma_handle = (dma_addr_t)phys_addr; 289 290 /* 291 * Sanity check: make sure we dind't truncate 292 * the address. The only arches I can find that 293 * have 64-bit phys_addr_t but 32-bit dma_addr_t 294 * are certain non-highmem MIPS and x86 295 * configurations, but these configurations 296 * should never allocate physical pages above 32 297 * bits, so this is fine. Just in case, throw a 298 * warning and abort if we end up with an 299 * unrepresentable address. 300 */ 301 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 302 free_pages_exact(queue, PAGE_ALIGN(size)); 303 return NULL; 304 } 305 } 306 return queue; 307 } 308 } 309 310 static void vring_free_queue(struct virtio_device *vdev, size_t size, 311 void *queue, dma_addr_t dma_handle) 312 { 313 if (vring_use_dma_api(vdev)) 314 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 315 else 316 free_pages_exact(queue, PAGE_ALIGN(size)); 317 } 318 319 /* 320 * The DMA ops on various arches are rather gnarly right now, and 321 * making all of the arch DMA ops work on the vring device itself 322 * is a mess. For now, we use the parent device for DMA ops. 323 */ 324 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 325 { 326 return vq->vq.vdev->dev.parent; 327 } 328 329 /* Map one sg entry. */ 330 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 331 struct scatterlist *sg, 332 enum dma_data_direction direction) 333 { 334 if (!vq->use_dma_api) 335 return (dma_addr_t)sg_phys(sg); 336 337 /* 338 * We can't use dma_map_sg, because we don't use scatterlists in 339 * the way it expects (we don't guarantee that the scatterlist 340 * will exist for the lifetime of the mapping). 341 */ 342 return dma_map_page(vring_dma_dev(vq), 343 sg_page(sg), sg->offset, sg->length, 344 direction); 345 } 346 347 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 348 void *cpu_addr, size_t size, 349 enum dma_data_direction direction) 350 { 351 if (!vq->use_dma_api) 352 return (dma_addr_t)virt_to_phys(cpu_addr); 353 354 return dma_map_single(vring_dma_dev(vq), 355 cpu_addr, size, direction); 356 } 357 358 static int vring_mapping_error(const struct vring_virtqueue *vq, 359 dma_addr_t addr) 360 { 361 if (!vq->use_dma_api) 362 return 0; 363 364 return dma_mapping_error(vring_dma_dev(vq), addr); 365 } 366 367 368 /* 369 * Split ring specific functions - *_split(). 370 */ 371 372 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 373 struct vring_desc *desc) 374 { 375 u16 flags; 376 377 if (!vq->use_dma_api) 378 return; 379 380 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 381 382 if (flags & VRING_DESC_F_INDIRECT) { 383 dma_unmap_single(vring_dma_dev(vq), 384 virtio64_to_cpu(vq->vq.vdev, desc->addr), 385 virtio32_to_cpu(vq->vq.vdev, desc->len), 386 (flags & VRING_DESC_F_WRITE) ? 387 DMA_FROM_DEVICE : DMA_TO_DEVICE); 388 } else { 389 dma_unmap_page(vring_dma_dev(vq), 390 virtio64_to_cpu(vq->vq.vdev, desc->addr), 391 virtio32_to_cpu(vq->vq.vdev, desc->len), 392 (flags & VRING_DESC_F_WRITE) ? 393 DMA_FROM_DEVICE : DMA_TO_DEVICE); 394 } 395 } 396 397 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 398 unsigned int i) 399 { 400 struct vring_desc_extra *extra = vq->split.desc_extra; 401 u16 flags; 402 403 if (!vq->use_dma_api) 404 goto out; 405 406 flags = extra[i].flags; 407 408 if (flags & VRING_DESC_F_INDIRECT) { 409 dma_unmap_single(vring_dma_dev(vq), 410 extra[i].addr, 411 extra[i].len, 412 (flags & VRING_DESC_F_WRITE) ? 413 DMA_FROM_DEVICE : DMA_TO_DEVICE); 414 } else { 415 dma_unmap_page(vring_dma_dev(vq), 416 extra[i].addr, 417 extra[i].len, 418 (flags & VRING_DESC_F_WRITE) ? 419 DMA_FROM_DEVICE : DMA_TO_DEVICE); 420 } 421 422 out: 423 return extra[i].next; 424 } 425 426 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 427 unsigned int total_sg, 428 gfp_t gfp) 429 { 430 struct vring_desc *desc; 431 unsigned int i; 432 433 /* 434 * We require lowmem mappings for the descriptors because 435 * otherwise virt_to_phys will give us bogus addresses in the 436 * virtqueue. 437 */ 438 gfp &= ~__GFP_HIGHMEM; 439 440 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 441 if (!desc) 442 return NULL; 443 444 for (i = 0; i < total_sg; i++) 445 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 446 return desc; 447 } 448 449 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 450 struct vring_desc *desc, 451 unsigned int i, 452 dma_addr_t addr, 453 unsigned int len, 454 u16 flags, 455 bool indirect) 456 { 457 struct vring_virtqueue *vring = to_vvq(vq); 458 struct vring_desc_extra *extra = vring->split.desc_extra; 459 u16 next; 460 461 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 462 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 463 desc[i].len = cpu_to_virtio32(vq->vdev, len); 464 465 if (!indirect) { 466 next = extra[i].next; 467 desc[i].next = cpu_to_virtio16(vq->vdev, next); 468 469 extra[i].addr = addr; 470 extra[i].len = len; 471 extra[i].flags = flags; 472 } else 473 next = virtio16_to_cpu(vq->vdev, desc[i].next); 474 475 return next; 476 } 477 478 static inline int virtqueue_add_split(struct virtqueue *_vq, 479 struct scatterlist *sgs[], 480 unsigned int total_sg, 481 unsigned int out_sgs, 482 unsigned int in_sgs, 483 void *data, 484 void *ctx, 485 gfp_t gfp) 486 { 487 struct vring_virtqueue *vq = to_vvq(_vq); 488 struct scatterlist *sg; 489 struct vring_desc *desc; 490 unsigned int i, n, avail, descs_used, prev, err_idx; 491 int head; 492 bool indirect; 493 494 START_USE(vq); 495 496 BUG_ON(data == NULL); 497 BUG_ON(ctx && vq->indirect); 498 499 if (unlikely(vq->broken)) { 500 END_USE(vq); 501 return -EIO; 502 } 503 504 LAST_ADD_TIME_UPDATE(vq); 505 506 BUG_ON(total_sg == 0); 507 508 head = vq->free_head; 509 510 if (virtqueue_use_indirect(_vq, total_sg)) 511 desc = alloc_indirect_split(_vq, total_sg, gfp); 512 else { 513 desc = NULL; 514 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 515 } 516 517 if (desc) { 518 /* Use a single buffer which doesn't continue */ 519 indirect = true; 520 /* Set up rest to use this indirect table. */ 521 i = 0; 522 descs_used = 1; 523 } else { 524 indirect = false; 525 desc = vq->split.vring.desc; 526 i = head; 527 descs_used = total_sg; 528 } 529 530 if (vq->vq.num_free < descs_used) { 531 pr_debug("Can't add buf len %i - avail = %i\n", 532 descs_used, vq->vq.num_free); 533 /* FIXME: for historical reasons, we force a notify here if 534 * there are outgoing parts to the buffer. Presumably the 535 * host should service the ring ASAP. */ 536 if (out_sgs) 537 vq->notify(&vq->vq); 538 if (indirect) 539 kfree(desc); 540 END_USE(vq); 541 return -ENOSPC; 542 } 543 544 for (n = 0; n < out_sgs; n++) { 545 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 546 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 547 if (vring_mapping_error(vq, addr)) 548 goto unmap_release; 549 550 prev = i; 551 /* Note that we trust indirect descriptor 552 * table since it use stream DMA mapping. 553 */ 554 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 555 VRING_DESC_F_NEXT, 556 indirect); 557 } 558 } 559 for (; n < (out_sgs + in_sgs); n++) { 560 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 561 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 562 if (vring_mapping_error(vq, addr)) 563 goto unmap_release; 564 565 prev = i; 566 /* Note that we trust indirect descriptor 567 * table since it use stream DMA mapping. 568 */ 569 i = virtqueue_add_desc_split(_vq, desc, i, addr, 570 sg->length, 571 VRING_DESC_F_NEXT | 572 VRING_DESC_F_WRITE, 573 indirect); 574 } 575 } 576 /* Last one doesn't continue. */ 577 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 578 if (!indirect && vq->use_dma_api) 579 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 580 ~VRING_DESC_F_NEXT; 581 582 if (indirect) { 583 /* Now that the indirect table is filled in, map it. */ 584 dma_addr_t addr = vring_map_single( 585 vq, desc, total_sg * sizeof(struct vring_desc), 586 DMA_TO_DEVICE); 587 if (vring_mapping_error(vq, addr)) 588 goto unmap_release; 589 590 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 591 head, addr, 592 total_sg * sizeof(struct vring_desc), 593 VRING_DESC_F_INDIRECT, 594 false); 595 } 596 597 /* We're using some buffers from the free list. */ 598 vq->vq.num_free -= descs_used; 599 600 /* Update free pointer */ 601 if (indirect) 602 vq->free_head = vq->split.desc_extra[head].next; 603 else 604 vq->free_head = i; 605 606 /* Store token and indirect buffer state. */ 607 vq->split.desc_state[head].data = data; 608 if (indirect) 609 vq->split.desc_state[head].indir_desc = desc; 610 else 611 vq->split.desc_state[head].indir_desc = ctx; 612 613 /* Put entry in available array (but don't update avail->idx until they 614 * do sync). */ 615 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 616 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 617 618 /* Descriptors and available array need to be set before we expose the 619 * new available array entries. */ 620 virtio_wmb(vq->weak_barriers); 621 vq->split.avail_idx_shadow++; 622 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 623 vq->split.avail_idx_shadow); 624 vq->num_added++; 625 626 pr_debug("Added buffer head %i to %p\n", head, vq); 627 END_USE(vq); 628 629 /* This is very unlikely, but theoretically possible. Kick 630 * just in case. */ 631 if (unlikely(vq->num_added == (1 << 16) - 1)) 632 virtqueue_kick(_vq); 633 634 return 0; 635 636 unmap_release: 637 err_idx = i; 638 639 if (indirect) 640 i = 0; 641 else 642 i = head; 643 644 for (n = 0; n < total_sg; n++) { 645 if (i == err_idx) 646 break; 647 if (indirect) { 648 vring_unmap_one_split_indirect(vq, &desc[i]); 649 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 650 } else 651 i = vring_unmap_one_split(vq, i); 652 } 653 654 if (indirect) 655 kfree(desc); 656 657 END_USE(vq); 658 return -ENOMEM; 659 } 660 661 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 662 { 663 struct vring_virtqueue *vq = to_vvq(_vq); 664 u16 new, old; 665 bool needs_kick; 666 667 START_USE(vq); 668 /* We need to expose available array entries before checking avail 669 * event. */ 670 virtio_mb(vq->weak_barriers); 671 672 old = vq->split.avail_idx_shadow - vq->num_added; 673 new = vq->split.avail_idx_shadow; 674 vq->num_added = 0; 675 676 LAST_ADD_TIME_CHECK(vq); 677 LAST_ADD_TIME_INVALID(vq); 678 679 if (vq->event) { 680 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 681 vring_avail_event(&vq->split.vring)), 682 new, old); 683 } else { 684 needs_kick = !(vq->split.vring.used->flags & 685 cpu_to_virtio16(_vq->vdev, 686 VRING_USED_F_NO_NOTIFY)); 687 } 688 END_USE(vq); 689 return needs_kick; 690 } 691 692 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 693 void **ctx) 694 { 695 unsigned int i, j; 696 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 697 698 /* Clear data ptr. */ 699 vq->split.desc_state[head].data = NULL; 700 701 /* Put back on free list: unmap first-level descriptors and find end */ 702 i = head; 703 704 while (vq->split.vring.desc[i].flags & nextflag) { 705 vring_unmap_one_split(vq, i); 706 i = vq->split.desc_extra[i].next; 707 vq->vq.num_free++; 708 } 709 710 vring_unmap_one_split(vq, i); 711 vq->split.desc_extra[i].next = vq->free_head; 712 vq->free_head = head; 713 714 /* Plus final descriptor */ 715 vq->vq.num_free++; 716 717 if (vq->indirect) { 718 struct vring_desc *indir_desc = 719 vq->split.desc_state[head].indir_desc; 720 u32 len; 721 722 /* Free the indirect table, if any, now that it's unmapped. */ 723 if (!indir_desc) 724 return; 725 726 len = vq->split.desc_extra[head].len; 727 728 BUG_ON(!(vq->split.desc_extra[head].flags & 729 VRING_DESC_F_INDIRECT)); 730 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 731 732 for (j = 0; j < len / sizeof(struct vring_desc); j++) 733 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 734 735 kfree(indir_desc); 736 vq->split.desc_state[head].indir_desc = NULL; 737 } else if (ctx) { 738 *ctx = vq->split.desc_state[head].indir_desc; 739 } 740 } 741 742 static inline bool more_used_split(const struct vring_virtqueue *vq) 743 { 744 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 745 vq->split.vring.used->idx); 746 } 747 748 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 749 unsigned int *len, 750 void **ctx) 751 { 752 struct vring_virtqueue *vq = to_vvq(_vq); 753 void *ret; 754 unsigned int i; 755 u16 last_used; 756 757 START_USE(vq); 758 759 if (unlikely(vq->broken)) { 760 END_USE(vq); 761 return NULL; 762 } 763 764 if (!more_used_split(vq)) { 765 pr_debug("No more buffers in queue\n"); 766 END_USE(vq); 767 return NULL; 768 } 769 770 /* Only get used array entries after they have been exposed by host. */ 771 virtio_rmb(vq->weak_barriers); 772 773 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 774 i = virtio32_to_cpu(_vq->vdev, 775 vq->split.vring.used->ring[last_used].id); 776 *len = virtio32_to_cpu(_vq->vdev, 777 vq->split.vring.used->ring[last_used].len); 778 779 if (unlikely(i >= vq->split.vring.num)) { 780 BAD_RING(vq, "id %u out of range\n", i); 781 return NULL; 782 } 783 if (unlikely(!vq->split.desc_state[i].data)) { 784 BAD_RING(vq, "id %u is not a head!\n", i); 785 return NULL; 786 } 787 788 /* detach_buf_split clears data, so grab it now. */ 789 ret = vq->split.desc_state[i].data; 790 detach_buf_split(vq, i, ctx); 791 vq->last_used_idx++; 792 /* If we expect an interrupt for the next entry, tell host 793 * by writing event index and flush out the write before 794 * the read in the next get_buf call. */ 795 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 796 virtio_store_mb(vq->weak_barriers, 797 &vring_used_event(&vq->split.vring), 798 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 799 800 LAST_ADD_TIME_INVALID(vq); 801 802 END_USE(vq); 803 return ret; 804 } 805 806 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 807 { 808 struct vring_virtqueue *vq = to_vvq(_vq); 809 810 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 811 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 812 if (vq->event) 813 /* TODO: this is a hack. Figure out a cleaner value to write. */ 814 vring_used_event(&vq->split.vring) = 0x0; 815 else 816 vq->split.vring.avail->flags = 817 cpu_to_virtio16(_vq->vdev, 818 vq->split.avail_flags_shadow); 819 } 820 } 821 822 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 823 { 824 struct vring_virtqueue *vq = to_vvq(_vq); 825 u16 last_used_idx; 826 827 START_USE(vq); 828 829 /* We optimistically turn back on interrupts, then check if there was 830 * more to do. */ 831 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 832 * either clear the flags bit or point the event index at the next 833 * entry. Always do both to keep code simple. */ 834 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 835 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 836 if (!vq->event) 837 vq->split.vring.avail->flags = 838 cpu_to_virtio16(_vq->vdev, 839 vq->split.avail_flags_shadow); 840 } 841 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 842 last_used_idx = vq->last_used_idx); 843 END_USE(vq); 844 return last_used_idx; 845 } 846 847 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 848 { 849 struct vring_virtqueue *vq = to_vvq(_vq); 850 851 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 852 vq->split.vring.used->idx); 853 } 854 855 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 856 { 857 struct vring_virtqueue *vq = to_vvq(_vq); 858 u16 bufs; 859 860 START_USE(vq); 861 862 /* We optimistically turn back on interrupts, then check if there was 863 * more to do. */ 864 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 865 * either clear the flags bit or point the event index at the next 866 * entry. Always update the event index to keep code simple. */ 867 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 868 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 869 if (!vq->event) 870 vq->split.vring.avail->flags = 871 cpu_to_virtio16(_vq->vdev, 872 vq->split.avail_flags_shadow); 873 } 874 /* TODO: tune this threshold */ 875 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 876 877 virtio_store_mb(vq->weak_barriers, 878 &vring_used_event(&vq->split.vring), 879 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 880 881 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 882 - vq->last_used_idx) > bufs)) { 883 END_USE(vq); 884 return false; 885 } 886 887 END_USE(vq); 888 return true; 889 } 890 891 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 892 { 893 struct vring_virtqueue *vq = to_vvq(_vq); 894 unsigned int i; 895 void *buf; 896 897 START_USE(vq); 898 899 for (i = 0; i < vq->split.vring.num; i++) { 900 if (!vq->split.desc_state[i].data) 901 continue; 902 /* detach_buf_split clears data, so grab it now. */ 903 buf = vq->split.desc_state[i].data; 904 detach_buf_split(vq, i, NULL); 905 vq->split.avail_idx_shadow--; 906 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 907 vq->split.avail_idx_shadow); 908 END_USE(vq); 909 return buf; 910 } 911 /* That should have freed everything. */ 912 BUG_ON(vq->vq.num_free != vq->split.vring.num); 913 914 END_USE(vq); 915 return NULL; 916 } 917 918 static struct virtqueue *vring_create_virtqueue_split( 919 unsigned int index, 920 unsigned int num, 921 unsigned int vring_align, 922 struct virtio_device *vdev, 923 bool weak_barriers, 924 bool may_reduce_num, 925 bool context, 926 bool (*notify)(struct virtqueue *), 927 void (*callback)(struct virtqueue *), 928 const char *name) 929 { 930 struct virtqueue *vq; 931 void *queue = NULL; 932 dma_addr_t dma_addr; 933 size_t queue_size_in_bytes; 934 struct vring vring; 935 936 /* We assume num is a power of 2. */ 937 if (num & (num - 1)) { 938 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 939 return NULL; 940 } 941 942 /* TODO: allocate each queue chunk individually */ 943 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 944 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 945 &dma_addr, 946 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 947 if (queue) 948 break; 949 if (!may_reduce_num) 950 return NULL; 951 } 952 953 if (!num) 954 return NULL; 955 956 if (!queue) { 957 /* Try to get a single page. You are my only hope! */ 958 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 959 &dma_addr, GFP_KERNEL|__GFP_ZERO); 960 } 961 if (!queue) 962 return NULL; 963 964 queue_size_in_bytes = vring_size(num, vring_align); 965 vring_init(&vring, num, queue, vring_align); 966 967 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 968 notify, callback, name); 969 if (!vq) { 970 vring_free_queue(vdev, queue_size_in_bytes, queue, 971 dma_addr); 972 return NULL; 973 } 974 975 to_vvq(vq)->split.queue_dma_addr = dma_addr; 976 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 977 to_vvq(vq)->we_own_ring = true; 978 979 return vq; 980 } 981 982 983 /* 984 * Packed ring specific functions - *_packed(). 985 */ 986 987 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 988 struct vring_desc_extra *state) 989 { 990 u16 flags; 991 992 if (!vq->use_dma_api) 993 return; 994 995 flags = state->flags; 996 997 if (flags & VRING_DESC_F_INDIRECT) { 998 dma_unmap_single(vring_dma_dev(vq), 999 state->addr, state->len, 1000 (flags & VRING_DESC_F_WRITE) ? 1001 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1002 } else { 1003 dma_unmap_page(vring_dma_dev(vq), 1004 state->addr, state->len, 1005 (flags & VRING_DESC_F_WRITE) ? 1006 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1007 } 1008 } 1009 1010 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1011 struct vring_packed_desc *desc) 1012 { 1013 u16 flags; 1014 1015 if (!vq->use_dma_api) 1016 return; 1017 1018 flags = le16_to_cpu(desc->flags); 1019 1020 if (flags & VRING_DESC_F_INDIRECT) { 1021 dma_unmap_single(vring_dma_dev(vq), 1022 le64_to_cpu(desc->addr), 1023 le32_to_cpu(desc->len), 1024 (flags & VRING_DESC_F_WRITE) ? 1025 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1026 } else { 1027 dma_unmap_page(vring_dma_dev(vq), 1028 le64_to_cpu(desc->addr), 1029 le32_to_cpu(desc->len), 1030 (flags & VRING_DESC_F_WRITE) ? 1031 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1032 } 1033 } 1034 1035 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1036 gfp_t gfp) 1037 { 1038 struct vring_packed_desc *desc; 1039 1040 /* 1041 * We require lowmem mappings for the descriptors because 1042 * otherwise virt_to_phys will give us bogus addresses in the 1043 * virtqueue. 1044 */ 1045 gfp &= ~__GFP_HIGHMEM; 1046 1047 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1048 1049 return desc; 1050 } 1051 1052 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1053 struct scatterlist *sgs[], 1054 unsigned int total_sg, 1055 unsigned int out_sgs, 1056 unsigned int in_sgs, 1057 void *data, 1058 gfp_t gfp) 1059 { 1060 struct vring_packed_desc *desc; 1061 struct scatterlist *sg; 1062 unsigned int i, n, err_idx; 1063 u16 head, id; 1064 dma_addr_t addr; 1065 1066 head = vq->packed.next_avail_idx; 1067 desc = alloc_indirect_packed(total_sg, gfp); 1068 if (!desc) 1069 return -ENOMEM; 1070 1071 if (unlikely(vq->vq.num_free < 1)) { 1072 pr_debug("Can't add buf len 1 - avail = 0\n"); 1073 kfree(desc); 1074 END_USE(vq); 1075 return -ENOSPC; 1076 } 1077 1078 i = 0; 1079 id = vq->free_head; 1080 BUG_ON(id == vq->packed.vring.num); 1081 1082 for (n = 0; n < out_sgs + in_sgs; n++) { 1083 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1084 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1085 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1086 if (vring_mapping_error(vq, addr)) 1087 goto unmap_release; 1088 1089 desc[i].flags = cpu_to_le16(n < out_sgs ? 1090 0 : VRING_DESC_F_WRITE); 1091 desc[i].addr = cpu_to_le64(addr); 1092 desc[i].len = cpu_to_le32(sg->length); 1093 i++; 1094 } 1095 } 1096 1097 /* Now that the indirect table is filled in, map it. */ 1098 addr = vring_map_single(vq, desc, 1099 total_sg * sizeof(struct vring_packed_desc), 1100 DMA_TO_DEVICE); 1101 if (vring_mapping_error(vq, addr)) 1102 goto unmap_release; 1103 1104 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1105 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1106 sizeof(struct vring_packed_desc)); 1107 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1108 1109 if (vq->use_dma_api) { 1110 vq->packed.desc_extra[id].addr = addr; 1111 vq->packed.desc_extra[id].len = total_sg * 1112 sizeof(struct vring_packed_desc); 1113 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1114 vq->packed.avail_used_flags; 1115 } 1116 1117 /* 1118 * A driver MUST NOT make the first descriptor in the list 1119 * available before all subsequent descriptors comprising 1120 * the list are made available. 1121 */ 1122 virtio_wmb(vq->weak_barriers); 1123 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1124 vq->packed.avail_used_flags); 1125 1126 /* We're using some buffers from the free list. */ 1127 vq->vq.num_free -= 1; 1128 1129 /* Update free pointer */ 1130 n = head + 1; 1131 if (n >= vq->packed.vring.num) { 1132 n = 0; 1133 vq->packed.avail_wrap_counter ^= 1; 1134 vq->packed.avail_used_flags ^= 1135 1 << VRING_PACKED_DESC_F_AVAIL | 1136 1 << VRING_PACKED_DESC_F_USED; 1137 } 1138 vq->packed.next_avail_idx = n; 1139 vq->free_head = vq->packed.desc_extra[id].next; 1140 1141 /* Store token and indirect buffer state. */ 1142 vq->packed.desc_state[id].num = 1; 1143 vq->packed.desc_state[id].data = data; 1144 vq->packed.desc_state[id].indir_desc = desc; 1145 vq->packed.desc_state[id].last = id; 1146 1147 vq->num_added += 1; 1148 1149 pr_debug("Added buffer head %i to %p\n", head, vq); 1150 END_USE(vq); 1151 1152 return 0; 1153 1154 unmap_release: 1155 err_idx = i; 1156 1157 for (i = 0; i < err_idx; i++) 1158 vring_unmap_desc_packed(vq, &desc[i]); 1159 1160 kfree(desc); 1161 1162 END_USE(vq); 1163 return -ENOMEM; 1164 } 1165 1166 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1167 struct scatterlist *sgs[], 1168 unsigned int total_sg, 1169 unsigned int out_sgs, 1170 unsigned int in_sgs, 1171 void *data, 1172 void *ctx, 1173 gfp_t gfp) 1174 { 1175 struct vring_virtqueue *vq = to_vvq(_vq); 1176 struct vring_packed_desc *desc; 1177 struct scatterlist *sg; 1178 unsigned int i, n, c, descs_used, err_idx; 1179 __le16 head_flags, flags; 1180 u16 head, id, prev, curr, avail_used_flags; 1181 int err; 1182 1183 START_USE(vq); 1184 1185 BUG_ON(data == NULL); 1186 BUG_ON(ctx && vq->indirect); 1187 1188 if (unlikely(vq->broken)) { 1189 END_USE(vq); 1190 return -EIO; 1191 } 1192 1193 LAST_ADD_TIME_UPDATE(vq); 1194 1195 BUG_ON(total_sg == 0); 1196 1197 if (virtqueue_use_indirect(_vq, total_sg)) { 1198 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1199 in_sgs, data, gfp); 1200 if (err != -ENOMEM) { 1201 END_USE(vq); 1202 return err; 1203 } 1204 1205 /* fall back on direct */ 1206 } 1207 1208 head = vq->packed.next_avail_idx; 1209 avail_used_flags = vq->packed.avail_used_flags; 1210 1211 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1212 1213 desc = vq->packed.vring.desc; 1214 i = head; 1215 descs_used = total_sg; 1216 1217 if (unlikely(vq->vq.num_free < descs_used)) { 1218 pr_debug("Can't add buf len %i - avail = %i\n", 1219 descs_used, vq->vq.num_free); 1220 END_USE(vq); 1221 return -ENOSPC; 1222 } 1223 1224 id = vq->free_head; 1225 BUG_ON(id == vq->packed.vring.num); 1226 1227 curr = id; 1228 c = 0; 1229 for (n = 0; n < out_sgs + in_sgs; n++) { 1230 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1231 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1232 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1233 if (vring_mapping_error(vq, addr)) 1234 goto unmap_release; 1235 1236 flags = cpu_to_le16(vq->packed.avail_used_flags | 1237 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1238 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1239 if (i == head) 1240 head_flags = flags; 1241 else 1242 desc[i].flags = flags; 1243 1244 desc[i].addr = cpu_to_le64(addr); 1245 desc[i].len = cpu_to_le32(sg->length); 1246 desc[i].id = cpu_to_le16(id); 1247 1248 if (unlikely(vq->use_dma_api)) { 1249 vq->packed.desc_extra[curr].addr = addr; 1250 vq->packed.desc_extra[curr].len = sg->length; 1251 vq->packed.desc_extra[curr].flags = 1252 le16_to_cpu(flags); 1253 } 1254 prev = curr; 1255 curr = vq->packed.desc_extra[curr].next; 1256 1257 if ((unlikely(++i >= vq->packed.vring.num))) { 1258 i = 0; 1259 vq->packed.avail_used_flags ^= 1260 1 << VRING_PACKED_DESC_F_AVAIL | 1261 1 << VRING_PACKED_DESC_F_USED; 1262 } 1263 } 1264 } 1265 1266 if (i < head) 1267 vq->packed.avail_wrap_counter ^= 1; 1268 1269 /* We're using some buffers from the free list. */ 1270 vq->vq.num_free -= descs_used; 1271 1272 /* Update free pointer */ 1273 vq->packed.next_avail_idx = i; 1274 vq->free_head = curr; 1275 1276 /* Store token. */ 1277 vq->packed.desc_state[id].num = descs_used; 1278 vq->packed.desc_state[id].data = data; 1279 vq->packed.desc_state[id].indir_desc = ctx; 1280 vq->packed.desc_state[id].last = prev; 1281 1282 /* 1283 * A driver MUST NOT make the first descriptor in the list 1284 * available before all subsequent descriptors comprising 1285 * the list are made available. 1286 */ 1287 virtio_wmb(vq->weak_barriers); 1288 vq->packed.vring.desc[head].flags = head_flags; 1289 vq->num_added += descs_used; 1290 1291 pr_debug("Added buffer head %i to %p\n", head, vq); 1292 END_USE(vq); 1293 1294 return 0; 1295 1296 unmap_release: 1297 err_idx = i; 1298 i = head; 1299 curr = vq->free_head; 1300 1301 vq->packed.avail_used_flags = avail_used_flags; 1302 1303 for (n = 0; n < total_sg; n++) { 1304 if (i == err_idx) 1305 break; 1306 vring_unmap_state_packed(vq, 1307 &vq->packed.desc_extra[curr]); 1308 curr = vq->packed.desc_extra[curr].next; 1309 i++; 1310 if (i >= vq->packed.vring.num) 1311 i = 0; 1312 } 1313 1314 END_USE(vq); 1315 return -EIO; 1316 } 1317 1318 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1319 { 1320 struct vring_virtqueue *vq = to_vvq(_vq); 1321 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1322 bool needs_kick; 1323 union { 1324 struct { 1325 __le16 off_wrap; 1326 __le16 flags; 1327 }; 1328 u32 u32; 1329 } snapshot; 1330 1331 START_USE(vq); 1332 1333 /* 1334 * We need to expose the new flags value before checking notification 1335 * suppressions. 1336 */ 1337 virtio_mb(vq->weak_barriers); 1338 1339 old = vq->packed.next_avail_idx - vq->num_added; 1340 new = vq->packed.next_avail_idx; 1341 vq->num_added = 0; 1342 1343 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1344 flags = le16_to_cpu(snapshot.flags); 1345 1346 LAST_ADD_TIME_CHECK(vq); 1347 LAST_ADD_TIME_INVALID(vq); 1348 1349 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1350 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1351 goto out; 1352 } 1353 1354 off_wrap = le16_to_cpu(snapshot.off_wrap); 1355 1356 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1357 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1358 if (wrap_counter != vq->packed.avail_wrap_counter) 1359 event_idx -= vq->packed.vring.num; 1360 1361 needs_kick = vring_need_event(event_idx, new, old); 1362 out: 1363 END_USE(vq); 1364 return needs_kick; 1365 } 1366 1367 static void detach_buf_packed(struct vring_virtqueue *vq, 1368 unsigned int id, void **ctx) 1369 { 1370 struct vring_desc_state_packed *state = NULL; 1371 struct vring_packed_desc *desc; 1372 unsigned int i, curr; 1373 1374 state = &vq->packed.desc_state[id]; 1375 1376 /* Clear data ptr. */ 1377 state->data = NULL; 1378 1379 vq->packed.desc_extra[state->last].next = vq->free_head; 1380 vq->free_head = id; 1381 vq->vq.num_free += state->num; 1382 1383 if (unlikely(vq->use_dma_api)) { 1384 curr = id; 1385 for (i = 0; i < state->num; i++) { 1386 vring_unmap_state_packed(vq, 1387 &vq->packed.desc_extra[curr]); 1388 curr = vq->packed.desc_extra[curr].next; 1389 } 1390 } 1391 1392 if (vq->indirect) { 1393 u32 len; 1394 1395 /* Free the indirect table, if any, now that it's unmapped. */ 1396 desc = state->indir_desc; 1397 if (!desc) 1398 return; 1399 1400 if (vq->use_dma_api) { 1401 len = vq->packed.desc_extra[id].len; 1402 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1403 i++) 1404 vring_unmap_desc_packed(vq, &desc[i]); 1405 } 1406 kfree(desc); 1407 state->indir_desc = NULL; 1408 } else if (ctx) { 1409 *ctx = state->indir_desc; 1410 } 1411 } 1412 1413 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1414 u16 idx, bool used_wrap_counter) 1415 { 1416 bool avail, used; 1417 u16 flags; 1418 1419 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1420 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1421 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1422 1423 return avail == used && used == used_wrap_counter; 1424 } 1425 1426 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1427 { 1428 return is_used_desc_packed(vq, vq->last_used_idx, 1429 vq->packed.used_wrap_counter); 1430 } 1431 1432 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1433 unsigned int *len, 1434 void **ctx) 1435 { 1436 struct vring_virtqueue *vq = to_vvq(_vq); 1437 u16 last_used, id; 1438 void *ret; 1439 1440 START_USE(vq); 1441 1442 if (unlikely(vq->broken)) { 1443 END_USE(vq); 1444 return NULL; 1445 } 1446 1447 if (!more_used_packed(vq)) { 1448 pr_debug("No more buffers in queue\n"); 1449 END_USE(vq); 1450 return NULL; 1451 } 1452 1453 /* Only get used elements after they have been exposed by host. */ 1454 virtio_rmb(vq->weak_barriers); 1455 1456 last_used = vq->last_used_idx; 1457 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1458 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1459 1460 if (unlikely(id >= vq->packed.vring.num)) { 1461 BAD_RING(vq, "id %u out of range\n", id); 1462 return NULL; 1463 } 1464 if (unlikely(!vq->packed.desc_state[id].data)) { 1465 BAD_RING(vq, "id %u is not a head!\n", id); 1466 return NULL; 1467 } 1468 1469 /* detach_buf_packed clears data, so grab it now. */ 1470 ret = vq->packed.desc_state[id].data; 1471 detach_buf_packed(vq, id, ctx); 1472 1473 vq->last_used_idx += vq->packed.desc_state[id].num; 1474 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1475 vq->last_used_idx -= vq->packed.vring.num; 1476 vq->packed.used_wrap_counter ^= 1; 1477 } 1478 1479 /* 1480 * If we expect an interrupt for the next entry, tell host 1481 * by writing event index and flush out the write before 1482 * the read in the next get_buf call. 1483 */ 1484 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1485 virtio_store_mb(vq->weak_barriers, 1486 &vq->packed.vring.driver->off_wrap, 1487 cpu_to_le16(vq->last_used_idx | 1488 (vq->packed.used_wrap_counter << 1489 VRING_PACKED_EVENT_F_WRAP_CTR))); 1490 1491 LAST_ADD_TIME_INVALID(vq); 1492 1493 END_USE(vq); 1494 return ret; 1495 } 1496 1497 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1498 { 1499 struct vring_virtqueue *vq = to_vvq(_vq); 1500 1501 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1502 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1503 vq->packed.vring.driver->flags = 1504 cpu_to_le16(vq->packed.event_flags_shadow); 1505 } 1506 } 1507 1508 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1509 { 1510 struct vring_virtqueue *vq = to_vvq(_vq); 1511 1512 START_USE(vq); 1513 1514 /* 1515 * We optimistically turn back on interrupts, then check if there was 1516 * more to do. 1517 */ 1518 1519 if (vq->event) { 1520 vq->packed.vring.driver->off_wrap = 1521 cpu_to_le16(vq->last_used_idx | 1522 (vq->packed.used_wrap_counter << 1523 VRING_PACKED_EVENT_F_WRAP_CTR)); 1524 /* 1525 * We need to update event offset and event wrap 1526 * counter first before updating event flags. 1527 */ 1528 virtio_wmb(vq->weak_barriers); 1529 } 1530 1531 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1532 vq->packed.event_flags_shadow = vq->event ? 1533 VRING_PACKED_EVENT_FLAG_DESC : 1534 VRING_PACKED_EVENT_FLAG_ENABLE; 1535 vq->packed.vring.driver->flags = 1536 cpu_to_le16(vq->packed.event_flags_shadow); 1537 } 1538 1539 END_USE(vq); 1540 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1541 VRING_PACKED_EVENT_F_WRAP_CTR); 1542 } 1543 1544 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1545 { 1546 struct vring_virtqueue *vq = to_vvq(_vq); 1547 bool wrap_counter; 1548 u16 used_idx; 1549 1550 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1551 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1552 1553 return is_used_desc_packed(vq, used_idx, wrap_counter); 1554 } 1555 1556 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1557 { 1558 struct vring_virtqueue *vq = to_vvq(_vq); 1559 u16 used_idx, wrap_counter; 1560 u16 bufs; 1561 1562 START_USE(vq); 1563 1564 /* 1565 * We optimistically turn back on interrupts, then check if there was 1566 * more to do. 1567 */ 1568 1569 if (vq->event) { 1570 /* TODO: tune this threshold */ 1571 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1572 wrap_counter = vq->packed.used_wrap_counter; 1573 1574 used_idx = vq->last_used_idx + bufs; 1575 if (used_idx >= vq->packed.vring.num) { 1576 used_idx -= vq->packed.vring.num; 1577 wrap_counter ^= 1; 1578 } 1579 1580 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1581 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1582 1583 /* 1584 * We need to update event offset and event wrap 1585 * counter first before updating event flags. 1586 */ 1587 virtio_wmb(vq->weak_barriers); 1588 } 1589 1590 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1591 vq->packed.event_flags_shadow = vq->event ? 1592 VRING_PACKED_EVENT_FLAG_DESC : 1593 VRING_PACKED_EVENT_FLAG_ENABLE; 1594 vq->packed.vring.driver->flags = 1595 cpu_to_le16(vq->packed.event_flags_shadow); 1596 } 1597 1598 /* 1599 * We need to update event suppression structure first 1600 * before re-checking for more used buffers. 1601 */ 1602 virtio_mb(vq->weak_barriers); 1603 1604 if (is_used_desc_packed(vq, 1605 vq->last_used_idx, 1606 vq->packed.used_wrap_counter)) { 1607 END_USE(vq); 1608 return false; 1609 } 1610 1611 END_USE(vq); 1612 return true; 1613 } 1614 1615 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1616 { 1617 struct vring_virtqueue *vq = to_vvq(_vq); 1618 unsigned int i; 1619 void *buf; 1620 1621 START_USE(vq); 1622 1623 for (i = 0; i < vq->packed.vring.num; i++) { 1624 if (!vq->packed.desc_state[i].data) 1625 continue; 1626 /* detach_buf clears data, so grab it now. */ 1627 buf = vq->packed.desc_state[i].data; 1628 detach_buf_packed(vq, i, NULL); 1629 END_USE(vq); 1630 return buf; 1631 } 1632 /* That should have freed everything. */ 1633 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1634 1635 END_USE(vq); 1636 return NULL; 1637 } 1638 1639 static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, 1640 unsigned int num) 1641 { 1642 struct vring_desc_extra *desc_extra; 1643 unsigned int i; 1644 1645 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1646 GFP_KERNEL); 1647 if (!desc_extra) 1648 return NULL; 1649 1650 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1651 1652 for (i = 0; i < num - 1; i++) 1653 desc_extra[i].next = i + 1; 1654 1655 return desc_extra; 1656 } 1657 1658 static struct virtqueue *vring_create_virtqueue_packed( 1659 unsigned int index, 1660 unsigned int num, 1661 unsigned int vring_align, 1662 struct virtio_device *vdev, 1663 bool weak_barriers, 1664 bool may_reduce_num, 1665 bool context, 1666 bool (*notify)(struct virtqueue *), 1667 void (*callback)(struct virtqueue *), 1668 const char *name) 1669 { 1670 struct vring_virtqueue *vq; 1671 struct vring_packed_desc *ring; 1672 struct vring_packed_desc_event *driver, *device; 1673 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1674 size_t ring_size_in_bytes, event_size_in_bytes; 1675 1676 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1677 1678 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1679 &ring_dma_addr, 1680 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1681 if (!ring) 1682 goto err_ring; 1683 1684 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1685 1686 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1687 &driver_event_dma_addr, 1688 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1689 if (!driver) 1690 goto err_driver; 1691 1692 device = vring_alloc_queue(vdev, event_size_in_bytes, 1693 &device_event_dma_addr, 1694 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1695 if (!device) 1696 goto err_device; 1697 1698 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1699 if (!vq) 1700 goto err_vq; 1701 1702 vq->vq.callback = callback; 1703 vq->vq.vdev = vdev; 1704 vq->vq.name = name; 1705 vq->vq.num_free = num; 1706 vq->vq.index = index; 1707 vq->we_own_ring = true; 1708 vq->notify = notify; 1709 vq->weak_barriers = weak_barriers; 1710 vq->broken = false; 1711 vq->last_used_idx = 0; 1712 vq->event_triggered = false; 1713 vq->num_added = 0; 1714 vq->packed_ring = true; 1715 vq->use_dma_api = vring_use_dma_api(vdev); 1716 #ifdef DEBUG 1717 vq->in_use = false; 1718 vq->last_add_time_valid = false; 1719 #endif 1720 1721 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1722 !context; 1723 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1724 1725 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1726 vq->weak_barriers = false; 1727 1728 vq->packed.ring_dma_addr = ring_dma_addr; 1729 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1730 vq->packed.device_event_dma_addr = device_event_dma_addr; 1731 1732 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1733 vq->packed.event_size_in_bytes = event_size_in_bytes; 1734 1735 vq->packed.vring.num = num; 1736 vq->packed.vring.desc = ring; 1737 vq->packed.vring.driver = driver; 1738 vq->packed.vring.device = device; 1739 1740 vq->packed.next_avail_idx = 0; 1741 vq->packed.avail_wrap_counter = 1; 1742 vq->packed.used_wrap_counter = 1; 1743 vq->packed.event_flags_shadow = 0; 1744 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1745 1746 vq->packed.desc_state = kmalloc_array(num, 1747 sizeof(struct vring_desc_state_packed), 1748 GFP_KERNEL); 1749 if (!vq->packed.desc_state) 1750 goto err_desc_state; 1751 1752 memset(vq->packed.desc_state, 0, 1753 num * sizeof(struct vring_desc_state_packed)); 1754 1755 /* Put everything in free lists. */ 1756 vq->free_head = 0; 1757 1758 vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); 1759 if (!vq->packed.desc_extra) 1760 goto err_desc_extra; 1761 1762 /* No callback? Tell other side not to bother us. */ 1763 if (!callback) { 1764 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1765 vq->packed.vring.driver->flags = 1766 cpu_to_le16(vq->packed.event_flags_shadow); 1767 } 1768 1769 spin_lock(&vdev->vqs_list_lock); 1770 list_add_tail(&vq->vq.list, &vdev->vqs); 1771 spin_unlock(&vdev->vqs_list_lock); 1772 return &vq->vq; 1773 1774 err_desc_extra: 1775 kfree(vq->packed.desc_state); 1776 err_desc_state: 1777 kfree(vq); 1778 err_vq: 1779 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 1780 err_device: 1781 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 1782 err_driver: 1783 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1784 err_ring: 1785 return NULL; 1786 } 1787 1788 1789 /* 1790 * Generic functions and exported symbols. 1791 */ 1792 1793 static inline int virtqueue_add(struct virtqueue *_vq, 1794 struct scatterlist *sgs[], 1795 unsigned int total_sg, 1796 unsigned int out_sgs, 1797 unsigned int in_sgs, 1798 void *data, 1799 void *ctx, 1800 gfp_t gfp) 1801 { 1802 struct vring_virtqueue *vq = to_vvq(_vq); 1803 1804 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1805 out_sgs, in_sgs, data, ctx, gfp) : 1806 virtqueue_add_split(_vq, sgs, total_sg, 1807 out_sgs, in_sgs, data, ctx, gfp); 1808 } 1809 1810 /** 1811 * virtqueue_add_sgs - expose buffers to other end 1812 * @_vq: the struct virtqueue we're talking about. 1813 * @sgs: array of terminated scatterlists. 1814 * @out_sgs: the number of scatterlists readable by other side 1815 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1816 * @data: the token identifying the buffer. 1817 * @gfp: how to do memory allocations (if necessary). 1818 * 1819 * Caller must ensure we don't call this with other virtqueue operations 1820 * at the same time (except where noted). 1821 * 1822 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1823 */ 1824 int virtqueue_add_sgs(struct virtqueue *_vq, 1825 struct scatterlist *sgs[], 1826 unsigned int out_sgs, 1827 unsigned int in_sgs, 1828 void *data, 1829 gfp_t gfp) 1830 { 1831 unsigned int i, total_sg = 0; 1832 1833 /* Count them first. */ 1834 for (i = 0; i < out_sgs + in_sgs; i++) { 1835 struct scatterlist *sg; 1836 1837 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1838 total_sg++; 1839 } 1840 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1841 data, NULL, gfp); 1842 } 1843 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1844 1845 /** 1846 * virtqueue_add_outbuf - expose output buffers to other end 1847 * @vq: the struct virtqueue we're talking about. 1848 * @sg: scatterlist (must be well-formed and terminated!) 1849 * @num: the number of entries in @sg readable by other side 1850 * @data: the token identifying the buffer. 1851 * @gfp: how to do memory allocations (if necessary). 1852 * 1853 * Caller must ensure we don't call this with other virtqueue operations 1854 * at the same time (except where noted). 1855 * 1856 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1857 */ 1858 int virtqueue_add_outbuf(struct virtqueue *vq, 1859 struct scatterlist *sg, unsigned int num, 1860 void *data, 1861 gfp_t gfp) 1862 { 1863 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1864 } 1865 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1866 1867 /** 1868 * virtqueue_add_inbuf - expose input buffers to other end 1869 * @vq: the struct virtqueue we're talking about. 1870 * @sg: scatterlist (must be well-formed and terminated!) 1871 * @num: the number of entries in @sg writable by other side 1872 * @data: the token identifying the buffer. 1873 * @gfp: how to do memory allocations (if necessary). 1874 * 1875 * Caller must ensure we don't call this with other virtqueue operations 1876 * at the same time (except where noted). 1877 * 1878 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1879 */ 1880 int virtqueue_add_inbuf(struct virtqueue *vq, 1881 struct scatterlist *sg, unsigned int num, 1882 void *data, 1883 gfp_t gfp) 1884 { 1885 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1886 } 1887 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1888 1889 /** 1890 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1891 * @vq: the struct virtqueue we're talking about. 1892 * @sg: scatterlist (must be well-formed and terminated!) 1893 * @num: the number of entries in @sg writable by other side 1894 * @data: the token identifying the buffer. 1895 * @ctx: extra context for the token 1896 * @gfp: how to do memory allocations (if necessary). 1897 * 1898 * Caller must ensure we don't call this with other virtqueue operations 1899 * at the same time (except where noted). 1900 * 1901 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1902 */ 1903 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1904 struct scatterlist *sg, unsigned int num, 1905 void *data, 1906 void *ctx, 1907 gfp_t gfp) 1908 { 1909 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1910 } 1911 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1912 1913 /** 1914 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1915 * @_vq: the struct virtqueue 1916 * 1917 * Instead of virtqueue_kick(), you can do: 1918 * if (virtqueue_kick_prepare(vq)) 1919 * virtqueue_notify(vq); 1920 * 1921 * This is sometimes useful because the virtqueue_kick_prepare() needs 1922 * to be serialized, but the actual virtqueue_notify() call does not. 1923 */ 1924 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1925 { 1926 struct vring_virtqueue *vq = to_vvq(_vq); 1927 1928 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1929 virtqueue_kick_prepare_split(_vq); 1930 } 1931 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1932 1933 /** 1934 * virtqueue_notify - second half of split virtqueue_kick call. 1935 * @_vq: the struct virtqueue 1936 * 1937 * This does not need to be serialized. 1938 * 1939 * Returns false if host notify failed or queue is broken, otherwise true. 1940 */ 1941 bool virtqueue_notify(struct virtqueue *_vq) 1942 { 1943 struct vring_virtqueue *vq = to_vvq(_vq); 1944 1945 if (unlikely(vq->broken)) 1946 return false; 1947 1948 /* Prod other side to tell it about changes. */ 1949 if (!vq->notify(_vq)) { 1950 vq->broken = true; 1951 return false; 1952 } 1953 return true; 1954 } 1955 EXPORT_SYMBOL_GPL(virtqueue_notify); 1956 1957 /** 1958 * virtqueue_kick - update after add_buf 1959 * @vq: the struct virtqueue 1960 * 1961 * After one or more virtqueue_add_* calls, invoke this to kick 1962 * the other side. 1963 * 1964 * Caller must ensure we don't call this with other virtqueue 1965 * operations at the same time (except where noted). 1966 * 1967 * Returns false if kick failed, otherwise true. 1968 */ 1969 bool virtqueue_kick(struct virtqueue *vq) 1970 { 1971 if (virtqueue_kick_prepare(vq)) 1972 return virtqueue_notify(vq); 1973 return true; 1974 } 1975 EXPORT_SYMBOL_GPL(virtqueue_kick); 1976 1977 /** 1978 * virtqueue_get_buf_ctx - get the next used buffer 1979 * @_vq: the struct virtqueue we're talking about. 1980 * @len: the length written into the buffer 1981 * @ctx: extra context for the token 1982 * 1983 * If the device wrote data into the buffer, @len will be set to the 1984 * amount written. This means you don't need to clear the buffer 1985 * beforehand to ensure there's no data leakage in the case of short 1986 * writes. 1987 * 1988 * Caller must ensure we don't call this with other virtqueue 1989 * operations at the same time (except where noted). 1990 * 1991 * Returns NULL if there are no used buffers, or the "data" token 1992 * handed to virtqueue_add_*(). 1993 */ 1994 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1995 void **ctx) 1996 { 1997 struct vring_virtqueue *vq = to_vvq(_vq); 1998 1999 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 2000 virtqueue_get_buf_ctx_split(_vq, len, ctx); 2001 } 2002 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2003 2004 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2005 { 2006 return virtqueue_get_buf_ctx(_vq, len, NULL); 2007 } 2008 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2009 /** 2010 * virtqueue_disable_cb - disable callbacks 2011 * @_vq: the struct virtqueue we're talking about. 2012 * 2013 * Note that this is not necessarily synchronous, hence unreliable and only 2014 * useful as an optimization. 2015 * 2016 * Unlike other operations, this need not be serialized. 2017 */ 2018 void virtqueue_disable_cb(struct virtqueue *_vq) 2019 { 2020 struct vring_virtqueue *vq = to_vvq(_vq); 2021 2022 /* If device triggered an event already it won't trigger one again: 2023 * no need to disable. 2024 */ 2025 if (vq->event_triggered) 2026 return; 2027 2028 if (vq->packed_ring) 2029 virtqueue_disable_cb_packed(_vq); 2030 else 2031 virtqueue_disable_cb_split(_vq); 2032 } 2033 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2034 2035 /** 2036 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2037 * @_vq: the struct virtqueue we're talking about. 2038 * 2039 * This re-enables callbacks; it returns current queue state 2040 * in an opaque unsigned value. This value should be later tested by 2041 * virtqueue_poll, to detect a possible race between the driver checking for 2042 * more work, and enabling callbacks. 2043 * 2044 * Caller must ensure we don't call this with other virtqueue 2045 * operations at the same time (except where noted). 2046 */ 2047 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2048 { 2049 struct vring_virtqueue *vq = to_vvq(_vq); 2050 2051 if (vq->event_triggered) 2052 vq->event_triggered = false; 2053 2054 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2055 virtqueue_enable_cb_prepare_split(_vq); 2056 } 2057 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2058 2059 /** 2060 * virtqueue_poll - query pending used buffers 2061 * @_vq: the struct virtqueue we're talking about. 2062 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2063 * 2064 * Returns "true" if there are pending used buffers in the queue. 2065 * 2066 * This does not need to be serialized. 2067 */ 2068 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 2069 { 2070 struct vring_virtqueue *vq = to_vvq(_vq); 2071 2072 if (unlikely(vq->broken)) 2073 return false; 2074 2075 virtio_mb(vq->weak_barriers); 2076 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2077 virtqueue_poll_split(_vq, last_used_idx); 2078 } 2079 EXPORT_SYMBOL_GPL(virtqueue_poll); 2080 2081 /** 2082 * virtqueue_enable_cb - restart callbacks after disable_cb. 2083 * @_vq: the struct virtqueue we're talking about. 2084 * 2085 * This re-enables callbacks; it returns "false" if there are pending 2086 * buffers in the queue, to detect a possible race between the driver 2087 * checking for more work, and enabling callbacks. 2088 * 2089 * Caller must ensure we don't call this with other virtqueue 2090 * operations at the same time (except where noted). 2091 */ 2092 bool virtqueue_enable_cb(struct virtqueue *_vq) 2093 { 2094 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 2095 2096 return !virtqueue_poll(_vq, last_used_idx); 2097 } 2098 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2099 2100 /** 2101 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2102 * @_vq: the struct virtqueue we're talking about. 2103 * 2104 * This re-enables callbacks but hints to the other side to delay 2105 * interrupts until most of the available buffers have been processed; 2106 * it returns "false" if there are many pending buffers in the queue, 2107 * to detect a possible race between the driver checking for more work, 2108 * and enabling callbacks. 2109 * 2110 * Caller must ensure we don't call this with other virtqueue 2111 * operations at the same time (except where noted). 2112 */ 2113 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2114 { 2115 struct vring_virtqueue *vq = to_vvq(_vq); 2116 2117 if (vq->event_triggered) 2118 vq->event_triggered = false; 2119 2120 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2121 virtqueue_enable_cb_delayed_split(_vq); 2122 } 2123 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2124 2125 /** 2126 * virtqueue_detach_unused_buf - detach first unused buffer 2127 * @_vq: the struct virtqueue we're talking about. 2128 * 2129 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2130 * This is not valid on an active queue; it is useful only for device 2131 * shutdown. 2132 */ 2133 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2134 { 2135 struct vring_virtqueue *vq = to_vvq(_vq); 2136 2137 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2138 virtqueue_detach_unused_buf_split(_vq); 2139 } 2140 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2141 2142 static inline bool more_used(const struct vring_virtqueue *vq) 2143 { 2144 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2145 } 2146 2147 irqreturn_t vring_interrupt(int irq, void *_vq) 2148 { 2149 struct vring_virtqueue *vq = to_vvq(_vq); 2150 2151 if (!more_used(vq)) { 2152 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2153 return IRQ_NONE; 2154 } 2155 2156 if (unlikely(vq->broken)) 2157 return IRQ_HANDLED; 2158 2159 /* Just a hint for performance: so it's ok that this can be racy! */ 2160 if (vq->event) 2161 vq->event_triggered = true; 2162 2163 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2164 if (vq->vq.callback) 2165 vq->vq.callback(&vq->vq); 2166 2167 return IRQ_HANDLED; 2168 } 2169 EXPORT_SYMBOL_GPL(vring_interrupt); 2170 2171 /* Only available for split ring */ 2172 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2173 struct vring vring, 2174 struct virtio_device *vdev, 2175 bool weak_barriers, 2176 bool context, 2177 bool (*notify)(struct virtqueue *), 2178 void (*callback)(struct virtqueue *), 2179 const char *name) 2180 { 2181 struct vring_virtqueue *vq; 2182 2183 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2184 return NULL; 2185 2186 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2187 if (!vq) 2188 return NULL; 2189 2190 vq->packed_ring = false; 2191 vq->vq.callback = callback; 2192 vq->vq.vdev = vdev; 2193 vq->vq.name = name; 2194 vq->vq.num_free = vring.num; 2195 vq->vq.index = index; 2196 vq->we_own_ring = false; 2197 vq->notify = notify; 2198 vq->weak_barriers = weak_barriers; 2199 vq->broken = false; 2200 vq->last_used_idx = 0; 2201 vq->event_triggered = false; 2202 vq->num_added = 0; 2203 vq->use_dma_api = vring_use_dma_api(vdev); 2204 #ifdef DEBUG 2205 vq->in_use = false; 2206 vq->last_add_time_valid = false; 2207 #endif 2208 2209 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2210 !context; 2211 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2212 2213 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2214 vq->weak_barriers = false; 2215 2216 vq->split.queue_dma_addr = 0; 2217 vq->split.queue_size_in_bytes = 0; 2218 2219 vq->split.vring = vring; 2220 vq->split.avail_flags_shadow = 0; 2221 vq->split.avail_idx_shadow = 0; 2222 2223 /* No callback? Tell other side not to bother us. */ 2224 if (!callback) { 2225 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2226 if (!vq->event) 2227 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2228 vq->split.avail_flags_shadow); 2229 } 2230 2231 vq->split.desc_state = kmalloc_array(vring.num, 2232 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2233 if (!vq->split.desc_state) 2234 goto err_state; 2235 2236 vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); 2237 if (!vq->split.desc_extra) 2238 goto err_extra; 2239 2240 /* Put everything in free lists. */ 2241 vq->free_head = 0; 2242 memset(vq->split.desc_state, 0, vring.num * 2243 sizeof(struct vring_desc_state_split)); 2244 2245 spin_lock(&vdev->vqs_list_lock); 2246 list_add_tail(&vq->vq.list, &vdev->vqs); 2247 spin_unlock(&vdev->vqs_list_lock); 2248 return &vq->vq; 2249 2250 err_extra: 2251 kfree(vq->split.desc_state); 2252 err_state: 2253 kfree(vq); 2254 return NULL; 2255 } 2256 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2257 2258 struct virtqueue *vring_create_virtqueue( 2259 unsigned int index, 2260 unsigned int num, 2261 unsigned int vring_align, 2262 struct virtio_device *vdev, 2263 bool weak_barriers, 2264 bool may_reduce_num, 2265 bool context, 2266 bool (*notify)(struct virtqueue *), 2267 void (*callback)(struct virtqueue *), 2268 const char *name) 2269 { 2270 2271 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2272 return vring_create_virtqueue_packed(index, num, vring_align, 2273 vdev, weak_barriers, may_reduce_num, 2274 context, notify, callback, name); 2275 2276 return vring_create_virtqueue_split(index, num, vring_align, 2277 vdev, weak_barriers, may_reduce_num, 2278 context, notify, callback, name); 2279 } 2280 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2281 2282 /* Only available for split ring */ 2283 struct virtqueue *vring_new_virtqueue(unsigned int index, 2284 unsigned int num, 2285 unsigned int vring_align, 2286 struct virtio_device *vdev, 2287 bool weak_barriers, 2288 bool context, 2289 void *pages, 2290 bool (*notify)(struct virtqueue *vq), 2291 void (*callback)(struct virtqueue *vq), 2292 const char *name) 2293 { 2294 struct vring vring; 2295 2296 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2297 return NULL; 2298 2299 vring_init(&vring, num, pages, vring_align); 2300 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2301 notify, callback, name); 2302 } 2303 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2304 2305 void vring_del_virtqueue(struct virtqueue *_vq) 2306 { 2307 struct vring_virtqueue *vq = to_vvq(_vq); 2308 2309 spin_lock(&vq->vq.vdev->vqs_list_lock); 2310 list_del(&_vq->list); 2311 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2312 2313 if (vq->we_own_ring) { 2314 if (vq->packed_ring) { 2315 vring_free_queue(vq->vq.vdev, 2316 vq->packed.ring_size_in_bytes, 2317 vq->packed.vring.desc, 2318 vq->packed.ring_dma_addr); 2319 2320 vring_free_queue(vq->vq.vdev, 2321 vq->packed.event_size_in_bytes, 2322 vq->packed.vring.driver, 2323 vq->packed.driver_event_dma_addr); 2324 2325 vring_free_queue(vq->vq.vdev, 2326 vq->packed.event_size_in_bytes, 2327 vq->packed.vring.device, 2328 vq->packed.device_event_dma_addr); 2329 2330 kfree(vq->packed.desc_state); 2331 kfree(vq->packed.desc_extra); 2332 } else { 2333 vring_free_queue(vq->vq.vdev, 2334 vq->split.queue_size_in_bytes, 2335 vq->split.vring.desc, 2336 vq->split.queue_dma_addr); 2337 } 2338 } 2339 if (!vq->packed_ring) { 2340 kfree(vq->split.desc_state); 2341 kfree(vq->split.desc_extra); 2342 } 2343 kfree(vq); 2344 } 2345 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2346 2347 /* Manipulates transport-specific feature bits. */ 2348 void vring_transport_features(struct virtio_device *vdev) 2349 { 2350 unsigned int i; 2351 2352 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2353 switch (i) { 2354 case VIRTIO_RING_F_INDIRECT_DESC: 2355 break; 2356 case VIRTIO_RING_F_EVENT_IDX: 2357 break; 2358 case VIRTIO_F_VERSION_1: 2359 break; 2360 case VIRTIO_F_ACCESS_PLATFORM: 2361 break; 2362 case VIRTIO_F_RING_PACKED: 2363 break; 2364 case VIRTIO_F_ORDER_PLATFORM: 2365 break; 2366 default: 2367 /* We don't understand this bit. */ 2368 __virtio_clear_bit(vdev, i); 2369 } 2370 } 2371 } 2372 EXPORT_SYMBOL_GPL(vring_transport_features); 2373 2374 /** 2375 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2376 * @_vq: the struct virtqueue containing the vring of interest. 2377 * 2378 * Returns the size of the vring. This is mainly used for boasting to 2379 * userspace. Unlike other operations, this need not be serialized. 2380 */ 2381 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2382 { 2383 2384 struct vring_virtqueue *vq = to_vvq(_vq); 2385 2386 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2387 } 2388 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2389 2390 bool virtqueue_is_broken(struct virtqueue *_vq) 2391 { 2392 struct vring_virtqueue *vq = to_vvq(_vq); 2393 2394 return READ_ONCE(vq->broken); 2395 } 2396 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2397 2398 /* 2399 * This should prevent the device from being used, allowing drivers to 2400 * recover. You may need to grab appropriate locks to flush. 2401 */ 2402 void virtio_break_device(struct virtio_device *dev) 2403 { 2404 struct virtqueue *_vq; 2405 2406 spin_lock(&dev->vqs_list_lock); 2407 list_for_each_entry(_vq, &dev->vqs, list) { 2408 struct vring_virtqueue *vq = to_vvq(_vq); 2409 2410 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2411 WRITE_ONCE(vq->broken, true); 2412 } 2413 spin_unlock(&dev->vqs_list_lock); 2414 } 2415 EXPORT_SYMBOL_GPL(virtio_break_device); 2416 2417 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2418 { 2419 struct vring_virtqueue *vq = to_vvq(_vq); 2420 2421 BUG_ON(!vq->we_own_ring); 2422 2423 if (vq->packed_ring) 2424 return vq->packed.ring_dma_addr; 2425 2426 return vq->split.queue_dma_addr; 2427 } 2428 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2429 2430 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2431 { 2432 struct vring_virtqueue *vq = to_vvq(_vq); 2433 2434 BUG_ON(!vq->we_own_ring); 2435 2436 if (vq->packed_ring) 2437 return vq->packed.driver_event_dma_addr; 2438 2439 return vq->split.queue_dma_addr + 2440 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2441 } 2442 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2443 2444 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2445 { 2446 struct vring_virtqueue *vq = to_vvq(_vq); 2447 2448 BUG_ON(!vq->we_own_ring); 2449 2450 if (vq->packed_ring) 2451 return vq->packed.device_event_dma_addr; 2452 2453 return vq->split.queue_dma_addr + 2454 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2455 } 2456 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2457 2458 /* Only available for split ring */ 2459 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2460 { 2461 return &to_vvq(vq)->split.vring; 2462 } 2463 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2464 2465 MODULE_LICENSE("GPL"); 2466