1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/spinlock.h> 15 #include <xen/xen.h> 16 17 #ifdef DEBUG 18 /* For development, we want to crash whenever the ring is screwed. */ 19 #define BAD_RING(_vq, fmt, args...) \ 20 do { \ 21 dev_err(&(_vq)->vq.vdev->dev, \ 22 "%s:"fmt, (_vq)->vq.name, ##args); \ 23 BUG(); \ 24 } while (0) 25 /* Caller is supposed to guarantee no reentry. */ 26 #define START_USE(_vq) \ 27 do { \ 28 if ((_vq)->in_use) \ 29 panic("%s:in_use = %i\n", \ 30 (_vq)->vq.name, (_vq)->in_use); \ 31 (_vq)->in_use = __LINE__; \ 32 } while (0) 33 #define END_USE(_vq) \ 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 35 #define LAST_ADD_TIME_UPDATE(_vq) \ 36 do { \ 37 ktime_t now = ktime_get(); \ 38 \ 39 /* No kick or get, with .1 second between? Warn. */ \ 40 if ((_vq)->last_add_time_valid) \ 41 WARN_ON(ktime_to_ms(ktime_sub(now, \ 42 (_vq)->last_add_time)) > 100); \ 43 (_vq)->last_add_time = now; \ 44 (_vq)->last_add_time_valid = true; \ 45 } while (0) 46 #define LAST_ADD_TIME_CHECK(_vq) \ 47 do { \ 48 if ((_vq)->last_add_time_valid) { \ 49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 50 (_vq)->last_add_time)) > 100); \ 51 } \ 52 } while (0) 53 #define LAST_ADD_TIME_INVALID(_vq) \ 54 ((_vq)->last_add_time_valid = false) 55 #else 56 #define BAD_RING(_vq, fmt, args...) \ 57 do { \ 58 dev_err(&_vq->vq.vdev->dev, \ 59 "%s:"fmt, (_vq)->vq.name, ##args); \ 60 (_vq)->broken = true; \ 61 } while (0) 62 #define START_USE(vq) 63 #define END_USE(vq) 64 #define LAST_ADD_TIME_UPDATE(vq) 65 #define LAST_ADD_TIME_CHECK(vq) 66 #define LAST_ADD_TIME_INVALID(vq) 67 #endif 68 69 struct vring_desc_state_split { 70 void *data; /* Data for callback. */ 71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 72 }; 73 74 struct vring_desc_state_packed { 75 void *data; /* Data for callback. */ 76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 77 u16 num; /* Descriptor list length. */ 78 u16 last; /* The last desc state in a list. */ 79 }; 80 81 struct vring_desc_extra { 82 dma_addr_t addr; /* Descriptor DMA addr. */ 83 u32 len; /* Descriptor length. */ 84 u16 flags; /* Descriptor flags. */ 85 u16 next; /* The next desc state in a list. */ 86 }; 87 88 struct vring_virtqueue { 89 struct virtqueue vq; 90 91 /* Is this a packed ring? */ 92 bool packed_ring; 93 94 /* Is DMA API used? */ 95 bool use_dma_api; 96 97 /* Can we use weak barriers? */ 98 bool weak_barriers; 99 100 /* Other side has made a mess, don't try any more. */ 101 bool broken; 102 103 /* Host supports indirect buffers */ 104 bool indirect; 105 106 /* Host publishes avail event idx */ 107 bool event; 108 109 /* Head of free buffer list. */ 110 unsigned int free_head; 111 /* Number we've added since last sync. */ 112 unsigned int num_added; 113 114 /* Last used index we've seen. */ 115 u16 last_used_idx; 116 117 /* Hint for event idx: already triggered no need to disable. */ 118 bool event_triggered; 119 120 union { 121 /* Available for split ring */ 122 struct { 123 /* Actual memory layout for this queue. */ 124 struct vring vring; 125 126 /* Last written value to avail->flags */ 127 u16 avail_flags_shadow; 128 129 /* 130 * Last written value to avail->idx in 131 * guest byte order. 132 */ 133 u16 avail_idx_shadow; 134 135 /* Per-descriptor state. */ 136 struct vring_desc_state_split *desc_state; 137 struct vring_desc_extra *desc_extra; 138 139 /* DMA address and size information */ 140 dma_addr_t queue_dma_addr; 141 size_t queue_size_in_bytes; 142 } split; 143 144 /* Available for packed ring */ 145 struct { 146 /* Actual memory layout for this queue. */ 147 struct { 148 unsigned int num; 149 struct vring_packed_desc *desc; 150 struct vring_packed_desc_event *driver; 151 struct vring_packed_desc_event *device; 152 } vring; 153 154 /* Driver ring wrap counter. */ 155 bool avail_wrap_counter; 156 157 /* Device ring wrap counter. */ 158 bool used_wrap_counter; 159 160 /* Avail used flags. */ 161 u16 avail_used_flags; 162 163 /* Index of the next avail descriptor. */ 164 u16 next_avail_idx; 165 166 /* 167 * Last written value to driver->flags in 168 * guest byte order. 169 */ 170 u16 event_flags_shadow; 171 172 /* Per-descriptor state. */ 173 struct vring_desc_state_packed *desc_state; 174 struct vring_desc_extra *desc_extra; 175 176 /* DMA address and size information */ 177 dma_addr_t ring_dma_addr; 178 dma_addr_t driver_event_dma_addr; 179 dma_addr_t device_event_dma_addr; 180 size_t ring_size_in_bytes; 181 size_t event_size_in_bytes; 182 } packed; 183 }; 184 185 /* How to notify other side. FIXME: commonalize hcalls! */ 186 bool (*notify)(struct virtqueue *vq); 187 188 /* DMA, allocation, and size information */ 189 bool we_own_ring; 190 191 #ifdef DEBUG 192 /* They're supposed to lock for us. */ 193 unsigned int in_use; 194 195 /* Figure out if their kicks are too delayed. */ 196 bool last_add_time_valid; 197 ktime_t last_add_time; 198 #endif 199 }; 200 201 202 /* 203 * Helpers. 204 */ 205 206 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 207 208 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 209 unsigned int total_sg) 210 { 211 struct vring_virtqueue *vq = to_vvq(_vq); 212 213 /* 214 * If the host supports indirect descriptor tables, and we have multiple 215 * buffers, then go indirect. FIXME: tune this threshold 216 */ 217 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 218 } 219 220 /* 221 * Modern virtio devices have feature bits to specify whether they need a 222 * quirk and bypass the IOMMU. If not there, just use the DMA API. 223 * 224 * If there, the interaction between virtio and DMA API is messy. 225 * 226 * On most systems with virtio, physical addresses match bus addresses, 227 * and it doesn't particularly matter whether we use the DMA API. 228 * 229 * On some systems, including Xen and any system with a physical device 230 * that speaks virtio behind a physical IOMMU, we must use the DMA API 231 * for virtio DMA to work at all. 232 * 233 * On other systems, including SPARC and PPC64, virtio-pci devices are 234 * enumerated as though they are behind an IOMMU, but the virtio host 235 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 236 * there or somehow map everything as the identity. 237 * 238 * For the time being, we preserve historic behavior and bypass the DMA 239 * API. 240 * 241 * TODO: install a per-device DMA ops structure that does the right thing 242 * taking into account all the above quirks, and use the DMA API 243 * unconditionally on data path. 244 */ 245 246 static bool vring_use_dma_api(struct virtio_device *vdev) 247 { 248 if (!virtio_has_dma_quirk(vdev)) 249 return true; 250 251 /* Otherwise, we are left to guess. */ 252 /* 253 * In theory, it's possible to have a buggy QEMU-supposed 254 * emulated Q35 IOMMU and Xen enabled at the same time. On 255 * such a configuration, virtio has never worked and will 256 * not work without an even larger kludge. Instead, enable 257 * the DMA API if we're a Xen guest, which at least allows 258 * all of the sensible Xen configurations to work correctly. 259 */ 260 if (xen_domain()) 261 return true; 262 263 return false; 264 } 265 266 size_t virtio_max_dma_size(struct virtio_device *vdev) 267 { 268 size_t max_segment_size = SIZE_MAX; 269 270 if (vring_use_dma_api(vdev)) 271 max_segment_size = dma_max_mapping_size(vdev->dev.parent); 272 273 return max_segment_size; 274 } 275 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 276 277 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 278 dma_addr_t *dma_handle, gfp_t flag) 279 { 280 if (vring_use_dma_api(vdev)) { 281 return dma_alloc_coherent(vdev->dev.parent, size, 282 dma_handle, flag); 283 } else { 284 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 285 286 if (queue) { 287 phys_addr_t phys_addr = virt_to_phys(queue); 288 *dma_handle = (dma_addr_t)phys_addr; 289 290 /* 291 * Sanity check: make sure we dind't truncate 292 * the address. The only arches I can find that 293 * have 64-bit phys_addr_t but 32-bit dma_addr_t 294 * are certain non-highmem MIPS and x86 295 * configurations, but these configurations 296 * should never allocate physical pages above 32 297 * bits, so this is fine. Just in case, throw a 298 * warning and abort if we end up with an 299 * unrepresentable address. 300 */ 301 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 302 free_pages_exact(queue, PAGE_ALIGN(size)); 303 return NULL; 304 } 305 } 306 return queue; 307 } 308 } 309 310 static void vring_free_queue(struct virtio_device *vdev, size_t size, 311 void *queue, dma_addr_t dma_handle) 312 { 313 if (vring_use_dma_api(vdev)) 314 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 315 else 316 free_pages_exact(queue, PAGE_ALIGN(size)); 317 } 318 319 /* 320 * The DMA ops on various arches are rather gnarly right now, and 321 * making all of the arch DMA ops work on the vring device itself 322 * is a mess. For now, we use the parent device for DMA ops. 323 */ 324 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 325 { 326 return vq->vq.vdev->dev.parent; 327 } 328 329 /* Map one sg entry. */ 330 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 331 struct scatterlist *sg, 332 enum dma_data_direction direction) 333 { 334 if (!vq->use_dma_api) 335 return (dma_addr_t)sg_phys(sg); 336 337 /* 338 * We can't use dma_map_sg, because we don't use scatterlists in 339 * the way it expects (we don't guarantee that the scatterlist 340 * will exist for the lifetime of the mapping). 341 */ 342 return dma_map_page(vring_dma_dev(vq), 343 sg_page(sg), sg->offset, sg->length, 344 direction); 345 } 346 347 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 348 void *cpu_addr, size_t size, 349 enum dma_data_direction direction) 350 { 351 if (!vq->use_dma_api) 352 return (dma_addr_t)virt_to_phys(cpu_addr); 353 354 return dma_map_single(vring_dma_dev(vq), 355 cpu_addr, size, direction); 356 } 357 358 static int vring_mapping_error(const struct vring_virtqueue *vq, 359 dma_addr_t addr) 360 { 361 if (!vq->use_dma_api) 362 return 0; 363 364 return dma_mapping_error(vring_dma_dev(vq), addr); 365 } 366 367 368 /* 369 * Split ring specific functions - *_split(). 370 */ 371 372 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 373 struct vring_desc *desc) 374 { 375 u16 flags; 376 377 if (!vq->use_dma_api) 378 return; 379 380 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 381 382 if (flags & VRING_DESC_F_INDIRECT) { 383 dma_unmap_single(vring_dma_dev(vq), 384 virtio64_to_cpu(vq->vq.vdev, desc->addr), 385 virtio32_to_cpu(vq->vq.vdev, desc->len), 386 (flags & VRING_DESC_F_WRITE) ? 387 DMA_FROM_DEVICE : DMA_TO_DEVICE); 388 } else { 389 dma_unmap_page(vring_dma_dev(vq), 390 virtio64_to_cpu(vq->vq.vdev, desc->addr), 391 virtio32_to_cpu(vq->vq.vdev, desc->len), 392 (flags & VRING_DESC_F_WRITE) ? 393 DMA_FROM_DEVICE : DMA_TO_DEVICE); 394 } 395 } 396 397 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 398 unsigned int i) 399 { 400 struct vring_desc_extra *extra = vq->split.desc_extra; 401 u16 flags; 402 403 if (!vq->use_dma_api) 404 goto out; 405 406 flags = extra[i].flags; 407 408 if (flags & VRING_DESC_F_INDIRECT) { 409 dma_unmap_single(vring_dma_dev(vq), 410 extra[i].addr, 411 extra[i].len, 412 (flags & VRING_DESC_F_WRITE) ? 413 DMA_FROM_DEVICE : DMA_TO_DEVICE); 414 } else { 415 dma_unmap_page(vring_dma_dev(vq), 416 extra[i].addr, 417 extra[i].len, 418 (flags & VRING_DESC_F_WRITE) ? 419 DMA_FROM_DEVICE : DMA_TO_DEVICE); 420 } 421 422 out: 423 return extra[i].next; 424 } 425 426 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 427 unsigned int total_sg, 428 gfp_t gfp) 429 { 430 struct vring_desc *desc; 431 unsigned int i; 432 433 /* 434 * We require lowmem mappings for the descriptors because 435 * otherwise virt_to_phys will give us bogus addresses in the 436 * virtqueue. 437 */ 438 gfp &= ~__GFP_HIGHMEM; 439 440 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 441 if (!desc) 442 return NULL; 443 444 for (i = 0; i < total_sg; i++) 445 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 446 return desc; 447 } 448 449 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 450 struct vring_desc *desc, 451 unsigned int i, 452 dma_addr_t addr, 453 unsigned int len, 454 u16 flags, 455 bool indirect) 456 { 457 struct vring_virtqueue *vring = to_vvq(vq); 458 struct vring_desc_extra *extra = vring->split.desc_extra; 459 u16 next; 460 461 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 462 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 463 desc[i].len = cpu_to_virtio32(vq->vdev, len); 464 465 if (!indirect) { 466 next = extra[i].next; 467 desc[i].next = cpu_to_virtio16(vq->vdev, next); 468 469 extra[i].addr = addr; 470 extra[i].len = len; 471 extra[i].flags = flags; 472 } else 473 next = virtio16_to_cpu(vq->vdev, desc[i].next); 474 475 return next; 476 } 477 478 static inline int virtqueue_add_split(struct virtqueue *_vq, 479 struct scatterlist *sgs[], 480 unsigned int total_sg, 481 unsigned int out_sgs, 482 unsigned int in_sgs, 483 void *data, 484 void *ctx, 485 gfp_t gfp) 486 { 487 struct vring_virtqueue *vq = to_vvq(_vq); 488 struct scatterlist *sg; 489 struct vring_desc *desc; 490 unsigned int i, n, avail, descs_used, prev, err_idx; 491 int head; 492 bool indirect; 493 494 START_USE(vq); 495 496 BUG_ON(data == NULL); 497 BUG_ON(ctx && vq->indirect); 498 499 if (unlikely(vq->broken)) { 500 END_USE(vq); 501 return -EIO; 502 } 503 504 LAST_ADD_TIME_UPDATE(vq); 505 506 BUG_ON(total_sg == 0); 507 508 head = vq->free_head; 509 510 if (virtqueue_use_indirect(_vq, total_sg)) 511 desc = alloc_indirect_split(_vq, total_sg, gfp); 512 else { 513 desc = NULL; 514 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 515 } 516 517 if (desc) { 518 /* Use a single buffer which doesn't continue */ 519 indirect = true; 520 /* Set up rest to use this indirect table. */ 521 i = 0; 522 descs_used = 1; 523 } else { 524 indirect = false; 525 desc = vq->split.vring.desc; 526 i = head; 527 descs_used = total_sg; 528 } 529 530 if (vq->vq.num_free < descs_used) { 531 pr_debug("Can't add buf len %i - avail = %i\n", 532 descs_used, vq->vq.num_free); 533 /* FIXME: for historical reasons, we force a notify here if 534 * there are outgoing parts to the buffer. Presumably the 535 * host should service the ring ASAP. */ 536 if (out_sgs) 537 vq->notify(&vq->vq); 538 if (indirect) 539 kfree(desc); 540 END_USE(vq); 541 return -ENOSPC; 542 } 543 544 for (n = 0; n < out_sgs; n++) { 545 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 546 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 547 if (vring_mapping_error(vq, addr)) 548 goto unmap_release; 549 550 prev = i; 551 /* Note that we trust indirect descriptor 552 * table since it use stream DMA mapping. 553 */ 554 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 555 VRING_DESC_F_NEXT, 556 indirect); 557 } 558 } 559 for (; n < (out_sgs + in_sgs); n++) { 560 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 561 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 562 if (vring_mapping_error(vq, addr)) 563 goto unmap_release; 564 565 prev = i; 566 /* Note that we trust indirect descriptor 567 * table since it use stream DMA mapping. 568 */ 569 i = virtqueue_add_desc_split(_vq, desc, i, addr, 570 sg->length, 571 VRING_DESC_F_NEXT | 572 VRING_DESC_F_WRITE, 573 indirect); 574 } 575 } 576 /* Last one doesn't continue. */ 577 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 578 if (!indirect && vq->use_dma_api) 579 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 580 ~VRING_DESC_F_NEXT; 581 582 if (indirect) { 583 /* Now that the indirect table is filled in, map it. */ 584 dma_addr_t addr = vring_map_single( 585 vq, desc, total_sg * sizeof(struct vring_desc), 586 DMA_TO_DEVICE); 587 if (vring_mapping_error(vq, addr)) 588 goto unmap_release; 589 590 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 591 head, addr, 592 total_sg * sizeof(struct vring_desc), 593 VRING_DESC_F_INDIRECT, 594 false); 595 } 596 597 /* We're using some buffers from the free list. */ 598 vq->vq.num_free -= descs_used; 599 600 /* Update free pointer */ 601 if (indirect) 602 vq->free_head = vq->split.desc_extra[head].next; 603 else 604 vq->free_head = i; 605 606 /* Store token and indirect buffer state. */ 607 vq->split.desc_state[head].data = data; 608 if (indirect) 609 vq->split.desc_state[head].indir_desc = desc; 610 else 611 vq->split.desc_state[head].indir_desc = ctx; 612 613 /* Put entry in available array (but don't update avail->idx until they 614 * do sync). */ 615 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 616 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 617 618 /* Descriptors and available array need to be set before we expose the 619 * new available array entries. */ 620 virtio_wmb(vq->weak_barriers); 621 vq->split.avail_idx_shadow++; 622 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 623 vq->split.avail_idx_shadow); 624 vq->num_added++; 625 626 pr_debug("Added buffer head %i to %p\n", head, vq); 627 END_USE(vq); 628 629 /* This is very unlikely, but theoretically possible. Kick 630 * just in case. */ 631 if (unlikely(vq->num_added == (1 << 16) - 1)) 632 virtqueue_kick(_vq); 633 634 return 0; 635 636 unmap_release: 637 err_idx = i; 638 639 if (indirect) 640 i = 0; 641 else 642 i = head; 643 644 for (n = 0; n < total_sg; n++) { 645 if (i == err_idx) 646 break; 647 if (indirect) { 648 vring_unmap_one_split_indirect(vq, &desc[i]); 649 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 650 } else 651 i = vring_unmap_one_split(vq, i); 652 } 653 654 if (indirect) 655 kfree(desc); 656 657 END_USE(vq); 658 return -ENOMEM; 659 } 660 661 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 662 { 663 struct vring_virtqueue *vq = to_vvq(_vq); 664 u16 new, old; 665 bool needs_kick; 666 667 START_USE(vq); 668 /* We need to expose available array entries before checking avail 669 * event. */ 670 virtio_mb(vq->weak_barriers); 671 672 old = vq->split.avail_idx_shadow - vq->num_added; 673 new = vq->split.avail_idx_shadow; 674 vq->num_added = 0; 675 676 LAST_ADD_TIME_CHECK(vq); 677 LAST_ADD_TIME_INVALID(vq); 678 679 if (vq->event) { 680 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 681 vring_avail_event(&vq->split.vring)), 682 new, old); 683 } else { 684 needs_kick = !(vq->split.vring.used->flags & 685 cpu_to_virtio16(_vq->vdev, 686 VRING_USED_F_NO_NOTIFY)); 687 } 688 END_USE(vq); 689 return needs_kick; 690 } 691 692 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 693 void **ctx) 694 { 695 unsigned int i, j; 696 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 697 698 /* Clear data ptr. */ 699 vq->split.desc_state[head].data = NULL; 700 701 /* Put back on free list: unmap first-level descriptors and find end */ 702 i = head; 703 704 while (vq->split.vring.desc[i].flags & nextflag) { 705 vring_unmap_one_split(vq, i); 706 i = vq->split.desc_extra[i].next; 707 vq->vq.num_free++; 708 } 709 710 vring_unmap_one_split(vq, i); 711 vq->split.desc_extra[i].next = vq->free_head; 712 vq->free_head = head; 713 714 /* Plus final descriptor */ 715 vq->vq.num_free++; 716 717 if (vq->indirect) { 718 struct vring_desc *indir_desc = 719 vq->split.desc_state[head].indir_desc; 720 u32 len; 721 722 /* Free the indirect table, if any, now that it's unmapped. */ 723 if (!indir_desc) 724 return; 725 726 len = vq->split.desc_extra[head].len; 727 728 BUG_ON(!(vq->split.desc_extra[head].flags & 729 VRING_DESC_F_INDIRECT)); 730 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 731 732 for (j = 0; j < len / sizeof(struct vring_desc); j++) 733 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 734 735 kfree(indir_desc); 736 vq->split.desc_state[head].indir_desc = NULL; 737 } else if (ctx) { 738 *ctx = vq->split.desc_state[head].indir_desc; 739 } 740 } 741 742 static inline bool more_used_split(const struct vring_virtqueue *vq) 743 { 744 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 745 vq->split.vring.used->idx); 746 } 747 748 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 749 unsigned int *len, 750 void **ctx) 751 { 752 struct vring_virtqueue *vq = to_vvq(_vq); 753 void *ret; 754 unsigned int i; 755 u16 last_used; 756 757 START_USE(vq); 758 759 if (unlikely(vq->broken)) { 760 END_USE(vq); 761 return NULL; 762 } 763 764 if (!more_used_split(vq)) { 765 pr_debug("No more buffers in queue\n"); 766 END_USE(vq); 767 return NULL; 768 } 769 770 /* Only get used array entries after they have been exposed by host. */ 771 virtio_rmb(vq->weak_barriers); 772 773 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 774 i = virtio32_to_cpu(_vq->vdev, 775 vq->split.vring.used->ring[last_used].id); 776 *len = virtio32_to_cpu(_vq->vdev, 777 vq->split.vring.used->ring[last_used].len); 778 779 if (unlikely(i >= vq->split.vring.num)) { 780 BAD_RING(vq, "id %u out of range\n", i); 781 return NULL; 782 } 783 if (unlikely(!vq->split.desc_state[i].data)) { 784 BAD_RING(vq, "id %u is not a head!\n", i); 785 return NULL; 786 } 787 788 /* detach_buf_split clears data, so grab it now. */ 789 ret = vq->split.desc_state[i].data; 790 detach_buf_split(vq, i, ctx); 791 vq->last_used_idx++; 792 /* If we expect an interrupt for the next entry, tell host 793 * by writing event index and flush out the write before 794 * the read in the next get_buf call. */ 795 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 796 virtio_store_mb(vq->weak_barriers, 797 &vring_used_event(&vq->split.vring), 798 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 799 800 LAST_ADD_TIME_INVALID(vq); 801 802 END_USE(vq); 803 return ret; 804 } 805 806 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 807 { 808 struct vring_virtqueue *vq = to_vvq(_vq); 809 810 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 811 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 812 if (vq->event) 813 /* TODO: this is a hack. Figure out a cleaner value to write. */ 814 vring_used_event(&vq->split.vring) = 0x0; 815 else 816 vq->split.vring.avail->flags = 817 cpu_to_virtio16(_vq->vdev, 818 vq->split.avail_flags_shadow); 819 } 820 } 821 822 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 823 { 824 struct vring_virtqueue *vq = to_vvq(_vq); 825 u16 last_used_idx; 826 827 START_USE(vq); 828 829 /* We optimistically turn back on interrupts, then check if there was 830 * more to do. */ 831 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 832 * either clear the flags bit or point the event index at the next 833 * entry. Always do both to keep code simple. */ 834 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 835 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 836 if (!vq->event) 837 vq->split.vring.avail->flags = 838 cpu_to_virtio16(_vq->vdev, 839 vq->split.avail_flags_shadow); 840 } 841 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 842 last_used_idx = vq->last_used_idx); 843 END_USE(vq); 844 return last_used_idx; 845 } 846 847 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 848 { 849 struct vring_virtqueue *vq = to_vvq(_vq); 850 851 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 852 vq->split.vring.used->idx); 853 } 854 855 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 856 { 857 struct vring_virtqueue *vq = to_vvq(_vq); 858 u16 bufs; 859 860 START_USE(vq); 861 862 /* We optimistically turn back on interrupts, then check if there was 863 * more to do. */ 864 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 865 * either clear the flags bit or point the event index at the next 866 * entry. Always update the event index to keep code simple. */ 867 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 868 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 869 if (!vq->event) 870 vq->split.vring.avail->flags = 871 cpu_to_virtio16(_vq->vdev, 872 vq->split.avail_flags_shadow); 873 } 874 /* TODO: tune this threshold */ 875 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 876 877 virtio_store_mb(vq->weak_barriers, 878 &vring_used_event(&vq->split.vring), 879 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 880 881 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 882 - vq->last_used_idx) > bufs)) { 883 END_USE(vq); 884 return false; 885 } 886 887 END_USE(vq); 888 return true; 889 } 890 891 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 892 { 893 struct vring_virtqueue *vq = to_vvq(_vq); 894 unsigned int i; 895 void *buf; 896 897 START_USE(vq); 898 899 for (i = 0; i < vq->split.vring.num; i++) { 900 if (!vq->split.desc_state[i].data) 901 continue; 902 /* detach_buf_split clears data, so grab it now. */ 903 buf = vq->split.desc_state[i].data; 904 detach_buf_split(vq, i, NULL); 905 vq->split.avail_idx_shadow--; 906 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 907 vq->split.avail_idx_shadow); 908 END_USE(vq); 909 return buf; 910 } 911 /* That should have freed everything. */ 912 BUG_ON(vq->vq.num_free != vq->split.vring.num); 913 914 END_USE(vq); 915 return NULL; 916 } 917 918 static struct virtqueue *vring_create_virtqueue_split( 919 unsigned int index, 920 unsigned int num, 921 unsigned int vring_align, 922 struct virtio_device *vdev, 923 bool weak_barriers, 924 bool may_reduce_num, 925 bool context, 926 bool (*notify)(struct virtqueue *), 927 void (*callback)(struct virtqueue *), 928 const char *name) 929 { 930 struct virtqueue *vq; 931 void *queue = NULL; 932 dma_addr_t dma_addr; 933 size_t queue_size_in_bytes; 934 struct vring vring; 935 936 /* We assume num is a power of 2. */ 937 if (num & (num - 1)) { 938 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 939 return NULL; 940 } 941 942 /* TODO: allocate each queue chunk individually */ 943 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 944 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 945 &dma_addr, 946 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 947 if (queue) 948 break; 949 if (!may_reduce_num) 950 return NULL; 951 } 952 953 if (!num) 954 return NULL; 955 956 if (!queue) { 957 /* Try to get a single page. You are my only hope! */ 958 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 959 &dma_addr, GFP_KERNEL|__GFP_ZERO); 960 } 961 if (!queue) 962 return NULL; 963 964 queue_size_in_bytes = vring_size(num, vring_align); 965 vring_init(&vring, num, queue, vring_align); 966 967 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 968 notify, callback, name); 969 if (!vq) { 970 vring_free_queue(vdev, queue_size_in_bytes, queue, 971 dma_addr); 972 return NULL; 973 } 974 975 to_vvq(vq)->split.queue_dma_addr = dma_addr; 976 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 977 to_vvq(vq)->we_own_ring = true; 978 979 return vq; 980 } 981 982 983 /* 984 * Packed ring specific functions - *_packed(). 985 */ 986 987 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 988 struct vring_desc_extra *state) 989 { 990 u16 flags; 991 992 if (!vq->use_dma_api) 993 return; 994 995 flags = state->flags; 996 997 if (flags & VRING_DESC_F_INDIRECT) { 998 dma_unmap_single(vring_dma_dev(vq), 999 state->addr, state->len, 1000 (flags & VRING_DESC_F_WRITE) ? 1001 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1002 } else { 1003 dma_unmap_page(vring_dma_dev(vq), 1004 state->addr, state->len, 1005 (flags & VRING_DESC_F_WRITE) ? 1006 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1007 } 1008 } 1009 1010 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1011 struct vring_packed_desc *desc) 1012 { 1013 u16 flags; 1014 1015 if (!vq->use_dma_api) 1016 return; 1017 1018 flags = le16_to_cpu(desc->flags); 1019 1020 if (flags & VRING_DESC_F_INDIRECT) { 1021 dma_unmap_single(vring_dma_dev(vq), 1022 le64_to_cpu(desc->addr), 1023 le32_to_cpu(desc->len), 1024 (flags & VRING_DESC_F_WRITE) ? 1025 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1026 } else { 1027 dma_unmap_page(vring_dma_dev(vq), 1028 le64_to_cpu(desc->addr), 1029 le32_to_cpu(desc->len), 1030 (flags & VRING_DESC_F_WRITE) ? 1031 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1032 } 1033 } 1034 1035 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1036 gfp_t gfp) 1037 { 1038 struct vring_packed_desc *desc; 1039 1040 /* 1041 * We require lowmem mappings for the descriptors because 1042 * otherwise virt_to_phys will give us bogus addresses in the 1043 * virtqueue. 1044 */ 1045 gfp &= ~__GFP_HIGHMEM; 1046 1047 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1048 1049 return desc; 1050 } 1051 1052 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1053 struct scatterlist *sgs[], 1054 unsigned int total_sg, 1055 unsigned int out_sgs, 1056 unsigned int in_sgs, 1057 void *data, 1058 gfp_t gfp) 1059 { 1060 struct vring_packed_desc *desc; 1061 struct scatterlist *sg; 1062 unsigned int i, n, err_idx; 1063 u16 head, id; 1064 dma_addr_t addr; 1065 1066 head = vq->packed.next_avail_idx; 1067 desc = alloc_indirect_packed(total_sg, gfp); 1068 if (!desc) 1069 return -ENOMEM; 1070 1071 if (unlikely(vq->vq.num_free < 1)) { 1072 pr_debug("Can't add buf len 1 - avail = 0\n"); 1073 kfree(desc); 1074 END_USE(vq); 1075 return -ENOSPC; 1076 } 1077 1078 i = 0; 1079 id = vq->free_head; 1080 BUG_ON(id == vq->packed.vring.num); 1081 1082 for (n = 0; n < out_sgs + in_sgs; n++) { 1083 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1084 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1085 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1086 if (vring_mapping_error(vq, addr)) 1087 goto unmap_release; 1088 1089 desc[i].flags = cpu_to_le16(n < out_sgs ? 1090 0 : VRING_DESC_F_WRITE); 1091 desc[i].addr = cpu_to_le64(addr); 1092 desc[i].len = cpu_to_le32(sg->length); 1093 i++; 1094 } 1095 } 1096 1097 /* Now that the indirect table is filled in, map it. */ 1098 addr = vring_map_single(vq, desc, 1099 total_sg * sizeof(struct vring_packed_desc), 1100 DMA_TO_DEVICE); 1101 if (vring_mapping_error(vq, addr)) 1102 goto unmap_release; 1103 1104 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1105 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1106 sizeof(struct vring_packed_desc)); 1107 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1108 1109 if (vq->use_dma_api) { 1110 vq->packed.desc_extra[id].addr = addr; 1111 vq->packed.desc_extra[id].len = total_sg * 1112 sizeof(struct vring_packed_desc); 1113 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1114 vq->packed.avail_used_flags; 1115 } 1116 1117 /* 1118 * A driver MUST NOT make the first descriptor in the list 1119 * available before all subsequent descriptors comprising 1120 * the list are made available. 1121 */ 1122 virtio_wmb(vq->weak_barriers); 1123 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1124 vq->packed.avail_used_flags); 1125 1126 /* We're using some buffers from the free list. */ 1127 vq->vq.num_free -= 1; 1128 1129 /* Update free pointer */ 1130 n = head + 1; 1131 if (n >= vq->packed.vring.num) { 1132 n = 0; 1133 vq->packed.avail_wrap_counter ^= 1; 1134 vq->packed.avail_used_flags ^= 1135 1 << VRING_PACKED_DESC_F_AVAIL | 1136 1 << VRING_PACKED_DESC_F_USED; 1137 } 1138 vq->packed.next_avail_idx = n; 1139 vq->free_head = vq->packed.desc_extra[id].next; 1140 1141 /* Store token and indirect buffer state. */ 1142 vq->packed.desc_state[id].num = 1; 1143 vq->packed.desc_state[id].data = data; 1144 vq->packed.desc_state[id].indir_desc = desc; 1145 vq->packed.desc_state[id].last = id; 1146 1147 vq->num_added += 1; 1148 1149 pr_debug("Added buffer head %i to %p\n", head, vq); 1150 END_USE(vq); 1151 1152 return 0; 1153 1154 unmap_release: 1155 err_idx = i; 1156 1157 for (i = 0; i < err_idx; i++) 1158 vring_unmap_desc_packed(vq, &desc[i]); 1159 1160 kfree(desc); 1161 1162 END_USE(vq); 1163 return -ENOMEM; 1164 } 1165 1166 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1167 struct scatterlist *sgs[], 1168 unsigned int total_sg, 1169 unsigned int out_sgs, 1170 unsigned int in_sgs, 1171 void *data, 1172 void *ctx, 1173 gfp_t gfp) 1174 { 1175 struct vring_virtqueue *vq = to_vvq(_vq); 1176 struct vring_packed_desc *desc; 1177 struct scatterlist *sg; 1178 unsigned int i, n, c, descs_used, err_idx; 1179 __le16 head_flags, flags; 1180 u16 head, id, prev, curr, avail_used_flags; 1181 int err; 1182 1183 START_USE(vq); 1184 1185 BUG_ON(data == NULL); 1186 BUG_ON(ctx && vq->indirect); 1187 1188 if (unlikely(vq->broken)) { 1189 END_USE(vq); 1190 return -EIO; 1191 } 1192 1193 LAST_ADD_TIME_UPDATE(vq); 1194 1195 BUG_ON(total_sg == 0); 1196 1197 if (virtqueue_use_indirect(_vq, total_sg)) { 1198 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1199 in_sgs, data, gfp); 1200 if (err != -ENOMEM) 1201 return err; 1202 1203 /* fall back on direct */ 1204 } 1205 1206 head = vq->packed.next_avail_idx; 1207 avail_used_flags = vq->packed.avail_used_flags; 1208 1209 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1210 1211 desc = vq->packed.vring.desc; 1212 i = head; 1213 descs_used = total_sg; 1214 1215 if (unlikely(vq->vq.num_free < descs_used)) { 1216 pr_debug("Can't add buf len %i - avail = %i\n", 1217 descs_used, vq->vq.num_free); 1218 END_USE(vq); 1219 return -ENOSPC; 1220 } 1221 1222 id = vq->free_head; 1223 BUG_ON(id == vq->packed.vring.num); 1224 1225 curr = id; 1226 c = 0; 1227 for (n = 0; n < out_sgs + in_sgs; n++) { 1228 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1229 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1230 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1231 if (vring_mapping_error(vq, addr)) 1232 goto unmap_release; 1233 1234 flags = cpu_to_le16(vq->packed.avail_used_flags | 1235 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1236 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1237 if (i == head) 1238 head_flags = flags; 1239 else 1240 desc[i].flags = flags; 1241 1242 desc[i].addr = cpu_to_le64(addr); 1243 desc[i].len = cpu_to_le32(sg->length); 1244 desc[i].id = cpu_to_le16(id); 1245 1246 if (unlikely(vq->use_dma_api)) { 1247 vq->packed.desc_extra[curr].addr = addr; 1248 vq->packed.desc_extra[curr].len = sg->length; 1249 vq->packed.desc_extra[curr].flags = 1250 le16_to_cpu(flags); 1251 } 1252 prev = curr; 1253 curr = vq->packed.desc_extra[curr].next; 1254 1255 if ((unlikely(++i >= vq->packed.vring.num))) { 1256 i = 0; 1257 vq->packed.avail_used_flags ^= 1258 1 << VRING_PACKED_DESC_F_AVAIL | 1259 1 << VRING_PACKED_DESC_F_USED; 1260 } 1261 } 1262 } 1263 1264 if (i < head) 1265 vq->packed.avail_wrap_counter ^= 1; 1266 1267 /* We're using some buffers from the free list. */ 1268 vq->vq.num_free -= descs_used; 1269 1270 /* Update free pointer */ 1271 vq->packed.next_avail_idx = i; 1272 vq->free_head = curr; 1273 1274 /* Store token. */ 1275 vq->packed.desc_state[id].num = descs_used; 1276 vq->packed.desc_state[id].data = data; 1277 vq->packed.desc_state[id].indir_desc = ctx; 1278 vq->packed.desc_state[id].last = prev; 1279 1280 /* 1281 * A driver MUST NOT make the first descriptor in the list 1282 * available before all subsequent descriptors comprising 1283 * the list are made available. 1284 */ 1285 virtio_wmb(vq->weak_barriers); 1286 vq->packed.vring.desc[head].flags = head_flags; 1287 vq->num_added += descs_used; 1288 1289 pr_debug("Added buffer head %i to %p\n", head, vq); 1290 END_USE(vq); 1291 1292 return 0; 1293 1294 unmap_release: 1295 err_idx = i; 1296 i = head; 1297 curr = vq->free_head; 1298 1299 vq->packed.avail_used_flags = avail_used_flags; 1300 1301 for (n = 0; n < total_sg; n++) { 1302 if (i == err_idx) 1303 break; 1304 vring_unmap_state_packed(vq, 1305 &vq->packed.desc_extra[curr]); 1306 curr = vq->packed.desc_extra[curr].next; 1307 i++; 1308 if (i >= vq->packed.vring.num) 1309 i = 0; 1310 } 1311 1312 END_USE(vq); 1313 return -EIO; 1314 } 1315 1316 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1317 { 1318 struct vring_virtqueue *vq = to_vvq(_vq); 1319 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1320 bool needs_kick; 1321 union { 1322 struct { 1323 __le16 off_wrap; 1324 __le16 flags; 1325 }; 1326 u32 u32; 1327 } snapshot; 1328 1329 START_USE(vq); 1330 1331 /* 1332 * We need to expose the new flags value before checking notification 1333 * suppressions. 1334 */ 1335 virtio_mb(vq->weak_barriers); 1336 1337 old = vq->packed.next_avail_idx - vq->num_added; 1338 new = vq->packed.next_avail_idx; 1339 vq->num_added = 0; 1340 1341 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1342 flags = le16_to_cpu(snapshot.flags); 1343 1344 LAST_ADD_TIME_CHECK(vq); 1345 LAST_ADD_TIME_INVALID(vq); 1346 1347 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1348 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1349 goto out; 1350 } 1351 1352 off_wrap = le16_to_cpu(snapshot.off_wrap); 1353 1354 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1355 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1356 if (wrap_counter != vq->packed.avail_wrap_counter) 1357 event_idx -= vq->packed.vring.num; 1358 1359 needs_kick = vring_need_event(event_idx, new, old); 1360 out: 1361 END_USE(vq); 1362 return needs_kick; 1363 } 1364 1365 static void detach_buf_packed(struct vring_virtqueue *vq, 1366 unsigned int id, void **ctx) 1367 { 1368 struct vring_desc_state_packed *state = NULL; 1369 struct vring_packed_desc *desc; 1370 unsigned int i, curr; 1371 1372 state = &vq->packed.desc_state[id]; 1373 1374 /* Clear data ptr. */ 1375 state->data = NULL; 1376 1377 vq->packed.desc_extra[state->last].next = vq->free_head; 1378 vq->free_head = id; 1379 vq->vq.num_free += state->num; 1380 1381 if (unlikely(vq->use_dma_api)) { 1382 curr = id; 1383 for (i = 0; i < state->num; i++) { 1384 vring_unmap_state_packed(vq, 1385 &vq->packed.desc_extra[curr]); 1386 curr = vq->packed.desc_extra[curr].next; 1387 } 1388 } 1389 1390 if (vq->indirect) { 1391 u32 len; 1392 1393 /* Free the indirect table, if any, now that it's unmapped. */ 1394 desc = state->indir_desc; 1395 if (!desc) 1396 return; 1397 1398 if (vq->use_dma_api) { 1399 len = vq->packed.desc_extra[id].len; 1400 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1401 i++) 1402 vring_unmap_desc_packed(vq, &desc[i]); 1403 } 1404 kfree(desc); 1405 state->indir_desc = NULL; 1406 } else if (ctx) { 1407 *ctx = state->indir_desc; 1408 } 1409 } 1410 1411 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1412 u16 idx, bool used_wrap_counter) 1413 { 1414 bool avail, used; 1415 u16 flags; 1416 1417 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1418 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1419 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1420 1421 return avail == used && used == used_wrap_counter; 1422 } 1423 1424 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1425 { 1426 return is_used_desc_packed(vq, vq->last_used_idx, 1427 vq->packed.used_wrap_counter); 1428 } 1429 1430 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1431 unsigned int *len, 1432 void **ctx) 1433 { 1434 struct vring_virtqueue *vq = to_vvq(_vq); 1435 u16 last_used, id; 1436 void *ret; 1437 1438 START_USE(vq); 1439 1440 if (unlikely(vq->broken)) { 1441 END_USE(vq); 1442 return NULL; 1443 } 1444 1445 if (!more_used_packed(vq)) { 1446 pr_debug("No more buffers in queue\n"); 1447 END_USE(vq); 1448 return NULL; 1449 } 1450 1451 /* Only get used elements after they have been exposed by host. */ 1452 virtio_rmb(vq->weak_barriers); 1453 1454 last_used = vq->last_used_idx; 1455 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1456 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1457 1458 if (unlikely(id >= vq->packed.vring.num)) { 1459 BAD_RING(vq, "id %u out of range\n", id); 1460 return NULL; 1461 } 1462 if (unlikely(!vq->packed.desc_state[id].data)) { 1463 BAD_RING(vq, "id %u is not a head!\n", id); 1464 return NULL; 1465 } 1466 1467 /* detach_buf_packed clears data, so grab it now. */ 1468 ret = vq->packed.desc_state[id].data; 1469 detach_buf_packed(vq, id, ctx); 1470 1471 vq->last_used_idx += vq->packed.desc_state[id].num; 1472 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1473 vq->last_used_idx -= vq->packed.vring.num; 1474 vq->packed.used_wrap_counter ^= 1; 1475 } 1476 1477 /* 1478 * If we expect an interrupt for the next entry, tell host 1479 * by writing event index and flush out the write before 1480 * the read in the next get_buf call. 1481 */ 1482 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1483 virtio_store_mb(vq->weak_barriers, 1484 &vq->packed.vring.driver->off_wrap, 1485 cpu_to_le16(vq->last_used_idx | 1486 (vq->packed.used_wrap_counter << 1487 VRING_PACKED_EVENT_F_WRAP_CTR))); 1488 1489 LAST_ADD_TIME_INVALID(vq); 1490 1491 END_USE(vq); 1492 return ret; 1493 } 1494 1495 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1496 { 1497 struct vring_virtqueue *vq = to_vvq(_vq); 1498 1499 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1500 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1501 vq->packed.vring.driver->flags = 1502 cpu_to_le16(vq->packed.event_flags_shadow); 1503 } 1504 } 1505 1506 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1507 { 1508 struct vring_virtqueue *vq = to_vvq(_vq); 1509 1510 START_USE(vq); 1511 1512 /* 1513 * We optimistically turn back on interrupts, then check if there was 1514 * more to do. 1515 */ 1516 1517 if (vq->event) { 1518 vq->packed.vring.driver->off_wrap = 1519 cpu_to_le16(vq->last_used_idx | 1520 (vq->packed.used_wrap_counter << 1521 VRING_PACKED_EVENT_F_WRAP_CTR)); 1522 /* 1523 * We need to update event offset and event wrap 1524 * counter first before updating event flags. 1525 */ 1526 virtio_wmb(vq->weak_barriers); 1527 } 1528 1529 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1530 vq->packed.event_flags_shadow = vq->event ? 1531 VRING_PACKED_EVENT_FLAG_DESC : 1532 VRING_PACKED_EVENT_FLAG_ENABLE; 1533 vq->packed.vring.driver->flags = 1534 cpu_to_le16(vq->packed.event_flags_shadow); 1535 } 1536 1537 END_USE(vq); 1538 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1539 VRING_PACKED_EVENT_F_WRAP_CTR); 1540 } 1541 1542 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1543 { 1544 struct vring_virtqueue *vq = to_vvq(_vq); 1545 bool wrap_counter; 1546 u16 used_idx; 1547 1548 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1549 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1550 1551 return is_used_desc_packed(vq, used_idx, wrap_counter); 1552 } 1553 1554 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1555 { 1556 struct vring_virtqueue *vq = to_vvq(_vq); 1557 u16 used_idx, wrap_counter; 1558 u16 bufs; 1559 1560 START_USE(vq); 1561 1562 /* 1563 * We optimistically turn back on interrupts, then check if there was 1564 * more to do. 1565 */ 1566 1567 if (vq->event) { 1568 /* TODO: tune this threshold */ 1569 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1570 wrap_counter = vq->packed.used_wrap_counter; 1571 1572 used_idx = vq->last_used_idx + bufs; 1573 if (used_idx >= vq->packed.vring.num) { 1574 used_idx -= vq->packed.vring.num; 1575 wrap_counter ^= 1; 1576 } 1577 1578 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1579 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1580 1581 /* 1582 * We need to update event offset and event wrap 1583 * counter first before updating event flags. 1584 */ 1585 virtio_wmb(vq->weak_barriers); 1586 } 1587 1588 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1589 vq->packed.event_flags_shadow = vq->event ? 1590 VRING_PACKED_EVENT_FLAG_DESC : 1591 VRING_PACKED_EVENT_FLAG_ENABLE; 1592 vq->packed.vring.driver->flags = 1593 cpu_to_le16(vq->packed.event_flags_shadow); 1594 } 1595 1596 /* 1597 * We need to update event suppression structure first 1598 * before re-checking for more used buffers. 1599 */ 1600 virtio_mb(vq->weak_barriers); 1601 1602 if (is_used_desc_packed(vq, 1603 vq->last_used_idx, 1604 vq->packed.used_wrap_counter)) { 1605 END_USE(vq); 1606 return false; 1607 } 1608 1609 END_USE(vq); 1610 return true; 1611 } 1612 1613 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1614 { 1615 struct vring_virtqueue *vq = to_vvq(_vq); 1616 unsigned int i; 1617 void *buf; 1618 1619 START_USE(vq); 1620 1621 for (i = 0; i < vq->packed.vring.num; i++) { 1622 if (!vq->packed.desc_state[i].data) 1623 continue; 1624 /* detach_buf clears data, so grab it now. */ 1625 buf = vq->packed.desc_state[i].data; 1626 detach_buf_packed(vq, i, NULL); 1627 END_USE(vq); 1628 return buf; 1629 } 1630 /* That should have freed everything. */ 1631 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1632 1633 END_USE(vq); 1634 return NULL; 1635 } 1636 1637 static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, 1638 unsigned int num) 1639 { 1640 struct vring_desc_extra *desc_extra; 1641 unsigned int i; 1642 1643 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1644 GFP_KERNEL); 1645 if (!desc_extra) 1646 return NULL; 1647 1648 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1649 1650 for (i = 0; i < num - 1; i++) 1651 desc_extra[i].next = i + 1; 1652 1653 return desc_extra; 1654 } 1655 1656 static struct virtqueue *vring_create_virtqueue_packed( 1657 unsigned int index, 1658 unsigned int num, 1659 unsigned int vring_align, 1660 struct virtio_device *vdev, 1661 bool weak_barriers, 1662 bool may_reduce_num, 1663 bool context, 1664 bool (*notify)(struct virtqueue *), 1665 void (*callback)(struct virtqueue *), 1666 const char *name) 1667 { 1668 struct vring_virtqueue *vq; 1669 struct vring_packed_desc *ring; 1670 struct vring_packed_desc_event *driver, *device; 1671 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1672 size_t ring_size_in_bytes, event_size_in_bytes; 1673 1674 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1675 1676 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1677 &ring_dma_addr, 1678 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1679 if (!ring) 1680 goto err_ring; 1681 1682 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1683 1684 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1685 &driver_event_dma_addr, 1686 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1687 if (!driver) 1688 goto err_driver; 1689 1690 device = vring_alloc_queue(vdev, event_size_in_bytes, 1691 &device_event_dma_addr, 1692 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1693 if (!device) 1694 goto err_device; 1695 1696 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1697 if (!vq) 1698 goto err_vq; 1699 1700 vq->vq.callback = callback; 1701 vq->vq.vdev = vdev; 1702 vq->vq.name = name; 1703 vq->vq.num_free = num; 1704 vq->vq.index = index; 1705 vq->we_own_ring = true; 1706 vq->notify = notify; 1707 vq->weak_barriers = weak_barriers; 1708 vq->broken = false; 1709 vq->last_used_idx = 0; 1710 vq->event_triggered = false; 1711 vq->num_added = 0; 1712 vq->packed_ring = true; 1713 vq->use_dma_api = vring_use_dma_api(vdev); 1714 #ifdef DEBUG 1715 vq->in_use = false; 1716 vq->last_add_time_valid = false; 1717 #endif 1718 1719 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1720 !context; 1721 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1722 1723 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1724 vq->weak_barriers = false; 1725 1726 vq->packed.ring_dma_addr = ring_dma_addr; 1727 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1728 vq->packed.device_event_dma_addr = device_event_dma_addr; 1729 1730 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1731 vq->packed.event_size_in_bytes = event_size_in_bytes; 1732 1733 vq->packed.vring.num = num; 1734 vq->packed.vring.desc = ring; 1735 vq->packed.vring.driver = driver; 1736 vq->packed.vring.device = device; 1737 1738 vq->packed.next_avail_idx = 0; 1739 vq->packed.avail_wrap_counter = 1; 1740 vq->packed.used_wrap_counter = 1; 1741 vq->packed.event_flags_shadow = 0; 1742 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1743 1744 vq->packed.desc_state = kmalloc_array(num, 1745 sizeof(struct vring_desc_state_packed), 1746 GFP_KERNEL); 1747 if (!vq->packed.desc_state) 1748 goto err_desc_state; 1749 1750 memset(vq->packed.desc_state, 0, 1751 num * sizeof(struct vring_desc_state_packed)); 1752 1753 /* Put everything in free lists. */ 1754 vq->free_head = 0; 1755 1756 vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); 1757 if (!vq->packed.desc_extra) 1758 goto err_desc_extra; 1759 1760 /* No callback? Tell other side not to bother us. */ 1761 if (!callback) { 1762 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1763 vq->packed.vring.driver->flags = 1764 cpu_to_le16(vq->packed.event_flags_shadow); 1765 } 1766 1767 spin_lock(&vdev->vqs_list_lock); 1768 list_add_tail(&vq->vq.list, &vdev->vqs); 1769 spin_unlock(&vdev->vqs_list_lock); 1770 return &vq->vq; 1771 1772 err_desc_extra: 1773 kfree(vq->packed.desc_state); 1774 err_desc_state: 1775 kfree(vq); 1776 err_vq: 1777 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 1778 err_device: 1779 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 1780 err_driver: 1781 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1782 err_ring: 1783 return NULL; 1784 } 1785 1786 1787 /* 1788 * Generic functions and exported symbols. 1789 */ 1790 1791 static inline int virtqueue_add(struct virtqueue *_vq, 1792 struct scatterlist *sgs[], 1793 unsigned int total_sg, 1794 unsigned int out_sgs, 1795 unsigned int in_sgs, 1796 void *data, 1797 void *ctx, 1798 gfp_t gfp) 1799 { 1800 struct vring_virtqueue *vq = to_vvq(_vq); 1801 1802 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1803 out_sgs, in_sgs, data, ctx, gfp) : 1804 virtqueue_add_split(_vq, sgs, total_sg, 1805 out_sgs, in_sgs, data, ctx, gfp); 1806 } 1807 1808 /** 1809 * virtqueue_add_sgs - expose buffers to other end 1810 * @_vq: the struct virtqueue we're talking about. 1811 * @sgs: array of terminated scatterlists. 1812 * @out_sgs: the number of scatterlists readable by other side 1813 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1814 * @data: the token identifying the buffer. 1815 * @gfp: how to do memory allocations (if necessary). 1816 * 1817 * Caller must ensure we don't call this with other virtqueue operations 1818 * at the same time (except where noted). 1819 * 1820 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1821 */ 1822 int virtqueue_add_sgs(struct virtqueue *_vq, 1823 struct scatterlist *sgs[], 1824 unsigned int out_sgs, 1825 unsigned int in_sgs, 1826 void *data, 1827 gfp_t gfp) 1828 { 1829 unsigned int i, total_sg = 0; 1830 1831 /* Count them first. */ 1832 for (i = 0; i < out_sgs + in_sgs; i++) { 1833 struct scatterlist *sg; 1834 1835 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1836 total_sg++; 1837 } 1838 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1839 data, NULL, gfp); 1840 } 1841 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1842 1843 /** 1844 * virtqueue_add_outbuf - expose output buffers to other end 1845 * @vq: the struct virtqueue we're talking about. 1846 * @sg: scatterlist (must be well-formed and terminated!) 1847 * @num: the number of entries in @sg readable by other side 1848 * @data: the token identifying the buffer. 1849 * @gfp: how to do memory allocations (if necessary). 1850 * 1851 * Caller must ensure we don't call this with other virtqueue operations 1852 * at the same time (except where noted). 1853 * 1854 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1855 */ 1856 int virtqueue_add_outbuf(struct virtqueue *vq, 1857 struct scatterlist *sg, unsigned int num, 1858 void *data, 1859 gfp_t gfp) 1860 { 1861 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1862 } 1863 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1864 1865 /** 1866 * virtqueue_add_inbuf - expose input buffers to other end 1867 * @vq: the struct virtqueue we're talking about. 1868 * @sg: scatterlist (must be well-formed and terminated!) 1869 * @num: the number of entries in @sg writable by other side 1870 * @data: the token identifying the buffer. 1871 * @gfp: how to do memory allocations (if necessary). 1872 * 1873 * Caller must ensure we don't call this with other virtqueue operations 1874 * at the same time (except where noted). 1875 * 1876 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1877 */ 1878 int virtqueue_add_inbuf(struct virtqueue *vq, 1879 struct scatterlist *sg, unsigned int num, 1880 void *data, 1881 gfp_t gfp) 1882 { 1883 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1884 } 1885 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1886 1887 /** 1888 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1889 * @vq: the struct virtqueue we're talking about. 1890 * @sg: scatterlist (must be well-formed and terminated!) 1891 * @num: the number of entries in @sg writable by other side 1892 * @data: the token identifying the buffer. 1893 * @ctx: extra context for the token 1894 * @gfp: how to do memory allocations (if necessary). 1895 * 1896 * Caller must ensure we don't call this with other virtqueue operations 1897 * at the same time (except where noted). 1898 * 1899 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1900 */ 1901 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1902 struct scatterlist *sg, unsigned int num, 1903 void *data, 1904 void *ctx, 1905 gfp_t gfp) 1906 { 1907 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1908 } 1909 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1910 1911 /** 1912 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1913 * @_vq: the struct virtqueue 1914 * 1915 * Instead of virtqueue_kick(), you can do: 1916 * if (virtqueue_kick_prepare(vq)) 1917 * virtqueue_notify(vq); 1918 * 1919 * This is sometimes useful because the virtqueue_kick_prepare() needs 1920 * to be serialized, but the actual virtqueue_notify() call does not. 1921 */ 1922 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1923 { 1924 struct vring_virtqueue *vq = to_vvq(_vq); 1925 1926 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1927 virtqueue_kick_prepare_split(_vq); 1928 } 1929 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1930 1931 /** 1932 * virtqueue_notify - second half of split virtqueue_kick call. 1933 * @_vq: the struct virtqueue 1934 * 1935 * This does not need to be serialized. 1936 * 1937 * Returns false if host notify failed or queue is broken, otherwise true. 1938 */ 1939 bool virtqueue_notify(struct virtqueue *_vq) 1940 { 1941 struct vring_virtqueue *vq = to_vvq(_vq); 1942 1943 if (unlikely(vq->broken)) 1944 return false; 1945 1946 /* Prod other side to tell it about changes. */ 1947 if (!vq->notify(_vq)) { 1948 vq->broken = true; 1949 return false; 1950 } 1951 return true; 1952 } 1953 EXPORT_SYMBOL_GPL(virtqueue_notify); 1954 1955 /** 1956 * virtqueue_kick - update after add_buf 1957 * @vq: the struct virtqueue 1958 * 1959 * After one or more virtqueue_add_* calls, invoke this to kick 1960 * the other side. 1961 * 1962 * Caller must ensure we don't call this with other virtqueue 1963 * operations at the same time (except where noted). 1964 * 1965 * Returns false if kick failed, otherwise true. 1966 */ 1967 bool virtqueue_kick(struct virtqueue *vq) 1968 { 1969 if (virtqueue_kick_prepare(vq)) 1970 return virtqueue_notify(vq); 1971 return true; 1972 } 1973 EXPORT_SYMBOL_GPL(virtqueue_kick); 1974 1975 /** 1976 * virtqueue_get_buf_ctx - get the next used buffer 1977 * @_vq: the struct virtqueue we're talking about. 1978 * @len: the length written into the buffer 1979 * @ctx: extra context for the token 1980 * 1981 * If the device wrote data into the buffer, @len will be set to the 1982 * amount written. This means you don't need to clear the buffer 1983 * beforehand to ensure there's no data leakage in the case of short 1984 * writes. 1985 * 1986 * Caller must ensure we don't call this with other virtqueue 1987 * operations at the same time (except where noted). 1988 * 1989 * Returns NULL if there are no used buffers, or the "data" token 1990 * handed to virtqueue_add_*(). 1991 */ 1992 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1993 void **ctx) 1994 { 1995 struct vring_virtqueue *vq = to_vvq(_vq); 1996 1997 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1998 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1999 } 2000 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2001 2002 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2003 { 2004 return virtqueue_get_buf_ctx(_vq, len, NULL); 2005 } 2006 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2007 /** 2008 * virtqueue_disable_cb - disable callbacks 2009 * @_vq: the struct virtqueue we're talking about. 2010 * 2011 * Note that this is not necessarily synchronous, hence unreliable and only 2012 * useful as an optimization. 2013 * 2014 * Unlike other operations, this need not be serialized. 2015 */ 2016 void virtqueue_disable_cb(struct virtqueue *_vq) 2017 { 2018 struct vring_virtqueue *vq = to_vvq(_vq); 2019 2020 /* If device triggered an event already it won't trigger one again: 2021 * no need to disable. 2022 */ 2023 if (vq->event_triggered) 2024 return; 2025 2026 if (vq->packed_ring) 2027 virtqueue_disable_cb_packed(_vq); 2028 else 2029 virtqueue_disable_cb_split(_vq); 2030 } 2031 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2032 2033 /** 2034 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2035 * @_vq: the struct virtqueue we're talking about. 2036 * 2037 * This re-enables callbacks; it returns current queue state 2038 * in an opaque unsigned value. This value should be later tested by 2039 * virtqueue_poll, to detect a possible race between the driver checking for 2040 * more work, and enabling callbacks. 2041 * 2042 * Caller must ensure we don't call this with other virtqueue 2043 * operations at the same time (except where noted). 2044 */ 2045 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2046 { 2047 struct vring_virtqueue *vq = to_vvq(_vq); 2048 2049 if (vq->event_triggered) 2050 vq->event_triggered = false; 2051 2052 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2053 virtqueue_enable_cb_prepare_split(_vq); 2054 } 2055 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2056 2057 /** 2058 * virtqueue_poll - query pending used buffers 2059 * @_vq: the struct virtqueue we're talking about. 2060 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2061 * 2062 * Returns "true" if there are pending used buffers in the queue. 2063 * 2064 * This does not need to be serialized. 2065 */ 2066 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 2067 { 2068 struct vring_virtqueue *vq = to_vvq(_vq); 2069 2070 if (unlikely(vq->broken)) 2071 return false; 2072 2073 virtio_mb(vq->weak_barriers); 2074 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2075 virtqueue_poll_split(_vq, last_used_idx); 2076 } 2077 EXPORT_SYMBOL_GPL(virtqueue_poll); 2078 2079 /** 2080 * virtqueue_enable_cb - restart callbacks after disable_cb. 2081 * @_vq: the struct virtqueue we're talking about. 2082 * 2083 * This re-enables callbacks; it returns "false" if there are pending 2084 * buffers in the queue, to detect a possible race between the driver 2085 * checking for more work, and enabling callbacks. 2086 * 2087 * Caller must ensure we don't call this with other virtqueue 2088 * operations at the same time (except where noted). 2089 */ 2090 bool virtqueue_enable_cb(struct virtqueue *_vq) 2091 { 2092 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 2093 2094 return !virtqueue_poll(_vq, last_used_idx); 2095 } 2096 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2097 2098 /** 2099 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2100 * @_vq: the struct virtqueue we're talking about. 2101 * 2102 * This re-enables callbacks but hints to the other side to delay 2103 * interrupts until most of the available buffers have been processed; 2104 * it returns "false" if there are many pending buffers in the queue, 2105 * to detect a possible race between the driver checking for more work, 2106 * and enabling callbacks. 2107 * 2108 * Caller must ensure we don't call this with other virtqueue 2109 * operations at the same time (except where noted). 2110 */ 2111 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2112 { 2113 struct vring_virtqueue *vq = to_vvq(_vq); 2114 2115 if (vq->event_triggered) 2116 vq->event_triggered = false; 2117 2118 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2119 virtqueue_enable_cb_delayed_split(_vq); 2120 } 2121 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2122 2123 /** 2124 * virtqueue_detach_unused_buf - detach first unused buffer 2125 * @_vq: the struct virtqueue we're talking about. 2126 * 2127 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2128 * This is not valid on an active queue; it is useful only for device 2129 * shutdown. 2130 */ 2131 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2132 { 2133 struct vring_virtqueue *vq = to_vvq(_vq); 2134 2135 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2136 virtqueue_detach_unused_buf_split(_vq); 2137 } 2138 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2139 2140 static inline bool more_used(const struct vring_virtqueue *vq) 2141 { 2142 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2143 } 2144 2145 irqreturn_t vring_interrupt(int irq, void *_vq) 2146 { 2147 struct vring_virtqueue *vq = to_vvq(_vq); 2148 2149 if (!more_used(vq)) { 2150 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2151 return IRQ_NONE; 2152 } 2153 2154 if (unlikely(vq->broken)) 2155 return IRQ_HANDLED; 2156 2157 /* Just a hint for performance: so it's ok that this can be racy! */ 2158 if (vq->event) 2159 vq->event_triggered = true; 2160 2161 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2162 if (vq->vq.callback) 2163 vq->vq.callback(&vq->vq); 2164 2165 return IRQ_HANDLED; 2166 } 2167 EXPORT_SYMBOL_GPL(vring_interrupt); 2168 2169 /* Only available for split ring */ 2170 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2171 struct vring vring, 2172 struct virtio_device *vdev, 2173 bool weak_barriers, 2174 bool context, 2175 bool (*notify)(struct virtqueue *), 2176 void (*callback)(struct virtqueue *), 2177 const char *name) 2178 { 2179 struct vring_virtqueue *vq; 2180 2181 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2182 return NULL; 2183 2184 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2185 if (!vq) 2186 return NULL; 2187 2188 vq->packed_ring = false; 2189 vq->vq.callback = callback; 2190 vq->vq.vdev = vdev; 2191 vq->vq.name = name; 2192 vq->vq.num_free = vring.num; 2193 vq->vq.index = index; 2194 vq->we_own_ring = false; 2195 vq->notify = notify; 2196 vq->weak_barriers = weak_barriers; 2197 vq->broken = false; 2198 vq->last_used_idx = 0; 2199 vq->event_triggered = false; 2200 vq->num_added = 0; 2201 vq->use_dma_api = vring_use_dma_api(vdev); 2202 #ifdef DEBUG 2203 vq->in_use = false; 2204 vq->last_add_time_valid = false; 2205 #endif 2206 2207 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2208 !context; 2209 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2210 2211 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2212 vq->weak_barriers = false; 2213 2214 vq->split.queue_dma_addr = 0; 2215 vq->split.queue_size_in_bytes = 0; 2216 2217 vq->split.vring = vring; 2218 vq->split.avail_flags_shadow = 0; 2219 vq->split.avail_idx_shadow = 0; 2220 2221 /* No callback? Tell other side not to bother us. */ 2222 if (!callback) { 2223 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2224 if (!vq->event) 2225 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2226 vq->split.avail_flags_shadow); 2227 } 2228 2229 vq->split.desc_state = kmalloc_array(vring.num, 2230 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2231 if (!vq->split.desc_state) 2232 goto err_state; 2233 2234 vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); 2235 if (!vq->split.desc_extra) 2236 goto err_extra; 2237 2238 /* Put everything in free lists. */ 2239 vq->free_head = 0; 2240 memset(vq->split.desc_state, 0, vring.num * 2241 sizeof(struct vring_desc_state_split)); 2242 2243 spin_lock(&vdev->vqs_list_lock); 2244 list_add_tail(&vq->vq.list, &vdev->vqs); 2245 spin_unlock(&vdev->vqs_list_lock); 2246 return &vq->vq; 2247 2248 err_extra: 2249 kfree(vq->split.desc_state); 2250 err_state: 2251 kfree(vq); 2252 return NULL; 2253 } 2254 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2255 2256 struct virtqueue *vring_create_virtqueue( 2257 unsigned int index, 2258 unsigned int num, 2259 unsigned int vring_align, 2260 struct virtio_device *vdev, 2261 bool weak_barriers, 2262 bool may_reduce_num, 2263 bool context, 2264 bool (*notify)(struct virtqueue *), 2265 void (*callback)(struct virtqueue *), 2266 const char *name) 2267 { 2268 2269 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2270 return vring_create_virtqueue_packed(index, num, vring_align, 2271 vdev, weak_barriers, may_reduce_num, 2272 context, notify, callback, name); 2273 2274 return vring_create_virtqueue_split(index, num, vring_align, 2275 vdev, weak_barriers, may_reduce_num, 2276 context, notify, callback, name); 2277 } 2278 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2279 2280 /* Only available for split ring */ 2281 struct virtqueue *vring_new_virtqueue(unsigned int index, 2282 unsigned int num, 2283 unsigned int vring_align, 2284 struct virtio_device *vdev, 2285 bool weak_barriers, 2286 bool context, 2287 void *pages, 2288 bool (*notify)(struct virtqueue *vq), 2289 void (*callback)(struct virtqueue *vq), 2290 const char *name) 2291 { 2292 struct vring vring; 2293 2294 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2295 return NULL; 2296 2297 vring_init(&vring, num, pages, vring_align); 2298 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2299 notify, callback, name); 2300 } 2301 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2302 2303 void vring_del_virtqueue(struct virtqueue *_vq) 2304 { 2305 struct vring_virtqueue *vq = to_vvq(_vq); 2306 2307 spin_lock(&vq->vq.vdev->vqs_list_lock); 2308 list_del(&_vq->list); 2309 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2310 2311 if (vq->we_own_ring) { 2312 if (vq->packed_ring) { 2313 vring_free_queue(vq->vq.vdev, 2314 vq->packed.ring_size_in_bytes, 2315 vq->packed.vring.desc, 2316 vq->packed.ring_dma_addr); 2317 2318 vring_free_queue(vq->vq.vdev, 2319 vq->packed.event_size_in_bytes, 2320 vq->packed.vring.driver, 2321 vq->packed.driver_event_dma_addr); 2322 2323 vring_free_queue(vq->vq.vdev, 2324 vq->packed.event_size_in_bytes, 2325 vq->packed.vring.device, 2326 vq->packed.device_event_dma_addr); 2327 2328 kfree(vq->packed.desc_state); 2329 kfree(vq->packed.desc_extra); 2330 } else { 2331 vring_free_queue(vq->vq.vdev, 2332 vq->split.queue_size_in_bytes, 2333 vq->split.vring.desc, 2334 vq->split.queue_dma_addr); 2335 } 2336 } 2337 if (!vq->packed_ring) { 2338 kfree(vq->split.desc_state); 2339 kfree(vq->split.desc_extra); 2340 } 2341 kfree(vq); 2342 } 2343 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2344 2345 /* Manipulates transport-specific feature bits. */ 2346 void vring_transport_features(struct virtio_device *vdev) 2347 { 2348 unsigned int i; 2349 2350 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2351 switch (i) { 2352 case VIRTIO_RING_F_INDIRECT_DESC: 2353 break; 2354 case VIRTIO_RING_F_EVENT_IDX: 2355 break; 2356 case VIRTIO_F_VERSION_1: 2357 break; 2358 case VIRTIO_F_ACCESS_PLATFORM: 2359 break; 2360 case VIRTIO_F_RING_PACKED: 2361 break; 2362 case VIRTIO_F_ORDER_PLATFORM: 2363 break; 2364 default: 2365 /* We don't understand this bit. */ 2366 __virtio_clear_bit(vdev, i); 2367 } 2368 } 2369 } 2370 EXPORT_SYMBOL_GPL(vring_transport_features); 2371 2372 /** 2373 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2374 * @_vq: the struct virtqueue containing the vring of interest. 2375 * 2376 * Returns the size of the vring. This is mainly used for boasting to 2377 * userspace. Unlike other operations, this need not be serialized. 2378 */ 2379 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2380 { 2381 2382 struct vring_virtqueue *vq = to_vvq(_vq); 2383 2384 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2385 } 2386 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2387 2388 bool virtqueue_is_broken(struct virtqueue *_vq) 2389 { 2390 struct vring_virtqueue *vq = to_vvq(_vq); 2391 2392 return READ_ONCE(vq->broken); 2393 } 2394 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2395 2396 /* 2397 * This should prevent the device from being used, allowing drivers to 2398 * recover. You may need to grab appropriate locks to flush. 2399 */ 2400 void virtio_break_device(struct virtio_device *dev) 2401 { 2402 struct virtqueue *_vq; 2403 2404 spin_lock(&dev->vqs_list_lock); 2405 list_for_each_entry(_vq, &dev->vqs, list) { 2406 struct vring_virtqueue *vq = to_vvq(_vq); 2407 2408 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2409 WRITE_ONCE(vq->broken, true); 2410 } 2411 spin_unlock(&dev->vqs_list_lock); 2412 } 2413 EXPORT_SYMBOL_GPL(virtio_break_device); 2414 2415 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2416 { 2417 struct vring_virtqueue *vq = to_vvq(_vq); 2418 2419 BUG_ON(!vq->we_own_ring); 2420 2421 if (vq->packed_ring) 2422 return vq->packed.ring_dma_addr; 2423 2424 return vq->split.queue_dma_addr; 2425 } 2426 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2427 2428 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2429 { 2430 struct vring_virtqueue *vq = to_vvq(_vq); 2431 2432 BUG_ON(!vq->we_own_ring); 2433 2434 if (vq->packed_ring) 2435 return vq->packed.driver_event_dma_addr; 2436 2437 return vq->split.queue_dma_addr + 2438 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2439 } 2440 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2441 2442 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2443 { 2444 struct vring_virtqueue *vq = to_vvq(_vq); 2445 2446 BUG_ON(!vq->we_own_ring); 2447 2448 if (vq->packed_ring) 2449 return vq->packed.device_event_dma_addr; 2450 2451 return vq->split.queue_dma_addr + 2452 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2453 } 2454 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2455 2456 /* Only available for split ring */ 2457 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2458 { 2459 return &to_vvq(vq)->split.vring; 2460 } 2461 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2462 2463 MODULE_LICENSE("GPL"); 2464