1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/spinlock.h> 15 #include <xen/xen.h> 16 17 #ifdef DEBUG 18 /* For development, we want to crash whenever the ring is screwed. */ 19 #define BAD_RING(_vq, fmt, args...) \ 20 do { \ 21 dev_err(&(_vq)->vq.vdev->dev, \ 22 "%s:"fmt, (_vq)->vq.name, ##args); \ 23 BUG(); \ 24 } while (0) 25 /* Caller is supposed to guarantee no reentry. */ 26 #define START_USE(_vq) \ 27 do { \ 28 if ((_vq)->in_use) \ 29 panic("%s:in_use = %i\n", \ 30 (_vq)->vq.name, (_vq)->in_use); \ 31 (_vq)->in_use = __LINE__; \ 32 } while (0) 33 #define END_USE(_vq) \ 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 35 #define LAST_ADD_TIME_UPDATE(_vq) \ 36 do { \ 37 ktime_t now = ktime_get(); \ 38 \ 39 /* No kick or get, with .1 second between? Warn. */ \ 40 if ((_vq)->last_add_time_valid) \ 41 WARN_ON(ktime_to_ms(ktime_sub(now, \ 42 (_vq)->last_add_time)) > 100); \ 43 (_vq)->last_add_time = now; \ 44 (_vq)->last_add_time_valid = true; \ 45 } while (0) 46 #define LAST_ADD_TIME_CHECK(_vq) \ 47 do { \ 48 if ((_vq)->last_add_time_valid) { \ 49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 50 (_vq)->last_add_time)) > 100); \ 51 } \ 52 } while (0) 53 #define LAST_ADD_TIME_INVALID(_vq) \ 54 ((_vq)->last_add_time_valid = false) 55 #else 56 #define BAD_RING(_vq, fmt, args...) \ 57 do { \ 58 dev_err(&_vq->vq.vdev->dev, \ 59 "%s:"fmt, (_vq)->vq.name, ##args); \ 60 (_vq)->broken = true; \ 61 } while (0) 62 #define START_USE(vq) 63 #define END_USE(vq) 64 #define LAST_ADD_TIME_UPDATE(vq) 65 #define LAST_ADD_TIME_CHECK(vq) 66 #define LAST_ADD_TIME_INVALID(vq) 67 #endif 68 69 struct vring_desc_state_split { 70 void *data; /* Data for callback. */ 71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 72 }; 73 74 struct vring_desc_state_packed { 75 void *data; /* Data for callback. */ 76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 77 u16 num; /* Descriptor list length. */ 78 u16 last; /* The last desc state in a list. */ 79 }; 80 81 struct vring_desc_extra { 82 dma_addr_t addr; /* Buffer DMA addr. */ 83 u32 len; /* Buffer length. */ 84 u16 flags; /* Descriptor flags. */ 85 u16 next; /* The next desc state in a list. */ 86 }; 87 88 struct vring_virtqueue { 89 struct virtqueue vq; 90 91 /* Is this a packed ring? */ 92 bool packed_ring; 93 94 /* Is DMA API used? */ 95 bool use_dma_api; 96 97 /* Can we use weak barriers? */ 98 bool weak_barriers; 99 100 /* Other side has made a mess, don't try any more. */ 101 bool broken; 102 103 /* Host supports indirect buffers */ 104 bool indirect; 105 106 /* Host publishes avail event idx */ 107 bool event; 108 109 /* Head of free buffer list. */ 110 unsigned int free_head; 111 /* Number we've added since last sync. */ 112 unsigned int num_added; 113 114 /* Last used index we've seen. */ 115 u16 last_used_idx; 116 117 /* Hint for event idx: already triggered no need to disable. */ 118 bool event_triggered; 119 120 union { 121 /* Available for split ring */ 122 struct { 123 /* Actual memory layout for this queue. */ 124 struct vring vring; 125 126 /* Last written value to avail->flags */ 127 u16 avail_flags_shadow; 128 129 /* 130 * Last written value to avail->idx in 131 * guest byte order. 132 */ 133 u16 avail_idx_shadow; 134 135 /* Per-descriptor state. */ 136 struct vring_desc_state_split *desc_state; 137 struct vring_desc_extra *desc_extra; 138 139 /* DMA address and size information */ 140 dma_addr_t queue_dma_addr; 141 size_t queue_size_in_bytes; 142 } split; 143 144 /* Available for packed ring */ 145 struct { 146 /* Actual memory layout for this queue. */ 147 struct { 148 unsigned int num; 149 struct vring_packed_desc *desc; 150 struct vring_packed_desc_event *driver; 151 struct vring_packed_desc_event *device; 152 } vring; 153 154 /* Driver ring wrap counter. */ 155 bool avail_wrap_counter; 156 157 /* Device ring wrap counter. */ 158 bool used_wrap_counter; 159 160 /* Avail used flags. */ 161 u16 avail_used_flags; 162 163 /* Index of the next avail descriptor. */ 164 u16 next_avail_idx; 165 166 /* 167 * Last written value to driver->flags in 168 * guest byte order. 169 */ 170 u16 event_flags_shadow; 171 172 /* Per-descriptor state. */ 173 struct vring_desc_state_packed *desc_state; 174 struct vring_desc_extra *desc_extra; 175 176 /* DMA address and size information */ 177 dma_addr_t ring_dma_addr; 178 dma_addr_t driver_event_dma_addr; 179 dma_addr_t device_event_dma_addr; 180 size_t ring_size_in_bytes; 181 size_t event_size_in_bytes; 182 } packed; 183 }; 184 185 /* How to notify other side. FIXME: commonalize hcalls! */ 186 bool (*notify)(struct virtqueue *vq); 187 188 /* DMA, allocation, and size information */ 189 bool we_own_ring; 190 191 #ifdef DEBUG 192 /* They're supposed to lock for us. */ 193 unsigned int in_use; 194 195 /* Figure out if their kicks are too delayed. */ 196 bool last_add_time_valid; 197 ktime_t last_add_time; 198 #endif 199 }; 200 201 202 /* 203 * Helpers. 204 */ 205 206 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 207 208 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 209 unsigned int total_sg) 210 { 211 struct vring_virtqueue *vq = to_vvq(_vq); 212 213 /* 214 * If the host supports indirect descriptor tables, and we have multiple 215 * buffers, then go indirect. FIXME: tune this threshold 216 */ 217 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 218 } 219 220 /* 221 * Modern virtio devices have feature bits to specify whether they need a 222 * quirk and bypass the IOMMU. If not there, just use the DMA API. 223 * 224 * If there, the interaction between virtio and DMA API is messy. 225 * 226 * On most systems with virtio, physical addresses match bus addresses, 227 * and it doesn't particularly matter whether we use the DMA API. 228 * 229 * On some systems, including Xen and any system with a physical device 230 * that speaks virtio behind a physical IOMMU, we must use the DMA API 231 * for virtio DMA to work at all. 232 * 233 * On other systems, including SPARC and PPC64, virtio-pci devices are 234 * enumerated as though they are behind an IOMMU, but the virtio host 235 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 236 * there or somehow map everything as the identity. 237 * 238 * For the time being, we preserve historic behavior and bypass the DMA 239 * API. 240 * 241 * TODO: install a per-device DMA ops structure that does the right thing 242 * taking into account all the above quirks, and use the DMA API 243 * unconditionally on data path. 244 */ 245 246 static bool vring_use_dma_api(struct virtio_device *vdev) 247 { 248 if (!virtio_has_dma_quirk(vdev)) 249 return true; 250 251 /* Otherwise, we are left to guess. */ 252 /* 253 * In theory, it's possible to have a buggy QEMU-supposed 254 * emulated Q35 IOMMU and Xen enabled at the same time. On 255 * such a configuration, virtio has never worked and will 256 * not work without an even larger kludge. Instead, enable 257 * the DMA API if we're a Xen guest, which at least allows 258 * all of the sensible Xen configurations to work correctly. 259 */ 260 if (xen_domain()) 261 return true; 262 263 return false; 264 } 265 266 size_t virtio_max_dma_size(struct virtio_device *vdev) 267 { 268 size_t max_segment_size = SIZE_MAX; 269 270 if (vring_use_dma_api(vdev)) 271 max_segment_size = dma_max_mapping_size(&vdev->dev); 272 273 return max_segment_size; 274 } 275 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 276 277 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 278 dma_addr_t *dma_handle, gfp_t flag) 279 { 280 if (vring_use_dma_api(vdev)) { 281 return dma_alloc_coherent(vdev->dev.parent, size, 282 dma_handle, flag); 283 } else { 284 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 285 286 if (queue) { 287 phys_addr_t phys_addr = virt_to_phys(queue); 288 *dma_handle = (dma_addr_t)phys_addr; 289 290 /* 291 * Sanity check: make sure we dind't truncate 292 * the address. The only arches I can find that 293 * have 64-bit phys_addr_t but 32-bit dma_addr_t 294 * are certain non-highmem MIPS and x86 295 * configurations, but these configurations 296 * should never allocate physical pages above 32 297 * bits, so this is fine. Just in case, throw a 298 * warning and abort if we end up with an 299 * unrepresentable address. 300 */ 301 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 302 free_pages_exact(queue, PAGE_ALIGN(size)); 303 return NULL; 304 } 305 } 306 return queue; 307 } 308 } 309 310 static void vring_free_queue(struct virtio_device *vdev, size_t size, 311 void *queue, dma_addr_t dma_handle) 312 { 313 if (vring_use_dma_api(vdev)) 314 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 315 else 316 free_pages_exact(queue, PAGE_ALIGN(size)); 317 } 318 319 /* 320 * The DMA ops on various arches are rather gnarly right now, and 321 * making all of the arch DMA ops work on the vring device itself 322 * is a mess. For now, we use the parent device for DMA ops. 323 */ 324 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 325 { 326 return vq->vq.vdev->dev.parent; 327 } 328 329 /* Map one sg entry. */ 330 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 331 struct scatterlist *sg, 332 enum dma_data_direction direction) 333 { 334 if (!vq->use_dma_api) 335 return (dma_addr_t)sg_phys(sg); 336 337 /* 338 * We can't use dma_map_sg, because we don't use scatterlists in 339 * the way it expects (we don't guarantee that the scatterlist 340 * will exist for the lifetime of the mapping). 341 */ 342 return dma_map_page(vring_dma_dev(vq), 343 sg_page(sg), sg->offset, sg->length, 344 direction); 345 } 346 347 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 348 void *cpu_addr, size_t size, 349 enum dma_data_direction direction) 350 { 351 if (!vq->use_dma_api) 352 return (dma_addr_t)virt_to_phys(cpu_addr); 353 354 return dma_map_single(vring_dma_dev(vq), 355 cpu_addr, size, direction); 356 } 357 358 static int vring_mapping_error(const struct vring_virtqueue *vq, 359 dma_addr_t addr) 360 { 361 if (!vq->use_dma_api) 362 return 0; 363 364 return dma_mapping_error(vring_dma_dev(vq), addr); 365 } 366 367 368 /* 369 * Split ring specific functions - *_split(). 370 */ 371 372 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 373 struct vring_desc *desc) 374 { 375 u16 flags; 376 377 if (!vq->use_dma_api) 378 return; 379 380 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 381 382 if (flags & VRING_DESC_F_INDIRECT) { 383 dma_unmap_single(vring_dma_dev(vq), 384 virtio64_to_cpu(vq->vq.vdev, desc->addr), 385 virtio32_to_cpu(vq->vq.vdev, desc->len), 386 (flags & VRING_DESC_F_WRITE) ? 387 DMA_FROM_DEVICE : DMA_TO_DEVICE); 388 } else { 389 dma_unmap_page(vring_dma_dev(vq), 390 virtio64_to_cpu(vq->vq.vdev, desc->addr), 391 virtio32_to_cpu(vq->vq.vdev, desc->len), 392 (flags & VRING_DESC_F_WRITE) ? 393 DMA_FROM_DEVICE : DMA_TO_DEVICE); 394 } 395 } 396 397 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 398 unsigned int i) 399 { 400 struct vring_desc_extra *extra = vq->split.desc_extra; 401 u16 flags; 402 403 if (!vq->use_dma_api) 404 goto out; 405 406 flags = extra[i].flags; 407 408 if (flags & VRING_DESC_F_INDIRECT) { 409 dma_unmap_single(vring_dma_dev(vq), 410 extra[i].addr, 411 extra[i].len, 412 (flags & VRING_DESC_F_WRITE) ? 413 DMA_FROM_DEVICE : DMA_TO_DEVICE); 414 } else { 415 dma_unmap_page(vring_dma_dev(vq), 416 extra[i].addr, 417 extra[i].len, 418 (flags & VRING_DESC_F_WRITE) ? 419 DMA_FROM_DEVICE : DMA_TO_DEVICE); 420 } 421 422 out: 423 return extra[i].next; 424 } 425 426 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 427 unsigned int total_sg, 428 gfp_t gfp) 429 { 430 struct vring_desc *desc; 431 unsigned int i; 432 433 /* 434 * We require lowmem mappings for the descriptors because 435 * otherwise virt_to_phys will give us bogus addresses in the 436 * virtqueue. 437 */ 438 gfp &= ~__GFP_HIGHMEM; 439 440 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 441 if (!desc) 442 return NULL; 443 444 for (i = 0; i < total_sg; i++) 445 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 446 return desc; 447 } 448 449 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 450 struct vring_desc *desc, 451 unsigned int i, 452 dma_addr_t addr, 453 unsigned int len, 454 u16 flags, 455 bool indirect) 456 { 457 struct vring_virtqueue *vring = to_vvq(vq); 458 struct vring_desc_extra *extra = vring->split.desc_extra; 459 u16 next; 460 461 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 462 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 463 desc[i].len = cpu_to_virtio32(vq->vdev, len); 464 465 if (!indirect) { 466 next = extra[i].next; 467 desc[i].next = cpu_to_virtio16(vq->vdev, next); 468 469 extra[i].addr = addr; 470 extra[i].len = len; 471 extra[i].flags = flags; 472 } else 473 next = virtio16_to_cpu(vq->vdev, desc[i].next); 474 475 return next; 476 } 477 478 static inline int virtqueue_add_split(struct virtqueue *_vq, 479 struct scatterlist *sgs[], 480 unsigned int total_sg, 481 unsigned int out_sgs, 482 unsigned int in_sgs, 483 void *data, 484 void *ctx, 485 gfp_t gfp) 486 { 487 struct vring_virtqueue *vq = to_vvq(_vq); 488 struct scatterlist *sg; 489 struct vring_desc *desc; 490 unsigned int i, n, avail, descs_used, prev, err_idx; 491 int head; 492 bool indirect; 493 494 START_USE(vq); 495 496 BUG_ON(data == NULL); 497 BUG_ON(ctx && vq->indirect); 498 499 if (unlikely(vq->broken)) { 500 END_USE(vq); 501 return -EIO; 502 } 503 504 LAST_ADD_TIME_UPDATE(vq); 505 506 BUG_ON(total_sg == 0); 507 508 head = vq->free_head; 509 510 if (virtqueue_use_indirect(_vq, total_sg)) 511 desc = alloc_indirect_split(_vq, total_sg, gfp); 512 else { 513 desc = NULL; 514 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 515 } 516 517 if (desc) { 518 /* Use a single buffer which doesn't continue */ 519 indirect = true; 520 /* Set up rest to use this indirect table. */ 521 i = 0; 522 descs_used = 1; 523 } else { 524 indirect = false; 525 desc = vq->split.vring.desc; 526 i = head; 527 descs_used = total_sg; 528 } 529 530 if (vq->vq.num_free < descs_used) { 531 pr_debug("Can't add buf len %i - avail = %i\n", 532 descs_used, vq->vq.num_free); 533 /* FIXME: for historical reasons, we force a notify here if 534 * there are outgoing parts to the buffer. Presumably the 535 * host should service the ring ASAP. */ 536 if (out_sgs) 537 vq->notify(&vq->vq); 538 if (indirect) 539 kfree(desc); 540 END_USE(vq); 541 return -ENOSPC; 542 } 543 544 for (n = 0; n < out_sgs; n++) { 545 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 546 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 547 if (vring_mapping_error(vq, addr)) 548 goto unmap_release; 549 550 prev = i; 551 /* Note that we trust indirect descriptor 552 * table since it use stream DMA mapping. 553 */ 554 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 555 VRING_DESC_F_NEXT, 556 indirect); 557 } 558 } 559 for (; n < (out_sgs + in_sgs); n++) { 560 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 561 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 562 if (vring_mapping_error(vq, addr)) 563 goto unmap_release; 564 565 prev = i; 566 /* Note that we trust indirect descriptor 567 * table since it use stream DMA mapping. 568 */ 569 i = virtqueue_add_desc_split(_vq, desc, i, addr, 570 sg->length, 571 VRING_DESC_F_NEXT | 572 VRING_DESC_F_WRITE, 573 indirect); 574 } 575 } 576 /* Last one doesn't continue. */ 577 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 578 if (!indirect && vq->use_dma_api) 579 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags = 580 ~VRING_DESC_F_NEXT; 581 582 if (indirect) { 583 /* Now that the indirect table is filled in, map it. */ 584 dma_addr_t addr = vring_map_single( 585 vq, desc, total_sg * sizeof(struct vring_desc), 586 DMA_TO_DEVICE); 587 if (vring_mapping_error(vq, addr)) 588 goto unmap_release; 589 590 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 591 head, addr, 592 total_sg * sizeof(struct vring_desc), 593 VRING_DESC_F_INDIRECT, 594 false); 595 } 596 597 /* We're using some buffers from the free list. */ 598 vq->vq.num_free -= descs_used; 599 600 /* Update free pointer */ 601 if (indirect) 602 vq->free_head = vq->split.desc_extra[head].next; 603 else 604 vq->free_head = i; 605 606 /* Store token and indirect buffer state. */ 607 vq->split.desc_state[head].data = data; 608 if (indirect) 609 vq->split.desc_state[head].indir_desc = desc; 610 else 611 vq->split.desc_state[head].indir_desc = ctx; 612 613 /* Put entry in available array (but don't update avail->idx until they 614 * do sync). */ 615 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 616 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 617 618 /* Descriptors and available array need to be set before we expose the 619 * new available array entries. */ 620 virtio_wmb(vq->weak_barriers); 621 vq->split.avail_idx_shadow++; 622 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 623 vq->split.avail_idx_shadow); 624 vq->num_added++; 625 626 pr_debug("Added buffer head %i to %p\n", head, vq); 627 END_USE(vq); 628 629 /* This is very unlikely, but theoretically possible. Kick 630 * just in case. */ 631 if (unlikely(vq->num_added == (1 << 16) - 1)) 632 virtqueue_kick(_vq); 633 634 return 0; 635 636 unmap_release: 637 err_idx = i; 638 639 if (indirect) 640 i = 0; 641 else 642 i = head; 643 644 for (n = 0; n < total_sg; n++) { 645 if (i == err_idx) 646 break; 647 if (indirect) { 648 vring_unmap_one_split_indirect(vq, &desc[i]); 649 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 650 } else 651 i = vring_unmap_one_split(vq, i); 652 } 653 654 if (indirect) 655 kfree(desc); 656 657 END_USE(vq); 658 return -ENOMEM; 659 } 660 661 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 662 { 663 struct vring_virtqueue *vq = to_vvq(_vq); 664 u16 new, old; 665 bool needs_kick; 666 667 START_USE(vq); 668 /* We need to expose available array entries before checking avail 669 * event. */ 670 virtio_mb(vq->weak_barriers); 671 672 old = vq->split.avail_idx_shadow - vq->num_added; 673 new = vq->split.avail_idx_shadow; 674 vq->num_added = 0; 675 676 LAST_ADD_TIME_CHECK(vq); 677 LAST_ADD_TIME_INVALID(vq); 678 679 if (vq->event) { 680 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 681 vring_avail_event(&vq->split.vring)), 682 new, old); 683 } else { 684 needs_kick = !(vq->split.vring.used->flags & 685 cpu_to_virtio16(_vq->vdev, 686 VRING_USED_F_NO_NOTIFY)); 687 } 688 END_USE(vq); 689 return needs_kick; 690 } 691 692 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 693 void **ctx) 694 { 695 unsigned int i, j; 696 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 697 698 /* Clear data ptr. */ 699 vq->split.desc_state[head].data = NULL; 700 701 /* Put back on free list: unmap first-level descriptors and find end */ 702 i = head; 703 704 while (vq->split.vring.desc[i].flags & nextflag) { 705 vring_unmap_one_split(vq, i); 706 i = vq->split.desc_extra[i].next; 707 vq->vq.num_free++; 708 } 709 710 vring_unmap_one_split(vq, i); 711 vq->split.desc_extra[i].next = vq->free_head; 712 vq->free_head = head; 713 714 /* Plus final descriptor */ 715 vq->vq.num_free++; 716 717 if (vq->indirect) { 718 struct vring_desc *indir_desc = 719 vq->split.desc_state[head].indir_desc; 720 u32 len; 721 722 /* Free the indirect table, if any, now that it's unmapped. */ 723 if (!indir_desc) 724 return; 725 726 len = vq->split.desc_extra[head].len; 727 728 BUG_ON(!(vq->split.desc_extra[head].flags & 729 VRING_DESC_F_INDIRECT)); 730 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 731 732 for (j = 0; j < len / sizeof(struct vring_desc); j++) 733 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 734 735 kfree(indir_desc); 736 vq->split.desc_state[head].indir_desc = NULL; 737 } else if (ctx) { 738 *ctx = vq->split.desc_state[head].indir_desc; 739 } 740 } 741 742 static inline bool more_used_split(const struct vring_virtqueue *vq) 743 { 744 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 745 vq->split.vring.used->idx); 746 } 747 748 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 749 unsigned int *len, 750 void **ctx) 751 { 752 struct vring_virtqueue *vq = to_vvq(_vq); 753 void *ret; 754 unsigned int i; 755 u16 last_used; 756 757 START_USE(vq); 758 759 if (unlikely(vq->broken)) { 760 END_USE(vq); 761 return NULL; 762 } 763 764 if (!more_used_split(vq)) { 765 pr_debug("No more buffers in queue\n"); 766 END_USE(vq); 767 return NULL; 768 } 769 770 /* Only get used array entries after they have been exposed by host. */ 771 virtio_rmb(vq->weak_barriers); 772 773 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 774 i = virtio32_to_cpu(_vq->vdev, 775 vq->split.vring.used->ring[last_used].id); 776 *len = virtio32_to_cpu(_vq->vdev, 777 vq->split.vring.used->ring[last_used].len); 778 779 if (unlikely(i >= vq->split.vring.num)) { 780 BAD_RING(vq, "id %u out of range\n", i); 781 return NULL; 782 } 783 if (unlikely(!vq->split.desc_state[i].data)) { 784 BAD_RING(vq, "id %u is not a head!\n", i); 785 return NULL; 786 } 787 788 /* detach_buf_split clears data, so grab it now. */ 789 ret = vq->split.desc_state[i].data; 790 detach_buf_split(vq, i, ctx); 791 vq->last_used_idx++; 792 /* If we expect an interrupt for the next entry, tell host 793 * by writing event index and flush out the write before 794 * the read in the next get_buf call. */ 795 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 796 virtio_store_mb(vq->weak_barriers, 797 &vring_used_event(&vq->split.vring), 798 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 799 800 LAST_ADD_TIME_INVALID(vq); 801 802 END_USE(vq); 803 return ret; 804 } 805 806 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 807 { 808 struct vring_virtqueue *vq = to_vvq(_vq); 809 810 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 811 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 812 if (vq->event) 813 /* TODO: this is a hack. Figure out a cleaner value to write. */ 814 vring_used_event(&vq->split.vring) = 0x0; 815 else 816 vq->split.vring.avail->flags = 817 cpu_to_virtio16(_vq->vdev, 818 vq->split.avail_flags_shadow); 819 } 820 } 821 822 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 823 { 824 struct vring_virtqueue *vq = to_vvq(_vq); 825 u16 last_used_idx; 826 827 START_USE(vq); 828 829 /* We optimistically turn back on interrupts, then check if there was 830 * more to do. */ 831 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 832 * either clear the flags bit or point the event index at the next 833 * entry. Always do both to keep code simple. */ 834 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 835 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 836 if (!vq->event) 837 vq->split.vring.avail->flags = 838 cpu_to_virtio16(_vq->vdev, 839 vq->split.avail_flags_shadow); 840 } 841 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 842 last_used_idx = vq->last_used_idx); 843 END_USE(vq); 844 return last_used_idx; 845 } 846 847 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 848 { 849 struct vring_virtqueue *vq = to_vvq(_vq); 850 851 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 852 vq->split.vring.used->idx); 853 } 854 855 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 856 { 857 struct vring_virtqueue *vq = to_vvq(_vq); 858 u16 bufs; 859 860 START_USE(vq); 861 862 /* We optimistically turn back on interrupts, then check if there was 863 * more to do. */ 864 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 865 * either clear the flags bit or point the event index at the next 866 * entry. Always update the event index to keep code simple. */ 867 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 868 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 869 if (!vq->event) 870 vq->split.vring.avail->flags = 871 cpu_to_virtio16(_vq->vdev, 872 vq->split.avail_flags_shadow); 873 } 874 /* TODO: tune this threshold */ 875 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 876 877 virtio_store_mb(vq->weak_barriers, 878 &vring_used_event(&vq->split.vring), 879 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 880 881 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 882 - vq->last_used_idx) > bufs)) { 883 END_USE(vq); 884 return false; 885 } 886 887 END_USE(vq); 888 return true; 889 } 890 891 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 892 { 893 struct vring_virtqueue *vq = to_vvq(_vq); 894 unsigned int i; 895 void *buf; 896 897 START_USE(vq); 898 899 for (i = 0; i < vq->split.vring.num; i++) { 900 if (!vq->split.desc_state[i].data) 901 continue; 902 /* detach_buf_split clears data, so grab it now. */ 903 buf = vq->split.desc_state[i].data; 904 detach_buf_split(vq, i, NULL); 905 vq->split.avail_idx_shadow--; 906 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 907 vq->split.avail_idx_shadow); 908 END_USE(vq); 909 return buf; 910 } 911 /* That should have freed everything. */ 912 BUG_ON(vq->vq.num_free != vq->split.vring.num); 913 914 END_USE(vq); 915 return NULL; 916 } 917 918 static struct virtqueue *vring_create_virtqueue_split( 919 unsigned int index, 920 unsigned int num, 921 unsigned int vring_align, 922 struct virtio_device *vdev, 923 bool weak_barriers, 924 bool may_reduce_num, 925 bool context, 926 bool (*notify)(struct virtqueue *), 927 void (*callback)(struct virtqueue *), 928 const char *name) 929 { 930 struct virtqueue *vq; 931 void *queue = NULL; 932 dma_addr_t dma_addr; 933 size_t queue_size_in_bytes; 934 struct vring vring; 935 936 /* We assume num is a power of 2. */ 937 if (num & (num - 1)) { 938 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 939 return NULL; 940 } 941 942 /* TODO: allocate each queue chunk individually */ 943 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 944 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 945 &dma_addr, 946 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 947 if (queue) 948 break; 949 if (!may_reduce_num) 950 return NULL; 951 } 952 953 if (!num) 954 return NULL; 955 956 if (!queue) { 957 /* Try to get a single page. You are my only hope! */ 958 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 959 &dma_addr, GFP_KERNEL|__GFP_ZERO); 960 } 961 if (!queue) 962 return NULL; 963 964 queue_size_in_bytes = vring_size(num, vring_align); 965 vring_init(&vring, num, queue, vring_align); 966 967 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 968 notify, callback, name); 969 if (!vq) { 970 vring_free_queue(vdev, queue_size_in_bytes, queue, 971 dma_addr); 972 return NULL; 973 } 974 975 to_vvq(vq)->split.queue_dma_addr = dma_addr; 976 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 977 to_vvq(vq)->we_own_ring = true; 978 979 return vq; 980 } 981 982 983 /* 984 * Packed ring specific functions - *_packed(). 985 */ 986 987 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 988 struct vring_desc_extra *state) 989 { 990 u16 flags; 991 992 if (!vq->use_dma_api) 993 return; 994 995 flags = state->flags; 996 997 if (flags & VRING_DESC_F_INDIRECT) { 998 dma_unmap_single(vring_dma_dev(vq), 999 state->addr, state->len, 1000 (flags & VRING_DESC_F_WRITE) ? 1001 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1002 } else { 1003 dma_unmap_page(vring_dma_dev(vq), 1004 state->addr, state->len, 1005 (flags & VRING_DESC_F_WRITE) ? 1006 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1007 } 1008 } 1009 1010 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1011 struct vring_packed_desc *desc) 1012 { 1013 u16 flags; 1014 1015 if (!vq->use_dma_api) 1016 return; 1017 1018 flags = le16_to_cpu(desc->flags); 1019 1020 if (flags & VRING_DESC_F_INDIRECT) { 1021 dma_unmap_single(vring_dma_dev(vq), 1022 le64_to_cpu(desc->addr), 1023 le32_to_cpu(desc->len), 1024 (flags & VRING_DESC_F_WRITE) ? 1025 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1026 } else { 1027 dma_unmap_page(vring_dma_dev(vq), 1028 le64_to_cpu(desc->addr), 1029 le32_to_cpu(desc->len), 1030 (flags & VRING_DESC_F_WRITE) ? 1031 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1032 } 1033 } 1034 1035 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1036 gfp_t gfp) 1037 { 1038 struct vring_packed_desc *desc; 1039 1040 /* 1041 * We require lowmem mappings for the descriptors because 1042 * otherwise virt_to_phys will give us bogus addresses in the 1043 * virtqueue. 1044 */ 1045 gfp &= ~__GFP_HIGHMEM; 1046 1047 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1048 1049 return desc; 1050 } 1051 1052 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1053 struct scatterlist *sgs[], 1054 unsigned int total_sg, 1055 unsigned int out_sgs, 1056 unsigned int in_sgs, 1057 void *data, 1058 gfp_t gfp) 1059 { 1060 struct vring_packed_desc *desc; 1061 struct scatterlist *sg; 1062 unsigned int i, n, err_idx; 1063 u16 head, id; 1064 dma_addr_t addr; 1065 1066 head = vq->packed.next_avail_idx; 1067 desc = alloc_indirect_packed(total_sg, gfp); 1068 1069 if (unlikely(vq->vq.num_free < 1)) { 1070 pr_debug("Can't add buf len 1 - avail = 0\n"); 1071 kfree(desc); 1072 END_USE(vq); 1073 return -ENOSPC; 1074 } 1075 1076 i = 0; 1077 id = vq->free_head; 1078 BUG_ON(id == vq->packed.vring.num); 1079 1080 for (n = 0; n < out_sgs + in_sgs; n++) { 1081 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1082 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1083 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1084 if (vring_mapping_error(vq, addr)) 1085 goto unmap_release; 1086 1087 desc[i].flags = cpu_to_le16(n < out_sgs ? 1088 0 : VRING_DESC_F_WRITE); 1089 desc[i].addr = cpu_to_le64(addr); 1090 desc[i].len = cpu_to_le32(sg->length); 1091 i++; 1092 } 1093 } 1094 1095 /* Now that the indirect table is filled in, map it. */ 1096 addr = vring_map_single(vq, desc, 1097 total_sg * sizeof(struct vring_packed_desc), 1098 DMA_TO_DEVICE); 1099 if (vring_mapping_error(vq, addr)) 1100 goto unmap_release; 1101 1102 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1103 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1104 sizeof(struct vring_packed_desc)); 1105 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1106 1107 if (vq->use_dma_api) { 1108 vq->packed.desc_extra[id].addr = addr; 1109 vq->packed.desc_extra[id].len = total_sg * 1110 sizeof(struct vring_packed_desc); 1111 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1112 vq->packed.avail_used_flags; 1113 } 1114 1115 /* 1116 * A driver MUST NOT make the first descriptor in the list 1117 * available before all subsequent descriptors comprising 1118 * the list are made available. 1119 */ 1120 virtio_wmb(vq->weak_barriers); 1121 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1122 vq->packed.avail_used_flags); 1123 1124 /* We're using some buffers from the free list. */ 1125 vq->vq.num_free -= 1; 1126 1127 /* Update free pointer */ 1128 n = head + 1; 1129 if (n >= vq->packed.vring.num) { 1130 n = 0; 1131 vq->packed.avail_wrap_counter ^= 1; 1132 vq->packed.avail_used_flags ^= 1133 1 << VRING_PACKED_DESC_F_AVAIL | 1134 1 << VRING_PACKED_DESC_F_USED; 1135 } 1136 vq->packed.next_avail_idx = n; 1137 vq->free_head = vq->packed.desc_extra[id].next; 1138 1139 /* Store token and indirect buffer state. */ 1140 vq->packed.desc_state[id].num = 1; 1141 vq->packed.desc_state[id].data = data; 1142 vq->packed.desc_state[id].indir_desc = desc; 1143 vq->packed.desc_state[id].last = id; 1144 1145 vq->num_added += 1; 1146 1147 pr_debug("Added buffer head %i to %p\n", head, vq); 1148 END_USE(vq); 1149 1150 return 0; 1151 1152 unmap_release: 1153 err_idx = i; 1154 1155 for (i = 0; i < err_idx; i++) 1156 vring_unmap_desc_packed(vq, &desc[i]); 1157 1158 kfree(desc); 1159 1160 END_USE(vq); 1161 return -ENOMEM; 1162 } 1163 1164 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1165 struct scatterlist *sgs[], 1166 unsigned int total_sg, 1167 unsigned int out_sgs, 1168 unsigned int in_sgs, 1169 void *data, 1170 void *ctx, 1171 gfp_t gfp) 1172 { 1173 struct vring_virtqueue *vq = to_vvq(_vq); 1174 struct vring_packed_desc *desc; 1175 struct scatterlist *sg; 1176 unsigned int i, n, c, descs_used, err_idx; 1177 __le16 head_flags, flags; 1178 u16 head, id, prev, curr, avail_used_flags; 1179 1180 START_USE(vq); 1181 1182 BUG_ON(data == NULL); 1183 BUG_ON(ctx && vq->indirect); 1184 1185 if (unlikely(vq->broken)) { 1186 END_USE(vq); 1187 return -EIO; 1188 } 1189 1190 LAST_ADD_TIME_UPDATE(vq); 1191 1192 BUG_ON(total_sg == 0); 1193 1194 if (virtqueue_use_indirect(_vq, total_sg)) 1195 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1196 out_sgs, in_sgs, data, gfp); 1197 1198 head = vq->packed.next_avail_idx; 1199 avail_used_flags = vq->packed.avail_used_flags; 1200 1201 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1202 1203 desc = vq->packed.vring.desc; 1204 i = head; 1205 descs_used = total_sg; 1206 1207 if (unlikely(vq->vq.num_free < descs_used)) { 1208 pr_debug("Can't add buf len %i - avail = %i\n", 1209 descs_used, vq->vq.num_free); 1210 END_USE(vq); 1211 return -ENOSPC; 1212 } 1213 1214 id = vq->free_head; 1215 BUG_ON(id == vq->packed.vring.num); 1216 1217 curr = id; 1218 c = 0; 1219 for (n = 0; n < out_sgs + in_sgs; n++) { 1220 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1221 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1222 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1223 if (vring_mapping_error(vq, addr)) 1224 goto unmap_release; 1225 1226 flags = cpu_to_le16(vq->packed.avail_used_flags | 1227 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1228 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1229 if (i == head) 1230 head_flags = flags; 1231 else 1232 desc[i].flags = flags; 1233 1234 desc[i].addr = cpu_to_le64(addr); 1235 desc[i].len = cpu_to_le32(sg->length); 1236 desc[i].id = cpu_to_le16(id); 1237 1238 if (unlikely(vq->use_dma_api)) { 1239 vq->packed.desc_extra[curr].addr = addr; 1240 vq->packed.desc_extra[curr].len = sg->length; 1241 vq->packed.desc_extra[curr].flags = 1242 le16_to_cpu(flags); 1243 } 1244 prev = curr; 1245 curr = vq->packed.desc_extra[curr].next; 1246 1247 if ((unlikely(++i >= vq->packed.vring.num))) { 1248 i = 0; 1249 vq->packed.avail_used_flags ^= 1250 1 << VRING_PACKED_DESC_F_AVAIL | 1251 1 << VRING_PACKED_DESC_F_USED; 1252 } 1253 } 1254 } 1255 1256 if (i < head) 1257 vq->packed.avail_wrap_counter ^= 1; 1258 1259 /* We're using some buffers from the free list. */ 1260 vq->vq.num_free -= descs_used; 1261 1262 /* Update free pointer */ 1263 vq->packed.next_avail_idx = i; 1264 vq->free_head = curr; 1265 1266 /* Store token. */ 1267 vq->packed.desc_state[id].num = descs_used; 1268 vq->packed.desc_state[id].data = data; 1269 vq->packed.desc_state[id].indir_desc = ctx; 1270 vq->packed.desc_state[id].last = prev; 1271 1272 /* 1273 * A driver MUST NOT make the first descriptor in the list 1274 * available before all subsequent descriptors comprising 1275 * the list are made available. 1276 */ 1277 virtio_wmb(vq->weak_barriers); 1278 vq->packed.vring.desc[head].flags = head_flags; 1279 vq->num_added += descs_used; 1280 1281 pr_debug("Added buffer head %i to %p\n", head, vq); 1282 END_USE(vq); 1283 1284 return 0; 1285 1286 unmap_release: 1287 err_idx = i; 1288 i = head; 1289 curr = vq->free_head; 1290 1291 vq->packed.avail_used_flags = avail_used_flags; 1292 1293 for (n = 0; n < total_sg; n++) { 1294 if (i == err_idx) 1295 break; 1296 vring_unmap_state_packed(vq, 1297 &vq->packed.desc_extra[curr]); 1298 curr = vq->packed.desc_extra[curr].next; 1299 i++; 1300 if (i >= vq->packed.vring.num) 1301 i = 0; 1302 } 1303 1304 END_USE(vq); 1305 return -EIO; 1306 } 1307 1308 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1309 { 1310 struct vring_virtqueue *vq = to_vvq(_vq); 1311 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1312 bool needs_kick; 1313 union { 1314 struct { 1315 __le16 off_wrap; 1316 __le16 flags; 1317 }; 1318 u32 u32; 1319 } snapshot; 1320 1321 START_USE(vq); 1322 1323 /* 1324 * We need to expose the new flags value before checking notification 1325 * suppressions. 1326 */ 1327 virtio_mb(vq->weak_barriers); 1328 1329 old = vq->packed.next_avail_idx - vq->num_added; 1330 new = vq->packed.next_avail_idx; 1331 vq->num_added = 0; 1332 1333 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1334 flags = le16_to_cpu(snapshot.flags); 1335 1336 LAST_ADD_TIME_CHECK(vq); 1337 LAST_ADD_TIME_INVALID(vq); 1338 1339 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1340 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1341 goto out; 1342 } 1343 1344 off_wrap = le16_to_cpu(snapshot.off_wrap); 1345 1346 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1347 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1348 if (wrap_counter != vq->packed.avail_wrap_counter) 1349 event_idx -= vq->packed.vring.num; 1350 1351 needs_kick = vring_need_event(event_idx, new, old); 1352 out: 1353 END_USE(vq); 1354 return needs_kick; 1355 } 1356 1357 static void detach_buf_packed(struct vring_virtqueue *vq, 1358 unsigned int id, void **ctx) 1359 { 1360 struct vring_desc_state_packed *state = NULL; 1361 struct vring_packed_desc *desc; 1362 unsigned int i, curr; 1363 1364 state = &vq->packed.desc_state[id]; 1365 1366 /* Clear data ptr. */ 1367 state->data = NULL; 1368 1369 vq->packed.desc_extra[state->last].next = vq->free_head; 1370 vq->free_head = id; 1371 vq->vq.num_free += state->num; 1372 1373 if (unlikely(vq->use_dma_api)) { 1374 curr = id; 1375 for (i = 0; i < state->num; i++) { 1376 vring_unmap_state_packed(vq, 1377 &vq->packed.desc_extra[curr]); 1378 curr = vq->packed.desc_extra[curr].next; 1379 } 1380 } 1381 1382 if (vq->indirect) { 1383 u32 len; 1384 1385 /* Free the indirect table, if any, now that it's unmapped. */ 1386 desc = state->indir_desc; 1387 if (!desc) 1388 return; 1389 1390 if (vq->use_dma_api) { 1391 len = vq->packed.desc_extra[id].len; 1392 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1393 i++) 1394 vring_unmap_desc_packed(vq, &desc[i]); 1395 } 1396 kfree(desc); 1397 state->indir_desc = NULL; 1398 } else if (ctx) { 1399 *ctx = state->indir_desc; 1400 } 1401 } 1402 1403 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1404 u16 idx, bool used_wrap_counter) 1405 { 1406 bool avail, used; 1407 u16 flags; 1408 1409 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1410 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1411 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1412 1413 return avail == used && used == used_wrap_counter; 1414 } 1415 1416 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1417 { 1418 return is_used_desc_packed(vq, vq->last_used_idx, 1419 vq->packed.used_wrap_counter); 1420 } 1421 1422 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1423 unsigned int *len, 1424 void **ctx) 1425 { 1426 struct vring_virtqueue *vq = to_vvq(_vq); 1427 u16 last_used, id; 1428 void *ret; 1429 1430 START_USE(vq); 1431 1432 if (unlikely(vq->broken)) { 1433 END_USE(vq); 1434 return NULL; 1435 } 1436 1437 if (!more_used_packed(vq)) { 1438 pr_debug("No more buffers in queue\n"); 1439 END_USE(vq); 1440 return NULL; 1441 } 1442 1443 /* Only get used elements after they have been exposed by host. */ 1444 virtio_rmb(vq->weak_barriers); 1445 1446 last_used = vq->last_used_idx; 1447 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1448 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1449 1450 if (unlikely(id >= vq->packed.vring.num)) { 1451 BAD_RING(vq, "id %u out of range\n", id); 1452 return NULL; 1453 } 1454 if (unlikely(!vq->packed.desc_state[id].data)) { 1455 BAD_RING(vq, "id %u is not a head!\n", id); 1456 return NULL; 1457 } 1458 1459 /* detach_buf_packed clears data, so grab it now. */ 1460 ret = vq->packed.desc_state[id].data; 1461 detach_buf_packed(vq, id, ctx); 1462 1463 vq->last_used_idx += vq->packed.desc_state[id].num; 1464 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1465 vq->last_used_idx -= vq->packed.vring.num; 1466 vq->packed.used_wrap_counter ^= 1; 1467 } 1468 1469 /* 1470 * If we expect an interrupt for the next entry, tell host 1471 * by writing event index and flush out the write before 1472 * the read in the next get_buf call. 1473 */ 1474 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1475 virtio_store_mb(vq->weak_barriers, 1476 &vq->packed.vring.driver->off_wrap, 1477 cpu_to_le16(vq->last_used_idx | 1478 (vq->packed.used_wrap_counter << 1479 VRING_PACKED_EVENT_F_WRAP_CTR))); 1480 1481 LAST_ADD_TIME_INVALID(vq); 1482 1483 END_USE(vq); 1484 return ret; 1485 } 1486 1487 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1488 { 1489 struct vring_virtqueue *vq = to_vvq(_vq); 1490 1491 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1492 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1493 vq->packed.vring.driver->flags = 1494 cpu_to_le16(vq->packed.event_flags_shadow); 1495 } 1496 } 1497 1498 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1499 { 1500 struct vring_virtqueue *vq = to_vvq(_vq); 1501 1502 START_USE(vq); 1503 1504 /* 1505 * We optimistically turn back on interrupts, then check if there was 1506 * more to do. 1507 */ 1508 1509 if (vq->event) { 1510 vq->packed.vring.driver->off_wrap = 1511 cpu_to_le16(vq->last_used_idx | 1512 (vq->packed.used_wrap_counter << 1513 VRING_PACKED_EVENT_F_WRAP_CTR)); 1514 /* 1515 * We need to update event offset and event wrap 1516 * counter first before updating event flags. 1517 */ 1518 virtio_wmb(vq->weak_barriers); 1519 } 1520 1521 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1522 vq->packed.event_flags_shadow = vq->event ? 1523 VRING_PACKED_EVENT_FLAG_DESC : 1524 VRING_PACKED_EVENT_FLAG_ENABLE; 1525 vq->packed.vring.driver->flags = 1526 cpu_to_le16(vq->packed.event_flags_shadow); 1527 } 1528 1529 END_USE(vq); 1530 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1531 VRING_PACKED_EVENT_F_WRAP_CTR); 1532 } 1533 1534 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1535 { 1536 struct vring_virtqueue *vq = to_vvq(_vq); 1537 bool wrap_counter; 1538 u16 used_idx; 1539 1540 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1541 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1542 1543 return is_used_desc_packed(vq, used_idx, wrap_counter); 1544 } 1545 1546 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1547 { 1548 struct vring_virtqueue *vq = to_vvq(_vq); 1549 u16 used_idx, wrap_counter; 1550 u16 bufs; 1551 1552 START_USE(vq); 1553 1554 /* 1555 * We optimistically turn back on interrupts, then check if there was 1556 * more to do. 1557 */ 1558 1559 if (vq->event) { 1560 /* TODO: tune this threshold */ 1561 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1562 wrap_counter = vq->packed.used_wrap_counter; 1563 1564 used_idx = vq->last_used_idx + bufs; 1565 if (used_idx >= vq->packed.vring.num) { 1566 used_idx -= vq->packed.vring.num; 1567 wrap_counter ^= 1; 1568 } 1569 1570 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1571 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1572 1573 /* 1574 * We need to update event offset and event wrap 1575 * counter first before updating event flags. 1576 */ 1577 virtio_wmb(vq->weak_barriers); 1578 } 1579 1580 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1581 vq->packed.event_flags_shadow = vq->event ? 1582 VRING_PACKED_EVENT_FLAG_DESC : 1583 VRING_PACKED_EVENT_FLAG_ENABLE; 1584 vq->packed.vring.driver->flags = 1585 cpu_to_le16(vq->packed.event_flags_shadow); 1586 } 1587 1588 /* 1589 * We need to update event suppression structure first 1590 * before re-checking for more used buffers. 1591 */ 1592 virtio_mb(vq->weak_barriers); 1593 1594 if (is_used_desc_packed(vq, 1595 vq->last_used_idx, 1596 vq->packed.used_wrap_counter)) { 1597 END_USE(vq); 1598 return false; 1599 } 1600 1601 END_USE(vq); 1602 return true; 1603 } 1604 1605 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1606 { 1607 struct vring_virtqueue *vq = to_vvq(_vq); 1608 unsigned int i; 1609 void *buf; 1610 1611 START_USE(vq); 1612 1613 for (i = 0; i < vq->packed.vring.num; i++) { 1614 if (!vq->packed.desc_state[i].data) 1615 continue; 1616 /* detach_buf clears data, so grab it now. */ 1617 buf = vq->packed.desc_state[i].data; 1618 detach_buf_packed(vq, i, NULL); 1619 END_USE(vq); 1620 return buf; 1621 } 1622 /* That should have freed everything. */ 1623 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1624 1625 END_USE(vq); 1626 return NULL; 1627 } 1628 1629 static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, 1630 unsigned int num) 1631 { 1632 struct vring_desc_extra *desc_extra; 1633 unsigned int i; 1634 1635 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1636 GFP_KERNEL); 1637 if (!desc_extra) 1638 return NULL; 1639 1640 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1641 1642 for (i = 0; i < num - 1; i++) 1643 desc_extra[i].next = i + 1; 1644 1645 return desc_extra; 1646 } 1647 1648 static struct virtqueue *vring_create_virtqueue_packed( 1649 unsigned int index, 1650 unsigned int num, 1651 unsigned int vring_align, 1652 struct virtio_device *vdev, 1653 bool weak_barriers, 1654 bool may_reduce_num, 1655 bool context, 1656 bool (*notify)(struct virtqueue *), 1657 void (*callback)(struct virtqueue *), 1658 const char *name) 1659 { 1660 struct vring_virtqueue *vq; 1661 struct vring_packed_desc *ring; 1662 struct vring_packed_desc_event *driver, *device; 1663 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1664 size_t ring_size_in_bytes, event_size_in_bytes; 1665 1666 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1667 1668 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1669 &ring_dma_addr, 1670 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1671 if (!ring) 1672 goto err_ring; 1673 1674 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1675 1676 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1677 &driver_event_dma_addr, 1678 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1679 if (!driver) 1680 goto err_driver; 1681 1682 device = vring_alloc_queue(vdev, event_size_in_bytes, 1683 &device_event_dma_addr, 1684 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1685 if (!device) 1686 goto err_device; 1687 1688 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1689 if (!vq) 1690 goto err_vq; 1691 1692 vq->vq.callback = callback; 1693 vq->vq.vdev = vdev; 1694 vq->vq.name = name; 1695 vq->vq.num_free = num; 1696 vq->vq.index = index; 1697 vq->we_own_ring = true; 1698 vq->notify = notify; 1699 vq->weak_barriers = weak_barriers; 1700 vq->broken = false; 1701 vq->last_used_idx = 0; 1702 vq->event_triggered = false; 1703 vq->num_added = 0; 1704 vq->packed_ring = true; 1705 vq->use_dma_api = vring_use_dma_api(vdev); 1706 #ifdef DEBUG 1707 vq->in_use = false; 1708 vq->last_add_time_valid = false; 1709 #endif 1710 1711 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1712 !context; 1713 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1714 1715 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1716 vq->weak_barriers = false; 1717 1718 vq->packed.ring_dma_addr = ring_dma_addr; 1719 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1720 vq->packed.device_event_dma_addr = device_event_dma_addr; 1721 1722 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1723 vq->packed.event_size_in_bytes = event_size_in_bytes; 1724 1725 vq->packed.vring.num = num; 1726 vq->packed.vring.desc = ring; 1727 vq->packed.vring.driver = driver; 1728 vq->packed.vring.device = device; 1729 1730 vq->packed.next_avail_idx = 0; 1731 vq->packed.avail_wrap_counter = 1; 1732 vq->packed.used_wrap_counter = 1; 1733 vq->packed.event_flags_shadow = 0; 1734 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1735 1736 vq->packed.desc_state = kmalloc_array(num, 1737 sizeof(struct vring_desc_state_packed), 1738 GFP_KERNEL); 1739 if (!vq->packed.desc_state) 1740 goto err_desc_state; 1741 1742 memset(vq->packed.desc_state, 0, 1743 num * sizeof(struct vring_desc_state_packed)); 1744 1745 /* Put everything in free lists. */ 1746 vq->free_head = 0; 1747 1748 vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); 1749 if (!vq->packed.desc_extra) 1750 goto err_desc_extra; 1751 1752 /* No callback? Tell other side not to bother us. */ 1753 if (!callback) { 1754 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1755 vq->packed.vring.driver->flags = 1756 cpu_to_le16(vq->packed.event_flags_shadow); 1757 } 1758 1759 spin_lock(&vdev->vqs_list_lock); 1760 list_add_tail(&vq->vq.list, &vdev->vqs); 1761 spin_unlock(&vdev->vqs_list_lock); 1762 return &vq->vq; 1763 1764 err_desc_extra: 1765 kfree(vq->packed.desc_state); 1766 err_desc_state: 1767 kfree(vq); 1768 err_vq: 1769 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 1770 err_device: 1771 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 1772 err_driver: 1773 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1774 err_ring: 1775 return NULL; 1776 } 1777 1778 1779 /* 1780 * Generic functions and exported symbols. 1781 */ 1782 1783 static inline int virtqueue_add(struct virtqueue *_vq, 1784 struct scatterlist *sgs[], 1785 unsigned int total_sg, 1786 unsigned int out_sgs, 1787 unsigned int in_sgs, 1788 void *data, 1789 void *ctx, 1790 gfp_t gfp) 1791 { 1792 struct vring_virtqueue *vq = to_vvq(_vq); 1793 1794 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1795 out_sgs, in_sgs, data, ctx, gfp) : 1796 virtqueue_add_split(_vq, sgs, total_sg, 1797 out_sgs, in_sgs, data, ctx, gfp); 1798 } 1799 1800 /** 1801 * virtqueue_add_sgs - expose buffers to other end 1802 * @_vq: the struct virtqueue we're talking about. 1803 * @sgs: array of terminated scatterlists. 1804 * @out_sgs: the number of scatterlists readable by other side 1805 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1806 * @data: the token identifying the buffer. 1807 * @gfp: how to do memory allocations (if necessary). 1808 * 1809 * Caller must ensure we don't call this with other virtqueue operations 1810 * at the same time (except where noted). 1811 * 1812 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1813 */ 1814 int virtqueue_add_sgs(struct virtqueue *_vq, 1815 struct scatterlist *sgs[], 1816 unsigned int out_sgs, 1817 unsigned int in_sgs, 1818 void *data, 1819 gfp_t gfp) 1820 { 1821 unsigned int i, total_sg = 0; 1822 1823 /* Count them first. */ 1824 for (i = 0; i < out_sgs + in_sgs; i++) { 1825 struct scatterlist *sg; 1826 1827 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1828 total_sg++; 1829 } 1830 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1831 data, NULL, gfp); 1832 } 1833 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1834 1835 /** 1836 * virtqueue_add_outbuf - expose output buffers to other end 1837 * @vq: the struct virtqueue we're talking about. 1838 * @sg: scatterlist (must be well-formed and terminated!) 1839 * @num: the number of entries in @sg readable by other side 1840 * @data: the token identifying the buffer. 1841 * @gfp: how to do memory allocations (if necessary). 1842 * 1843 * Caller must ensure we don't call this with other virtqueue operations 1844 * at the same time (except where noted). 1845 * 1846 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1847 */ 1848 int virtqueue_add_outbuf(struct virtqueue *vq, 1849 struct scatterlist *sg, unsigned int num, 1850 void *data, 1851 gfp_t gfp) 1852 { 1853 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1854 } 1855 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1856 1857 /** 1858 * virtqueue_add_inbuf - expose input buffers to other end 1859 * @vq: the struct virtqueue we're talking about. 1860 * @sg: scatterlist (must be well-formed and terminated!) 1861 * @num: the number of entries in @sg writable by other side 1862 * @data: the token identifying the buffer. 1863 * @gfp: how to do memory allocations (if necessary). 1864 * 1865 * Caller must ensure we don't call this with other virtqueue operations 1866 * at the same time (except where noted). 1867 * 1868 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1869 */ 1870 int virtqueue_add_inbuf(struct virtqueue *vq, 1871 struct scatterlist *sg, unsigned int num, 1872 void *data, 1873 gfp_t gfp) 1874 { 1875 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1876 } 1877 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1878 1879 /** 1880 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1881 * @vq: the struct virtqueue we're talking about. 1882 * @sg: scatterlist (must be well-formed and terminated!) 1883 * @num: the number of entries in @sg writable by other side 1884 * @data: the token identifying the buffer. 1885 * @ctx: extra context for the token 1886 * @gfp: how to do memory allocations (if necessary). 1887 * 1888 * Caller must ensure we don't call this with other virtqueue operations 1889 * at the same time (except where noted). 1890 * 1891 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1892 */ 1893 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1894 struct scatterlist *sg, unsigned int num, 1895 void *data, 1896 void *ctx, 1897 gfp_t gfp) 1898 { 1899 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1900 } 1901 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1902 1903 /** 1904 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1905 * @_vq: the struct virtqueue 1906 * 1907 * Instead of virtqueue_kick(), you can do: 1908 * if (virtqueue_kick_prepare(vq)) 1909 * virtqueue_notify(vq); 1910 * 1911 * This is sometimes useful because the virtqueue_kick_prepare() needs 1912 * to be serialized, but the actual virtqueue_notify() call does not. 1913 */ 1914 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1915 { 1916 struct vring_virtqueue *vq = to_vvq(_vq); 1917 1918 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1919 virtqueue_kick_prepare_split(_vq); 1920 } 1921 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1922 1923 /** 1924 * virtqueue_notify - second half of split virtqueue_kick call. 1925 * @_vq: the struct virtqueue 1926 * 1927 * This does not need to be serialized. 1928 * 1929 * Returns false if host notify failed or queue is broken, otherwise true. 1930 */ 1931 bool virtqueue_notify(struct virtqueue *_vq) 1932 { 1933 struct vring_virtqueue *vq = to_vvq(_vq); 1934 1935 if (unlikely(vq->broken)) 1936 return false; 1937 1938 /* Prod other side to tell it about changes. */ 1939 if (!vq->notify(_vq)) { 1940 vq->broken = true; 1941 return false; 1942 } 1943 return true; 1944 } 1945 EXPORT_SYMBOL_GPL(virtqueue_notify); 1946 1947 /** 1948 * virtqueue_kick - update after add_buf 1949 * @vq: the struct virtqueue 1950 * 1951 * After one or more virtqueue_add_* calls, invoke this to kick 1952 * the other side. 1953 * 1954 * Caller must ensure we don't call this with other virtqueue 1955 * operations at the same time (except where noted). 1956 * 1957 * Returns false if kick failed, otherwise true. 1958 */ 1959 bool virtqueue_kick(struct virtqueue *vq) 1960 { 1961 if (virtqueue_kick_prepare(vq)) 1962 return virtqueue_notify(vq); 1963 return true; 1964 } 1965 EXPORT_SYMBOL_GPL(virtqueue_kick); 1966 1967 /** 1968 * virtqueue_get_buf_ctx - get the next used buffer 1969 * @_vq: the struct virtqueue we're talking about. 1970 * @len: the length written into the buffer 1971 * @ctx: extra context for the token 1972 * 1973 * If the device wrote data into the buffer, @len will be set to the 1974 * amount written. This means you don't need to clear the buffer 1975 * beforehand to ensure there's no data leakage in the case of short 1976 * writes. 1977 * 1978 * Caller must ensure we don't call this with other virtqueue 1979 * operations at the same time (except where noted). 1980 * 1981 * Returns NULL if there are no used buffers, or the "data" token 1982 * handed to virtqueue_add_*(). 1983 */ 1984 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1985 void **ctx) 1986 { 1987 struct vring_virtqueue *vq = to_vvq(_vq); 1988 1989 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1990 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1991 } 1992 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1993 1994 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1995 { 1996 return virtqueue_get_buf_ctx(_vq, len, NULL); 1997 } 1998 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1999 /** 2000 * virtqueue_disable_cb - disable callbacks 2001 * @_vq: the struct virtqueue we're talking about. 2002 * 2003 * Note that this is not necessarily synchronous, hence unreliable and only 2004 * useful as an optimization. 2005 * 2006 * Unlike other operations, this need not be serialized. 2007 */ 2008 void virtqueue_disable_cb(struct virtqueue *_vq) 2009 { 2010 struct vring_virtqueue *vq = to_vvq(_vq); 2011 2012 /* If device triggered an event already it won't trigger one again: 2013 * no need to disable. 2014 */ 2015 if (vq->event_triggered) 2016 return; 2017 2018 if (vq->packed_ring) 2019 virtqueue_disable_cb_packed(_vq); 2020 else 2021 virtqueue_disable_cb_split(_vq); 2022 } 2023 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2024 2025 /** 2026 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2027 * @_vq: the struct virtqueue we're talking about. 2028 * 2029 * This re-enables callbacks; it returns current queue state 2030 * in an opaque unsigned value. This value should be later tested by 2031 * virtqueue_poll, to detect a possible race between the driver checking for 2032 * more work, and enabling callbacks. 2033 * 2034 * Caller must ensure we don't call this with other virtqueue 2035 * operations at the same time (except where noted). 2036 */ 2037 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2038 { 2039 struct vring_virtqueue *vq = to_vvq(_vq); 2040 2041 if (vq->event_triggered) 2042 vq->event_triggered = false; 2043 2044 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2045 virtqueue_enable_cb_prepare_split(_vq); 2046 } 2047 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2048 2049 /** 2050 * virtqueue_poll - query pending used buffers 2051 * @_vq: the struct virtqueue we're talking about. 2052 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2053 * 2054 * Returns "true" if there are pending used buffers in the queue. 2055 * 2056 * This does not need to be serialized. 2057 */ 2058 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 2059 { 2060 struct vring_virtqueue *vq = to_vvq(_vq); 2061 2062 if (unlikely(vq->broken)) 2063 return false; 2064 2065 virtio_mb(vq->weak_barriers); 2066 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2067 virtqueue_poll_split(_vq, last_used_idx); 2068 } 2069 EXPORT_SYMBOL_GPL(virtqueue_poll); 2070 2071 /** 2072 * virtqueue_enable_cb - restart callbacks after disable_cb. 2073 * @_vq: the struct virtqueue we're talking about. 2074 * 2075 * This re-enables callbacks; it returns "false" if there are pending 2076 * buffers in the queue, to detect a possible race between the driver 2077 * checking for more work, and enabling callbacks. 2078 * 2079 * Caller must ensure we don't call this with other virtqueue 2080 * operations at the same time (except where noted). 2081 */ 2082 bool virtqueue_enable_cb(struct virtqueue *_vq) 2083 { 2084 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 2085 2086 return !virtqueue_poll(_vq, last_used_idx); 2087 } 2088 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2089 2090 /** 2091 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2092 * @_vq: the struct virtqueue we're talking about. 2093 * 2094 * This re-enables callbacks but hints to the other side to delay 2095 * interrupts until most of the available buffers have been processed; 2096 * it returns "false" if there are many pending buffers in the queue, 2097 * to detect a possible race between the driver checking for more work, 2098 * and enabling callbacks. 2099 * 2100 * Caller must ensure we don't call this with other virtqueue 2101 * operations at the same time (except where noted). 2102 */ 2103 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2104 { 2105 struct vring_virtqueue *vq = to_vvq(_vq); 2106 2107 if (vq->event_triggered) 2108 vq->event_triggered = false; 2109 2110 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2111 virtqueue_enable_cb_delayed_split(_vq); 2112 } 2113 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2114 2115 /** 2116 * virtqueue_detach_unused_buf - detach first unused buffer 2117 * @_vq: the struct virtqueue we're talking about. 2118 * 2119 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2120 * This is not valid on an active queue; it is useful only for device 2121 * shutdown. 2122 */ 2123 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2124 { 2125 struct vring_virtqueue *vq = to_vvq(_vq); 2126 2127 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2128 virtqueue_detach_unused_buf_split(_vq); 2129 } 2130 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2131 2132 static inline bool more_used(const struct vring_virtqueue *vq) 2133 { 2134 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2135 } 2136 2137 irqreturn_t vring_interrupt(int irq, void *_vq) 2138 { 2139 struct vring_virtqueue *vq = to_vvq(_vq); 2140 2141 if (!more_used(vq)) { 2142 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2143 return IRQ_NONE; 2144 } 2145 2146 if (unlikely(vq->broken)) 2147 return IRQ_HANDLED; 2148 2149 /* Just a hint for performance: so it's ok that this can be racy! */ 2150 if (vq->event) 2151 vq->event_triggered = true; 2152 2153 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2154 if (vq->vq.callback) 2155 vq->vq.callback(&vq->vq); 2156 2157 return IRQ_HANDLED; 2158 } 2159 EXPORT_SYMBOL_GPL(vring_interrupt); 2160 2161 /* Only available for split ring */ 2162 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2163 struct vring vring, 2164 struct virtio_device *vdev, 2165 bool weak_barriers, 2166 bool context, 2167 bool (*notify)(struct virtqueue *), 2168 void (*callback)(struct virtqueue *), 2169 const char *name) 2170 { 2171 struct vring_virtqueue *vq; 2172 2173 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2174 return NULL; 2175 2176 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2177 if (!vq) 2178 return NULL; 2179 2180 vq->packed_ring = false; 2181 vq->vq.callback = callback; 2182 vq->vq.vdev = vdev; 2183 vq->vq.name = name; 2184 vq->vq.num_free = vring.num; 2185 vq->vq.index = index; 2186 vq->we_own_ring = false; 2187 vq->notify = notify; 2188 vq->weak_barriers = weak_barriers; 2189 vq->broken = false; 2190 vq->last_used_idx = 0; 2191 vq->event_triggered = false; 2192 vq->num_added = 0; 2193 vq->use_dma_api = vring_use_dma_api(vdev); 2194 #ifdef DEBUG 2195 vq->in_use = false; 2196 vq->last_add_time_valid = false; 2197 #endif 2198 2199 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2200 !context; 2201 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2202 2203 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2204 vq->weak_barriers = false; 2205 2206 vq->split.queue_dma_addr = 0; 2207 vq->split.queue_size_in_bytes = 0; 2208 2209 vq->split.vring = vring; 2210 vq->split.avail_flags_shadow = 0; 2211 vq->split.avail_idx_shadow = 0; 2212 2213 /* No callback? Tell other side not to bother us. */ 2214 if (!callback) { 2215 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2216 if (!vq->event) 2217 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2218 vq->split.avail_flags_shadow); 2219 } 2220 2221 vq->split.desc_state = kmalloc_array(vring.num, 2222 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2223 if (!vq->split.desc_state) 2224 goto err_state; 2225 2226 vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); 2227 if (!vq->split.desc_extra) 2228 goto err_extra; 2229 2230 /* Put everything in free lists. */ 2231 vq->free_head = 0; 2232 memset(vq->split.desc_state, 0, vring.num * 2233 sizeof(struct vring_desc_state_split)); 2234 2235 spin_lock(&vdev->vqs_list_lock); 2236 list_add_tail(&vq->vq.list, &vdev->vqs); 2237 spin_unlock(&vdev->vqs_list_lock); 2238 return &vq->vq; 2239 2240 err_extra: 2241 kfree(vq->split.desc_state); 2242 err_state: 2243 kfree(vq); 2244 return NULL; 2245 } 2246 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2247 2248 struct virtqueue *vring_create_virtqueue( 2249 unsigned int index, 2250 unsigned int num, 2251 unsigned int vring_align, 2252 struct virtio_device *vdev, 2253 bool weak_barriers, 2254 bool may_reduce_num, 2255 bool context, 2256 bool (*notify)(struct virtqueue *), 2257 void (*callback)(struct virtqueue *), 2258 const char *name) 2259 { 2260 2261 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2262 return vring_create_virtqueue_packed(index, num, vring_align, 2263 vdev, weak_barriers, may_reduce_num, 2264 context, notify, callback, name); 2265 2266 return vring_create_virtqueue_split(index, num, vring_align, 2267 vdev, weak_barriers, may_reduce_num, 2268 context, notify, callback, name); 2269 } 2270 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2271 2272 /* Only available for split ring */ 2273 struct virtqueue *vring_new_virtqueue(unsigned int index, 2274 unsigned int num, 2275 unsigned int vring_align, 2276 struct virtio_device *vdev, 2277 bool weak_barriers, 2278 bool context, 2279 void *pages, 2280 bool (*notify)(struct virtqueue *vq), 2281 void (*callback)(struct virtqueue *vq), 2282 const char *name) 2283 { 2284 struct vring vring; 2285 2286 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2287 return NULL; 2288 2289 vring_init(&vring, num, pages, vring_align); 2290 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2291 notify, callback, name); 2292 } 2293 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2294 2295 void vring_del_virtqueue(struct virtqueue *_vq) 2296 { 2297 struct vring_virtqueue *vq = to_vvq(_vq); 2298 2299 spin_lock(&vq->vq.vdev->vqs_list_lock); 2300 list_del(&_vq->list); 2301 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2302 2303 if (vq->we_own_ring) { 2304 if (vq->packed_ring) { 2305 vring_free_queue(vq->vq.vdev, 2306 vq->packed.ring_size_in_bytes, 2307 vq->packed.vring.desc, 2308 vq->packed.ring_dma_addr); 2309 2310 vring_free_queue(vq->vq.vdev, 2311 vq->packed.event_size_in_bytes, 2312 vq->packed.vring.driver, 2313 vq->packed.driver_event_dma_addr); 2314 2315 vring_free_queue(vq->vq.vdev, 2316 vq->packed.event_size_in_bytes, 2317 vq->packed.vring.device, 2318 vq->packed.device_event_dma_addr); 2319 2320 kfree(vq->packed.desc_state); 2321 kfree(vq->packed.desc_extra); 2322 } else { 2323 vring_free_queue(vq->vq.vdev, 2324 vq->split.queue_size_in_bytes, 2325 vq->split.vring.desc, 2326 vq->split.queue_dma_addr); 2327 } 2328 } 2329 if (!vq->packed_ring) { 2330 kfree(vq->split.desc_state); 2331 kfree(vq->split.desc_extra); 2332 } 2333 kfree(vq); 2334 } 2335 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2336 2337 /* Manipulates transport-specific feature bits. */ 2338 void vring_transport_features(struct virtio_device *vdev) 2339 { 2340 unsigned int i; 2341 2342 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2343 switch (i) { 2344 case VIRTIO_RING_F_INDIRECT_DESC: 2345 break; 2346 case VIRTIO_RING_F_EVENT_IDX: 2347 break; 2348 case VIRTIO_F_VERSION_1: 2349 break; 2350 case VIRTIO_F_ACCESS_PLATFORM: 2351 break; 2352 case VIRTIO_F_RING_PACKED: 2353 break; 2354 case VIRTIO_F_ORDER_PLATFORM: 2355 break; 2356 default: 2357 /* We don't understand this bit. */ 2358 __virtio_clear_bit(vdev, i); 2359 } 2360 } 2361 } 2362 EXPORT_SYMBOL_GPL(vring_transport_features); 2363 2364 /** 2365 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2366 * @_vq: the struct virtqueue containing the vring of interest. 2367 * 2368 * Returns the size of the vring. This is mainly used for boasting to 2369 * userspace. Unlike other operations, this need not be serialized. 2370 */ 2371 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2372 { 2373 2374 struct vring_virtqueue *vq = to_vvq(_vq); 2375 2376 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2377 } 2378 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2379 2380 bool virtqueue_is_broken(struct virtqueue *_vq) 2381 { 2382 struct vring_virtqueue *vq = to_vvq(_vq); 2383 2384 return READ_ONCE(vq->broken); 2385 } 2386 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2387 2388 /* 2389 * This should prevent the device from being used, allowing drivers to 2390 * recover. You may need to grab appropriate locks to flush. 2391 */ 2392 void virtio_break_device(struct virtio_device *dev) 2393 { 2394 struct virtqueue *_vq; 2395 2396 spin_lock(&dev->vqs_list_lock); 2397 list_for_each_entry(_vq, &dev->vqs, list) { 2398 struct vring_virtqueue *vq = to_vvq(_vq); 2399 2400 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2401 WRITE_ONCE(vq->broken, true); 2402 } 2403 spin_unlock(&dev->vqs_list_lock); 2404 } 2405 EXPORT_SYMBOL_GPL(virtio_break_device); 2406 2407 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2408 { 2409 struct vring_virtqueue *vq = to_vvq(_vq); 2410 2411 BUG_ON(!vq->we_own_ring); 2412 2413 if (vq->packed_ring) 2414 return vq->packed.ring_dma_addr; 2415 2416 return vq->split.queue_dma_addr; 2417 } 2418 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2419 2420 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2421 { 2422 struct vring_virtqueue *vq = to_vvq(_vq); 2423 2424 BUG_ON(!vq->we_own_ring); 2425 2426 if (vq->packed_ring) 2427 return vq->packed.driver_event_dma_addr; 2428 2429 return vq->split.queue_dma_addr + 2430 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2431 } 2432 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2433 2434 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2435 { 2436 struct vring_virtqueue *vq = to_vvq(_vq); 2437 2438 BUG_ON(!vq->we_own_ring); 2439 2440 if (vq->packed_ring) 2441 return vq->packed.device_event_dma_addr; 2442 2443 return vq->split.queue_dma_addr + 2444 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2445 } 2446 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2447 2448 /* Only available for split ring */ 2449 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2450 { 2451 return &to_vvq(vq)->split.vring; 2452 } 2453 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2454 2455 MODULE_LICENSE("GPL"); 2456