1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/spinlock.h> 15 #include <xen/xen.h> 16 17 static bool force_used_validation = false; 18 module_param(force_used_validation, bool, 0444); 19 20 #ifdef DEBUG 21 /* For development, we want to crash whenever the ring is screwed. */ 22 #define BAD_RING(_vq, fmt, args...) \ 23 do { \ 24 dev_err(&(_vq)->vq.vdev->dev, \ 25 "%s:"fmt, (_vq)->vq.name, ##args); \ 26 BUG(); \ 27 } while (0) 28 /* Caller is supposed to guarantee no reentry. */ 29 #define START_USE(_vq) \ 30 do { \ 31 if ((_vq)->in_use) \ 32 panic("%s:in_use = %i\n", \ 33 (_vq)->vq.name, (_vq)->in_use); \ 34 (_vq)->in_use = __LINE__; \ 35 } while (0) 36 #define END_USE(_vq) \ 37 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 38 #define LAST_ADD_TIME_UPDATE(_vq) \ 39 do { \ 40 ktime_t now = ktime_get(); \ 41 \ 42 /* No kick or get, with .1 second between? Warn. */ \ 43 if ((_vq)->last_add_time_valid) \ 44 WARN_ON(ktime_to_ms(ktime_sub(now, \ 45 (_vq)->last_add_time)) > 100); \ 46 (_vq)->last_add_time = now; \ 47 (_vq)->last_add_time_valid = true; \ 48 } while (0) 49 #define LAST_ADD_TIME_CHECK(_vq) \ 50 do { \ 51 if ((_vq)->last_add_time_valid) { \ 52 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 53 (_vq)->last_add_time)) > 100); \ 54 } \ 55 } while (0) 56 #define LAST_ADD_TIME_INVALID(_vq) \ 57 ((_vq)->last_add_time_valid = false) 58 #else 59 #define BAD_RING(_vq, fmt, args...) \ 60 do { \ 61 dev_err(&_vq->vq.vdev->dev, \ 62 "%s:"fmt, (_vq)->vq.name, ##args); \ 63 (_vq)->broken = true; \ 64 } while (0) 65 #define START_USE(vq) 66 #define END_USE(vq) 67 #define LAST_ADD_TIME_UPDATE(vq) 68 #define LAST_ADD_TIME_CHECK(vq) 69 #define LAST_ADD_TIME_INVALID(vq) 70 #endif 71 72 struct vring_desc_state_split { 73 void *data; /* Data for callback. */ 74 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 75 }; 76 77 struct vring_desc_state_packed { 78 void *data; /* Data for callback. */ 79 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 80 u16 num; /* Descriptor list length. */ 81 u16 last; /* The last desc state in a list. */ 82 }; 83 84 struct vring_desc_extra { 85 dma_addr_t addr; /* Descriptor DMA addr. */ 86 u32 len; /* Descriptor length. */ 87 u16 flags; /* Descriptor flags. */ 88 u16 next; /* The next desc state in a list. */ 89 }; 90 91 struct vring_virtqueue { 92 struct virtqueue vq; 93 94 /* Is this a packed ring? */ 95 bool packed_ring; 96 97 /* Is DMA API used? */ 98 bool use_dma_api; 99 100 /* Can we use weak barriers? */ 101 bool weak_barriers; 102 103 /* Other side has made a mess, don't try any more. */ 104 bool broken; 105 106 /* Host supports indirect buffers */ 107 bool indirect; 108 109 /* Host publishes avail event idx */ 110 bool event; 111 112 /* Head of free buffer list. */ 113 unsigned int free_head; 114 /* Number we've added since last sync. */ 115 unsigned int num_added; 116 117 /* Last used index we've seen. */ 118 u16 last_used_idx; 119 120 /* Hint for event idx: already triggered no need to disable. */ 121 bool event_triggered; 122 123 union { 124 /* Available for split ring */ 125 struct { 126 /* Actual memory layout for this queue. */ 127 struct vring vring; 128 129 /* Last written value to avail->flags */ 130 u16 avail_flags_shadow; 131 132 /* 133 * Last written value to avail->idx in 134 * guest byte order. 135 */ 136 u16 avail_idx_shadow; 137 138 /* Per-descriptor state. */ 139 struct vring_desc_state_split *desc_state; 140 struct vring_desc_extra *desc_extra; 141 142 /* DMA address and size information */ 143 dma_addr_t queue_dma_addr; 144 size_t queue_size_in_bytes; 145 } split; 146 147 /* Available for packed ring */ 148 struct { 149 /* Actual memory layout for this queue. */ 150 struct { 151 unsigned int num; 152 struct vring_packed_desc *desc; 153 struct vring_packed_desc_event *driver; 154 struct vring_packed_desc_event *device; 155 } vring; 156 157 /* Driver ring wrap counter. */ 158 bool avail_wrap_counter; 159 160 /* Device ring wrap counter. */ 161 bool used_wrap_counter; 162 163 /* Avail used flags. */ 164 u16 avail_used_flags; 165 166 /* Index of the next avail descriptor. */ 167 u16 next_avail_idx; 168 169 /* 170 * Last written value to driver->flags in 171 * guest byte order. 172 */ 173 u16 event_flags_shadow; 174 175 /* Per-descriptor state. */ 176 struct vring_desc_state_packed *desc_state; 177 struct vring_desc_extra *desc_extra; 178 179 /* DMA address and size information */ 180 dma_addr_t ring_dma_addr; 181 dma_addr_t driver_event_dma_addr; 182 dma_addr_t device_event_dma_addr; 183 size_t ring_size_in_bytes; 184 size_t event_size_in_bytes; 185 } packed; 186 }; 187 188 /* Per-descriptor in buffer length */ 189 u32 *buflen; 190 191 /* How to notify other side. FIXME: commonalize hcalls! */ 192 bool (*notify)(struct virtqueue *vq); 193 194 /* DMA, allocation, and size information */ 195 bool we_own_ring; 196 197 #ifdef DEBUG 198 /* They're supposed to lock for us. */ 199 unsigned int in_use; 200 201 /* Figure out if their kicks are too delayed. */ 202 bool last_add_time_valid; 203 ktime_t last_add_time; 204 #endif 205 }; 206 207 208 /* 209 * Helpers. 210 */ 211 212 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 213 214 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 215 unsigned int total_sg) 216 { 217 struct vring_virtqueue *vq = to_vvq(_vq); 218 219 /* 220 * If the host supports indirect descriptor tables, and we have multiple 221 * buffers, then go indirect. FIXME: tune this threshold 222 */ 223 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 224 } 225 226 /* 227 * Modern virtio devices have feature bits to specify whether they need a 228 * quirk and bypass the IOMMU. If not there, just use the DMA API. 229 * 230 * If there, the interaction between virtio and DMA API is messy. 231 * 232 * On most systems with virtio, physical addresses match bus addresses, 233 * and it doesn't particularly matter whether we use the DMA API. 234 * 235 * On some systems, including Xen and any system with a physical device 236 * that speaks virtio behind a physical IOMMU, we must use the DMA API 237 * for virtio DMA to work at all. 238 * 239 * On other systems, including SPARC and PPC64, virtio-pci devices are 240 * enumerated as though they are behind an IOMMU, but the virtio host 241 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 242 * there or somehow map everything as the identity. 243 * 244 * For the time being, we preserve historic behavior and bypass the DMA 245 * API. 246 * 247 * TODO: install a per-device DMA ops structure that does the right thing 248 * taking into account all the above quirks, and use the DMA API 249 * unconditionally on data path. 250 */ 251 252 static bool vring_use_dma_api(struct virtio_device *vdev) 253 { 254 if (!virtio_has_dma_quirk(vdev)) 255 return true; 256 257 /* Otherwise, we are left to guess. */ 258 /* 259 * In theory, it's possible to have a buggy QEMU-supposed 260 * emulated Q35 IOMMU and Xen enabled at the same time. On 261 * such a configuration, virtio has never worked and will 262 * not work without an even larger kludge. Instead, enable 263 * the DMA API if we're a Xen guest, which at least allows 264 * all of the sensible Xen configurations to work correctly. 265 */ 266 if (xen_domain()) 267 return true; 268 269 return false; 270 } 271 272 size_t virtio_max_dma_size(struct virtio_device *vdev) 273 { 274 size_t max_segment_size = SIZE_MAX; 275 276 if (vring_use_dma_api(vdev)) 277 max_segment_size = dma_max_mapping_size(&vdev->dev); 278 279 return max_segment_size; 280 } 281 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 282 283 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 284 dma_addr_t *dma_handle, gfp_t flag) 285 { 286 if (vring_use_dma_api(vdev)) { 287 return dma_alloc_coherent(vdev->dev.parent, size, 288 dma_handle, flag); 289 } else { 290 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 291 292 if (queue) { 293 phys_addr_t phys_addr = virt_to_phys(queue); 294 *dma_handle = (dma_addr_t)phys_addr; 295 296 /* 297 * Sanity check: make sure we dind't truncate 298 * the address. The only arches I can find that 299 * have 64-bit phys_addr_t but 32-bit dma_addr_t 300 * are certain non-highmem MIPS and x86 301 * configurations, but these configurations 302 * should never allocate physical pages above 32 303 * bits, so this is fine. Just in case, throw a 304 * warning and abort if we end up with an 305 * unrepresentable address. 306 */ 307 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 308 free_pages_exact(queue, PAGE_ALIGN(size)); 309 return NULL; 310 } 311 } 312 return queue; 313 } 314 } 315 316 static void vring_free_queue(struct virtio_device *vdev, size_t size, 317 void *queue, dma_addr_t dma_handle) 318 { 319 if (vring_use_dma_api(vdev)) 320 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 321 else 322 free_pages_exact(queue, PAGE_ALIGN(size)); 323 } 324 325 /* 326 * The DMA ops on various arches are rather gnarly right now, and 327 * making all of the arch DMA ops work on the vring device itself 328 * is a mess. For now, we use the parent device for DMA ops. 329 */ 330 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 331 { 332 return vq->vq.vdev->dev.parent; 333 } 334 335 /* Map one sg entry. */ 336 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 337 struct scatterlist *sg, 338 enum dma_data_direction direction) 339 { 340 if (!vq->use_dma_api) 341 return (dma_addr_t)sg_phys(sg); 342 343 /* 344 * We can't use dma_map_sg, because we don't use scatterlists in 345 * the way it expects (we don't guarantee that the scatterlist 346 * will exist for the lifetime of the mapping). 347 */ 348 return dma_map_page(vring_dma_dev(vq), 349 sg_page(sg), sg->offset, sg->length, 350 direction); 351 } 352 353 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 354 void *cpu_addr, size_t size, 355 enum dma_data_direction direction) 356 { 357 if (!vq->use_dma_api) 358 return (dma_addr_t)virt_to_phys(cpu_addr); 359 360 return dma_map_single(vring_dma_dev(vq), 361 cpu_addr, size, direction); 362 } 363 364 static int vring_mapping_error(const struct vring_virtqueue *vq, 365 dma_addr_t addr) 366 { 367 if (!vq->use_dma_api) 368 return 0; 369 370 return dma_mapping_error(vring_dma_dev(vq), addr); 371 } 372 373 374 /* 375 * Split ring specific functions - *_split(). 376 */ 377 378 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 379 struct vring_desc *desc) 380 { 381 u16 flags; 382 383 if (!vq->use_dma_api) 384 return; 385 386 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 387 388 if (flags & VRING_DESC_F_INDIRECT) { 389 dma_unmap_single(vring_dma_dev(vq), 390 virtio64_to_cpu(vq->vq.vdev, desc->addr), 391 virtio32_to_cpu(vq->vq.vdev, desc->len), 392 (flags & VRING_DESC_F_WRITE) ? 393 DMA_FROM_DEVICE : DMA_TO_DEVICE); 394 } else { 395 dma_unmap_page(vring_dma_dev(vq), 396 virtio64_to_cpu(vq->vq.vdev, desc->addr), 397 virtio32_to_cpu(vq->vq.vdev, desc->len), 398 (flags & VRING_DESC_F_WRITE) ? 399 DMA_FROM_DEVICE : DMA_TO_DEVICE); 400 } 401 } 402 403 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 404 unsigned int i) 405 { 406 struct vring_desc_extra *extra = vq->split.desc_extra; 407 u16 flags; 408 409 if (!vq->use_dma_api) 410 goto out; 411 412 flags = extra[i].flags; 413 414 if (flags & VRING_DESC_F_INDIRECT) { 415 dma_unmap_single(vring_dma_dev(vq), 416 extra[i].addr, 417 extra[i].len, 418 (flags & VRING_DESC_F_WRITE) ? 419 DMA_FROM_DEVICE : DMA_TO_DEVICE); 420 } else { 421 dma_unmap_page(vring_dma_dev(vq), 422 extra[i].addr, 423 extra[i].len, 424 (flags & VRING_DESC_F_WRITE) ? 425 DMA_FROM_DEVICE : DMA_TO_DEVICE); 426 } 427 428 out: 429 return extra[i].next; 430 } 431 432 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 433 unsigned int total_sg, 434 gfp_t gfp) 435 { 436 struct vring_desc *desc; 437 unsigned int i; 438 439 /* 440 * We require lowmem mappings for the descriptors because 441 * otherwise virt_to_phys will give us bogus addresses in the 442 * virtqueue. 443 */ 444 gfp &= ~__GFP_HIGHMEM; 445 446 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 447 if (!desc) 448 return NULL; 449 450 for (i = 0; i < total_sg; i++) 451 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 452 return desc; 453 } 454 455 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 456 struct vring_desc *desc, 457 unsigned int i, 458 dma_addr_t addr, 459 unsigned int len, 460 u16 flags, 461 bool indirect) 462 { 463 struct vring_virtqueue *vring = to_vvq(vq); 464 struct vring_desc_extra *extra = vring->split.desc_extra; 465 u16 next; 466 467 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 468 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 469 desc[i].len = cpu_to_virtio32(vq->vdev, len); 470 471 if (!indirect) { 472 next = extra[i].next; 473 desc[i].next = cpu_to_virtio16(vq->vdev, next); 474 475 extra[i].addr = addr; 476 extra[i].len = len; 477 extra[i].flags = flags; 478 } else 479 next = virtio16_to_cpu(vq->vdev, desc[i].next); 480 481 return next; 482 } 483 484 static inline int virtqueue_add_split(struct virtqueue *_vq, 485 struct scatterlist *sgs[], 486 unsigned int total_sg, 487 unsigned int out_sgs, 488 unsigned int in_sgs, 489 void *data, 490 void *ctx, 491 gfp_t gfp) 492 { 493 struct vring_virtqueue *vq = to_vvq(_vq); 494 struct scatterlist *sg; 495 struct vring_desc *desc; 496 unsigned int i, n, avail, descs_used, prev, err_idx; 497 int head; 498 bool indirect; 499 u32 buflen = 0; 500 501 START_USE(vq); 502 503 BUG_ON(data == NULL); 504 BUG_ON(ctx && vq->indirect); 505 506 if (unlikely(vq->broken)) { 507 END_USE(vq); 508 return -EIO; 509 } 510 511 LAST_ADD_TIME_UPDATE(vq); 512 513 BUG_ON(total_sg == 0); 514 515 head = vq->free_head; 516 517 if (virtqueue_use_indirect(_vq, total_sg)) 518 desc = alloc_indirect_split(_vq, total_sg, gfp); 519 else { 520 desc = NULL; 521 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 522 } 523 524 if (desc) { 525 /* Use a single buffer which doesn't continue */ 526 indirect = true; 527 /* Set up rest to use this indirect table. */ 528 i = 0; 529 descs_used = 1; 530 } else { 531 indirect = false; 532 desc = vq->split.vring.desc; 533 i = head; 534 descs_used = total_sg; 535 } 536 537 if (vq->vq.num_free < descs_used) { 538 pr_debug("Can't add buf len %i - avail = %i\n", 539 descs_used, vq->vq.num_free); 540 /* FIXME: for historical reasons, we force a notify here if 541 * there are outgoing parts to the buffer. Presumably the 542 * host should service the ring ASAP. */ 543 if (out_sgs) 544 vq->notify(&vq->vq); 545 if (indirect) 546 kfree(desc); 547 END_USE(vq); 548 return -ENOSPC; 549 } 550 551 for (n = 0; n < out_sgs; n++) { 552 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 553 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 554 if (vring_mapping_error(vq, addr)) 555 goto unmap_release; 556 557 prev = i; 558 /* Note that we trust indirect descriptor 559 * table since it use stream DMA mapping. 560 */ 561 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 562 VRING_DESC_F_NEXT, 563 indirect); 564 } 565 } 566 for (; n < (out_sgs + in_sgs); n++) { 567 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 568 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 569 if (vring_mapping_error(vq, addr)) 570 goto unmap_release; 571 572 prev = i; 573 /* Note that we trust indirect descriptor 574 * table since it use stream DMA mapping. 575 */ 576 i = virtqueue_add_desc_split(_vq, desc, i, addr, 577 sg->length, 578 VRING_DESC_F_NEXT | 579 VRING_DESC_F_WRITE, 580 indirect); 581 buflen += sg->length; 582 } 583 } 584 /* Last one doesn't continue. */ 585 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 586 if (!indirect && vq->use_dma_api) 587 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 588 ~VRING_DESC_F_NEXT; 589 590 if (indirect) { 591 /* Now that the indirect table is filled in, map it. */ 592 dma_addr_t addr = vring_map_single( 593 vq, desc, total_sg * sizeof(struct vring_desc), 594 DMA_TO_DEVICE); 595 if (vring_mapping_error(vq, addr)) 596 goto unmap_release; 597 598 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 599 head, addr, 600 total_sg * sizeof(struct vring_desc), 601 VRING_DESC_F_INDIRECT, 602 false); 603 } 604 605 /* We're using some buffers from the free list. */ 606 vq->vq.num_free -= descs_used; 607 608 /* Update free pointer */ 609 if (indirect) 610 vq->free_head = vq->split.desc_extra[head].next; 611 else 612 vq->free_head = i; 613 614 /* Store token and indirect buffer state. */ 615 vq->split.desc_state[head].data = data; 616 if (indirect) 617 vq->split.desc_state[head].indir_desc = desc; 618 else 619 vq->split.desc_state[head].indir_desc = ctx; 620 621 /* Store in buffer length if necessary */ 622 if (vq->buflen) 623 vq->buflen[head] = buflen; 624 625 /* Put entry in available array (but don't update avail->idx until they 626 * do sync). */ 627 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 628 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 629 630 /* Descriptors and available array need to be set before we expose the 631 * new available array entries. */ 632 virtio_wmb(vq->weak_barriers); 633 vq->split.avail_idx_shadow++; 634 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 635 vq->split.avail_idx_shadow); 636 vq->num_added++; 637 638 pr_debug("Added buffer head %i to %p\n", head, vq); 639 END_USE(vq); 640 641 /* This is very unlikely, but theoretically possible. Kick 642 * just in case. */ 643 if (unlikely(vq->num_added == (1 << 16) - 1)) 644 virtqueue_kick(_vq); 645 646 return 0; 647 648 unmap_release: 649 err_idx = i; 650 651 if (indirect) 652 i = 0; 653 else 654 i = head; 655 656 for (n = 0; n < total_sg; n++) { 657 if (i == err_idx) 658 break; 659 if (indirect) { 660 vring_unmap_one_split_indirect(vq, &desc[i]); 661 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 662 } else 663 i = vring_unmap_one_split(vq, i); 664 } 665 666 if (indirect) 667 kfree(desc); 668 669 END_USE(vq); 670 return -ENOMEM; 671 } 672 673 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 674 { 675 struct vring_virtqueue *vq = to_vvq(_vq); 676 u16 new, old; 677 bool needs_kick; 678 679 START_USE(vq); 680 /* We need to expose available array entries before checking avail 681 * event. */ 682 virtio_mb(vq->weak_barriers); 683 684 old = vq->split.avail_idx_shadow - vq->num_added; 685 new = vq->split.avail_idx_shadow; 686 vq->num_added = 0; 687 688 LAST_ADD_TIME_CHECK(vq); 689 LAST_ADD_TIME_INVALID(vq); 690 691 if (vq->event) { 692 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 693 vring_avail_event(&vq->split.vring)), 694 new, old); 695 } else { 696 needs_kick = !(vq->split.vring.used->flags & 697 cpu_to_virtio16(_vq->vdev, 698 VRING_USED_F_NO_NOTIFY)); 699 } 700 END_USE(vq); 701 return needs_kick; 702 } 703 704 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 705 void **ctx) 706 { 707 unsigned int i, j; 708 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 709 710 /* Clear data ptr. */ 711 vq->split.desc_state[head].data = NULL; 712 713 /* Put back on free list: unmap first-level descriptors and find end */ 714 i = head; 715 716 while (vq->split.vring.desc[i].flags & nextflag) { 717 vring_unmap_one_split(vq, i); 718 i = vq->split.desc_extra[i].next; 719 vq->vq.num_free++; 720 } 721 722 vring_unmap_one_split(vq, i); 723 vq->split.desc_extra[i].next = vq->free_head; 724 vq->free_head = head; 725 726 /* Plus final descriptor */ 727 vq->vq.num_free++; 728 729 if (vq->indirect) { 730 struct vring_desc *indir_desc = 731 vq->split.desc_state[head].indir_desc; 732 u32 len; 733 734 /* Free the indirect table, if any, now that it's unmapped. */ 735 if (!indir_desc) 736 return; 737 738 len = vq->split.desc_extra[head].len; 739 740 BUG_ON(!(vq->split.desc_extra[head].flags & 741 VRING_DESC_F_INDIRECT)); 742 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 743 744 for (j = 0; j < len / sizeof(struct vring_desc); j++) 745 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 746 747 kfree(indir_desc); 748 vq->split.desc_state[head].indir_desc = NULL; 749 } else if (ctx) { 750 *ctx = vq->split.desc_state[head].indir_desc; 751 } 752 } 753 754 static inline bool more_used_split(const struct vring_virtqueue *vq) 755 { 756 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 757 vq->split.vring.used->idx); 758 } 759 760 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 761 unsigned int *len, 762 void **ctx) 763 { 764 struct vring_virtqueue *vq = to_vvq(_vq); 765 void *ret; 766 unsigned int i; 767 u16 last_used; 768 769 START_USE(vq); 770 771 if (unlikely(vq->broken)) { 772 END_USE(vq); 773 return NULL; 774 } 775 776 if (!more_used_split(vq)) { 777 pr_debug("No more buffers in queue\n"); 778 END_USE(vq); 779 return NULL; 780 } 781 782 /* Only get used array entries after they have been exposed by host. */ 783 virtio_rmb(vq->weak_barriers); 784 785 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 786 i = virtio32_to_cpu(_vq->vdev, 787 vq->split.vring.used->ring[last_used].id); 788 *len = virtio32_to_cpu(_vq->vdev, 789 vq->split.vring.used->ring[last_used].len); 790 791 if (unlikely(i >= vq->split.vring.num)) { 792 BAD_RING(vq, "id %u out of range\n", i); 793 return NULL; 794 } 795 if (unlikely(!vq->split.desc_state[i].data)) { 796 BAD_RING(vq, "id %u is not a head!\n", i); 797 return NULL; 798 } 799 if (vq->buflen && unlikely(*len > vq->buflen[i])) { 800 BAD_RING(vq, "used len %d is larger than in buflen %u\n", 801 *len, vq->buflen[i]); 802 return NULL; 803 } 804 805 /* detach_buf_split clears data, so grab it now. */ 806 ret = vq->split.desc_state[i].data; 807 detach_buf_split(vq, i, ctx); 808 vq->last_used_idx++; 809 /* If we expect an interrupt for the next entry, tell host 810 * by writing event index and flush out the write before 811 * the read in the next get_buf call. */ 812 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 813 virtio_store_mb(vq->weak_barriers, 814 &vring_used_event(&vq->split.vring), 815 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 816 817 LAST_ADD_TIME_INVALID(vq); 818 819 END_USE(vq); 820 return ret; 821 } 822 823 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 824 { 825 struct vring_virtqueue *vq = to_vvq(_vq); 826 827 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 828 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 829 if (vq->event) 830 /* TODO: this is a hack. Figure out a cleaner value to write. */ 831 vring_used_event(&vq->split.vring) = 0x0; 832 else 833 vq->split.vring.avail->flags = 834 cpu_to_virtio16(_vq->vdev, 835 vq->split.avail_flags_shadow); 836 } 837 } 838 839 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 840 { 841 struct vring_virtqueue *vq = to_vvq(_vq); 842 u16 last_used_idx; 843 844 START_USE(vq); 845 846 /* We optimistically turn back on interrupts, then check if there was 847 * more to do. */ 848 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 849 * either clear the flags bit or point the event index at the next 850 * entry. Always do both to keep code simple. */ 851 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 852 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 853 if (!vq->event) 854 vq->split.vring.avail->flags = 855 cpu_to_virtio16(_vq->vdev, 856 vq->split.avail_flags_shadow); 857 } 858 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 859 last_used_idx = vq->last_used_idx); 860 END_USE(vq); 861 return last_used_idx; 862 } 863 864 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 865 { 866 struct vring_virtqueue *vq = to_vvq(_vq); 867 868 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 869 vq->split.vring.used->idx); 870 } 871 872 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 873 { 874 struct vring_virtqueue *vq = to_vvq(_vq); 875 u16 bufs; 876 877 START_USE(vq); 878 879 /* We optimistically turn back on interrupts, then check if there was 880 * more to do. */ 881 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 882 * either clear the flags bit or point the event index at the next 883 * entry. Always update the event index to keep code simple. */ 884 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 885 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 886 if (!vq->event) 887 vq->split.vring.avail->flags = 888 cpu_to_virtio16(_vq->vdev, 889 vq->split.avail_flags_shadow); 890 } 891 /* TODO: tune this threshold */ 892 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 893 894 virtio_store_mb(vq->weak_barriers, 895 &vring_used_event(&vq->split.vring), 896 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 897 898 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 899 - vq->last_used_idx) > bufs)) { 900 END_USE(vq); 901 return false; 902 } 903 904 END_USE(vq); 905 return true; 906 } 907 908 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 909 { 910 struct vring_virtqueue *vq = to_vvq(_vq); 911 unsigned int i; 912 void *buf; 913 914 START_USE(vq); 915 916 for (i = 0; i < vq->split.vring.num; i++) { 917 if (!vq->split.desc_state[i].data) 918 continue; 919 /* detach_buf_split clears data, so grab it now. */ 920 buf = vq->split.desc_state[i].data; 921 detach_buf_split(vq, i, NULL); 922 vq->split.avail_idx_shadow--; 923 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 924 vq->split.avail_idx_shadow); 925 END_USE(vq); 926 return buf; 927 } 928 /* That should have freed everything. */ 929 BUG_ON(vq->vq.num_free != vq->split.vring.num); 930 931 END_USE(vq); 932 return NULL; 933 } 934 935 static struct virtqueue *vring_create_virtqueue_split( 936 unsigned int index, 937 unsigned int num, 938 unsigned int vring_align, 939 struct virtio_device *vdev, 940 bool weak_barriers, 941 bool may_reduce_num, 942 bool context, 943 bool (*notify)(struct virtqueue *), 944 void (*callback)(struct virtqueue *), 945 const char *name) 946 { 947 struct virtqueue *vq; 948 void *queue = NULL; 949 dma_addr_t dma_addr; 950 size_t queue_size_in_bytes; 951 struct vring vring; 952 953 /* We assume num is a power of 2. */ 954 if (num & (num - 1)) { 955 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 956 return NULL; 957 } 958 959 /* TODO: allocate each queue chunk individually */ 960 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 961 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 962 &dma_addr, 963 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 964 if (queue) 965 break; 966 if (!may_reduce_num) 967 return NULL; 968 } 969 970 if (!num) 971 return NULL; 972 973 if (!queue) { 974 /* Try to get a single page. You are my only hope! */ 975 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 976 &dma_addr, GFP_KERNEL|__GFP_ZERO); 977 } 978 if (!queue) 979 return NULL; 980 981 queue_size_in_bytes = vring_size(num, vring_align); 982 vring_init(&vring, num, queue, vring_align); 983 984 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 985 notify, callback, name); 986 if (!vq) { 987 vring_free_queue(vdev, queue_size_in_bytes, queue, 988 dma_addr); 989 return NULL; 990 } 991 992 to_vvq(vq)->split.queue_dma_addr = dma_addr; 993 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 994 to_vvq(vq)->we_own_ring = true; 995 996 return vq; 997 } 998 999 1000 /* 1001 * Packed ring specific functions - *_packed(). 1002 */ 1003 1004 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 1005 struct vring_desc_extra *state) 1006 { 1007 u16 flags; 1008 1009 if (!vq->use_dma_api) 1010 return; 1011 1012 flags = state->flags; 1013 1014 if (flags & VRING_DESC_F_INDIRECT) { 1015 dma_unmap_single(vring_dma_dev(vq), 1016 state->addr, state->len, 1017 (flags & VRING_DESC_F_WRITE) ? 1018 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1019 } else { 1020 dma_unmap_page(vring_dma_dev(vq), 1021 state->addr, state->len, 1022 (flags & VRING_DESC_F_WRITE) ? 1023 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1024 } 1025 } 1026 1027 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1028 struct vring_packed_desc *desc) 1029 { 1030 u16 flags; 1031 1032 if (!vq->use_dma_api) 1033 return; 1034 1035 flags = le16_to_cpu(desc->flags); 1036 1037 if (flags & VRING_DESC_F_INDIRECT) { 1038 dma_unmap_single(vring_dma_dev(vq), 1039 le64_to_cpu(desc->addr), 1040 le32_to_cpu(desc->len), 1041 (flags & VRING_DESC_F_WRITE) ? 1042 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1043 } else { 1044 dma_unmap_page(vring_dma_dev(vq), 1045 le64_to_cpu(desc->addr), 1046 le32_to_cpu(desc->len), 1047 (flags & VRING_DESC_F_WRITE) ? 1048 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1049 } 1050 } 1051 1052 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1053 gfp_t gfp) 1054 { 1055 struct vring_packed_desc *desc; 1056 1057 /* 1058 * We require lowmem mappings for the descriptors because 1059 * otherwise virt_to_phys will give us bogus addresses in the 1060 * virtqueue. 1061 */ 1062 gfp &= ~__GFP_HIGHMEM; 1063 1064 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1065 1066 return desc; 1067 } 1068 1069 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1070 struct scatterlist *sgs[], 1071 unsigned int total_sg, 1072 unsigned int out_sgs, 1073 unsigned int in_sgs, 1074 void *data, 1075 gfp_t gfp) 1076 { 1077 struct vring_packed_desc *desc; 1078 struct scatterlist *sg; 1079 unsigned int i, n, err_idx; 1080 u16 head, id; 1081 dma_addr_t addr; 1082 u32 buflen = 0; 1083 1084 head = vq->packed.next_avail_idx; 1085 desc = alloc_indirect_packed(total_sg, gfp); 1086 if (!desc) 1087 return -ENOMEM; 1088 1089 if (unlikely(vq->vq.num_free < 1)) { 1090 pr_debug("Can't add buf len 1 - avail = 0\n"); 1091 kfree(desc); 1092 END_USE(vq); 1093 return -ENOSPC; 1094 } 1095 1096 i = 0; 1097 id = vq->free_head; 1098 BUG_ON(id == vq->packed.vring.num); 1099 1100 for (n = 0; n < out_sgs + in_sgs; n++) { 1101 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1102 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1103 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1104 if (vring_mapping_error(vq, addr)) 1105 goto unmap_release; 1106 1107 desc[i].flags = cpu_to_le16(n < out_sgs ? 1108 0 : VRING_DESC_F_WRITE); 1109 desc[i].addr = cpu_to_le64(addr); 1110 desc[i].len = cpu_to_le32(sg->length); 1111 i++; 1112 if (n >= out_sgs) 1113 buflen += sg->length; 1114 } 1115 } 1116 1117 /* Now that the indirect table is filled in, map it. */ 1118 addr = vring_map_single(vq, desc, 1119 total_sg * sizeof(struct vring_packed_desc), 1120 DMA_TO_DEVICE); 1121 if (vring_mapping_error(vq, addr)) 1122 goto unmap_release; 1123 1124 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1125 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1126 sizeof(struct vring_packed_desc)); 1127 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1128 1129 if (vq->use_dma_api) { 1130 vq->packed.desc_extra[id].addr = addr; 1131 vq->packed.desc_extra[id].len = total_sg * 1132 sizeof(struct vring_packed_desc); 1133 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1134 vq->packed.avail_used_flags; 1135 } 1136 1137 /* 1138 * A driver MUST NOT make the first descriptor in the list 1139 * available before all subsequent descriptors comprising 1140 * the list are made available. 1141 */ 1142 virtio_wmb(vq->weak_barriers); 1143 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1144 vq->packed.avail_used_flags); 1145 1146 /* We're using some buffers from the free list. */ 1147 vq->vq.num_free -= 1; 1148 1149 /* Update free pointer */ 1150 n = head + 1; 1151 if (n >= vq->packed.vring.num) { 1152 n = 0; 1153 vq->packed.avail_wrap_counter ^= 1; 1154 vq->packed.avail_used_flags ^= 1155 1 << VRING_PACKED_DESC_F_AVAIL | 1156 1 << VRING_PACKED_DESC_F_USED; 1157 } 1158 vq->packed.next_avail_idx = n; 1159 vq->free_head = vq->packed.desc_extra[id].next; 1160 1161 /* Store token and indirect buffer state. */ 1162 vq->packed.desc_state[id].num = 1; 1163 vq->packed.desc_state[id].data = data; 1164 vq->packed.desc_state[id].indir_desc = desc; 1165 vq->packed.desc_state[id].last = id; 1166 1167 /* Store in buffer length if necessary */ 1168 if (vq->buflen) 1169 vq->buflen[id] = buflen; 1170 1171 vq->num_added += 1; 1172 1173 pr_debug("Added buffer head %i to %p\n", head, vq); 1174 END_USE(vq); 1175 1176 return 0; 1177 1178 unmap_release: 1179 err_idx = i; 1180 1181 for (i = 0; i < err_idx; i++) 1182 vring_unmap_desc_packed(vq, &desc[i]); 1183 1184 kfree(desc); 1185 1186 END_USE(vq); 1187 return -ENOMEM; 1188 } 1189 1190 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1191 struct scatterlist *sgs[], 1192 unsigned int total_sg, 1193 unsigned int out_sgs, 1194 unsigned int in_sgs, 1195 void *data, 1196 void *ctx, 1197 gfp_t gfp) 1198 { 1199 struct vring_virtqueue *vq = to_vvq(_vq); 1200 struct vring_packed_desc *desc; 1201 struct scatterlist *sg; 1202 unsigned int i, n, c, descs_used, err_idx; 1203 __le16 head_flags, flags; 1204 u16 head, id, prev, curr, avail_used_flags; 1205 int err; 1206 u32 buflen = 0; 1207 1208 START_USE(vq); 1209 1210 BUG_ON(data == NULL); 1211 BUG_ON(ctx && vq->indirect); 1212 1213 if (unlikely(vq->broken)) { 1214 END_USE(vq); 1215 return -EIO; 1216 } 1217 1218 LAST_ADD_TIME_UPDATE(vq); 1219 1220 BUG_ON(total_sg == 0); 1221 1222 if (virtqueue_use_indirect(_vq, total_sg)) { 1223 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1224 in_sgs, data, gfp); 1225 if (err != -ENOMEM) 1226 return err; 1227 1228 /* fall back on direct */ 1229 } 1230 1231 head = vq->packed.next_avail_idx; 1232 avail_used_flags = vq->packed.avail_used_flags; 1233 1234 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1235 1236 desc = vq->packed.vring.desc; 1237 i = head; 1238 descs_used = total_sg; 1239 1240 if (unlikely(vq->vq.num_free < descs_used)) { 1241 pr_debug("Can't add buf len %i - avail = %i\n", 1242 descs_used, vq->vq.num_free); 1243 END_USE(vq); 1244 return -ENOSPC; 1245 } 1246 1247 id = vq->free_head; 1248 BUG_ON(id == vq->packed.vring.num); 1249 1250 curr = id; 1251 c = 0; 1252 for (n = 0; n < out_sgs + in_sgs; n++) { 1253 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1254 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1255 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1256 if (vring_mapping_error(vq, addr)) 1257 goto unmap_release; 1258 1259 flags = cpu_to_le16(vq->packed.avail_used_flags | 1260 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1261 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1262 if (i == head) 1263 head_flags = flags; 1264 else 1265 desc[i].flags = flags; 1266 1267 desc[i].addr = cpu_to_le64(addr); 1268 desc[i].len = cpu_to_le32(sg->length); 1269 desc[i].id = cpu_to_le16(id); 1270 1271 if (unlikely(vq->use_dma_api)) { 1272 vq->packed.desc_extra[curr].addr = addr; 1273 vq->packed.desc_extra[curr].len = sg->length; 1274 vq->packed.desc_extra[curr].flags = 1275 le16_to_cpu(flags); 1276 } 1277 prev = curr; 1278 curr = vq->packed.desc_extra[curr].next; 1279 1280 if ((unlikely(++i >= vq->packed.vring.num))) { 1281 i = 0; 1282 vq->packed.avail_used_flags ^= 1283 1 << VRING_PACKED_DESC_F_AVAIL | 1284 1 << VRING_PACKED_DESC_F_USED; 1285 } 1286 if (n >= out_sgs) 1287 buflen += sg->length; 1288 } 1289 } 1290 1291 if (i < head) 1292 vq->packed.avail_wrap_counter ^= 1; 1293 1294 /* We're using some buffers from the free list. */ 1295 vq->vq.num_free -= descs_used; 1296 1297 /* Update free pointer */ 1298 vq->packed.next_avail_idx = i; 1299 vq->free_head = curr; 1300 1301 /* Store token. */ 1302 vq->packed.desc_state[id].num = descs_used; 1303 vq->packed.desc_state[id].data = data; 1304 vq->packed.desc_state[id].indir_desc = ctx; 1305 vq->packed.desc_state[id].last = prev; 1306 1307 /* Store in buffer length if necessary */ 1308 if (vq->buflen) 1309 vq->buflen[id] = buflen; 1310 1311 /* 1312 * A driver MUST NOT make the first descriptor in the list 1313 * available before all subsequent descriptors comprising 1314 * the list are made available. 1315 */ 1316 virtio_wmb(vq->weak_barriers); 1317 vq->packed.vring.desc[head].flags = head_flags; 1318 vq->num_added += descs_used; 1319 1320 pr_debug("Added buffer head %i to %p\n", head, vq); 1321 END_USE(vq); 1322 1323 return 0; 1324 1325 unmap_release: 1326 err_idx = i; 1327 i = head; 1328 curr = vq->free_head; 1329 1330 vq->packed.avail_used_flags = avail_used_flags; 1331 1332 for (n = 0; n < total_sg; n++) { 1333 if (i == err_idx) 1334 break; 1335 vring_unmap_state_packed(vq, 1336 &vq->packed.desc_extra[curr]); 1337 curr = vq->packed.desc_extra[curr].next; 1338 i++; 1339 if (i >= vq->packed.vring.num) 1340 i = 0; 1341 } 1342 1343 END_USE(vq); 1344 return -EIO; 1345 } 1346 1347 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1348 { 1349 struct vring_virtqueue *vq = to_vvq(_vq); 1350 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1351 bool needs_kick; 1352 union { 1353 struct { 1354 __le16 off_wrap; 1355 __le16 flags; 1356 }; 1357 u32 u32; 1358 } snapshot; 1359 1360 START_USE(vq); 1361 1362 /* 1363 * We need to expose the new flags value before checking notification 1364 * suppressions. 1365 */ 1366 virtio_mb(vq->weak_barriers); 1367 1368 old = vq->packed.next_avail_idx - vq->num_added; 1369 new = vq->packed.next_avail_idx; 1370 vq->num_added = 0; 1371 1372 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1373 flags = le16_to_cpu(snapshot.flags); 1374 1375 LAST_ADD_TIME_CHECK(vq); 1376 LAST_ADD_TIME_INVALID(vq); 1377 1378 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1379 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1380 goto out; 1381 } 1382 1383 off_wrap = le16_to_cpu(snapshot.off_wrap); 1384 1385 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1386 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1387 if (wrap_counter != vq->packed.avail_wrap_counter) 1388 event_idx -= vq->packed.vring.num; 1389 1390 needs_kick = vring_need_event(event_idx, new, old); 1391 out: 1392 END_USE(vq); 1393 return needs_kick; 1394 } 1395 1396 static void detach_buf_packed(struct vring_virtqueue *vq, 1397 unsigned int id, void **ctx) 1398 { 1399 struct vring_desc_state_packed *state = NULL; 1400 struct vring_packed_desc *desc; 1401 unsigned int i, curr; 1402 1403 state = &vq->packed.desc_state[id]; 1404 1405 /* Clear data ptr. */ 1406 state->data = NULL; 1407 1408 vq->packed.desc_extra[state->last].next = vq->free_head; 1409 vq->free_head = id; 1410 vq->vq.num_free += state->num; 1411 1412 if (unlikely(vq->use_dma_api)) { 1413 curr = id; 1414 for (i = 0; i < state->num; i++) { 1415 vring_unmap_state_packed(vq, 1416 &vq->packed.desc_extra[curr]); 1417 curr = vq->packed.desc_extra[curr].next; 1418 } 1419 } 1420 1421 if (vq->indirect) { 1422 u32 len; 1423 1424 /* Free the indirect table, if any, now that it's unmapped. */ 1425 desc = state->indir_desc; 1426 if (!desc) 1427 return; 1428 1429 if (vq->use_dma_api) { 1430 len = vq->packed.desc_extra[id].len; 1431 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1432 i++) 1433 vring_unmap_desc_packed(vq, &desc[i]); 1434 } 1435 kfree(desc); 1436 state->indir_desc = NULL; 1437 } else if (ctx) { 1438 *ctx = state->indir_desc; 1439 } 1440 } 1441 1442 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1443 u16 idx, bool used_wrap_counter) 1444 { 1445 bool avail, used; 1446 u16 flags; 1447 1448 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1449 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1450 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1451 1452 return avail == used && used == used_wrap_counter; 1453 } 1454 1455 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1456 { 1457 return is_used_desc_packed(vq, vq->last_used_idx, 1458 vq->packed.used_wrap_counter); 1459 } 1460 1461 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1462 unsigned int *len, 1463 void **ctx) 1464 { 1465 struct vring_virtqueue *vq = to_vvq(_vq); 1466 u16 last_used, id; 1467 void *ret; 1468 1469 START_USE(vq); 1470 1471 if (unlikely(vq->broken)) { 1472 END_USE(vq); 1473 return NULL; 1474 } 1475 1476 if (!more_used_packed(vq)) { 1477 pr_debug("No more buffers in queue\n"); 1478 END_USE(vq); 1479 return NULL; 1480 } 1481 1482 /* Only get used elements after they have been exposed by host. */ 1483 virtio_rmb(vq->weak_barriers); 1484 1485 last_used = vq->last_used_idx; 1486 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1487 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1488 1489 if (unlikely(id >= vq->packed.vring.num)) { 1490 BAD_RING(vq, "id %u out of range\n", id); 1491 return NULL; 1492 } 1493 if (unlikely(!vq->packed.desc_state[id].data)) { 1494 BAD_RING(vq, "id %u is not a head!\n", id); 1495 return NULL; 1496 } 1497 if (vq->buflen && unlikely(*len > vq->buflen[id])) { 1498 BAD_RING(vq, "used len %d is larger than in buflen %u\n", 1499 *len, vq->buflen[id]); 1500 return NULL; 1501 } 1502 1503 /* detach_buf_packed clears data, so grab it now. */ 1504 ret = vq->packed.desc_state[id].data; 1505 detach_buf_packed(vq, id, ctx); 1506 1507 vq->last_used_idx += vq->packed.desc_state[id].num; 1508 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1509 vq->last_used_idx -= vq->packed.vring.num; 1510 vq->packed.used_wrap_counter ^= 1; 1511 } 1512 1513 /* 1514 * If we expect an interrupt for the next entry, tell host 1515 * by writing event index and flush out the write before 1516 * the read in the next get_buf call. 1517 */ 1518 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1519 virtio_store_mb(vq->weak_barriers, 1520 &vq->packed.vring.driver->off_wrap, 1521 cpu_to_le16(vq->last_used_idx | 1522 (vq->packed.used_wrap_counter << 1523 VRING_PACKED_EVENT_F_WRAP_CTR))); 1524 1525 LAST_ADD_TIME_INVALID(vq); 1526 1527 END_USE(vq); 1528 return ret; 1529 } 1530 1531 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1532 { 1533 struct vring_virtqueue *vq = to_vvq(_vq); 1534 1535 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1536 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1537 vq->packed.vring.driver->flags = 1538 cpu_to_le16(vq->packed.event_flags_shadow); 1539 } 1540 } 1541 1542 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1543 { 1544 struct vring_virtqueue *vq = to_vvq(_vq); 1545 1546 START_USE(vq); 1547 1548 /* 1549 * We optimistically turn back on interrupts, then check if there was 1550 * more to do. 1551 */ 1552 1553 if (vq->event) { 1554 vq->packed.vring.driver->off_wrap = 1555 cpu_to_le16(vq->last_used_idx | 1556 (vq->packed.used_wrap_counter << 1557 VRING_PACKED_EVENT_F_WRAP_CTR)); 1558 /* 1559 * We need to update event offset and event wrap 1560 * counter first before updating event flags. 1561 */ 1562 virtio_wmb(vq->weak_barriers); 1563 } 1564 1565 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1566 vq->packed.event_flags_shadow = vq->event ? 1567 VRING_PACKED_EVENT_FLAG_DESC : 1568 VRING_PACKED_EVENT_FLAG_ENABLE; 1569 vq->packed.vring.driver->flags = 1570 cpu_to_le16(vq->packed.event_flags_shadow); 1571 } 1572 1573 END_USE(vq); 1574 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1575 VRING_PACKED_EVENT_F_WRAP_CTR); 1576 } 1577 1578 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1579 { 1580 struct vring_virtqueue *vq = to_vvq(_vq); 1581 bool wrap_counter; 1582 u16 used_idx; 1583 1584 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1585 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1586 1587 return is_used_desc_packed(vq, used_idx, wrap_counter); 1588 } 1589 1590 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1591 { 1592 struct vring_virtqueue *vq = to_vvq(_vq); 1593 u16 used_idx, wrap_counter; 1594 u16 bufs; 1595 1596 START_USE(vq); 1597 1598 /* 1599 * We optimistically turn back on interrupts, then check if there was 1600 * more to do. 1601 */ 1602 1603 if (vq->event) { 1604 /* TODO: tune this threshold */ 1605 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1606 wrap_counter = vq->packed.used_wrap_counter; 1607 1608 used_idx = vq->last_used_idx + bufs; 1609 if (used_idx >= vq->packed.vring.num) { 1610 used_idx -= vq->packed.vring.num; 1611 wrap_counter ^= 1; 1612 } 1613 1614 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1615 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1616 1617 /* 1618 * We need to update event offset and event wrap 1619 * counter first before updating event flags. 1620 */ 1621 virtio_wmb(vq->weak_barriers); 1622 } 1623 1624 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1625 vq->packed.event_flags_shadow = vq->event ? 1626 VRING_PACKED_EVENT_FLAG_DESC : 1627 VRING_PACKED_EVENT_FLAG_ENABLE; 1628 vq->packed.vring.driver->flags = 1629 cpu_to_le16(vq->packed.event_flags_shadow); 1630 } 1631 1632 /* 1633 * We need to update event suppression structure first 1634 * before re-checking for more used buffers. 1635 */ 1636 virtio_mb(vq->weak_barriers); 1637 1638 if (is_used_desc_packed(vq, 1639 vq->last_used_idx, 1640 vq->packed.used_wrap_counter)) { 1641 END_USE(vq); 1642 return false; 1643 } 1644 1645 END_USE(vq); 1646 return true; 1647 } 1648 1649 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1650 { 1651 struct vring_virtqueue *vq = to_vvq(_vq); 1652 unsigned int i; 1653 void *buf; 1654 1655 START_USE(vq); 1656 1657 for (i = 0; i < vq->packed.vring.num; i++) { 1658 if (!vq->packed.desc_state[i].data) 1659 continue; 1660 /* detach_buf clears data, so grab it now. */ 1661 buf = vq->packed.desc_state[i].data; 1662 detach_buf_packed(vq, i, NULL); 1663 END_USE(vq); 1664 return buf; 1665 } 1666 /* That should have freed everything. */ 1667 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1668 1669 END_USE(vq); 1670 return NULL; 1671 } 1672 1673 static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, 1674 unsigned int num) 1675 { 1676 struct vring_desc_extra *desc_extra; 1677 unsigned int i; 1678 1679 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1680 GFP_KERNEL); 1681 if (!desc_extra) 1682 return NULL; 1683 1684 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1685 1686 for (i = 0; i < num - 1; i++) 1687 desc_extra[i].next = i + 1; 1688 1689 return desc_extra; 1690 } 1691 1692 static struct virtqueue *vring_create_virtqueue_packed( 1693 unsigned int index, 1694 unsigned int num, 1695 unsigned int vring_align, 1696 struct virtio_device *vdev, 1697 bool weak_barriers, 1698 bool may_reduce_num, 1699 bool context, 1700 bool (*notify)(struct virtqueue *), 1701 void (*callback)(struct virtqueue *), 1702 const char *name) 1703 { 1704 struct vring_virtqueue *vq; 1705 struct vring_packed_desc *ring; 1706 struct vring_packed_desc_event *driver, *device; 1707 struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); 1708 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1709 size_t ring_size_in_bytes, event_size_in_bytes; 1710 1711 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1712 1713 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1714 &ring_dma_addr, 1715 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1716 if (!ring) 1717 goto err_ring; 1718 1719 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1720 1721 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1722 &driver_event_dma_addr, 1723 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1724 if (!driver) 1725 goto err_driver; 1726 1727 device = vring_alloc_queue(vdev, event_size_in_bytes, 1728 &device_event_dma_addr, 1729 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1730 if (!device) 1731 goto err_device; 1732 1733 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1734 if (!vq) 1735 goto err_vq; 1736 1737 vq->vq.callback = callback; 1738 vq->vq.vdev = vdev; 1739 vq->vq.name = name; 1740 vq->vq.num_free = num; 1741 vq->vq.index = index; 1742 vq->we_own_ring = true; 1743 vq->notify = notify; 1744 vq->weak_barriers = weak_barriers; 1745 vq->broken = false; 1746 vq->last_used_idx = 0; 1747 vq->event_triggered = false; 1748 vq->num_added = 0; 1749 vq->packed_ring = true; 1750 vq->use_dma_api = vring_use_dma_api(vdev); 1751 #ifdef DEBUG 1752 vq->in_use = false; 1753 vq->last_add_time_valid = false; 1754 #endif 1755 1756 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1757 !context; 1758 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1759 1760 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1761 vq->weak_barriers = false; 1762 1763 vq->packed.ring_dma_addr = ring_dma_addr; 1764 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1765 vq->packed.device_event_dma_addr = device_event_dma_addr; 1766 1767 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1768 vq->packed.event_size_in_bytes = event_size_in_bytes; 1769 1770 vq->packed.vring.num = num; 1771 vq->packed.vring.desc = ring; 1772 vq->packed.vring.driver = driver; 1773 vq->packed.vring.device = device; 1774 1775 vq->packed.next_avail_idx = 0; 1776 vq->packed.avail_wrap_counter = 1; 1777 vq->packed.used_wrap_counter = 1; 1778 vq->packed.event_flags_shadow = 0; 1779 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1780 1781 vq->packed.desc_state = kmalloc_array(num, 1782 sizeof(struct vring_desc_state_packed), 1783 GFP_KERNEL); 1784 if (!vq->packed.desc_state) 1785 goto err_desc_state; 1786 1787 memset(vq->packed.desc_state, 0, 1788 num * sizeof(struct vring_desc_state_packed)); 1789 1790 /* Put everything in free lists. */ 1791 vq->free_head = 0; 1792 1793 vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); 1794 if (!vq->packed.desc_extra) 1795 goto err_desc_extra; 1796 1797 if (!drv->suppress_used_validation || force_used_validation) { 1798 vq->buflen = kmalloc_array(num, sizeof(*vq->buflen), 1799 GFP_KERNEL); 1800 if (!vq->buflen) 1801 goto err_buflen; 1802 } else { 1803 vq->buflen = NULL; 1804 } 1805 1806 /* No callback? Tell other side not to bother us. */ 1807 if (!callback) { 1808 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1809 vq->packed.vring.driver->flags = 1810 cpu_to_le16(vq->packed.event_flags_shadow); 1811 } 1812 1813 spin_lock(&vdev->vqs_list_lock); 1814 list_add_tail(&vq->vq.list, &vdev->vqs); 1815 spin_unlock(&vdev->vqs_list_lock); 1816 return &vq->vq; 1817 1818 err_buflen: 1819 kfree(vq->packed.desc_extra); 1820 err_desc_extra: 1821 kfree(vq->packed.desc_state); 1822 err_desc_state: 1823 kfree(vq); 1824 err_vq: 1825 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 1826 err_device: 1827 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 1828 err_driver: 1829 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1830 err_ring: 1831 return NULL; 1832 } 1833 1834 1835 /* 1836 * Generic functions and exported symbols. 1837 */ 1838 1839 static inline int virtqueue_add(struct virtqueue *_vq, 1840 struct scatterlist *sgs[], 1841 unsigned int total_sg, 1842 unsigned int out_sgs, 1843 unsigned int in_sgs, 1844 void *data, 1845 void *ctx, 1846 gfp_t gfp) 1847 { 1848 struct vring_virtqueue *vq = to_vvq(_vq); 1849 1850 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1851 out_sgs, in_sgs, data, ctx, gfp) : 1852 virtqueue_add_split(_vq, sgs, total_sg, 1853 out_sgs, in_sgs, data, ctx, gfp); 1854 } 1855 1856 /** 1857 * virtqueue_add_sgs - expose buffers to other end 1858 * @_vq: the struct virtqueue we're talking about. 1859 * @sgs: array of terminated scatterlists. 1860 * @out_sgs: the number of scatterlists readable by other side 1861 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1862 * @data: the token identifying the buffer. 1863 * @gfp: how to do memory allocations (if necessary). 1864 * 1865 * Caller must ensure we don't call this with other virtqueue operations 1866 * at the same time (except where noted). 1867 * 1868 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1869 */ 1870 int virtqueue_add_sgs(struct virtqueue *_vq, 1871 struct scatterlist *sgs[], 1872 unsigned int out_sgs, 1873 unsigned int in_sgs, 1874 void *data, 1875 gfp_t gfp) 1876 { 1877 unsigned int i, total_sg = 0; 1878 1879 /* Count them first. */ 1880 for (i = 0; i < out_sgs + in_sgs; i++) { 1881 struct scatterlist *sg; 1882 1883 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1884 total_sg++; 1885 } 1886 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1887 data, NULL, gfp); 1888 } 1889 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1890 1891 /** 1892 * virtqueue_add_outbuf - expose output buffers to other end 1893 * @vq: the struct virtqueue we're talking about. 1894 * @sg: scatterlist (must be well-formed and terminated!) 1895 * @num: the number of entries in @sg readable by other side 1896 * @data: the token identifying the buffer. 1897 * @gfp: how to do memory allocations (if necessary). 1898 * 1899 * Caller must ensure we don't call this with other virtqueue operations 1900 * at the same time (except where noted). 1901 * 1902 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1903 */ 1904 int virtqueue_add_outbuf(struct virtqueue *vq, 1905 struct scatterlist *sg, unsigned int num, 1906 void *data, 1907 gfp_t gfp) 1908 { 1909 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1910 } 1911 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1912 1913 /** 1914 * virtqueue_add_inbuf - expose input buffers to other end 1915 * @vq: the struct virtqueue we're talking about. 1916 * @sg: scatterlist (must be well-formed and terminated!) 1917 * @num: the number of entries in @sg writable by other side 1918 * @data: the token identifying the buffer. 1919 * @gfp: how to do memory allocations (if necessary). 1920 * 1921 * Caller must ensure we don't call this with other virtqueue operations 1922 * at the same time (except where noted). 1923 * 1924 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1925 */ 1926 int virtqueue_add_inbuf(struct virtqueue *vq, 1927 struct scatterlist *sg, unsigned int num, 1928 void *data, 1929 gfp_t gfp) 1930 { 1931 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1932 } 1933 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1934 1935 /** 1936 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1937 * @vq: the struct virtqueue we're talking about. 1938 * @sg: scatterlist (must be well-formed and terminated!) 1939 * @num: the number of entries in @sg writable by other side 1940 * @data: the token identifying the buffer. 1941 * @ctx: extra context for the token 1942 * @gfp: how to do memory allocations (if necessary). 1943 * 1944 * Caller must ensure we don't call this with other virtqueue operations 1945 * at the same time (except where noted). 1946 * 1947 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1948 */ 1949 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1950 struct scatterlist *sg, unsigned int num, 1951 void *data, 1952 void *ctx, 1953 gfp_t gfp) 1954 { 1955 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1956 } 1957 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1958 1959 /** 1960 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1961 * @_vq: the struct virtqueue 1962 * 1963 * Instead of virtqueue_kick(), you can do: 1964 * if (virtqueue_kick_prepare(vq)) 1965 * virtqueue_notify(vq); 1966 * 1967 * This is sometimes useful because the virtqueue_kick_prepare() needs 1968 * to be serialized, but the actual virtqueue_notify() call does not. 1969 */ 1970 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1971 { 1972 struct vring_virtqueue *vq = to_vvq(_vq); 1973 1974 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1975 virtqueue_kick_prepare_split(_vq); 1976 } 1977 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1978 1979 /** 1980 * virtqueue_notify - second half of split virtqueue_kick call. 1981 * @_vq: the struct virtqueue 1982 * 1983 * This does not need to be serialized. 1984 * 1985 * Returns false if host notify failed or queue is broken, otherwise true. 1986 */ 1987 bool virtqueue_notify(struct virtqueue *_vq) 1988 { 1989 struct vring_virtqueue *vq = to_vvq(_vq); 1990 1991 if (unlikely(vq->broken)) 1992 return false; 1993 1994 /* Prod other side to tell it about changes. */ 1995 if (!vq->notify(_vq)) { 1996 vq->broken = true; 1997 return false; 1998 } 1999 return true; 2000 } 2001 EXPORT_SYMBOL_GPL(virtqueue_notify); 2002 2003 /** 2004 * virtqueue_kick - update after add_buf 2005 * @vq: the struct virtqueue 2006 * 2007 * After one or more virtqueue_add_* calls, invoke this to kick 2008 * the other side. 2009 * 2010 * Caller must ensure we don't call this with other virtqueue 2011 * operations at the same time (except where noted). 2012 * 2013 * Returns false if kick failed, otherwise true. 2014 */ 2015 bool virtqueue_kick(struct virtqueue *vq) 2016 { 2017 if (virtqueue_kick_prepare(vq)) 2018 return virtqueue_notify(vq); 2019 return true; 2020 } 2021 EXPORT_SYMBOL_GPL(virtqueue_kick); 2022 2023 /** 2024 * virtqueue_get_buf_ctx - get the next used buffer 2025 * @_vq: the struct virtqueue we're talking about. 2026 * @len: the length written into the buffer 2027 * @ctx: extra context for the token 2028 * 2029 * If the device wrote data into the buffer, @len will be set to the 2030 * amount written. This means you don't need to clear the buffer 2031 * beforehand to ensure there's no data leakage in the case of short 2032 * writes. 2033 * 2034 * Caller must ensure we don't call this with other virtqueue 2035 * operations at the same time (except where noted). 2036 * 2037 * Returns NULL if there are no used buffers, or the "data" token 2038 * handed to virtqueue_add_*(). 2039 */ 2040 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 2041 void **ctx) 2042 { 2043 struct vring_virtqueue *vq = to_vvq(_vq); 2044 2045 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 2046 virtqueue_get_buf_ctx_split(_vq, len, ctx); 2047 } 2048 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2049 2050 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2051 { 2052 return virtqueue_get_buf_ctx(_vq, len, NULL); 2053 } 2054 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2055 /** 2056 * virtqueue_disable_cb - disable callbacks 2057 * @_vq: the struct virtqueue we're talking about. 2058 * 2059 * Note that this is not necessarily synchronous, hence unreliable and only 2060 * useful as an optimization. 2061 * 2062 * Unlike other operations, this need not be serialized. 2063 */ 2064 void virtqueue_disable_cb(struct virtqueue *_vq) 2065 { 2066 struct vring_virtqueue *vq = to_vvq(_vq); 2067 2068 /* If device triggered an event already it won't trigger one again: 2069 * no need to disable. 2070 */ 2071 if (vq->event_triggered) 2072 return; 2073 2074 if (vq->packed_ring) 2075 virtqueue_disable_cb_packed(_vq); 2076 else 2077 virtqueue_disable_cb_split(_vq); 2078 } 2079 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2080 2081 /** 2082 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2083 * @_vq: the struct virtqueue we're talking about. 2084 * 2085 * This re-enables callbacks; it returns current queue state 2086 * in an opaque unsigned value. This value should be later tested by 2087 * virtqueue_poll, to detect a possible race between the driver checking for 2088 * more work, and enabling callbacks. 2089 * 2090 * Caller must ensure we don't call this with other virtqueue 2091 * operations at the same time (except where noted). 2092 */ 2093 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2094 { 2095 struct vring_virtqueue *vq = to_vvq(_vq); 2096 2097 if (vq->event_triggered) 2098 vq->event_triggered = false; 2099 2100 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2101 virtqueue_enable_cb_prepare_split(_vq); 2102 } 2103 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2104 2105 /** 2106 * virtqueue_poll - query pending used buffers 2107 * @_vq: the struct virtqueue we're talking about. 2108 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2109 * 2110 * Returns "true" if there are pending used buffers in the queue. 2111 * 2112 * This does not need to be serialized. 2113 */ 2114 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 2115 { 2116 struct vring_virtqueue *vq = to_vvq(_vq); 2117 2118 if (unlikely(vq->broken)) 2119 return false; 2120 2121 virtio_mb(vq->weak_barriers); 2122 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2123 virtqueue_poll_split(_vq, last_used_idx); 2124 } 2125 EXPORT_SYMBOL_GPL(virtqueue_poll); 2126 2127 /** 2128 * virtqueue_enable_cb - restart callbacks after disable_cb. 2129 * @_vq: the struct virtqueue we're talking about. 2130 * 2131 * This re-enables callbacks; it returns "false" if there are pending 2132 * buffers in the queue, to detect a possible race between the driver 2133 * checking for more work, and enabling callbacks. 2134 * 2135 * Caller must ensure we don't call this with other virtqueue 2136 * operations at the same time (except where noted). 2137 */ 2138 bool virtqueue_enable_cb(struct virtqueue *_vq) 2139 { 2140 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 2141 2142 return !virtqueue_poll(_vq, last_used_idx); 2143 } 2144 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2145 2146 /** 2147 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2148 * @_vq: the struct virtqueue we're talking about. 2149 * 2150 * This re-enables callbacks but hints to the other side to delay 2151 * interrupts until most of the available buffers have been processed; 2152 * it returns "false" if there are many pending buffers in the queue, 2153 * to detect a possible race between the driver checking for more work, 2154 * and enabling callbacks. 2155 * 2156 * Caller must ensure we don't call this with other virtqueue 2157 * operations at the same time (except where noted). 2158 */ 2159 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2160 { 2161 struct vring_virtqueue *vq = to_vvq(_vq); 2162 2163 if (vq->event_triggered) 2164 vq->event_triggered = false; 2165 2166 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2167 virtqueue_enable_cb_delayed_split(_vq); 2168 } 2169 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2170 2171 /** 2172 * virtqueue_detach_unused_buf - detach first unused buffer 2173 * @_vq: the struct virtqueue we're talking about. 2174 * 2175 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2176 * This is not valid on an active queue; it is useful only for device 2177 * shutdown. 2178 */ 2179 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2180 { 2181 struct vring_virtqueue *vq = to_vvq(_vq); 2182 2183 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2184 virtqueue_detach_unused_buf_split(_vq); 2185 } 2186 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2187 2188 static inline bool more_used(const struct vring_virtqueue *vq) 2189 { 2190 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2191 } 2192 2193 irqreturn_t vring_interrupt(int irq, void *_vq) 2194 { 2195 struct vring_virtqueue *vq = to_vvq(_vq); 2196 2197 if (!more_used(vq)) { 2198 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2199 return IRQ_NONE; 2200 } 2201 2202 if (unlikely(vq->broken)) 2203 return IRQ_HANDLED; 2204 2205 /* Just a hint for performance: so it's ok that this can be racy! */ 2206 if (vq->event) 2207 vq->event_triggered = true; 2208 2209 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2210 if (vq->vq.callback) 2211 vq->vq.callback(&vq->vq); 2212 2213 return IRQ_HANDLED; 2214 } 2215 EXPORT_SYMBOL_GPL(vring_interrupt); 2216 2217 /* Only available for split ring */ 2218 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2219 struct vring vring, 2220 struct virtio_device *vdev, 2221 bool weak_barriers, 2222 bool context, 2223 bool (*notify)(struct virtqueue *), 2224 void (*callback)(struct virtqueue *), 2225 const char *name) 2226 { 2227 struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); 2228 struct vring_virtqueue *vq; 2229 2230 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2231 return NULL; 2232 2233 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2234 if (!vq) 2235 return NULL; 2236 2237 vq->packed_ring = false; 2238 vq->vq.callback = callback; 2239 vq->vq.vdev = vdev; 2240 vq->vq.name = name; 2241 vq->vq.num_free = vring.num; 2242 vq->vq.index = index; 2243 vq->we_own_ring = false; 2244 vq->notify = notify; 2245 vq->weak_barriers = weak_barriers; 2246 vq->broken = false; 2247 vq->last_used_idx = 0; 2248 vq->event_triggered = false; 2249 vq->num_added = 0; 2250 vq->use_dma_api = vring_use_dma_api(vdev); 2251 #ifdef DEBUG 2252 vq->in_use = false; 2253 vq->last_add_time_valid = false; 2254 #endif 2255 2256 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2257 !context; 2258 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2259 2260 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2261 vq->weak_barriers = false; 2262 2263 vq->split.queue_dma_addr = 0; 2264 vq->split.queue_size_in_bytes = 0; 2265 2266 vq->split.vring = vring; 2267 vq->split.avail_flags_shadow = 0; 2268 vq->split.avail_idx_shadow = 0; 2269 2270 /* No callback? Tell other side not to bother us. */ 2271 if (!callback) { 2272 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2273 if (!vq->event) 2274 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2275 vq->split.avail_flags_shadow); 2276 } 2277 2278 vq->split.desc_state = kmalloc_array(vring.num, 2279 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2280 if (!vq->split.desc_state) 2281 goto err_state; 2282 2283 vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); 2284 if (!vq->split.desc_extra) 2285 goto err_extra; 2286 2287 if (!drv->suppress_used_validation || force_used_validation) { 2288 vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen), 2289 GFP_KERNEL); 2290 if (!vq->buflen) 2291 goto err_buflen; 2292 } else { 2293 vq->buflen = NULL; 2294 } 2295 2296 /* Put everything in free lists. */ 2297 vq->free_head = 0; 2298 memset(vq->split.desc_state, 0, vring.num * 2299 sizeof(struct vring_desc_state_split)); 2300 2301 spin_lock(&vdev->vqs_list_lock); 2302 list_add_tail(&vq->vq.list, &vdev->vqs); 2303 spin_unlock(&vdev->vqs_list_lock); 2304 return &vq->vq; 2305 2306 err_buflen: 2307 kfree(vq->split.desc_extra); 2308 err_extra: 2309 kfree(vq->split.desc_state); 2310 err_state: 2311 kfree(vq); 2312 return NULL; 2313 } 2314 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2315 2316 struct virtqueue *vring_create_virtqueue( 2317 unsigned int index, 2318 unsigned int num, 2319 unsigned int vring_align, 2320 struct virtio_device *vdev, 2321 bool weak_barriers, 2322 bool may_reduce_num, 2323 bool context, 2324 bool (*notify)(struct virtqueue *), 2325 void (*callback)(struct virtqueue *), 2326 const char *name) 2327 { 2328 2329 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2330 return vring_create_virtqueue_packed(index, num, vring_align, 2331 vdev, weak_barriers, may_reduce_num, 2332 context, notify, callback, name); 2333 2334 return vring_create_virtqueue_split(index, num, vring_align, 2335 vdev, weak_barriers, may_reduce_num, 2336 context, notify, callback, name); 2337 } 2338 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2339 2340 /* Only available for split ring */ 2341 struct virtqueue *vring_new_virtqueue(unsigned int index, 2342 unsigned int num, 2343 unsigned int vring_align, 2344 struct virtio_device *vdev, 2345 bool weak_barriers, 2346 bool context, 2347 void *pages, 2348 bool (*notify)(struct virtqueue *vq), 2349 void (*callback)(struct virtqueue *vq), 2350 const char *name) 2351 { 2352 struct vring vring; 2353 2354 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2355 return NULL; 2356 2357 vring_init(&vring, num, pages, vring_align); 2358 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2359 notify, callback, name); 2360 } 2361 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2362 2363 void vring_del_virtqueue(struct virtqueue *_vq) 2364 { 2365 struct vring_virtqueue *vq = to_vvq(_vq); 2366 2367 spin_lock(&vq->vq.vdev->vqs_list_lock); 2368 list_del(&_vq->list); 2369 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2370 2371 if (vq->we_own_ring) { 2372 if (vq->packed_ring) { 2373 vring_free_queue(vq->vq.vdev, 2374 vq->packed.ring_size_in_bytes, 2375 vq->packed.vring.desc, 2376 vq->packed.ring_dma_addr); 2377 2378 vring_free_queue(vq->vq.vdev, 2379 vq->packed.event_size_in_bytes, 2380 vq->packed.vring.driver, 2381 vq->packed.driver_event_dma_addr); 2382 2383 vring_free_queue(vq->vq.vdev, 2384 vq->packed.event_size_in_bytes, 2385 vq->packed.vring.device, 2386 vq->packed.device_event_dma_addr); 2387 2388 kfree(vq->packed.desc_state); 2389 kfree(vq->packed.desc_extra); 2390 } else { 2391 vring_free_queue(vq->vq.vdev, 2392 vq->split.queue_size_in_bytes, 2393 vq->split.vring.desc, 2394 vq->split.queue_dma_addr); 2395 } 2396 } 2397 if (!vq->packed_ring) { 2398 kfree(vq->split.desc_state); 2399 kfree(vq->split.desc_extra); 2400 } 2401 kfree(vq); 2402 } 2403 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2404 2405 /* Manipulates transport-specific feature bits. */ 2406 void vring_transport_features(struct virtio_device *vdev) 2407 { 2408 unsigned int i; 2409 2410 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2411 switch (i) { 2412 case VIRTIO_RING_F_INDIRECT_DESC: 2413 break; 2414 case VIRTIO_RING_F_EVENT_IDX: 2415 break; 2416 case VIRTIO_F_VERSION_1: 2417 break; 2418 case VIRTIO_F_ACCESS_PLATFORM: 2419 break; 2420 case VIRTIO_F_RING_PACKED: 2421 break; 2422 case VIRTIO_F_ORDER_PLATFORM: 2423 break; 2424 default: 2425 /* We don't understand this bit. */ 2426 __virtio_clear_bit(vdev, i); 2427 } 2428 } 2429 } 2430 EXPORT_SYMBOL_GPL(vring_transport_features); 2431 2432 /** 2433 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2434 * @_vq: the struct virtqueue containing the vring of interest. 2435 * 2436 * Returns the size of the vring. This is mainly used for boasting to 2437 * userspace. Unlike other operations, this need not be serialized. 2438 */ 2439 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2440 { 2441 2442 struct vring_virtqueue *vq = to_vvq(_vq); 2443 2444 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2445 } 2446 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2447 2448 bool virtqueue_is_broken(struct virtqueue *_vq) 2449 { 2450 struct vring_virtqueue *vq = to_vvq(_vq); 2451 2452 return READ_ONCE(vq->broken); 2453 } 2454 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2455 2456 /* 2457 * This should prevent the device from being used, allowing drivers to 2458 * recover. You may need to grab appropriate locks to flush. 2459 */ 2460 void virtio_break_device(struct virtio_device *dev) 2461 { 2462 struct virtqueue *_vq; 2463 2464 spin_lock(&dev->vqs_list_lock); 2465 list_for_each_entry(_vq, &dev->vqs, list) { 2466 struct vring_virtqueue *vq = to_vvq(_vq); 2467 2468 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2469 WRITE_ONCE(vq->broken, true); 2470 } 2471 spin_unlock(&dev->vqs_list_lock); 2472 } 2473 EXPORT_SYMBOL_GPL(virtio_break_device); 2474 2475 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2476 { 2477 struct vring_virtqueue *vq = to_vvq(_vq); 2478 2479 BUG_ON(!vq->we_own_ring); 2480 2481 if (vq->packed_ring) 2482 return vq->packed.ring_dma_addr; 2483 2484 return vq->split.queue_dma_addr; 2485 } 2486 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2487 2488 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2489 { 2490 struct vring_virtqueue *vq = to_vvq(_vq); 2491 2492 BUG_ON(!vq->we_own_ring); 2493 2494 if (vq->packed_ring) 2495 return vq->packed.driver_event_dma_addr; 2496 2497 return vq->split.queue_dma_addr + 2498 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2499 } 2500 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2501 2502 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2503 { 2504 struct vring_virtqueue *vq = to_vvq(_vq); 2505 2506 BUG_ON(!vq->we_own_ring); 2507 2508 if (vq->packed_ring) 2509 return vq->packed.device_event_dma_addr; 2510 2511 return vq->split.queue_dma_addr + 2512 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2513 } 2514 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2515 2516 /* Only available for split ring */ 2517 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2518 { 2519 return &to_vvq(vq)->split.vring; 2520 } 2521 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2522 2523 MODULE_LICENSE("GPL"); 2524