1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <xen/xen.h> 15 16 #ifdef DEBUG 17 /* For development, we want to crash whenever the ring is screwed. */ 18 #define BAD_RING(_vq, fmt, args...) \ 19 do { \ 20 dev_err(&(_vq)->vq.vdev->dev, \ 21 "%s:"fmt, (_vq)->vq.name, ##args); \ 22 BUG(); \ 23 } while (0) 24 /* Caller is supposed to guarantee no reentry. */ 25 #define START_USE(_vq) \ 26 do { \ 27 if ((_vq)->in_use) \ 28 panic("%s:in_use = %i\n", \ 29 (_vq)->vq.name, (_vq)->in_use); \ 30 (_vq)->in_use = __LINE__; \ 31 } while (0) 32 #define END_USE(_vq) \ 33 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 34 #define LAST_ADD_TIME_UPDATE(_vq) \ 35 do { \ 36 ktime_t now = ktime_get(); \ 37 \ 38 /* No kick or get, with .1 second between? Warn. */ \ 39 if ((_vq)->last_add_time_valid) \ 40 WARN_ON(ktime_to_ms(ktime_sub(now, \ 41 (_vq)->last_add_time)) > 100); \ 42 (_vq)->last_add_time = now; \ 43 (_vq)->last_add_time_valid = true; \ 44 } while (0) 45 #define LAST_ADD_TIME_CHECK(_vq) \ 46 do { \ 47 if ((_vq)->last_add_time_valid) { \ 48 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 49 (_vq)->last_add_time)) > 100); \ 50 } \ 51 } while (0) 52 #define LAST_ADD_TIME_INVALID(_vq) \ 53 ((_vq)->last_add_time_valid = false) 54 #else 55 #define BAD_RING(_vq, fmt, args...) \ 56 do { \ 57 dev_err(&_vq->vq.vdev->dev, \ 58 "%s:"fmt, (_vq)->vq.name, ##args); \ 59 (_vq)->broken = true; \ 60 } while (0) 61 #define START_USE(vq) 62 #define END_USE(vq) 63 #define LAST_ADD_TIME_UPDATE(vq) 64 #define LAST_ADD_TIME_CHECK(vq) 65 #define LAST_ADD_TIME_INVALID(vq) 66 #endif 67 68 struct vring_desc_state_split { 69 void *data; /* Data for callback. */ 70 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 71 }; 72 73 struct vring_desc_state_packed { 74 void *data; /* Data for callback. */ 75 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 76 u16 num; /* Descriptor list length. */ 77 u16 last; /* The last desc state in a list. */ 78 }; 79 80 struct vring_desc_extra { 81 dma_addr_t addr; /* Buffer DMA addr. */ 82 u32 len; /* Buffer length. */ 83 u16 flags; /* Descriptor flags. */ 84 u16 next; /* The next desc state in a list. */ 85 }; 86 87 struct vring_virtqueue { 88 struct virtqueue vq; 89 90 /* Is this a packed ring? */ 91 bool packed_ring; 92 93 /* Is DMA API used? */ 94 bool use_dma_api; 95 96 /* Can we use weak barriers? */ 97 bool weak_barriers; 98 99 /* Other side has made a mess, don't try any more. */ 100 bool broken; 101 102 /* Host supports indirect buffers */ 103 bool indirect; 104 105 /* Host publishes avail event idx */ 106 bool event; 107 108 /* Head of free buffer list. */ 109 unsigned int free_head; 110 /* Number we've added since last sync. */ 111 unsigned int num_added; 112 113 /* Last used index we've seen. */ 114 u16 last_used_idx; 115 116 /* Hint for event idx: already triggered no need to disable. */ 117 bool event_triggered; 118 119 union { 120 /* Available for split ring */ 121 struct { 122 /* Actual memory layout for this queue. */ 123 struct vring vring; 124 125 /* Last written value to avail->flags */ 126 u16 avail_flags_shadow; 127 128 /* 129 * Last written value to avail->idx in 130 * guest byte order. 131 */ 132 u16 avail_idx_shadow; 133 134 /* Per-descriptor state. */ 135 struct vring_desc_state_split *desc_state; 136 struct vring_desc_extra *desc_extra; 137 138 /* DMA address and size information */ 139 dma_addr_t queue_dma_addr; 140 size_t queue_size_in_bytes; 141 } split; 142 143 /* Available for packed ring */ 144 struct { 145 /* Actual memory layout for this queue. */ 146 struct { 147 unsigned int num; 148 struct vring_packed_desc *desc; 149 struct vring_packed_desc_event *driver; 150 struct vring_packed_desc_event *device; 151 } vring; 152 153 /* Driver ring wrap counter. */ 154 bool avail_wrap_counter; 155 156 /* Device ring wrap counter. */ 157 bool used_wrap_counter; 158 159 /* Avail used flags. */ 160 u16 avail_used_flags; 161 162 /* Index of the next avail descriptor. */ 163 u16 next_avail_idx; 164 165 /* 166 * Last written value to driver->flags in 167 * guest byte order. 168 */ 169 u16 event_flags_shadow; 170 171 /* Per-descriptor state. */ 172 struct vring_desc_state_packed *desc_state; 173 struct vring_desc_extra *desc_extra; 174 175 /* DMA address and size information */ 176 dma_addr_t ring_dma_addr; 177 dma_addr_t driver_event_dma_addr; 178 dma_addr_t device_event_dma_addr; 179 size_t ring_size_in_bytes; 180 size_t event_size_in_bytes; 181 } packed; 182 }; 183 184 /* How to notify other side. FIXME: commonalize hcalls! */ 185 bool (*notify)(struct virtqueue *vq); 186 187 /* DMA, allocation, and size information */ 188 bool we_own_ring; 189 190 #ifdef DEBUG 191 /* They're supposed to lock for us. */ 192 unsigned int in_use; 193 194 /* Figure out if their kicks are too delayed. */ 195 bool last_add_time_valid; 196 ktime_t last_add_time; 197 #endif 198 }; 199 200 201 /* 202 * Helpers. 203 */ 204 205 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 206 207 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 208 unsigned int total_sg) 209 { 210 struct vring_virtqueue *vq = to_vvq(_vq); 211 212 /* 213 * If the host supports indirect descriptor tables, and we have multiple 214 * buffers, then go indirect. FIXME: tune this threshold 215 */ 216 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 217 } 218 219 /* 220 * Modern virtio devices have feature bits to specify whether they need a 221 * quirk and bypass the IOMMU. If not there, just use the DMA API. 222 * 223 * If there, the interaction between virtio and DMA API is messy. 224 * 225 * On most systems with virtio, physical addresses match bus addresses, 226 * and it doesn't particularly matter whether we use the DMA API. 227 * 228 * On some systems, including Xen and any system with a physical device 229 * that speaks virtio behind a physical IOMMU, we must use the DMA API 230 * for virtio DMA to work at all. 231 * 232 * On other systems, including SPARC and PPC64, virtio-pci devices are 233 * enumerated as though they are behind an IOMMU, but the virtio host 234 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 235 * there or somehow map everything as the identity. 236 * 237 * For the time being, we preserve historic behavior and bypass the DMA 238 * API. 239 * 240 * TODO: install a per-device DMA ops structure that does the right thing 241 * taking into account all the above quirks, and use the DMA API 242 * unconditionally on data path. 243 */ 244 245 static bool vring_use_dma_api(struct virtio_device *vdev) 246 { 247 if (!virtio_has_dma_quirk(vdev)) 248 return true; 249 250 /* Otherwise, we are left to guess. */ 251 /* 252 * In theory, it's possible to have a buggy QEMU-supposed 253 * emulated Q35 IOMMU and Xen enabled at the same time. On 254 * such a configuration, virtio has never worked and will 255 * not work without an even larger kludge. Instead, enable 256 * the DMA API if we're a Xen guest, which at least allows 257 * all of the sensible Xen configurations to work correctly. 258 */ 259 if (xen_domain()) 260 return true; 261 262 return false; 263 } 264 265 size_t virtio_max_dma_size(struct virtio_device *vdev) 266 { 267 size_t max_segment_size = SIZE_MAX; 268 269 if (vring_use_dma_api(vdev)) 270 max_segment_size = dma_max_mapping_size(&vdev->dev); 271 272 return max_segment_size; 273 } 274 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 275 276 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 277 dma_addr_t *dma_handle, gfp_t flag) 278 { 279 if (vring_use_dma_api(vdev)) { 280 return dma_alloc_coherent(vdev->dev.parent, size, 281 dma_handle, flag); 282 } else { 283 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 284 285 if (queue) { 286 phys_addr_t phys_addr = virt_to_phys(queue); 287 *dma_handle = (dma_addr_t)phys_addr; 288 289 /* 290 * Sanity check: make sure we dind't truncate 291 * the address. The only arches I can find that 292 * have 64-bit phys_addr_t but 32-bit dma_addr_t 293 * are certain non-highmem MIPS and x86 294 * configurations, but these configurations 295 * should never allocate physical pages above 32 296 * bits, so this is fine. Just in case, throw a 297 * warning and abort if we end up with an 298 * unrepresentable address. 299 */ 300 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 301 free_pages_exact(queue, PAGE_ALIGN(size)); 302 return NULL; 303 } 304 } 305 return queue; 306 } 307 } 308 309 static void vring_free_queue(struct virtio_device *vdev, size_t size, 310 void *queue, dma_addr_t dma_handle) 311 { 312 if (vring_use_dma_api(vdev)) 313 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 314 else 315 free_pages_exact(queue, PAGE_ALIGN(size)); 316 } 317 318 /* 319 * The DMA ops on various arches are rather gnarly right now, and 320 * making all of the arch DMA ops work on the vring device itself 321 * is a mess. For now, we use the parent device for DMA ops. 322 */ 323 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 324 { 325 return vq->vq.vdev->dev.parent; 326 } 327 328 /* Map one sg entry. */ 329 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 330 struct scatterlist *sg, 331 enum dma_data_direction direction) 332 { 333 if (!vq->use_dma_api) 334 return (dma_addr_t)sg_phys(sg); 335 336 /* 337 * We can't use dma_map_sg, because we don't use scatterlists in 338 * the way it expects (we don't guarantee that the scatterlist 339 * will exist for the lifetime of the mapping). 340 */ 341 return dma_map_page(vring_dma_dev(vq), 342 sg_page(sg), sg->offset, sg->length, 343 direction); 344 } 345 346 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 347 void *cpu_addr, size_t size, 348 enum dma_data_direction direction) 349 { 350 if (!vq->use_dma_api) 351 return (dma_addr_t)virt_to_phys(cpu_addr); 352 353 return dma_map_single(vring_dma_dev(vq), 354 cpu_addr, size, direction); 355 } 356 357 static int vring_mapping_error(const struct vring_virtqueue *vq, 358 dma_addr_t addr) 359 { 360 if (!vq->use_dma_api) 361 return 0; 362 363 return dma_mapping_error(vring_dma_dev(vq), addr); 364 } 365 366 367 /* 368 * Split ring specific functions - *_split(). 369 */ 370 371 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 372 struct vring_desc *desc) 373 { 374 u16 flags; 375 376 if (!vq->use_dma_api) 377 return; 378 379 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 380 381 if (flags & VRING_DESC_F_INDIRECT) { 382 dma_unmap_single(vring_dma_dev(vq), 383 virtio64_to_cpu(vq->vq.vdev, desc->addr), 384 virtio32_to_cpu(vq->vq.vdev, desc->len), 385 (flags & VRING_DESC_F_WRITE) ? 386 DMA_FROM_DEVICE : DMA_TO_DEVICE); 387 } else { 388 dma_unmap_page(vring_dma_dev(vq), 389 virtio64_to_cpu(vq->vq.vdev, desc->addr), 390 virtio32_to_cpu(vq->vq.vdev, desc->len), 391 (flags & VRING_DESC_F_WRITE) ? 392 DMA_FROM_DEVICE : DMA_TO_DEVICE); 393 } 394 } 395 396 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 397 unsigned int i) 398 { 399 struct vring_desc_extra *extra = vq->split.desc_extra; 400 u16 flags; 401 402 if (!vq->use_dma_api) 403 goto out; 404 405 flags = extra[i].flags; 406 407 if (flags & VRING_DESC_F_INDIRECT) { 408 dma_unmap_single(vring_dma_dev(vq), 409 extra[i].addr, 410 extra[i].len, 411 (flags & VRING_DESC_F_WRITE) ? 412 DMA_FROM_DEVICE : DMA_TO_DEVICE); 413 } else { 414 dma_unmap_page(vring_dma_dev(vq), 415 extra[i].addr, 416 extra[i].len, 417 (flags & VRING_DESC_F_WRITE) ? 418 DMA_FROM_DEVICE : DMA_TO_DEVICE); 419 } 420 421 out: 422 return extra[i].next; 423 } 424 425 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 426 unsigned int total_sg, 427 gfp_t gfp) 428 { 429 struct vring_desc *desc; 430 unsigned int i; 431 432 /* 433 * We require lowmem mappings for the descriptors because 434 * otherwise virt_to_phys will give us bogus addresses in the 435 * virtqueue. 436 */ 437 gfp &= ~__GFP_HIGHMEM; 438 439 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 440 if (!desc) 441 return NULL; 442 443 for (i = 0; i < total_sg; i++) 444 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 445 return desc; 446 } 447 448 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 449 struct vring_desc *desc, 450 unsigned int i, 451 dma_addr_t addr, 452 unsigned int len, 453 u16 flags, 454 bool indirect) 455 { 456 struct vring_virtqueue *vring = to_vvq(vq); 457 struct vring_desc_extra *extra = vring->split.desc_extra; 458 u16 next; 459 460 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 461 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 462 desc[i].len = cpu_to_virtio32(vq->vdev, len); 463 464 if (!indirect) { 465 next = extra[i].next; 466 desc[i].next = cpu_to_virtio16(vq->vdev, next); 467 468 extra[i].addr = addr; 469 extra[i].len = len; 470 extra[i].flags = flags; 471 } else 472 next = virtio16_to_cpu(vq->vdev, desc[i].next); 473 474 return next; 475 } 476 477 static inline int virtqueue_add_split(struct virtqueue *_vq, 478 struct scatterlist *sgs[], 479 unsigned int total_sg, 480 unsigned int out_sgs, 481 unsigned int in_sgs, 482 void *data, 483 void *ctx, 484 gfp_t gfp) 485 { 486 struct vring_virtqueue *vq = to_vvq(_vq); 487 struct scatterlist *sg; 488 struct vring_desc *desc; 489 unsigned int i, n, avail, descs_used, prev, err_idx; 490 int head; 491 bool indirect; 492 493 START_USE(vq); 494 495 BUG_ON(data == NULL); 496 BUG_ON(ctx && vq->indirect); 497 498 if (unlikely(vq->broken)) { 499 END_USE(vq); 500 return -EIO; 501 } 502 503 LAST_ADD_TIME_UPDATE(vq); 504 505 BUG_ON(total_sg == 0); 506 507 head = vq->free_head; 508 509 if (virtqueue_use_indirect(_vq, total_sg)) 510 desc = alloc_indirect_split(_vq, total_sg, gfp); 511 else { 512 desc = NULL; 513 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 514 } 515 516 if (desc) { 517 /* Use a single buffer which doesn't continue */ 518 indirect = true; 519 /* Set up rest to use this indirect table. */ 520 i = 0; 521 descs_used = 1; 522 } else { 523 indirect = false; 524 desc = vq->split.vring.desc; 525 i = head; 526 descs_used = total_sg; 527 } 528 529 if (vq->vq.num_free < descs_used) { 530 pr_debug("Can't add buf len %i - avail = %i\n", 531 descs_used, vq->vq.num_free); 532 /* FIXME: for historical reasons, we force a notify here if 533 * there are outgoing parts to the buffer. Presumably the 534 * host should service the ring ASAP. */ 535 if (out_sgs) 536 vq->notify(&vq->vq); 537 if (indirect) 538 kfree(desc); 539 END_USE(vq); 540 return -ENOSPC; 541 } 542 543 for (n = 0; n < out_sgs; n++) { 544 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 545 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 546 if (vring_mapping_error(vq, addr)) 547 goto unmap_release; 548 549 prev = i; 550 /* Note that we trust indirect descriptor 551 * table since it use stream DMA mapping. 552 */ 553 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 554 VRING_DESC_F_NEXT, 555 indirect); 556 } 557 } 558 for (; n < (out_sgs + in_sgs); n++) { 559 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 560 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 561 if (vring_mapping_error(vq, addr)) 562 goto unmap_release; 563 564 prev = i; 565 /* Note that we trust indirect descriptor 566 * table since it use stream DMA mapping. 567 */ 568 i = virtqueue_add_desc_split(_vq, desc, i, addr, 569 sg->length, 570 VRING_DESC_F_NEXT | 571 VRING_DESC_F_WRITE, 572 indirect); 573 } 574 } 575 /* Last one doesn't continue. */ 576 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 577 if (!indirect && vq->use_dma_api) 578 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags = 579 ~VRING_DESC_F_NEXT; 580 581 if (indirect) { 582 /* Now that the indirect table is filled in, map it. */ 583 dma_addr_t addr = vring_map_single( 584 vq, desc, total_sg * sizeof(struct vring_desc), 585 DMA_TO_DEVICE); 586 if (vring_mapping_error(vq, addr)) 587 goto unmap_release; 588 589 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 590 head, addr, 591 total_sg * sizeof(struct vring_desc), 592 VRING_DESC_F_INDIRECT, 593 false); 594 } 595 596 /* We're using some buffers from the free list. */ 597 vq->vq.num_free -= descs_used; 598 599 /* Update free pointer */ 600 if (indirect) 601 vq->free_head = vq->split.desc_extra[head].next; 602 else 603 vq->free_head = i; 604 605 /* Store token and indirect buffer state. */ 606 vq->split.desc_state[head].data = data; 607 if (indirect) 608 vq->split.desc_state[head].indir_desc = desc; 609 else 610 vq->split.desc_state[head].indir_desc = ctx; 611 612 /* Put entry in available array (but don't update avail->idx until they 613 * do sync). */ 614 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 615 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 616 617 /* Descriptors and available array need to be set before we expose the 618 * new available array entries. */ 619 virtio_wmb(vq->weak_barriers); 620 vq->split.avail_idx_shadow++; 621 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 622 vq->split.avail_idx_shadow); 623 vq->num_added++; 624 625 pr_debug("Added buffer head %i to %p\n", head, vq); 626 END_USE(vq); 627 628 /* This is very unlikely, but theoretically possible. Kick 629 * just in case. */ 630 if (unlikely(vq->num_added == (1 << 16) - 1)) 631 virtqueue_kick(_vq); 632 633 return 0; 634 635 unmap_release: 636 err_idx = i; 637 638 if (indirect) 639 i = 0; 640 else 641 i = head; 642 643 for (n = 0; n < total_sg; n++) { 644 if (i == err_idx) 645 break; 646 if (indirect) { 647 vring_unmap_one_split_indirect(vq, &desc[i]); 648 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 649 } else 650 i = vring_unmap_one_split(vq, i); 651 } 652 653 if (indirect) 654 kfree(desc); 655 656 END_USE(vq); 657 return -ENOMEM; 658 } 659 660 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 661 { 662 struct vring_virtqueue *vq = to_vvq(_vq); 663 u16 new, old; 664 bool needs_kick; 665 666 START_USE(vq); 667 /* We need to expose available array entries before checking avail 668 * event. */ 669 virtio_mb(vq->weak_barriers); 670 671 old = vq->split.avail_idx_shadow - vq->num_added; 672 new = vq->split.avail_idx_shadow; 673 vq->num_added = 0; 674 675 LAST_ADD_TIME_CHECK(vq); 676 LAST_ADD_TIME_INVALID(vq); 677 678 if (vq->event) { 679 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 680 vring_avail_event(&vq->split.vring)), 681 new, old); 682 } else { 683 needs_kick = !(vq->split.vring.used->flags & 684 cpu_to_virtio16(_vq->vdev, 685 VRING_USED_F_NO_NOTIFY)); 686 } 687 END_USE(vq); 688 return needs_kick; 689 } 690 691 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 692 void **ctx) 693 { 694 unsigned int i, j; 695 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 696 697 /* Clear data ptr. */ 698 vq->split.desc_state[head].data = NULL; 699 700 /* Put back on free list: unmap first-level descriptors and find end */ 701 i = head; 702 703 while (vq->split.vring.desc[i].flags & nextflag) { 704 vring_unmap_one_split(vq, i); 705 i = vq->split.desc_extra[i].next; 706 vq->vq.num_free++; 707 } 708 709 vring_unmap_one_split(vq, i); 710 vq->split.desc_extra[i].next = vq->free_head; 711 vq->free_head = head; 712 713 /* Plus final descriptor */ 714 vq->vq.num_free++; 715 716 if (vq->indirect) { 717 struct vring_desc *indir_desc = 718 vq->split.desc_state[head].indir_desc; 719 u32 len; 720 721 /* Free the indirect table, if any, now that it's unmapped. */ 722 if (!indir_desc) 723 return; 724 725 len = vq->split.desc_extra[head].len; 726 727 BUG_ON(!(vq->split.desc_extra[head].flags & 728 VRING_DESC_F_INDIRECT)); 729 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 730 731 for (j = 0; j < len / sizeof(struct vring_desc); j++) 732 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 733 734 kfree(indir_desc); 735 vq->split.desc_state[head].indir_desc = NULL; 736 } else if (ctx) { 737 *ctx = vq->split.desc_state[head].indir_desc; 738 } 739 } 740 741 static inline bool more_used_split(const struct vring_virtqueue *vq) 742 { 743 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 744 vq->split.vring.used->idx); 745 } 746 747 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 748 unsigned int *len, 749 void **ctx) 750 { 751 struct vring_virtqueue *vq = to_vvq(_vq); 752 void *ret; 753 unsigned int i; 754 u16 last_used; 755 756 START_USE(vq); 757 758 if (unlikely(vq->broken)) { 759 END_USE(vq); 760 return NULL; 761 } 762 763 if (!more_used_split(vq)) { 764 pr_debug("No more buffers in queue\n"); 765 END_USE(vq); 766 return NULL; 767 } 768 769 /* Only get used array entries after they have been exposed by host. */ 770 virtio_rmb(vq->weak_barriers); 771 772 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 773 i = virtio32_to_cpu(_vq->vdev, 774 vq->split.vring.used->ring[last_used].id); 775 *len = virtio32_to_cpu(_vq->vdev, 776 vq->split.vring.used->ring[last_used].len); 777 778 if (unlikely(i >= vq->split.vring.num)) { 779 BAD_RING(vq, "id %u out of range\n", i); 780 return NULL; 781 } 782 if (unlikely(!vq->split.desc_state[i].data)) { 783 BAD_RING(vq, "id %u is not a head!\n", i); 784 return NULL; 785 } 786 787 /* detach_buf_split clears data, so grab it now. */ 788 ret = vq->split.desc_state[i].data; 789 detach_buf_split(vq, i, ctx); 790 vq->last_used_idx++; 791 /* If we expect an interrupt for the next entry, tell host 792 * by writing event index and flush out the write before 793 * the read in the next get_buf call. */ 794 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 795 virtio_store_mb(vq->weak_barriers, 796 &vring_used_event(&vq->split.vring), 797 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 798 799 LAST_ADD_TIME_INVALID(vq); 800 801 END_USE(vq); 802 return ret; 803 } 804 805 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 806 { 807 struct vring_virtqueue *vq = to_vvq(_vq); 808 809 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 810 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 811 if (vq->event) 812 /* TODO: this is a hack. Figure out a cleaner value to write. */ 813 vring_used_event(&vq->split.vring) = 0x0; 814 else 815 vq->split.vring.avail->flags = 816 cpu_to_virtio16(_vq->vdev, 817 vq->split.avail_flags_shadow); 818 } 819 } 820 821 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 822 { 823 struct vring_virtqueue *vq = to_vvq(_vq); 824 u16 last_used_idx; 825 826 START_USE(vq); 827 828 /* We optimistically turn back on interrupts, then check if there was 829 * more to do. */ 830 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 831 * either clear the flags bit or point the event index at the next 832 * entry. Always do both to keep code simple. */ 833 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 834 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 835 if (!vq->event) 836 vq->split.vring.avail->flags = 837 cpu_to_virtio16(_vq->vdev, 838 vq->split.avail_flags_shadow); 839 } 840 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 841 last_used_idx = vq->last_used_idx); 842 END_USE(vq); 843 return last_used_idx; 844 } 845 846 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 847 { 848 struct vring_virtqueue *vq = to_vvq(_vq); 849 850 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 851 vq->split.vring.used->idx); 852 } 853 854 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 855 { 856 struct vring_virtqueue *vq = to_vvq(_vq); 857 u16 bufs; 858 859 START_USE(vq); 860 861 /* We optimistically turn back on interrupts, then check if there was 862 * more to do. */ 863 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 864 * either clear the flags bit or point the event index at the next 865 * entry. Always update the event index to keep code simple. */ 866 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 867 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 868 if (!vq->event) 869 vq->split.vring.avail->flags = 870 cpu_to_virtio16(_vq->vdev, 871 vq->split.avail_flags_shadow); 872 } 873 /* TODO: tune this threshold */ 874 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 875 876 virtio_store_mb(vq->weak_barriers, 877 &vring_used_event(&vq->split.vring), 878 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 879 880 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 881 - vq->last_used_idx) > bufs)) { 882 END_USE(vq); 883 return false; 884 } 885 886 END_USE(vq); 887 return true; 888 } 889 890 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 891 { 892 struct vring_virtqueue *vq = to_vvq(_vq); 893 unsigned int i; 894 void *buf; 895 896 START_USE(vq); 897 898 for (i = 0; i < vq->split.vring.num; i++) { 899 if (!vq->split.desc_state[i].data) 900 continue; 901 /* detach_buf_split clears data, so grab it now. */ 902 buf = vq->split.desc_state[i].data; 903 detach_buf_split(vq, i, NULL); 904 vq->split.avail_idx_shadow--; 905 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 906 vq->split.avail_idx_shadow); 907 END_USE(vq); 908 return buf; 909 } 910 /* That should have freed everything. */ 911 BUG_ON(vq->vq.num_free != vq->split.vring.num); 912 913 END_USE(vq); 914 return NULL; 915 } 916 917 static struct virtqueue *vring_create_virtqueue_split( 918 unsigned int index, 919 unsigned int num, 920 unsigned int vring_align, 921 struct virtio_device *vdev, 922 bool weak_barriers, 923 bool may_reduce_num, 924 bool context, 925 bool (*notify)(struct virtqueue *), 926 void (*callback)(struct virtqueue *), 927 const char *name) 928 { 929 struct virtqueue *vq; 930 void *queue = NULL; 931 dma_addr_t dma_addr; 932 size_t queue_size_in_bytes; 933 struct vring vring; 934 935 /* We assume num is a power of 2. */ 936 if (num & (num - 1)) { 937 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 938 return NULL; 939 } 940 941 /* TODO: allocate each queue chunk individually */ 942 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 943 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 944 &dma_addr, 945 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 946 if (queue) 947 break; 948 if (!may_reduce_num) 949 return NULL; 950 } 951 952 if (!num) 953 return NULL; 954 955 if (!queue) { 956 /* Try to get a single page. You are my only hope! */ 957 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 958 &dma_addr, GFP_KERNEL|__GFP_ZERO); 959 } 960 if (!queue) 961 return NULL; 962 963 queue_size_in_bytes = vring_size(num, vring_align); 964 vring_init(&vring, num, queue, vring_align); 965 966 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 967 notify, callback, name); 968 if (!vq) { 969 vring_free_queue(vdev, queue_size_in_bytes, queue, 970 dma_addr); 971 return NULL; 972 } 973 974 to_vvq(vq)->split.queue_dma_addr = dma_addr; 975 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 976 to_vvq(vq)->we_own_ring = true; 977 978 return vq; 979 } 980 981 982 /* 983 * Packed ring specific functions - *_packed(). 984 */ 985 986 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 987 struct vring_desc_extra *state) 988 { 989 u16 flags; 990 991 if (!vq->use_dma_api) 992 return; 993 994 flags = state->flags; 995 996 if (flags & VRING_DESC_F_INDIRECT) { 997 dma_unmap_single(vring_dma_dev(vq), 998 state->addr, state->len, 999 (flags & VRING_DESC_F_WRITE) ? 1000 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1001 } else { 1002 dma_unmap_page(vring_dma_dev(vq), 1003 state->addr, state->len, 1004 (flags & VRING_DESC_F_WRITE) ? 1005 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1006 } 1007 } 1008 1009 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1010 struct vring_packed_desc *desc) 1011 { 1012 u16 flags; 1013 1014 if (!vq->use_dma_api) 1015 return; 1016 1017 flags = le16_to_cpu(desc->flags); 1018 1019 if (flags & VRING_DESC_F_INDIRECT) { 1020 dma_unmap_single(vring_dma_dev(vq), 1021 le64_to_cpu(desc->addr), 1022 le32_to_cpu(desc->len), 1023 (flags & VRING_DESC_F_WRITE) ? 1024 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1025 } else { 1026 dma_unmap_page(vring_dma_dev(vq), 1027 le64_to_cpu(desc->addr), 1028 le32_to_cpu(desc->len), 1029 (flags & VRING_DESC_F_WRITE) ? 1030 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1031 } 1032 } 1033 1034 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1035 gfp_t gfp) 1036 { 1037 struct vring_packed_desc *desc; 1038 1039 /* 1040 * We require lowmem mappings for the descriptors because 1041 * otherwise virt_to_phys will give us bogus addresses in the 1042 * virtqueue. 1043 */ 1044 gfp &= ~__GFP_HIGHMEM; 1045 1046 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1047 1048 return desc; 1049 } 1050 1051 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1052 struct scatterlist *sgs[], 1053 unsigned int total_sg, 1054 unsigned int out_sgs, 1055 unsigned int in_sgs, 1056 void *data, 1057 gfp_t gfp) 1058 { 1059 struct vring_packed_desc *desc; 1060 struct scatterlist *sg; 1061 unsigned int i, n, err_idx; 1062 u16 head, id; 1063 dma_addr_t addr; 1064 1065 head = vq->packed.next_avail_idx; 1066 desc = alloc_indirect_packed(total_sg, gfp); 1067 1068 if (unlikely(vq->vq.num_free < 1)) { 1069 pr_debug("Can't add buf len 1 - avail = 0\n"); 1070 kfree(desc); 1071 END_USE(vq); 1072 return -ENOSPC; 1073 } 1074 1075 i = 0; 1076 id = vq->free_head; 1077 BUG_ON(id == vq->packed.vring.num); 1078 1079 for (n = 0; n < out_sgs + in_sgs; n++) { 1080 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1081 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1082 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1083 if (vring_mapping_error(vq, addr)) 1084 goto unmap_release; 1085 1086 desc[i].flags = cpu_to_le16(n < out_sgs ? 1087 0 : VRING_DESC_F_WRITE); 1088 desc[i].addr = cpu_to_le64(addr); 1089 desc[i].len = cpu_to_le32(sg->length); 1090 i++; 1091 } 1092 } 1093 1094 /* Now that the indirect table is filled in, map it. */ 1095 addr = vring_map_single(vq, desc, 1096 total_sg * sizeof(struct vring_packed_desc), 1097 DMA_TO_DEVICE); 1098 if (vring_mapping_error(vq, addr)) 1099 goto unmap_release; 1100 1101 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1102 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1103 sizeof(struct vring_packed_desc)); 1104 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1105 1106 if (vq->use_dma_api) { 1107 vq->packed.desc_extra[id].addr = addr; 1108 vq->packed.desc_extra[id].len = total_sg * 1109 sizeof(struct vring_packed_desc); 1110 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1111 vq->packed.avail_used_flags; 1112 } 1113 1114 /* 1115 * A driver MUST NOT make the first descriptor in the list 1116 * available before all subsequent descriptors comprising 1117 * the list are made available. 1118 */ 1119 virtio_wmb(vq->weak_barriers); 1120 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1121 vq->packed.avail_used_flags); 1122 1123 /* We're using some buffers from the free list. */ 1124 vq->vq.num_free -= 1; 1125 1126 /* Update free pointer */ 1127 n = head + 1; 1128 if (n >= vq->packed.vring.num) { 1129 n = 0; 1130 vq->packed.avail_wrap_counter ^= 1; 1131 vq->packed.avail_used_flags ^= 1132 1 << VRING_PACKED_DESC_F_AVAIL | 1133 1 << VRING_PACKED_DESC_F_USED; 1134 } 1135 vq->packed.next_avail_idx = n; 1136 vq->free_head = vq->packed.desc_extra[id].next; 1137 1138 /* Store token and indirect buffer state. */ 1139 vq->packed.desc_state[id].num = 1; 1140 vq->packed.desc_state[id].data = data; 1141 vq->packed.desc_state[id].indir_desc = desc; 1142 vq->packed.desc_state[id].last = id; 1143 1144 vq->num_added += 1; 1145 1146 pr_debug("Added buffer head %i to %p\n", head, vq); 1147 END_USE(vq); 1148 1149 return 0; 1150 1151 unmap_release: 1152 err_idx = i; 1153 1154 for (i = 0; i < err_idx; i++) 1155 vring_unmap_desc_packed(vq, &desc[i]); 1156 1157 kfree(desc); 1158 1159 END_USE(vq); 1160 return -ENOMEM; 1161 } 1162 1163 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1164 struct scatterlist *sgs[], 1165 unsigned int total_sg, 1166 unsigned int out_sgs, 1167 unsigned int in_sgs, 1168 void *data, 1169 void *ctx, 1170 gfp_t gfp) 1171 { 1172 struct vring_virtqueue *vq = to_vvq(_vq); 1173 struct vring_packed_desc *desc; 1174 struct scatterlist *sg; 1175 unsigned int i, n, c, descs_used, err_idx; 1176 __le16 head_flags, flags; 1177 u16 head, id, prev, curr, avail_used_flags; 1178 1179 START_USE(vq); 1180 1181 BUG_ON(data == NULL); 1182 BUG_ON(ctx && vq->indirect); 1183 1184 if (unlikely(vq->broken)) { 1185 END_USE(vq); 1186 return -EIO; 1187 } 1188 1189 LAST_ADD_TIME_UPDATE(vq); 1190 1191 BUG_ON(total_sg == 0); 1192 1193 if (virtqueue_use_indirect(_vq, total_sg)) 1194 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1195 out_sgs, in_sgs, data, gfp); 1196 1197 head = vq->packed.next_avail_idx; 1198 avail_used_flags = vq->packed.avail_used_flags; 1199 1200 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1201 1202 desc = vq->packed.vring.desc; 1203 i = head; 1204 descs_used = total_sg; 1205 1206 if (unlikely(vq->vq.num_free < descs_used)) { 1207 pr_debug("Can't add buf len %i - avail = %i\n", 1208 descs_used, vq->vq.num_free); 1209 END_USE(vq); 1210 return -ENOSPC; 1211 } 1212 1213 id = vq->free_head; 1214 BUG_ON(id == vq->packed.vring.num); 1215 1216 curr = id; 1217 c = 0; 1218 for (n = 0; n < out_sgs + in_sgs; n++) { 1219 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1220 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1221 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1222 if (vring_mapping_error(vq, addr)) 1223 goto unmap_release; 1224 1225 flags = cpu_to_le16(vq->packed.avail_used_flags | 1226 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1227 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1228 if (i == head) 1229 head_flags = flags; 1230 else 1231 desc[i].flags = flags; 1232 1233 desc[i].addr = cpu_to_le64(addr); 1234 desc[i].len = cpu_to_le32(sg->length); 1235 desc[i].id = cpu_to_le16(id); 1236 1237 if (unlikely(vq->use_dma_api)) { 1238 vq->packed.desc_extra[curr].addr = addr; 1239 vq->packed.desc_extra[curr].len = sg->length; 1240 vq->packed.desc_extra[curr].flags = 1241 le16_to_cpu(flags); 1242 } 1243 prev = curr; 1244 curr = vq->packed.desc_extra[curr].next; 1245 1246 if ((unlikely(++i >= vq->packed.vring.num))) { 1247 i = 0; 1248 vq->packed.avail_used_flags ^= 1249 1 << VRING_PACKED_DESC_F_AVAIL | 1250 1 << VRING_PACKED_DESC_F_USED; 1251 } 1252 } 1253 } 1254 1255 if (i < head) 1256 vq->packed.avail_wrap_counter ^= 1; 1257 1258 /* We're using some buffers from the free list. */ 1259 vq->vq.num_free -= descs_used; 1260 1261 /* Update free pointer */ 1262 vq->packed.next_avail_idx = i; 1263 vq->free_head = curr; 1264 1265 /* Store token. */ 1266 vq->packed.desc_state[id].num = descs_used; 1267 vq->packed.desc_state[id].data = data; 1268 vq->packed.desc_state[id].indir_desc = ctx; 1269 vq->packed.desc_state[id].last = prev; 1270 1271 /* 1272 * A driver MUST NOT make the first descriptor in the list 1273 * available before all subsequent descriptors comprising 1274 * the list are made available. 1275 */ 1276 virtio_wmb(vq->weak_barriers); 1277 vq->packed.vring.desc[head].flags = head_flags; 1278 vq->num_added += descs_used; 1279 1280 pr_debug("Added buffer head %i to %p\n", head, vq); 1281 END_USE(vq); 1282 1283 return 0; 1284 1285 unmap_release: 1286 err_idx = i; 1287 i = head; 1288 curr = vq->free_head; 1289 1290 vq->packed.avail_used_flags = avail_used_flags; 1291 1292 for (n = 0; n < total_sg; n++) { 1293 if (i == err_idx) 1294 break; 1295 vring_unmap_state_packed(vq, 1296 &vq->packed.desc_extra[curr]); 1297 curr = vq->packed.desc_extra[curr].next; 1298 i++; 1299 if (i >= vq->packed.vring.num) 1300 i = 0; 1301 } 1302 1303 END_USE(vq); 1304 return -EIO; 1305 } 1306 1307 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1308 { 1309 struct vring_virtqueue *vq = to_vvq(_vq); 1310 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1311 bool needs_kick; 1312 union { 1313 struct { 1314 __le16 off_wrap; 1315 __le16 flags; 1316 }; 1317 u32 u32; 1318 } snapshot; 1319 1320 START_USE(vq); 1321 1322 /* 1323 * We need to expose the new flags value before checking notification 1324 * suppressions. 1325 */ 1326 virtio_mb(vq->weak_barriers); 1327 1328 old = vq->packed.next_avail_idx - vq->num_added; 1329 new = vq->packed.next_avail_idx; 1330 vq->num_added = 0; 1331 1332 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1333 flags = le16_to_cpu(snapshot.flags); 1334 1335 LAST_ADD_TIME_CHECK(vq); 1336 LAST_ADD_TIME_INVALID(vq); 1337 1338 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1339 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1340 goto out; 1341 } 1342 1343 off_wrap = le16_to_cpu(snapshot.off_wrap); 1344 1345 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1346 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1347 if (wrap_counter != vq->packed.avail_wrap_counter) 1348 event_idx -= vq->packed.vring.num; 1349 1350 needs_kick = vring_need_event(event_idx, new, old); 1351 out: 1352 END_USE(vq); 1353 return needs_kick; 1354 } 1355 1356 static void detach_buf_packed(struct vring_virtqueue *vq, 1357 unsigned int id, void **ctx) 1358 { 1359 struct vring_desc_state_packed *state = NULL; 1360 struct vring_packed_desc *desc; 1361 unsigned int i, curr; 1362 1363 state = &vq->packed.desc_state[id]; 1364 1365 /* Clear data ptr. */ 1366 state->data = NULL; 1367 1368 vq->packed.desc_extra[state->last].next = vq->free_head; 1369 vq->free_head = id; 1370 vq->vq.num_free += state->num; 1371 1372 if (unlikely(vq->use_dma_api)) { 1373 curr = id; 1374 for (i = 0; i < state->num; i++) { 1375 vring_unmap_state_packed(vq, 1376 &vq->packed.desc_extra[curr]); 1377 curr = vq->packed.desc_extra[curr].next; 1378 } 1379 } 1380 1381 if (vq->indirect) { 1382 u32 len; 1383 1384 /* Free the indirect table, if any, now that it's unmapped. */ 1385 desc = state->indir_desc; 1386 if (!desc) 1387 return; 1388 1389 if (vq->use_dma_api) { 1390 len = vq->packed.desc_extra[id].len; 1391 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1392 i++) 1393 vring_unmap_desc_packed(vq, &desc[i]); 1394 } 1395 kfree(desc); 1396 state->indir_desc = NULL; 1397 } else if (ctx) { 1398 *ctx = state->indir_desc; 1399 } 1400 } 1401 1402 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1403 u16 idx, bool used_wrap_counter) 1404 { 1405 bool avail, used; 1406 u16 flags; 1407 1408 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1409 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1410 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1411 1412 return avail == used && used == used_wrap_counter; 1413 } 1414 1415 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1416 { 1417 return is_used_desc_packed(vq, vq->last_used_idx, 1418 vq->packed.used_wrap_counter); 1419 } 1420 1421 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1422 unsigned int *len, 1423 void **ctx) 1424 { 1425 struct vring_virtqueue *vq = to_vvq(_vq); 1426 u16 last_used, id; 1427 void *ret; 1428 1429 START_USE(vq); 1430 1431 if (unlikely(vq->broken)) { 1432 END_USE(vq); 1433 return NULL; 1434 } 1435 1436 if (!more_used_packed(vq)) { 1437 pr_debug("No more buffers in queue\n"); 1438 END_USE(vq); 1439 return NULL; 1440 } 1441 1442 /* Only get used elements after they have been exposed by host. */ 1443 virtio_rmb(vq->weak_barriers); 1444 1445 last_used = vq->last_used_idx; 1446 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1447 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1448 1449 if (unlikely(id >= vq->packed.vring.num)) { 1450 BAD_RING(vq, "id %u out of range\n", id); 1451 return NULL; 1452 } 1453 if (unlikely(!vq->packed.desc_state[id].data)) { 1454 BAD_RING(vq, "id %u is not a head!\n", id); 1455 return NULL; 1456 } 1457 1458 /* detach_buf_packed clears data, so grab it now. */ 1459 ret = vq->packed.desc_state[id].data; 1460 detach_buf_packed(vq, id, ctx); 1461 1462 vq->last_used_idx += vq->packed.desc_state[id].num; 1463 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1464 vq->last_used_idx -= vq->packed.vring.num; 1465 vq->packed.used_wrap_counter ^= 1; 1466 } 1467 1468 /* 1469 * If we expect an interrupt for the next entry, tell host 1470 * by writing event index and flush out the write before 1471 * the read in the next get_buf call. 1472 */ 1473 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1474 virtio_store_mb(vq->weak_barriers, 1475 &vq->packed.vring.driver->off_wrap, 1476 cpu_to_le16(vq->last_used_idx | 1477 (vq->packed.used_wrap_counter << 1478 VRING_PACKED_EVENT_F_WRAP_CTR))); 1479 1480 LAST_ADD_TIME_INVALID(vq); 1481 1482 END_USE(vq); 1483 return ret; 1484 } 1485 1486 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1487 { 1488 struct vring_virtqueue *vq = to_vvq(_vq); 1489 1490 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1491 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1492 vq->packed.vring.driver->flags = 1493 cpu_to_le16(vq->packed.event_flags_shadow); 1494 } 1495 } 1496 1497 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1498 { 1499 struct vring_virtqueue *vq = to_vvq(_vq); 1500 1501 START_USE(vq); 1502 1503 /* 1504 * We optimistically turn back on interrupts, then check if there was 1505 * more to do. 1506 */ 1507 1508 if (vq->event) { 1509 vq->packed.vring.driver->off_wrap = 1510 cpu_to_le16(vq->last_used_idx | 1511 (vq->packed.used_wrap_counter << 1512 VRING_PACKED_EVENT_F_WRAP_CTR)); 1513 /* 1514 * We need to update event offset and event wrap 1515 * counter first before updating event flags. 1516 */ 1517 virtio_wmb(vq->weak_barriers); 1518 } 1519 1520 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1521 vq->packed.event_flags_shadow = vq->event ? 1522 VRING_PACKED_EVENT_FLAG_DESC : 1523 VRING_PACKED_EVENT_FLAG_ENABLE; 1524 vq->packed.vring.driver->flags = 1525 cpu_to_le16(vq->packed.event_flags_shadow); 1526 } 1527 1528 END_USE(vq); 1529 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1530 VRING_PACKED_EVENT_F_WRAP_CTR); 1531 } 1532 1533 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1534 { 1535 struct vring_virtqueue *vq = to_vvq(_vq); 1536 bool wrap_counter; 1537 u16 used_idx; 1538 1539 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1540 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1541 1542 return is_used_desc_packed(vq, used_idx, wrap_counter); 1543 } 1544 1545 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1546 { 1547 struct vring_virtqueue *vq = to_vvq(_vq); 1548 u16 used_idx, wrap_counter; 1549 u16 bufs; 1550 1551 START_USE(vq); 1552 1553 /* 1554 * We optimistically turn back on interrupts, then check if there was 1555 * more to do. 1556 */ 1557 1558 if (vq->event) { 1559 /* TODO: tune this threshold */ 1560 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1561 wrap_counter = vq->packed.used_wrap_counter; 1562 1563 used_idx = vq->last_used_idx + bufs; 1564 if (used_idx >= vq->packed.vring.num) { 1565 used_idx -= vq->packed.vring.num; 1566 wrap_counter ^= 1; 1567 } 1568 1569 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1570 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1571 1572 /* 1573 * We need to update event offset and event wrap 1574 * counter first before updating event flags. 1575 */ 1576 virtio_wmb(vq->weak_barriers); 1577 } 1578 1579 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1580 vq->packed.event_flags_shadow = vq->event ? 1581 VRING_PACKED_EVENT_FLAG_DESC : 1582 VRING_PACKED_EVENT_FLAG_ENABLE; 1583 vq->packed.vring.driver->flags = 1584 cpu_to_le16(vq->packed.event_flags_shadow); 1585 } 1586 1587 /* 1588 * We need to update event suppression structure first 1589 * before re-checking for more used buffers. 1590 */ 1591 virtio_mb(vq->weak_barriers); 1592 1593 if (is_used_desc_packed(vq, 1594 vq->last_used_idx, 1595 vq->packed.used_wrap_counter)) { 1596 END_USE(vq); 1597 return false; 1598 } 1599 1600 END_USE(vq); 1601 return true; 1602 } 1603 1604 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1605 { 1606 struct vring_virtqueue *vq = to_vvq(_vq); 1607 unsigned int i; 1608 void *buf; 1609 1610 START_USE(vq); 1611 1612 for (i = 0; i < vq->packed.vring.num; i++) { 1613 if (!vq->packed.desc_state[i].data) 1614 continue; 1615 /* detach_buf clears data, so grab it now. */ 1616 buf = vq->packed.desc_state[i].data; 1617 detach_buf_packed(vq, i, NULL); 1618 END_USE(vq); 1619 return buf; 1620 } 1621 /* That should have freed everything. */ 1622 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1623 1624 END_USE(vq); 1625 return NULL; 1626 } 1627 1628 static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, 1629 unsigned int num) 1630 { 1631 struct vring_desc_extra *desc_extra; 1632 unsigned int i; 1633 1634 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1635 GFP_KERNEL); 1636 if (!desc_extra) 1637 return NULL; 1638 1639 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1640 1641 for (i = 0; i < num - 1; i++) 1642 desc_extra[i].next = i + 1; 1643 1644 return desc_extra; 1645 } 1646 1647 static struct virtqueue *vring_create_virtqueue_packed( 1648 unsigned int index, 1649 unsigned int num, 1650 unsigned int vring_align, 1651 struct virtio_device *vdev, 1652 bool weak_barriers, 1653 bool may_reduce_num, 1654 bool context, 1655 bool (*notify)(struct virtqueue *), 1656 void (*callback)(struct virtqueue *), 1657 const char *name) 1658 { 1659 struct vring_virtqueue *vq; 1660 struct vring_packed_desc *ring; 1661 struct vring_packed_desc_event *driver, *device; 1662 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1663 size_t ring_size_in_bytes, event_size_in_bytes; 1664 1665 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1666 1667 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1668 &ring_dma_addr, 1669 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1670 if (!ring) 1671 goto err_ring; 1672 1673 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1674 1675 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1676 &driver_event_dma_addr, 1677 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1678 if (!driver) 1679 goto err_driver; 1680 1681 device = vring_alloc_queue(vdev, event_size_in_bytes, 1682 &device_event_dma_addr, 1683 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1684 if (!device) 1685 goto err_device; 1686 1687 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1688 if (!vq) 1689 goto err_vq; 1690 1691 vq->vq.callback = callback; 1692 vq->vq.vdev = vdev; 1693 vq->vq.name = name; 1694 vq->vq.num_free = num; 1695 vq->vq.index = index; 1696 vq->we_own_ring = true; 1697 vq->notify = notify; 1698 vq->weak_barriers = weak_barriers; 1699 vq->broken = false; 1700 vq->last_used_idx = 0; 1701 vq->event_triggered = false; 1702 vq->num_added = 0; 1703 vq->packed_ring = true; 1704 vq->use_dma_api = vring_use_dma_api(vdev); 1705 #ifdef DEBUG 1706 vq->in_use = false; 1707 vq->last_add_time_valid = false; 1708 #endif 1709 1710 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1711 !context; 1712 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1713 1714 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1715 vq->weak_barriers = false; 1716 1717 vq->packed.ring_dma_addr = ring_dma_addr; 1718 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1719 vq->packed.device_event_dma_addr = device_event_dma_addr; 1720 1721 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1722 vq->packed.event_size_in_bytes = event_size_in_bytes; 1723 1724 vq->packed.vring.num = num; 1725 vq->packed.vring.desc = ring; 1726 vq->packed.vring.driver = driver; 1727 vq->packed.vring.device = device; 1728 1729 vq->packed.next_avail_idx = 0; 1730 vq->packed.avail_wrap_counter = 1; 1731 vq->packed.used_wrap_counter = 1; 1732 vq->packed.event_flags_shadow = 0; 1733 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1734 1735 vq->packed.desc_state = kmalloc_array(num, 1736 sizeof(struct vring_desc_state_packed), 1737 GFP_KERNEL); 1738 if (!vq->packed.desc_state) 1739 goto err_desc_state; 1740 1741 memset(vq->packed.desc_state, 0, 1742 num * sizeof(struct vring_desc_state_packed)); 1743 1744 /* Put everything in free lists. */ 1745 vq->free_head = 0; 1746 1747 vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); 1748 if (!vq->packed.desc_extra) 1749 goto err_desc_extra; 1750 1751 /* No callback? Tell other side not to bother us. */ 1752 if (!callback) { 1753 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1754 vq->packed.vring.driver->flags = 1755 cpu_to_le16(vq->packed.event_flags_shadow); 1756 } 1757 1758 list_add_tail(&vq->vq.list, &vdev->vqs); 1759 return &vq->vq; 1760 1761 err_desc_extra: 1762 kfree(vq->packed.desc_state); 1763 err_desc_state: 1764 kfree(vq); 1765 err_vq: 1766 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 1767 err_device: 1768 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 1769 err_driver: 1770 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1771 err_ring: 1772 return NULL; 1773 } 1774 1775 1776 /* 1777 * Generic functions and exported symbols. 1778 */ 1779 1780 static inline int virtqueue_add(struct virtqueue *_vq, 1781 struct scatterlist *sgs[], 1782 unsigned int total_sg, 1783 unsigned int out_sgs, 1784 unsigned int in_sgs, 1785 void *data, 1786 void *ctx, 1787 gfp_t gfp) 1788 { 1789 struct vring_virtqueue *vq = to_vvq(_vq); 1790 1791 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1792 out_sgs, in_sgs, data, ctx, gfp) : 1793 virtqueue_add_split(_vq, sgs, total_sg, 1794 out_sgs, in_sgs, data, ctx, gfp); 1795 } 1796 1797 /** 1798 * virtqueue_add_sgs - expose buffers to other end 1799 * @_vq: the struct virtqueue we're talking about. 1800 * @sgs: array of terminated scatterlists. 1801 * @out_sgs: the number of scatterlists readable by other side 1802 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1803 * @data: the token identifying the buffer. 1804 * @gfp: how to do memory allocations (if necessary). 1805 * 1806 * Caller must ensure we don't call this with other virtqueue operations 1807 * at the same time (except where noted). 1808 * 1809 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1810 */ 1811 int virtqueue_add_sgs(struct virtqueue *_vq, 1812 struct scatterlist *sgs[], 1813 unsigned int out_sgs, 1814 unsigned int in_sgs, 1815 void *data, 1816 gfp_t gfp) 1817 { 1818 unsigned int i, total_sg = 0; 1819 1820 /* Count them first. */ 1821 for (i = 0; i < out_sgs + in_sgs; i++) { 1822 struct scatterlist *sg; 1823 1824 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1825 total_sg++; 1826 } 1827 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1828 data, NULL, gfp); 1829 } 1830 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1831 1832 /** 1833 * virtqueue_add_outbuf - expose output buffers to other end 1834 * @vq: the struct virtqueue we're talking about. 1835 * @sg: scatterlist (must be well-formed and terminated!) 1836 * @num: the number of entries in @sg readable by other side 1837 * @data: the token identifying the buffer. 1838 * @gfp: how to do memory allocations (if necessary). 1839 * 1840 * Caller must ensure we don't call this with other virtqueue operations 1841 * at the same time (except where noted). 1842 * 1843 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1844 */ 1845 int virtqueue_add_outbuf(struct virtqueue *vq, 1846 struct scatterlist *sg, unsigned int num, 1847 void *data, 1848 gfp_t gfp) 1849 { 1850 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1851 } 1852 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1853 1854 /** 1855 * virtqueue_add_inbuf - expose input buffers to other end 1856 * @vq: the struct virtqueue we're talking about. 1857 * @sg: scatterlist (must be well-formed and terminated!) 1858 * @num: the number of entries in @sg writable by other side 1859 * @data: the token identifying the buffer. 1860 * @gfp: how to do memory allocations (if necessary). 1861 * 1862 * Caller must ensure we don't call this with other virtqueue operations 1863 * at the same time (except where noted). 1864 * 1865 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1866 */ 1867 int virtqueue_add_inbuf(struct virtqueue *vq, 1868 struct scatterlist *sg, unsigned int num, 1869 void *data, 1870 gfp_t gfp) 1871 { 1872 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1873 } 1874 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1875 1876 /** 1877 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1878 * @vq: the struct virtqueue we're talking about. 1879 * @sg: scatterlist (must be well-formed and terminated!) 1880 * @num: the number of entries in @sg writable by other side 1881 * @data: the token identifying the buffer. 1882 * @ctx: extra context for the token 1883 * @gfp: how to do memory allocations (if necessary). 1884 * 1885 * Caller must ensure we don't call this with other virtqueue operations 1886 * at the same time (except where noted). 1887 * 1888 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1889 */ 1890 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1891 struct scatterlist *sg, unsigned int num, 1892 void *data, 1893 void *ctx, 1894 gfp_t gfp) 1895 { 1896 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1897 } 1898 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1899 1900 /** 1901 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1902 * @_vq: the struct virtqueue 1903 * 1904 * Instead of virtqueue_kick(), you can do: 1905 * if (virtqueue_kick_prepare(vq)) 1906 * virtqueue_notify(vq); 1907 * 1908 * This is sometimes useful because the virtqueue_kick_prepare() needs 1909 * to be serialized, but the actual virtqueue_notify() call does not. 1910 */ 1911 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1912 { 1913 struct vring_virtqueue *vq = to_vvq(_vq); 1914 1915 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1916 virtqueue_kick_prepare_split(_vq); 1917 } 1918 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1919 1920 /** 1921 * virtqueue_notify - second half of split virtqueue_kick call. 1922 * @_vq: the struct virtqueue 1923 * 1924 * This does not need to be serialized. 1925 * 1926 * Returns false if host notify failed or queue is broken, otherwise true. 1927 */ 1928 bool virtqueue_notify(struct virtqueue *_vq) 1929 { 1930 struct vring_virtqueue *vq = to_vvq(_vq); 1931 1932 if (unlikely(vq->broken)) 1933 return false; 1934 1935 /* Prod other side to tell it about changes. */ 1936 if (!vq->notify(_vq)) { 1937 vq->broken = true; 1938 return false; 1939 } 1940 return true; 1941 } 1942 EXPORT_SYMBOL_GPL(virtqueue_notify); 1943 1944 /** 1945 * virtqueue_kick - update after add_buf 1946 * @vq: the struct virtqueue 1947 * 1948 * After one or more virtqueue_add_* calls, invoke this to kick 1949 * the other side. 1950 * 1951 * Caller must ensure we don't call this with other virtqueue 1952 * operations at the same time (except where noted). 1953 * 1954 * Returns false if kick failed, otherwise true. 1955 */ 1956 bool virtqueue_kick(struct virtqueue *vq) 1957 { 1958 if (virtqueue_kick_prepare(vq)) 1959 return virtqueue_notify(vq); 1960 return true; 1961 } 1962 EXPORT_SYMBOL_GPL(virtqueue_kick); 1963 1964 /** 1965 * virtqueue_get_buf_ctx - get the next used buffer 1966 * @_vq: the struct virtqueue we're talking about. 1967 * @len: the length written into the buffer 1968 * @ctx: extra context for the token 1969 * 1970 * If the device wrote data into the buffer, @len will be set to the 1971 * amount written. This means you don't need to clear the buffer 1972 * beforehand to ensure there's no data leakage in the case of short 1973 * writes. 1974 * 1975 * Caller must ensure we don't call this with other virtqueue 1976 * operations at the same time (except where noted). 1977 * 1978 * Returns NULL if there are no used buffers, or the "data" token 1979 * handed to virtqueue_add_*(). 1980 */ 1981 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1982 void **ctx) 1983 { 1984 struct vring_virtqueue *vq = to_vvq(_vq); 1985 1986 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1987 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1988 } 1989 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1990 1991 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1992 { 1993 return virtqueue_get_buf_ctx(_vq, len, NULL); 1994 } 1995 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1996 /** 1997 * virtqueue_disable_cb - disable callbacks 1998 * @_vq: the struct virtqueue we're talking about. 1999 * 2000 * Note that this is not necessarily synchronous, hence unreliable and only 2001 * useful as an optimization. 2002 * 2003 * Unlike other operations, this need not be serialized. 2004 */ 2005 void virtqueue_disable_cb(struct virtqueue *_vq) 2006 { 2007 struct vring_virtqueue *vq = to_vvq(_vq); 2008 2009 /* If device triggered an event already it won't trigger one again: 2010 * no need to disable. 2011 */ 2012 if (vq->event_triggered) 2013 return; 2014 2015 if (vq->packed_ring) 2016 virtqueue_disable_cb_packed(_vq); 2017 else 2018 virtqueue_disable_cb_split(_vq); 2019 } 2020 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2021 2022 /** 2023 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2024 * @_vq: the struct virtqueue we're talking about. 2025 * 2026 * This re-enables callbacks; it returns current queue state 2027 * in an opaque unsigned value. This value should be later tested by 2028 * virtqueue_poll, to detect a possible race between the driver checking for 2029 * more work, and enabling callbacks. 2030 * 2031 * Caller must ensure we don't call this with other virtqueue 2032 * operations at the same time (except where noted). 2033 */ 2034 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2035 { 2036 struct vring_virtqueue *vq = to_vvq(_vq); 2037 2038 if (vq->event_triggered) 2039 vq->event_triggered = false; 2040 2041 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2042 virtqueue_enable_cb_prepare_split(_vq); 2043 } 2044 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2045 2046 /** 2047 * virtqueue_poll - query pending used buffers 2048 * @_vq: the struct virtqueue we're talking about. 2049 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2050 * 2051 * Returns "true" if there are pending used buffers in the queue. 2052 * 2053 * This does not need to be serialized. 2054 */ 2055 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 2056 { 2057 struct vring_virtqueue *vq = to_vvq(_vq); 2058 2059 if (unlikely(vq->broken)) 2060 return false; 2061 2062 virtio_mb(vq->weak_barriers); 2063 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2064 virtqueue_poll_split(_vq, last_used_idx); 2065 } 2066 EXPORT_SYMBOL_GPL(virtqueue_poll); 2067 2068 /** 2069 * virtqueue_enable_cb - restart callbacks after disable_cb. 2070 * @_vq: the struct virtqueue we're talking about. 2071 * 2072 * This re-enables callbacks; it returns "false" if there are pending 2073 * buffers in the queue, to detect a possible race between the driver 2074 * checking for more work, and enabling callbacks. 2075 * 2076 * Caller must ensure we don't call this with other virtqueue 2077 * operations at the same time (except where noted). 2078 */ 2079 bool virtqueue_enable_cb(struct virtqueue *_vq) 2080 { 2081 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 2082 2083 return !virtqueue_poll(_vq, last_used_idx); 2084 } 2085 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2086 2087 /** 2088 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2089 * @_vq: the struct virtqueue we're talking about. 2090 * 2091 * This re-enables callbacks but hints to the other side to delay 2092 * interrupts until most of the available buffers have been processed; 2093 * it returns "false" if there are many pending buffers in the queue, 2094 * to detect a possible race between the driver checking for more work, 2095 * and enabling callbacks. 2096 * 2097 * Caller must ensure we don't call this with other virtqueue 2098 * operations at the same time (except where noted). 2099 */ 2100 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2101 { 2102 struct vring_virtqueue *vq = to_vvq(_vq); 2103 2104 if (vq->event_triggered) 2105 vq->event_triggered = false; 2106 2107 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2108 virtqueue_enable_cb_delayed_split(_vq); 2109 } 2110 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2111 2112 /** 2113 * virtqueue_detach_unused_buf - detach first unused buffer 2114 * @_vq: the struct virtqueue we're talking about. 2115 * 2116 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2117 * This is not valid on an active queue; it is useful only for device 2118 * shutdown. 2119 */ 2120 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2121 { 2122 struct vring_virtqueue *vq = to_vvq(_vq); 2123 2124 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2125 virtqueue_detach_unused_buf_split(_vq); 2126 } 2127 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2128 2129 static inline bool more_used(const struct vring_virtqueue *vq) 2130 { 2131 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2132 } 2133 2134 irqreturn_t vring_interrupt(int irq, void *_vq) 2135 { 2136 struct vring_virtqueue *vq = to_vvq(_vq); 2137 2138 if (!more_used(vq)) { 2139 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2140 return IRQ_NONE; 2141 } 2142 2143 if (unlikely(vq->broken)) 2144 return IRQ_HANDLED; 2145 2146 /* Just a hint for performance: so it's ok that this can be racy! */ 2147 if (vq->event) 2148 vq->event_triggered = true; 2149 2150 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2151 if (vq->vq.callback) 2152 vq->vq.callback(&vq->vq); 2153 2154 return IRQ_HANDLED; 2155 } 2156 EXPORT_SYMBOL_GPL(vring_interrupt); 2157 2158 /* Only available for split ring */ 2159 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2160 struct vring vring, 2161 struct virtio_device *vdev, 2162 bool weak_barriers, 2163 bool context, 2164 bool (*notify)(struct virtqueue *), 2165 void (*callback)(struct virtqueue *), 2166 const char *name) 2167 { 2168 struct vring_virtqueue *vq; 2169 2170 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2171 return NULL; 2172 2173 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2174 if (!vq) 2175 return NULL; 2176 2177 vq->packed_ring = false; 2178 vq->vq.callback = callback; 2179 vq->vq.vdev = vdev; 2180 vq->vq.name = name; 2181 vq->vq.num_free = vring.num; 2182 vq->vq.index = index; 2183 vq->we_own_ring = false; 2184 vq->notify = notify; 2185 vq->weak_barriers = weak_barriers; 2186 vq->broken = false; 2187 vq->last_used_idx = 0; 2188 vq->event_triggered = false; 2189 vq->num_added = 0; 2190 vq->use_dma_api = vring_use_dma_api(vdev); 2191 #ifdef DEBUG 2192 vq->in_use = false; 2193 vq->last_add_time_valid = false; 2194 #endif 2195 2196 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2197 !context; 2198 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2199 2200 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2201 vq->weak_barriers = false; 2202 2203 vq->split.queue_dma_addr = 0; 2204 vq->split.queue_size_in_bytes = 0; 2205 2206 vq->split.vring = vring; 2207 vq->split.avail_flags_shadow = 0; 2208 vq->split.avail_idx_shadow = 0; 2209 2210 /* No callback? Tell other side not to bother us. */ 2211 if (!callback) { 2212 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2213 if (!vq->event) 2214 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2215 vq->split.avail_flags_shadow); 2216 } 2217 2218 vq->split.desc_state = kmalloc_array(vring.num, 2219 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2220 if (!vq->split.desc_state) 2221 goto err_state; 2222 2223 vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); 2224 if (!vq->split.desc_extra) 2225 goto err_extra; 2226 2227 /* Put everything in free lists. */ 2228 vq->free_head = 0; 2229 memset(vq->split.desc_state, 0, vring.num * 2230 sizeof(struct vring_desc_state_split)); 2231 2232 list_add_tail(&vq->vq.list, &vdev->vqs); 2233 return &vq->vq; 2234 2235 err_extra: 2236 kfree(vq->split.desc_state); 2237 err_state: 2238 kfree(vq); 2239 return NULL; 2240 } 2241 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2242 2243 struct virtqueue *vring_create_virtqueue( 2244 unsigned int index, 2245 unsigned int num, 2246 unsigned int vring_align, 2247 struct virtio_device *vdev, 2248 bool weak_barriers, 2249 bool may_reduce_num, 2250 bool context, 2251 bool (*notify)(struct virtqueue *), 2252 void (*callback)(struct virtqueue *), 2253 const char *name) 2254 { 2255 2256 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2257 return vring_create_virtqueue_packed(index, num, vring_align, 2258 vdev, weak_barriers, may_reduce_num, 2259 context, notify, callback, name); 2260 2261 return vring_create_virtqueue_split(index, num, vring_align, 2262 vdev, weak_barriers, may_reduce_num, 2263 context, notify, callback, name); 2264 } 2265 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2266 2267 /* Only available for split ring */ 2268 struct virtqueue *vring_new_virtqueue(unsigned int index, 2269 unsigned int num, 2270 unsigned int vring_align, 2271 struct virtio_device *vdev, 2272 bool weak_barriers, 2273 bool context, 2274 void *pages, 2275 bool (*notify)(struct virtqueue *vq), 2276 void (*callback)(struct virtqueue *vq), 2277 const char *name) 2278 { 2279 struct vring vring; 2280 2281 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2282 return NULL; 2283 2284 vring_init(&vring, num, pages, vring_align); 2285 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2286 notify, callback, name); 2287 } 2288 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2289 2290 void vring_del_virtqueue(struct virtqueue *_vq) 2291 { 2292 struct vring_virtqueue *vq = to_vvq(_vq); 2293 2294 if (vq->we_own_ring) { 2295 if (vq->packed_ring) { 2296 vring_free_queue(vq->vq.vdev, 2297 vq->packed.ring_size_in_bytes, 2298 vq->packed.vring.desc, 2299 vq->packed.ring_dma_addr); 2300 2301 vring_free_queue(vq->vq.vdev, 2302 vq->packed.event_size_in_bytes, 2303 vq->packed.vring.driver, 2304 vq->packed.driver_event_dma_addr); 2305 2306 vring_free_queue(vq->vq.vdev, 2307 vq->packed.event_size_in_bytes, 2308 vq->packed.vring.device, 2309 vq->packed.device_event_dma_addr); 2310 2311 kfree(vq->packed.desc_state); 2312 kfree(vq->packed.desc_extra); 2313 } else { 2314 vring_free_queue(vq->vq.vdev, 2315 vq->split.queue_size_in_bytes, 2316 vq->split.vring.desc, 2317 vq->split.queue_dma_addr); 2318 } 2319 } 2320 if (!vq->packed_ring) { 2321 kfree(vq->split.desc_state); 2322 kfree(vq->split.desc_extra); 2323 } 2324 list_del(&_vq->list); 2325 kfree(vq); 2326 } 2327 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2328 2329 /* Manipulates transport-specific feature bits. */ 2330 void vring_transport_features(struct virtio_device *vdev) 2331 { 2332 unsigned int i; 2333 2334 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2335 switch (i) { 2336 case VIRTIO_RING_F_INDIRECT_DESC: 2337 break; 2338 case VIRTIO_RING_F_EVENT_IDX: 2339 break; 2340 case VIRTIO_F_VERSION_1: 2341 break; 2342 case VIRTIO_F_ACCESS_PLATFORM: 2343 break; 2344 case VIRTIO_F_RING_PACKED: 2345 break; 2346 case VIRTIO_F_ORDER_PLATFORM: 2347 break; 2348 default: 2349 /* We don't understand this bit. */ 2350 __virtio_clear_bit(vdev, i); 2351 } 2352 } 2353 } 2354 EXPORT_SYMBOL_GPL(vring_transport_features); 2355 2356 /** 2357 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2358 * @_vq: the struct virtqueue containing the vring of interest. 2359 * 2360 * Returns the size of the vring. This is mainly used for boasting to 2361 * userspace. Unlike other operations, this need not be serialized. 2362 */ 2363 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2364 { 2365 2366 struct vring_virtqueue *vq = to_vvq(_vq); 2367 2368 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2369 } 2370 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2371 2372 bool virtqueue_is_broken(struct virtqueue *_vq) 2373 { 2374 struct vring_virtqueue *vq = to_vvq(_vq); 2375 2376 return vq->broken; 2377 } 2378 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2379 2380 /* 2381 * This should prevent the device from being used, allowing drivers to 2382 * recover. You may need to grab appropriate locks to flush. 2383 */ 2384 void virtio_break_device(struct virtio_device *dev) 2385 { 2386 struct virtqueue *_vq; 2387 2388 list_for_each_entry(_vq, &dev->vqs, list) { 2389 struct vring_virtqueue *vq = to_vvq(_vq); 2390 vq->broken = true; 2391 } 2392 } 2393 EXPORT_SYMBOL_GPL(virtio_break_device); 2394 2395 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2396 { 2397 struct vring_virtqueue *vq = to_vvq(_vq); 2398 2399 BUG_ON(!vq->we_own_ring); 2400 2401 if (vq->packed_ring) 2402 return vq->packed.ring_dma_addr; 2403 2404 return vq->split.queue_dma_addr; 2405 } 2406 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2407 2408 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2409 { 2410 struct vring_virtqueue *vq = to_vvq(_vq); 2411 2412 BUG_ON(!vq->we_own_ring); 2413 2414 if (vq->packed_ring) 2415 return vq->packed.driver_event_dma_addr; 2416 2417 return vq->split.queue_dma_addr + 2418 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2419 } 2420 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2421 2422 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2423 { 2424 struct vring_virtqueue *vq = to_vvq(_vq); 2425 2426 BUG_ON(!vq->we_own_ring); 2427 2428 if (vq->packed_ring) 2429 return vq->packed.device_event_dma_addr; 2430 2431 return vq->split.queue_dma_addr + 2432 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2433 } 2434 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2435 2436 /* Only available for split ring */ 2437 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2438 { 2439 return &to_vvq(vq)->split.vring; 2440 } 2441 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2442 2443 MODULE_LICENSE("GPL"); 2444