1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <xen/xen.h> 15 16 #ifdef DEBUG 17 /* For development, we want to crash whenever the ring is screwed. */ 18 #define BAD_RING(_vq, fmt, args...) \ 19 do { \ 20 dev_err(&(_vq)->vq.vdev->dev, \ 21 "%s:"fmt, (_vq)->vq.name, ##args); \ 22 BUG(); \ 23 } while (0) 24 /* Caller is supposed to guarantee no reentry. */ 25 #define START_USE(_vq) \ 26 do { \ 27 if ((_vq)->in_use) \ 28 panic("%s:in_use = %i\n", \ 29 (_vq)->vq.name, (_vq)->in_use); \ 30 (_vq)->in_use = __LINE__; \ 31 } while (0) 32 #define END_USE(_vq) \ 33 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 34 #define LAST_ADD_TIME_UPDATE(_vq) \ 35 do { \ 36 ktime_t now = ktime_get(); \ 37 \ 38 /* No kick or get, with .1 second between? Warn. */ \ 39 if ((_vq)->last_add_time_valid) \ 40 WARN_ON(ktime_to_ms(ktime_sub(now, \ 41 (_vq)->last_add_time)) > 100); \ 42 (_vq)->last_add_time = now; \ 43 (_vq)->last_add_time_valid = true; \ 44 } while (0) 45 #define LAST_ADD_TIME_CHECK(_vq) \ 46 do { \ 47 if ((_vq)->last_add_time_valid) { \ 48 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 49 (_vq)->last_add_time)) > 100); \ 50 } \ 51 } while (0) 52 #define LAST_ADD_TIME_INVALID(_vq) \ 53 ((_vq)->last_add_time_valid = false) 54 #else 55 #define BAD_RING(_vq, fmt, args...) \ 56 do { \ 57 dev_err(&_vq->vq.vdev->dev, \ 58 "%s:"fmt, (_vq)->vq.name, ##args); \ 59 (_vq)->broken = true; \ 60 } while (0) 61 #define START_USE(vq) 62 #define END_USE(vq) 63 #define LAST_ADD_TIME_UPDATE(vq) 64 #define LAST_ADD_TIME_CHECK(vq) 65 #define LAST_ADD_TIME_INVALID(vq) 66 #endif 67 68 struct vring_desc_state_split { 69 void *data; /* Data for callback. */ 70 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 71 }; 72 73 struct vring_desc_state_packed { 74 void *data; /* Data for callback. */ 75 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 76 u16 num; /* Descriptor list length. */ 77 u16 next; /* The next desc state in a list. */ 78 u16 last; /* The last desc state in a list. */ 79 }; 80 81 struct vring_desc_extra_packed { 82 dma_addr_t addr; /* Buffer DMA addr. */ 83 u32 len; /* Buffer length. */ 84 u16 flags; /* Descriptor flags. */ 85 }; 86 87 struct vring_virtqueue { 88 struct virtqueue vq; 89 90 /* Is this a packed ring? */ 91 bool packed_ring; 92 93 /* Is DMA API used? */ 94 bool use_dma_api; 95 96 /* Can we use weak barriers? */ 97 bool weak_barriers; 98 99 /* Other side has made a mess, don't try any more. */ 100 bool broken; 101 102 /* Host supports indirect buffers */ 103 bool indirect; 104 105 /* Host publishes avail event idx */ 106 bool event; 107 108 /* Head of free buffer list. */ 109 unsigned int free_head; 110 /* Number we've added since last sync. */ 111 unsigned int num_added; 112 113 /* Last used index we've seen. */ 114 u16 last_used_idx; 115 116 union { 117 /* Available for split ring */ 118 struct { 119 /* Actual memory layout for this queue. */ 120 struct vring vring; 121 122 /* Last written value to avail->flags */ 123 u16 avail_flags_shadow; 124 125 /* 126 * Last written value to avail->idx in 127 * guest byte order. 128 */ 129 u16 avail_idx_shadow; 130 131 /* Per-descriptor state. */ 132 struct vring_desc_state_split *desc_state; 133 134 /* DMA address and size information */ 135 dma_addr_t queue_dma_addr; 136 size_t queue_size_in_bytes; 137 } split; 138 139 /* Available for packed ring */ 140 struct { 141 /* Actual memory layout for this queue. */ 142 struct { 143 unsigned int num; 144 struct vring_packed_desc *desc; 145 struct vring_packed_desc_event *driver; 146 struct vring_packed_desc_event *device; 147 } vring; 148 149 /* Driver ring wrap counter. */ 150 bool avail_wrap_counter; 151 152 /* Device ring wrap counter. */ 153 bool used_wrap_counter; 154 155 /* Avail used flags. */ 156 u16 avail_used_flags; 157 158 /* Index of the next avail descriptor. */ 159 u16 next_avail_idx; 160 161 /* 162 * Last written value to driver->flags in 163 * guest byte order. 164 */ 165 u16 event_flags_shadow; 166 167 /* Per-descriptor state. */ 168 struct vring_desc_state_packed *desc_state; 169 struct vring_desc_extra_packed *desc_extra; 170 171 /* DMA address and size information */ 172 dma_addr_t ring_dma_addr; 173 dma_addr_t driver_event_dma_addr; 174 dma_addr_t device_event_dma_addr; 175 size_t ring_size_in_bytes; 176 size_t event_size_in_bytes; 177 } packed; 178 }; 179 180 /* How to notify other side. FIXME: commonalize hcalls! */ 181 bool (*notify)(struct virtqueue *vq); 182 183 /* DMA, allocation, and size information */ 184 bool we_own_ring; 185 186 #ifdef DEBUG 187 /* They're supposed to lock for us. */ 188 unsigned int in_use; 189 190 /* Figure out if their kicks are too delayed. */ 191 bool last_add_time_valid; 192 ktime_t last_add_time; 193 #endif 194 }; 195 196 197 /* 198 * Helpers. 199 */ 200 201 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 202 203 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 204 unsigned int total_sg) 205 { 206 struct vring_virtqueue *vq = to_vvq(_vq); 207 208 /* 209 * If the host supports indirect descriptor tables, and we have multiple 210 * buffers, then go indirect. FIXME: tune this threshold 211 */ 212 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 213 } 214 215 /* 216 * Modern virtio devices have feature bits to specify whether they need a 217 * quirk and bypass the IOMMU. If not there, just use the DMA API. 218 * 219 * If there, the interaction between virtio and DMA API is messy. 220 * 221 * On most systems with virtio, physical addresses match bus addresses, 222 * and it doesn't particularly matter whether we use the DMA API. 223 * 224 * On some systems, including Xen and any system with a physical device 225 * that speaks virtio behind a physical IOMMU, we must use the DMA API 226 * for virtio DMA to work at all. 227 * 228 * On other systems, including SPARC and PPC64, virtio-pci devices are 229 * enumerated as though they are behind an IOMMU, but the virtio host 230 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 231 * there or somehow map everything as the identity. 232 * 233 * For the time being, we preserve historic behavior and bypass the DMA 234 * API. 235 * 236 * TODO: install a per-device DMA ops structure that does the right thing 237 * taking into account all the above quirks, and use the DMA API 238 * unconditionally on data path. 239 */ 240 241 static bool vring_use_dma_api(struct virtio_device *vdev) 242 { 243 if (!virtio_has_iommu_quirk(vdev)) 244 return true; 245 246 /* Otherwise, we are left to guess. */ 247 /* 248 * In theory, it's possible to have a buggy QEMU-supposed 249 * emulated Q35 IOMMU and Xen enabled at the same time. On 250 * such a configuration, virtio has never worked and will 251 * not work without an even larger kludge. Instead, enable 252 * the DMA API if we're a Xen guest, which at least allows 253 * all of the sensible Xen configurations to work correctly. 254 */ 255 if (xen_domain()) 256 return true; 257 258 return false; 259 } 260 261 size_t virtio_max_dma_size(struct virtio_device *vdev) 262 { 263 size_t max_segment_size = SIZE_MAX; 264 265 if (vring_use_dma_api(vdev)) 266 max_segment_size = dma_max_mapping_size(&vdev->dev); 267 268 return max_segment_size; 269 } 270 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 271 272 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 273 dma_addr_t *dma_handle, gfp_t flag) 274 { 275 if (vring_use_dma_api(vdev)) { 276 return dma_alloc_coherent(vdev->dev.parent, size, 277 dma_handle, flag); 278 } else { 279 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 280 281 if (queue) { 282 phys_addr_t phys_addr = virt_to_phys(queue); 283 *dma_handle = (dma_addr_t)phys_addr; 284 285 /* 286 * Sanity check: make sure we dind't truncate 287 * the address. The only arches I can find that 288 * have 64-bit phys_addr_t but 32-bit dma_addr_t 289 * are certain non-highmem MIPS and x86 290 * configurations, but these configurations 291 * should never allocate physical pages above 32 292 * bits, so this is fine. Just in case, throw a 293 * warning and abort if we end up with an 294 * unrepresentable address. 295 */ 296 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 297 free_pages_exact(queue, PAGE_ALIGN(size)); 298 return NULL; 299 } 300 } 301 return queue; 302 } 303 } 304 305 static void vring_free_queue(struct virtio_device *vdev, size_t size, 306 void *queue, dma_addr_t dma_handle) 307 { 308 if (vring_use_dma_api(vdev)) 309 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 310 else 311 free_pages_exact(queue, PAGE_ALIGN(size)); 312 } 313 314 /* 315 * The DMA ops on various arches are rather gnarly right now, and 316 * making all of the arch DMA ops work on the vring device itself 317 * is a mess. For now, we use the parent device for DMA ops. 318 */ 319 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 320 { 321 return vq->vq.vdev->dev.parent; 322 } 323 324 /* Map one sg entry. */ 325 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 326 struct scatterlist *sg, 327 enum dma_data_direction direction) 328 { 329 if (!vq->use_dma_api) 330 return (dma_addr_t)sg_phys(sg); 331 332 /* 333 * We can't use dma_map_sg, because we don't use scatterlists in 334 * the way it expects (we don't guarantee that the scatterlist 335 * will exist for the lifetime of the mapping). 336 */ 337 return dma_map_page(vring_dma_dev(vq), 338 sg_page(sg), sg->offset, sg->length, 339 direction); 340 } 341 342 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 343 void *cpu_addr, size_t size, 344 enum dma_data_direction direction) 345 { 346 if (!vq->use_dma_api) 347 return (dma_addr_t)virt_to_phys(cpu_addr); 348 349 return dma_map_single(vring_dma_dev(vq), 350 cpu_addr, size, direction); 351 } 352 353 static int vring_mapping_error(const struct vring_virtqueue *vq, 354 dma_addr_t addr) 355 { 356 if (!vq->use_dma_api) 357 return 0; 358 359 return dma_mapping_error(vring_dma_dev(vq), addr); 360 } 361 362 363 /* 364 * Split ring specific functions - *_split(). 365 */ 366 367 static void vring_unmap_one_split(const struct vring_virtqueue *vq, 368 struct vring_desc *desc) 369 { 370 u16 flags; 371 372 if (!vq->use_dma_api) 373 return; 374 375 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 376 377 if (flags & VRING_DESC_F_INDIRECT) { 378 dma_unmap_single(vring_dma_dev(vq), 379 virtio64_to_cpu(vq->vq.vdev, desc->addr), 380 virtio32_to_cpu(vq->vq.vdev, desc->len), 381 (flags & VRING_DESC_F_WRITE) ? 382 DMA_FROM_DEVICE : DMA_TO_DEVICE); 383 } else { 384 dma_unmap_page(vring_dma_dev(vq), 385 virtio64_to_cpu(vq->vq.vdev, desc->addr), 386 virtio32_to_cpu(vq->vq.vdev, desc->len), 387 (flags & VRING_DESC_F_WRITE) ? 388 DMA_FROM_DEVICE : DMA_TO_DEVICE); 389 } 390 } 391 392 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 393 unsigned int total_sg, 394 gfp_t gfp) 395 { 396 struct vring_desc *desc; 397 unsigned int i; 398 399 /* 400 * We require lowmem mappings for the descriptors because 401 * otherwise virt_to_phys will give us bogus addresses in the 402 * virtqueue. 403 */ 404 gfp &= ~__GFP_HIGHMEM; 405 406 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 407 if (!desc) 408 return NULL; 409 410 for (i = 0; i < total_sg; i++) 411 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 412 return desc; 413 } 414 415 static inline int virtqueue_add_split(struct virtqueue *_vq, 416 struct scatterlist *sgs[], 417 unsigned int total_sg, 418 unsigned int out_sgs, 419 unsigned int in_sgs, 420 void *data, 421 void *ctx, 422 gfp_t gfp) 423 { 424 struct vring_virtqueue *vq = to_vvq(_vq); 425 struct scatterlist *sg; 426 struct vring_desc *desc; 427 unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 428 int head; 429 bool indirect; 430 431 START_USE(vq); 432 433 BUG_ON(data == NULL); 434 BUG_ON(ctx && vq->indirect); 435 436 if (unlikely(vq->broken)) { 437 END_USE(vq); 438 return -EIO; 439 } 440 441 LAST_ADD_TIME_UPDATE(vq); 442 443 BUG_ON(total_sg == 0); 444 445 head = vq->free_head; 446 447 if (virtqueue_use_indirect(_vq, total_sg)) 448 desc = alloc_indirect_split(_vq, total_sg, gfp); 449 else { 450 desc = NULL; 451 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 452 } 453 454 if (desc) { 455 /* Use a single buffer which doesn't continue */ 456 indirect = true; 457 /* Set up rest to use this indirect table. */ 458 i = 0; 459 descs_used = 1; 460 } else { 461 indirect = false; 462 desc = vq->split.vring.desc; 463 i = head; 464 descs_used = total_sg; 465 } 466 467 if (vq->vq.num_free < descs_used) { 468 pr_debug("Can't add buf len %i - avail = %i\n", 469 descs_used, vq->vq.num_free); 470 /* FIXME: for historical reasons, we force a notify here if 471 * there are outgoing parts to the buffer. Presumably the 472 * host should service the ring ASAP. */ 473 if (out_sgs) 474 vq->notify(&vq->vq); 475 if (indirect) 476 kfree(desc); 477 END_USE(vq); 478 return -ENOSPC; 479 } 480 481 for (n = 0; n < out_sgs; n++) { 482 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 483 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 484 if (vring_mapping_error(vq, addr)) 485 goto unmap_release; 486 487 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 488 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 489 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 490 prev = i; 491 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 492 } 493 } 494 for (; n < (out_sgs + in_sgs); n++) { 495 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 496 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 497 if (vring_mapping_error(vq, addr)) 498 goto unmap_release; 499 500 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 501 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 502 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 503 prev = i; 504 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 505 } 506 } 507 /* Last one doesn't continue. */ 508 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 509 510 if (indirect) { 511 /* Now that the indirect table is filled in, map it. */ 512 dma_addr_t addr = vring_map_single( 513 vq, desc, total_sg * sizeof(struct vring_desc), 514 DMA_TO_DEVICE); 515 if (vring_mapping_error(vq, addr)) 516 goto unmap_release; 517 518 vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 519 VRING_DESC_F_INDIRECT); 520 vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 521 addr); 522 523 vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 524 total_sg * sizeof(struct vring_desc)); 525 } 526 527 /* We're using some buffers from the free list. */ 528 vq->vq.num_free -= descs_used; 529 530 /* Update free pointer */ 531 if (indirect) 532 vq->free_head = virtio16_to_cpu(_vq->vdev, 533 vq->split.vring.desc[head].next); 534 else 535 vq->free_head = i; 536 537 /* Store token and indirect buffer state. */ 538 vq->split.desc_state[head].data = data; 539 if (indirect) 540 vq->split.desc_state[head].indir_desc = desc; 541 else 542 vq->split.desc_state[head].indir_desc = ctx; 543 544 /* Put entry in available array (but don't update avail->idx until they 545 * do sync). */ 546 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 547 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 548 549 /* Descriptors and available array need to be set before we expose the 550 * new available array entries. */ 551 virtio_wmb(vq->weak_barriers); 552 vq->split.avail_idx_shadow++; 553 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 554 vq->split.avail_idx_shadow); 555 vq->num_added++; 556 557 pr_debug("Added buffer head %i to %p\n", head, vq); 558 END_USE(vq); 559 560 /* This is very unlikely, but theoretically possible. Kick 561 * just in case. */ 562 if (unlikely(vq->num_added == (1 << 16) - 1)) 563 virtqueue_kick(_vq); 564 565 return 0; 566 567 unmap_release: 568 err_idx = i; 569 i = head; 570 571 for (n = 0; n < total_sg; n++) { 572 if (i == err_idx) 573 break; 574 vring_unmap_one_split(vq, &desc[i]); 575 i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); 576 } 577 578 if (indirect) 579 kfree(desc); 580 581 END_USE(vq); 582 return -EIO; 583 } 584 585 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 586 { 587 struct vring_virtqueue *vq = to_vvq(_vq); 588 u16 new, old; 589 bool needs_kick; 590 591 START_USE(vq); 592 /* We need to expose available array entries before checking avail 593 * event. */ 594 virtio_mb(vq->weak_barriers); 595 596 old = vq->split.avail_idx_shadow - vq->num_added; 597 new = vq->split.avail_idx_shadow; 598 vq->num_added = 0; 599 600 LAST_ADD_TIME_CHECK(vq); 601 LAST_ADD_TIME_INVALID(vq); 602 603 if (vq->event) { 604 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 605 vring_avail_event(&vq->split.vring)), 606 new, old); 607 } else { 608 needs_kick = !(vq->split.vring.used->flags & 609 cpu_to_virtio16(_vq->vdev, 610 VRING_USED_F_NO_NOTIFY)); 611 } 612 END_USE(vq); 613 return needs_kick; 614 } 615 616 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 617 void **ctx) 618 { 619 unsigned int i, j; 620 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 621 622 /* Clear data ptr. */ 623 vq->split.desc_state[head].data = NULL; 624 625 /* Put back on free list: unmap first-level descriptors and find end */ 626 i = head; 627 628 while (vq->split.vring.desc[i].flags & nextflag) { 629 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 630 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 631 vq->vq.num_free++; 632 } 633 634 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 635 vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 636 vq->free_head); 637 vq->free_head = head; 638 639 /* Plus final descriptor */ 640 vq->vq.num_free++; 641 642 if (vq->indirect) { 643 struct vring_desc *indir_desc = 644 vq->split.desc_state[head].indir_desc; 645 u32 len; 646 647 /* Free the indirect table, if any, now that it's unmapped. */ 648 if (!indir_desc) 649 return; 650 651 len = virtio32_to_cpu(vq->vq.vdev, 652 vq->split.vring.desc[head].len); 653 654 BUG_ON(!(vq->split.vring.desc[head].flags & 655 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 656 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 657 658 for (j = 0; j < len / sizeof(struct vring_desc); j++) 659 vring_unmap_one_split(vq, &indir_desc[j]); 660 661 kfree(indir_desc); 662 vq->split.desc_state[head].indir_desc = NULL; 663 } else if (ctx) { 664 *ctx = vq->split.desc_state[head].indir_desc; 665 } 666 } 667 668 static inline bool more_used_split(const struct vring_virtqueue *vq) 669 { 670 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 671 vq->split.vring.used->idx); 672 } 673 674 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 675 unsigned int *len, 676 void **ctx) 677 { 678 struct vring_virtqueue *vq = to_vvq(_vq); 679 void *ret; 680 unsigned int i; 681 u16 last_used; 682 683 START_USE(vq); 684 685 if (unlikely(vq->broken)) { 686 END_USE(vq); 687 return NULL; 688 } 689 690 if (!more_used_split(vq)) { 691 pr_debug("No more buffers in queue\n"); 692 END_USE(vq); 693 return NULL; 694 } 695 696 /* Only get used array entries after they have been exposed by host. */ 697 virtio_rmb(vq->weak_barriers); 698 699 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 700 i = virtio32_to_cpu(_vq->vdev, 701 vq->split.vring.used->ring[last_used].id); 702 *len = virtio32_to_cpu(_vq->vdev, 703 vq->split.vring.used->ring[last_used].len); 704 705 if (unlikely(i >= vq->split.vring.num)) { 706 BAD_RING(vq, "id %u out of range\n", i); 707 return NULL; 708 } 709 if (unlikely(!vq->split.desc_state[i].data)) { 710 BAD_RING(vq, "id %u is not a head!\n", i); 711 return NULL; 712 } 713 714 /* detach_buf_split clears data, so grab it now. */ 715 ret = vq->split.desc_state[i].data; 716 detach_buf_split(vq, i, ctx); 717 vq->last_used_idx++; 718 /* If we expect an interrupt for the next entry, tell host 719 * by writing event index and flush out the write before 720 * the read in the next get_buf call. */ 721 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 722 virtio_store_mb(vq->weak_barriers, 723 &vring_used_event(&vq->split.vring), 724 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 725 726 LAST_ADD_TIME_INVALID(vq); 727 728 END_USE(vq); 729 return ret; 730 } 731 732 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 733 { 734 struct vring_virtqueue *vq = to_vvq(_vq); 735 736 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 737 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 738 if (!vq->event) 739 vq->split.vring.avail->flags = 740 cpu_to_virtio16(_vq->vdev, 741 vq->split.avail_flags_shadow); 742 } 743 } 744 745 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 746 { 747 struct vring_virtqueue *vq = to_vvq(_vq); 748 u16 last_used_idx; 749 750 START_USE(vq); 751 752 /* We optimistically turn back on interrupts, then check if there was 753 * more to do. */ 754 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 755 * either clear the flags bit or point the event index at the next 756 * entry. Always do both to keep code simple. */ 757 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 758 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 759 if (!vq->event) 760 vq->split.vring.avail->flags = 761 cpu_to_virtio16(_vq->vdev, 762 vq->split.avail_flags_shadow); 763 } 764 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 765 last_used_idx = vq->last_used_idx); 766 END_USE(vq); 767 return last_used_idx; 768 } 769 770 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 771 { 772 struct vring_virtqueue *vq = to_vvq(_vq); 773 774 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 775 vq->split.vring.used->idx); 776 } 777 778 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 779 { 780 struct vring_virtqueue *vq = to_vvq(_vq); 781 u16 bufs; 782 783 START_USE(vq); 784 785 /* We optimistically turn back on interrupts, then check if there was 786 * more to do. */ 787 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 788 * either clear the flags bit or point the event index at the next 789 * entry. Always update the event index to keep code simple. */ 790 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 791 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 792 if (!vq->event) 793 vq->split.vring.avail->flags = 794 cpu_to_virtio16(_vq->vdev, 795 vq->split.avail_flags_shadow); 796 } 797 /* TODO: tune this threshold */ 798 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 799 800 virtio_store_mb(vq->weak_barriers, 801 &vring_used_event(&vq->split.vring), 802 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 803 804 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 805 - vq->last_used_idx) > bufs)) { 806 END_USE(vq); 807 return false; 808 } 809 810 END_USE(vq); 811 return true; 812 } 813 814 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 815 { 816 struct vring_virtqueue *vq = to_vvq(_vq); 817 unsigned int i; 818 void *buf; 819 820 START_USE(vq); 821 822 for (i = 0; i < vq->split.vring.num; i++) { 823 if (!vq->split.desc_state[i].data) 824 continue; 825 /* detach_buf_split clears data, so grab it now. */ 826 buf = vq->split.desc_state[i].data; 827 detach_buf_split(vq, i, NULL); 828 vq->split.avail_idx_shadow--; 829 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 830 vq->split.avail_idx_shadow); 831 END_USE(vq); 832 return buf; 833 } 834 /* That should have freed everything. */ 835 BUG_ON(vq->vq.num_free != vq->split.vring.num); 836 837 END_USE(vq); 838 return NULL; 839 } 840 841 static struct virtqueue *vring_create_virtqueue_split( 842 unsigned int index, 843 unsigned int num, 844 unsigned int vring_align, 845 struct virtio_device *vdev, 846 bool weak_barriers, 847 bool may_reduce_num, 848 bool context, 849 bool (*notify)(struct virtqueue *), 850 void (*callback)(struct virtqueue *), 851 const char *name) 852 { 853 struct virtqueue *vq; 854 void *queue = NULL; 855 dma_addr_t dma_addr; 856 size_t queue_size_in_bytes; 857 struct vring vring; 858 859 /* We assume num is a power of 2. */ 860 if (num & (num - 1)) { 861 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 862 return NULL; 863 } 864 865 /* TODO: allocate each queue chunk individually */ 866 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 867 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 868 &dma_addr, 869 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 870 if (queue) 871 break; 872 if (!may_reduce_num) 873 return NULL; 874 } 875 876 if (!num) 877 return NULL; 878 879 if (!queue) { 880 /* Try to get a single page. You are my only hope! */ 881 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 882 &dma_addr, GFP_KERNEL|__GFP_ZERO); 883 } 884 if (!queue) 885 return NULL; 886 887 queue_size_in_bytes = vring_size(num, vring_align); 888 vring_init(&vring, num, queue, vring_align); 889 890 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 891 notify, callback, name); 892 if (!vq) { 893 vring_free_queue(vdev, queue_size_in_bytes, queue, 894 dma_addr); 895 return NULL; 896 } 897 898 to_vvq(vq)->split.queue_dma_addr = dma_addr; 899 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 900 to_vvq(vq)->we_own_ring = true; 901 902 return vq; 903 } 904 905 906 /* 907 * Packed ring specific functions - *_packed(). 908 */ 909 910 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 911 struct vring_desc_extra_packed *state) 912 { 913 u16 flags; 914 915 if (!vq->use_dma_api) 916 return; 917 918 flags = state->flags; 919 920 if (flags & VRING_DESC_F_INDIRECT) { 921 dma_unmap_single(vring_dma_dev(vq), 922 state->addr, state->len, 923 (flags & VRING_DESC_F_WRITE) ? 924 DMA_FROM_DEVICE : DMA_TO_DEVICE); 925 } else { 926 dma_unmap_page(vring_dma_dev(vq), 927 state->addr, state->len, 928 (flags & VRING_DESC_F_WRITE) ? 929 DMA_FROM_DEVICE : DMA_TO_DEVICE); 930 } 931 } 932 933 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 934 struct vring_packed_desc *desc) 935 { 936 u16 flags; 937 938 if (!vq->use_dma_api) 939 return; 940 941 flags = le16_to_cpu(desc->flags); 942 943 if (flags & VRING_DESC_F_INDIRECT) { 944 dma_unmap_single(vring_dma_dev(vq), 945 le64_to_cpu(desc->addr), 946 le32_to_cpu(desc->len), 947 (flags & VRING_DESC_F_WRITE) ? 948 DMA_FROM_DEVICE : DMA_TO_DEVICE); 949 } else { 950 dma_unmap_page(vring_dma_dev(vq), 951 le64_to_cpu(desc->addr), 952 le32_to_cpu(desc->len), 953 (flags & VRING_DESC_F_WRITE) ? 954 DMA_FROM_DEVICE : DMA_TO_DEVICE); 955 } 956 } 957 958 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 959 gfp_t gfp) 960 { 961 struct vring_packed_desc *desc; 962 963 /* 964 * We require lowmem mappings for the descriptors because 965 * otherwise virt_to_phys will give us bogus addresses in the 966 * virtqueue. 967 */ 968 gfp &= ~__GFP_HIGHMEM; 969 970 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 971 972 return desc; 973 } 974 975 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 976 struct scatterlist *sgs[], 977 unsigned int total_sg, 978 unsigned int out_sgs, 979 unsigned int in_sgs, 980 void *data, 981 gfp_t gfp) 982 { 983 struct vring_packed_desc *desc; 984 struct scatterlist *sg; 985 unsigned int i, n, err_idx; 986 u16 head, id; 987 dma_addr_t addr; 988 989 head = vq->packed.next_avail_idx; 990 desc = alloc_indirect_packed(total_sg, gfp); 991 992 if (unlikely(vq->vq.num_free < 1)) { 993 pr_debug("Can't add buf len 1 - avail = 0\n"); 994 kfree(desc); 995 END_USE(vq); 996 return -ENOSPC; 997 } 998 999 i = 0; 1000 id = vq->free_head; 1001 BUG_ON(id == vq->packed.vring.num); 1002 1003 for (n = 0; n < out_sgs + in_sgs; n++) { 1004 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1005 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1006 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1007 if (vring_mapping_error(vq, addr)) 1008 goto unmap_release; 1009 1010 desc[i].flags = cpu_to_le16(n < out_sgs ? 1011 0 : VRING_DESC_F_WRITE); 1012 desc[i].addr = cpu_to_le64(addr); 1013 desc[i].len = cpu_to_le32(sg->length); 1014 i++; 1015 } 1016 } 1017 1018 /* Now that the indirect table is filled in, map it. */ 1019 addr = vring_map_single(vq, desc, 1020 total_sg * sizeof(struct vring_packed_desc), 1021 DMA_TO_DEVICE); 1022 if (vring_mapping_error(vq, addr)) 1023 goto unmap_release; 1024 1025 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1026 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1027 sizeof(struct vring_packed_desc)); 1028 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1029 1030 if (vq->use_dma_api) { 1031 vq->packed.desc_extra[id].addr = addr; 1032 vq->packed.desc_extra[id].len = total_sg * 1033 sizeof(struct vring_packed_desc); 1034 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1035 vq->packed.avail_used_flags; 1036 } 1037 1038 /* 1039 * A driver MUST NOT make the first descriptor in the list 1040 * available before all subsequent descriptors comprising 1041 * the list are made available. 1042 */ 1043 virtio_wmb(vq->weak_barriers); 1044 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1045 vq->packed.avail_used_flags); 1046 1047 /* We're using some buffers from the free list. */ 1048 vq->vq.num_free -= 1; 1049 1050 /* Update free pointer */ 1051 n = head + 1; 1052 if (n >= vq->packed.vring.num) { 1053 n = 0; 1054 vq->packed.avail_wrap_counter ^= 1; 1055 vq->packed.avail_used_flags ^= 1056 1 << VRING_PACKED_DESC_F_AVAIL | 1057 1 << VRING_PACKED_DESC_F_USED; 1058 } 1059 vq->packed.next_avail_idx = n; 1060 vq->free_head = vq->packed.desc_state[id].next; 1061 1062 /* Store token and indirect buffer state. */ 1063 vq->packed.desc_state[id].num = 1; 1064 vq->packed.desc_state[id].data = data; 1065 vq->packed.desc_state[id].indir_desc = desc; 1066 vq->packed.desc_state[id].last = id; 1067 1068 vq->num_added += 1; 1069 1070 pr_debug("Added buffer head %i to %p\n", head, vq); 1071 END_USE(vq); 1072 1073 return 0; 1074 1075 unmap_release: 1076 err_idx = i; 1077 1078 for (i = 0; i < err_idx; i++) 1079 vring_unmap_desc_packed(vq, &desc[i]); 1080 1081 kfree(desc); 1082 1083 END_USE(vq); 1084 return -EIO; 1085 } 1086 1087 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1088 struct scatterlist *sgs[], 1089 unsigned int total_sg, 1090 unsigned int out_sgs, 1091 unsigned int in_sgs, 1092 void *data, 1093 void *ctx, 1094 gfp_t gfp) 1095 { 1096 struct vring_virtqueue *vq = to_vvq(_vq); 1097 struct vring_packed_desc *desc; 1098 struct scatterlist *sg; 1099 unsigned int i, n, c, descs_used, err_idx; 1100 __le16 uninitialized_var(head_flags), flags; 1101 u16 head, id, uninitialized_var(prev), curr, avail_used_flags; 1102 1103 START_USE(vq); 1104 1105 BUG_ON(data == NULL); 1106 BUG_ON(ctx && vq->indirect); 1107 1108 if (unlikely(vq->broken)) { 1109 END_USE(vq); 1110 return -EIO; 1111 } 1112 1113 LAST_ADD_TIME_UPDATE(vq); 1114 1115 BUG_ON(total_sg == 0); 1116 1117 if (virtqueue_use_indirect(_vq, total_sg)) 1118 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1119 out_sgs, in_sgs, data, gfp); 1120 1121 head = vq->packed.next_avail_idx; 1122 avail_used_flags = vq->packed.avail_used_flags; 1123 1124 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1125 1126 desc = vq->packed.vring.desc; 1127 i = head; 1128 descs_used = total_sg; 1129 1130 if (unlikely(vq->vq.num_free < descs_used)) { 1131 pr_debug("Can't add buf len %i - avail = %i\n", 1132 descs_used, vq->vq.num_free); 1133 END_USE(vq); 1134 return -ENOSPC; 1135 } 1136 1137 id = vq->free_head; 1138 BUG_ON(id == vq->packed.vring.num); 1139 1140 curr = id; 1141 c = 0; 1142 for (n = 0; n < out_sgs + in_sgs; n++) { 1143 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1144 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1145 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1146 if (vring_mapping_error(vq, addr)) 1147 goto unmap_release; 1148 1149 flags = cpu_to_le16(vq->packed.avail_used_flags | 1150 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1151 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1152 if (i == head) 1153 head_flags = flags; 1154 else 1155 desc[i].flags = flags; 1156 1157 desc[i].addr = cpu_to_le64(addr); 1158 desc[i].len = cpu_to_le32(sg->length); 1159 desc[i].id = cpu_to_le16(id); 1160 1161 if (unlikely(vq->use_dma_api)) { 1162 vq->packed.desc_extra[curr].addr = addr; 1163 vq->packed.desc_extra[curr].len = sg->length; 1164 vq->packed.desc_extra[curr].flags = 1165 le16_to_cpu(flags); 1166 } 1167 prev = curr; 1168 curr = vq->packed.desc_state[curr].next; 1169 1170 if ((unlikely(++i >= vq->packed.vring.num))) { 1171 i = 0; 1172 vq->packed.avail_used_flags ^= 1173 1 << VRING_PACKED_DESC_F_AVAIL | 1174 1 << VRING_PACKED_DESC_F_USED; 1175 } 1176 } 1177 } 1178 1179 if (i < head) 1180 vq->packed.avail_wrap_counter ^= 1; 1181 1182 /* We're using some buffers from the free list. */ 1183 vq->vq.num_free -= descs_used; 1184 1185 /* Update free pointer */ 1186 vq->packed.next_avail_idx = i; 1187 vq->free_head = curr; 1188 1189 /* Store token. */ 1190 vq->packed.desc_state[id].num = descs_used; 1191 vq->packed.desc_state[id].data = data; 1192 vq->packed.desc_state[id].indir_desc = ctx; 1193 vq->packed.desc_state[id].last = prev; 1194 1195 /* 1196 * A driver MUST NOT make the first descriptor in the list 1197 * available before all subsequent descriptors comprising 1198 * the list are made available. 1199 */ 1200 virtio_wmb(vq->weak_barriers); 1201 vq->packed.vring.desc[head].flags = head_flags; 1202 vq->num_added += descs_used; 1203 1204 pr_debug("Added buffer head %i to %p\n", head, vq); 1205 END_USE(vq); 1206 1207 return 0; 1208 1209 unmap_release: 1210 err_idx = i; 1211 i = head; 1212 1213 vq->packed.avail_used_flags = avail_used_flags; 1214 1215 for (n = 0; n < total_sg; n++) { 1216 if (i == err_idx) 1217 break; 1218 vring_unmap_desc_packed(vq, &desc[i]); 1219 i++; 1220 if (i >= vq->packed.vring.num) 1221 i = 0; 1222 } 1223 1224 END_USE(vq); 1225 return -EIO; 1226 } 1227 1228 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1229 { 1230 struct vring_virtqueue *vq = to_vvq(_vq); 1231 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1232 bool needs_kick; 1233 union { 1234 struct { 1235 __le16 off_wrap; 1236 __le16 flags; 1237 }; 1238 u32 u32; 1239 } snapshot; 1240 1241 START_USE(vq); 1242 1243 /* 1244 * We need to expose the new flags value before checking notification 1245 * suppressions. 1246 */ 1247 virtio_mb(vq->weak_barriers); 1248 1249 old = vq->packed.next_avail_idx - vq->num_added; 1250 new = vq->packed.next_avail_idx; 1251 vq->num_added = 0; 1252 1253 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1254 flags = le16_to_cpu(snapshot.flags); 1255 1256 LAST_ADD_TIME_CHECK(vq); 1257 LAST_ADD_TIME_INVALID(vq); 1258 1259 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1260 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1261 goto out; 1262 } 1263 1264 off_wrap = le16_to_cpu(snapshot.off_wrap); 1265 1266 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1267 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1268 if (wrap_counter != vq->packed.avail_wrap_counter) 1269 event_idx -= vq->packed.vring.num; 1270 1271 needs_kick = vring_need_event(event_idx, new, old); 1272 out: 1273 END_USE(vq); 1274 return needs_kick; 1275 } 1276 1277 static void detach_buf_packed(struct vring_virtqueue *vq, 1278 unsigned int id, void **ctx) 1279 { 1280 struct vring_desc_state_packed *state = NULL; 1281 struct vring_packed_desc *desc; 1282 unsigned int i, curr; 1283 1284 state = &vq->packed.desc_state[id]; 1285 1286 /* Clear data ptr. */ 1287 state->data = NULL; 1288 1289 vq->packed.desc_state[state->last].next = vq->free_head; 1290 vq->free_head = id; 1291 vq->vq.num_free += state->num; 1292 1293 if (unlikely(vq->use_dma_api)) { 1294 curr = id; 1295 for (i = 0; i < state->num; i++) { 1296 vring_unmap_state_packed(vq, 1297 &vq->packed.desc_extra[curr]); 1298 curr = vq->packed.desc_state[curr].next; 1299 } 1300 } 1301 1302 if (vq->indirect) { 1303 u32 len; 1304 1305 /* Free the indirect table, if any, now that it's unmapped. */ 1306 desc = state->indir_desc; 1307 if (!desc) 1308 return; 1309 1310 if (vq->use_dma_api) { 1311 len = vq->packed.desc_extra[id].len; 1312 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1313 i++) 1314 vring_unmap_desc_packed(vq, &desc[i]); 1315 } 1316 kfree(desc); 1317 state->indir_desc = NULL; 1318 } else if (ctx) { 1319 *ctx = state->indir_desc; 1320 } 1321 } 1322 1323 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1324 u16 idx, bool used_wrap_counter) 1325 { 1326 bool avail, used; 1327 u16 flags; 1328 1329 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1330 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1331 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1332 1333 return avail == used && used == used_wrap_counter; 1334 } 1335 1336 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1337 { 1338 return is_used_desc_packed(vq, vq->last_used_idx, 1339 vq->packed.used_wrap_counter); 1340 } 1341 1342 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1343 unsigned int *len, 1344 void **ctx) 1345 { 1346 struct vring_virtqueue *vq = to_vvq(_vq); 1347 u16 last_used, id; 1348 void *ret; 1349 1350 START_USE(vq); 1351 1352 if (unlikely(vq->broken)) { 1353 END_USE(vq); 1354 return NULL; 1355 } 1356 1357 if (!more_used_packed(vq)) { 1358 pr_debug("No more buffers in queue\n"); 1359 END_USE(vq); 1360 return NULL; 1361 } 1362 1363 /* Only get used elements after they have been exposed by host. */ 1364 virtio_rmb(vq->weak_barriers); 1365 1366 last_used = vq->last_used_idx; 1367 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1368 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1369 1370 if (unlikely(id >= vq->packed.vring.num)) { 1371 BAD_RING(vq, "id %u out of range\n", id); 1372 return NULL; 1373 } 1374 if (unlikely(!vq->packed.desc_state[id].data)) { 1375 BAD_RING(vq, "id %u is not a head!\n", id); 1376 return NULL; 1377 } 1378 1379 /* detach_buf_packed clears data, so grab it now. */ 1380 ret = vq->packed.desc_state[id].data; 1381 detach_buf_packed(vq, id, ctx); 1382 1383 vq->last_used_idx += vq->packed.desc_state[id].num; 1384 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1385 vq->last_used_idx -= vq->packed.vring.num; 1386 vq->packed.used_wrap_counter ^= 1; 1387 } 1388 1389 /* 1390 * If we expect an interrupt for the next entry, tell host 1391 * by writing event index and flush out the write before 1392 * the read in the next get_buf call. 1393 */ 1394 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1395 virtio_store_mb(vq->weak_barriers, 1396 &vq->packed.vring.driver->off_wrap, 1397 cpu_to_le16(vq->last_used_idx | 1398 (vq->packed.used_wrap_counter << 1399 VRING_PACKED_EVENT_F_WRAP_CTR))); 1400 1401 LAST_ADD_TIME_INVALID(vq); 1402 1403 END_USE(vq); 1404 return ret; 1405 } 1406 1407 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1408 { 1409 struct vring_virtqueue *vq = to_vvq(_vq); 1410 1411 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1412 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1413 vq->packed.vring.driver->flags = 1414 cpu_to_le16(vq->packed.event_flags_shadow); 1415 } 1416 } 1417 1418 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1419 { 1420 struct vring_virtqueue *vq = to_vvq(_vq); 1421 1422 START_USE(vq); 1423 1424 /* 1425 * We optimistically turn back on interrupts, then check if there was 1426 * more to do. 1427 */ 1428 1429 if (vq->event) { 1430 vq->packed.vring.driver->off_wrap = 1431 cpu_to_le16(vq->last_used_idx | 1432 (vq->packed.used_wrap_counter << 1433 VRING_PACKED_EVENT_F_WRAP_CTR)); 1434 /* 1435 * We need to update event offset and event wrap 1436 * counter first before updating event flags. 1437 */ 1438 virtio_wmb(vq->weak_barriers); 1439 } 1440 1441 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1442 vq->packed.event_flags_shadow = vq->event ? 1443 VRING_PACKED_EVENT_FLAG_DESC : 1444 VRING_PACKED_EVENT_FLAG_ENABLE; 1445 vq->packed.vring.driver->flags = 1446 cpu_to_le16(vq->packed.event_flags_shadow); 1447 } 1448 1449 END_USE(vq); 1450 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1451 VRING_PACKED_EVENT_F_WRAP_CTR); 1452 } 1453 1454 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1455 { 1456 struct vring_virtqueue *vq = to_vvq(_vq); 1457 bool wrap_counter; 1458 u16 used_idx; 1459 1460 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1461 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1462 1463 return is_used_desc_packed(vq, used_idx, wrap_counter); 1464 } 1465 1466 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1467 { 1468 struct vring_virtqueue *vq = to_vvq(_vq); 1469 u16 used_idx, wrap_counter; 1470 u16 bufs; 1471 1472 START_USE(vq); 1473 1474 /* 1475 * We optimistically turn back on interrupts, then check if there was 1476 * more to do. 1477 */ 1478 1479 if (vq->event) { 1480 /* TODO: tune this threshold */ 1481 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1482 wrap_counter = vq->packed.used_wrap_counter; 1483 1484 used_idx = vq->last_used_idx + bufs; 1485 if (used_idx >= vq->packed.vring.num) { 1486 used_idx -= vq->packed.vring.num; 1487 wrap_counter ^= 1; 1488 } 1489 1490 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1491 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1492 1493 /* 1494 * We need to update event offset and event wrap 1495 * counter first before updating event flags. 1496 */ 1497 virtio_wmb(vq->weak_barriers); 1498 } else { 1499 used_idx = vq->last_used_idx; 1500 wrap_counter = vq->packed.used_wrap_counter; 1501 } 1502 1503 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1504 vq->packed.event_flags_shadow = vq->event ? 1505 VRING_PACKED_EVENT_FLAG_DESC : 1506 VRING_PACKED_EVENT_FLAG_ENABLE; 1507 vq->packed.vring.driver->flags = 1508 cpu_to_le16(vq->packed.event_flags_shadow); 1509 } 1510 1511 /* 1512 * We need to update event suppression structure first 1513 * before re-checking for more used buffers. 1514 */ 1515 virtio_mb(vq->weak_barriers); 1516 1517 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1518 END_USE(vq); 1519 return false; 1520 } 1521 1522 END_USE(vq); 1523 return true; 1524 } 1525 1526 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1527 { 1528 struct vring_virtqueue *vq = to_vvq(_vq); 1529 unsigned int i; 1530 void *buf; 1531 1532 START_USE(vq); 1533 1534 for (i = 0; i < vq->packed.vring.num; i++) { 1535 if (!vq->packed.desc_state[i].data) 1536 continue; 1537 /* detach_buf clears data, so grab it now. */ 1538 buf = vq->packed.desc_state[i].data; 1539 detach_buf_packed(vq, i, NULL); 1540 END_USE(vq); 1541 return buf; 1542 } 1543 /* That should have freed everything. */ 1544 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1545 1546 END_USE(vq); 1547 return NULL; 1548 } 1549 1550 static struct virtqueue *vring_create_virtqueue_packed( 1551 unsigned int index, 1552 unsigned int num, 1553 unsigned int vring_align, 1554 struct virtio_device *vdev, 1555 bool weak_barriers, 1556 bool may_reduce_num, 1557 bool context, 1558 bool (*notify)(struct virtqueue *), 1559 void (*callback)(struct virtqueue *), 1560 const char *name) 1561 { 1562 struct vring_virtqueue *vq; 1563 struct vring_packed_desc *ring; 1564 struct vring_packed_desc_event *driver, *device; 1565 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1566 size_t ring_size_in_bytes, event_size_in_bytes; 1567 unsigned int i; 1568 1569 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1570 1571 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1572 &ring_dma_addr, 1573 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1574 if (!ring) 1575 goto err_ring; 1576 1577 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1578 1579 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1580 &driver_event_dma_addr, 1581 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1582 if (!driver) 1583 goto err_driver; 1584 1585 device = vring_alloc_queue(vdev, event_size_in_bytes, 1586 &device_event_dma_addr, 1587 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1588 if (!device) 1589 goto err_device; 1590 1591 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1592 if (!vq) 1593 goto err_vq; 1594 1595 vq->vq.callback = callback; 1596 vq->vq.vdev = vdev; 1597 vq->vq.name = name; 1598 vq->vq.num_free = num; 1599 vq->vq.index = index; 1600 vq->we_own_ring = true; 1601 vq->notify = notify; 1602 vq->weak_barriers = weak_barriers; 1603 vq->broken = false; 1604 vq->last_used_idx = 0; 1605 vq->num_added = 0; 1606 vq->packed_ring = true; 1607 vq->use_dma_api = vring_use_dma_api(vdev); 1608 list_add_tail(&vq->vq.list, &vdev->vqs); 1609 #ifdef DEBUG 1610 vq->in_use = false; 1611 vq->last_add_time_valid = false; 1612 #endif 1613 1614 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1615 !context; 1616 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1617 1618 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1619 vq->weak_barriers = false; 1620 1621 vq->packed.ring_dma_addr = ring_dma_addr; 1622 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1623 vq->packed.device_event_dma_addr = device_event_dma_addr; 1624 1625 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1626 vq->packed.event_size_in_bytes = event_size_in_bytes; 1627 1628 vq->packed.vring.num = num; 1629 vq->packed.vring.desc = ring; 1630 vq->packed.vring.driver = driver; 1631 vq->packed.vring.device = device; 1632 1633 vq->packed.next_avail_idx = 0; 1634 vq->packed.avail_wrap_counter = 1; 1635 vq->packed.used_wrap_counter = 1; 1636 vq->packed.event_flags_shadow = 0; 1637 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1638 1639 vq->packed.desc_state = kmalloc_array(num, 1640 sizeof(struct vring_desc_state_packed), 1641 GFP_KERNEL); 1642 if (!vq->packed.desc_state) 1643 goto err_desc_state; 1644 1645 memset(vq->packed.desc_state, 0, 1646 num * sizeof(struct vring_desc_state_packed)); 1647 1648 /* Put everything in free lists. */ 1649 vq->free_head = 0; 1650 for (i = 0; i < num-1; i++) 1651 vq->packed.desc_state[i].next = i + 1; 1652 1653 vq->packed.desc_extra = kmalloc_array(num, 1654 sizeof(struct vring_desc_extra_packed), 1655 GFP_KERNEL); 1656 if (!vq->packed.desc_extra) 1657 goto err_desc_extra; 1658 1659 memset(vq->packed.desc_extra, 0, 1660 num * sizeof(struct vring_desc_extra_packed)); 1661 1662 /* No callback? Tell other side not to bother us. */ 1663 if (!callback) { 1664 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1665 vq->packed.vring.driver->flags = 1666 cpu_to_le16(vq->packed.event_flags_shadow); 1667 } 1668 1669 return &vq->vq; 1670 1671 err_desc_extra: 1672 kfree(vq->packed.desc_state); 1673 err_desc_state: 1674 kfree(vq); 1675 err_vq: 1676 vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); 1677 err_device: 1678 vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); 1679 err_driver: 1680 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1681 err_ring: 1682 return NULL; 1683 } 1684 1685 1686 /* 1687 * Generic functions and exported symbols. 1688 */ 1689 1690 static inline int virtqueue_add(struct virtqueue *_vq, 1691 struct scatterlist *sgs[], 1692 unsigned int total_sg, 1693 unsigned int out_sgs, 1694 unsigned int in_sgs, 1695 void *data, 1696 void *ctx, 1697 gfp_t gfp) 1698 { 1699 struct vring_virtqueue *vq = to_vvq(_vq); 1700 1701 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1702 out_sgs, in_sgs, data, ctx, gfp) : 1703 virtqueue_add_split(_vq, sgs, total_sg, 1704 out_sgs, in_sgs, data, ctx, gfp); 1705 } 1706 1707 /** 1708 * virtqueue_add_sgs - expose buffers to other end 1709 * @_vq: the struct virtqueue we're talking about. 1710 * @sgs: array of terminated scatterlists. 1711 * @out_sgs: the number of scatterlists readable by other side 1712 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1713 * @data: the token identifying the buffer. 1714 * @gfp: how to do memory allocations (if necessary). 1715 * 1716 * Caller must ensure we don't call this with other virtqueue operations 1717 * at the same time (except where noted). 1718 * 1719 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1720 */ 1721 int virtqueue_add_sgs(struct virtqueue *_vq, 1722 struct scatterlist *sgs[], 1723 unsigned int out_sgs, 1724 unsigned int in_sgs, 1725 void *data, 1726 gfp_t gfp) 1727 { 1728 unsigned int i, total_sg = 0; 1729 1730 /* Count them first. */ 1731 for (i = 0; i < out_sgs + in_sgs; i++) { 1732 struct scatterlist *sg; 1733 1734 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1735 total_sg++; 1736 } 1737 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1738 data, NULL, gfp); 1739 } 1740 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1741 1742 /** 1743 * virtqueue_add_outbuf - expose output buffers to other end 1744 * @vq: the struct virtqueue we're talking about. 1745 * @sg: scatterlist (must be well-formed and terminated!) 1746 * @num: the number of entries in @sg readable by other side 1747 * @data: the token identifying the buffer. 1748 * @gfp: how to do memory allocations (if necessary). 1749 * 1750 * Caller must ensure we don't call this with other virtqueue operations 1751 * at the same time (except where noted). 1752 * 1753 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1754 */ 1755 int virtqueue_add_outbuf(struct virtqueue *vq, 1756 struct scatterlist *sg, unsigned int num, 1757 void *data, 1758 gfp_t gfp) 1759 { 1760 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1761 } 1762 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1763 1764 /** 1765 * virtqueue_add_inbuf - expose input buffers to other end 1766 * @vq: the struct virtqueue we're talking about. 1767 * @sg: scatterlist (must be well-formed and terminated!) 1768 * @num: the number of entries in @sg writable by other side 1769 * @data: the token identifying the buffer. 1770 * @gfp: how to do memory allocations (if necessary). 1771 * 1772 * Caller must ensure we don't call this with other virtqueue operations 1773 * at the same time (except where noted). 1774 * 1775 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1776 */ 1777 int virtqueue_add_inbuf(struct virtqueue *vq, 1778 struct scatterlist *sg, unsigned int num, 1779 void *data, 1780 gfp_t gfp) 1781 { 1782 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1783 } 1784 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1785 1786 /** 1787 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1788 * @vq: the struct virtqueue we're talking about. 1789 * @sg: scatterlist (must be well-formed and terminated!) 1790 * @num: the number of entries in @sg writable by other side 1791 * @data: the token identifying the buffer. 1792 * @ctx: extra context for the token 1793 * @gfp: how to do memory allocations (if necessary). 1794 * 1795 * Caller must ensure we don't call this with other virtqueue operations 1796 * at the same time (except where noted). 1797 * 1798 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1799 */ 1800 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1801 struct scatterlist *sg, unsigned int num, 1802 void *data, 1803 void *ctx, 1804 gfp_t gfp) 1805 { 1806 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1807 } 1808 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1809 1810 /** 1811 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1812 * @_vq: the struct virtqueue 1813 * 1814 * Instead of virtqueue_kick(), you can do: 1815 * if (virtqueue_kick_prepare(vq)) 1816 * virtqueue_notify(vq); 1817 * 1818 * This is sometimes useful because the virtqueue_kick_prepare() needs 1819 * to be serialized, but the actual virtqueue_notify() call does not. 1820 */ 1821 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1822 { 1823 struct vring_virtqueue *vq = to_vvq(_vq); 1824 1825 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1826 virtqueue_kick_prepare_split(_vq); 1827 } 1828 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1829 1830 /** 1831 * virtqueue_notify - second half of split virtqueue_kick call. 1832 * @_vq: the struct virtqueue 1833 * 1834 * This does not need to be serialized. 1835 * 1836 * Returns false if host notify failed or queue is broken, otherwise true. 1837 */ 1838 bool virtqueue_notify(struct virtqueue *_vq) 1839 { 1840 struct vring_virtqueue *vq = to_vvq(_vq); 1841 1842 if (unlikely(vq->broken)) 1843 return false; 1844 1845 /* Prod other side to tell it about changes. */ 1846 if (!vq->notify(_vq)) { 1847 vq->broken = true; 1848 return false; 1849 } 1850 return true; 1851 } 1852 EXPORT_SYMBOL_GPL(virtqueue_notify); 1853 1854 /** 1855 * virtqueue_kick - update after add_buf 1856 * @vq: the struct virtqueue 1857 * 1858 * After one or more virtqueue_add_* calls, invoke this to kick 1859 * the other side. 1860 * 1861 * Caller must ensure we don't call this with other virtqueue 1862 * operations at the same time (except where noted). 1863 * 1864 * Returns false if kick failed, otherwise true. 1865 */ 1866 bool virtqueue_kick(struct virtqueue *vq) 1867 { 1868 if (virtqueue_kick_prepare(vq)) 1869 return virtqueue_notify(vq); 1870 return true; 1871 } 1872 EXPORT_SYMBOL_GPL(virtqueue_kick); 1873 1874 /** 1875 * virtqueue_get_buf - get the next used buffer 1876 * @_vq: the struct virtqueue we're talking about. 1877 * @len: the length written into the buffer 1878 * @ctx: extra context for the token 1879 * 1880 * If the device wrote data into the buffer, @len will be set to the 1881 * amount written. This means you don't need to clear the buffer 1882 * beforehand to ensure there's no data leakage in the case of short 1883 * writes. 1884 * 1885 * Caller must ensure we don't call this with other virtqueue 1886 * operations at the same time (except where noted). 1887 * 1888 * Returns NULL if there are no used buffers, or the "data" token 1889 * handed to virtqueue_add_*(). 1890 */ 1891 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1892 void **ctx) 1893 { 1894 struct vring_virtqueue *vq = to_vvq(_vq); 1895 1896 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1897 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1898 } 1899 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1900 1901 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1902 { 1903 return virtqueue_get_buf_ctx(_vq, len, NULL); 1904 } 1905 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1906 /** 1907 * virtqueue_disable_cb - disable callbacks 1908 * @_vq: the struct virtqueue we're talking about. 1909 * 1910 * Note that this is not necessarily synchronous, hence unreliable and only 1911 * useful as an optimization. 1912 * 1913 * Unlike other operations, this need not be serialized. 1914 */ 1915 void virtqueue_disable_cb(struct virtqueue *_vq) 1916 { 1917 struct vring_virtqueue *vq = to_vvq(_vq); 1918 1919 if (vq->packed_ring) 1920 virtqueue_disable_cb_packed(_vq); 1921 else 1922 virtqueue_disable_cb_split(_vq); 1923 } 1924 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1925 1926 /** 1927 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1928 * @_vq: the struct virtqueue we're talking about. 1929 * 1930 * This re-enables callbacks; it returns current queue state 1931 * in an opaque unsigned value. This value should be later tested by 1932 * virtqueue_poll, to detect a possible race between the driver checking for 1933 * more work, and enabling callbacks. 1934 * 1935 * Caller must ensure we don't call this with other virtqueue 1936 * operations at the same time (except where noted). 1937 */ 1938 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1939 { 1940 struct vring_virtqueue *vq = to_vvq(_vq); 1941 1942 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 1943 virtqueue_enable_cb_prepare_split(_vq); 1944 } 1945 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1946 1947 /** 1948 * virtqueue_poll - query pending used buffers 1949 * @_vq: the struct virtqueue we're talking about. 1950 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1951 * 1952 * Returns "true" if there are pending used buffers in the queue. 1953 * 1954 * This does not need to be serialized. 1955 */ 1956 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1957 { 1958 struct vring_virtqueue *vq = to_vvq(_vq); 1959 1960 virtio_mb(vq->weak_barriers); 1961 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 1962 virtqueue_poll_split(_vq, last_used_idx); 1963 } 1964 EXPORT_SYMBOL_GPL(virtqueue_poll); 1965 1966 /** 1967 * virtqueue_enable_cb - restart callbacks after disable_cb. 1968 * @_vq: the struct virtqueue we're talking about. 1969 * 1970 * This re-enables callbacks; it returns "false" if there are pending 1971 * buffers in the queue, to detect a possible race between the driver 1972 * checking for more work, and enabling callbacks. 1973 * 1974 * Caller must ensure we don't call this with other virtqueue 1975 * operations at the same time (except where noted). 1976 */ 1977 bool virtqueue_enable_cb(struct virtqueue *_vq) 1978 { 1979 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 1980 1981 return !virtqueue_poll(_vq, last_used_idx); 1982 } 1983 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 1984 1985 /** 1986 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 1987 * @_vq: the struct virtqueue we're talking about. 1988 * 1989 * This re-enables callbacks but hints to the other side to delay 1990 * interrupts until most of the available buffers have been processed; 1991 * it returns "false" if there are many pending buffers in the queue, 1992 * to detect a possible race between the driver checking for more work, 1993 * and enabling callbacks. 1994 * 1995 * Caller must ensure we don't call this with other virtqueue 1996 * operations at the same time (except where noted). 1997 */ 1998 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 1999 { 2000 struct vring_virtqueue *vq = to_vvq(_vq); 2001 2002 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2003 virtqueue_enable_cb_delayed_split(_vq); 2004 } 2005 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2006 2007 /** 2008 * virtqueue_detach_unused_buf - detach first unused buffer 2009 * @_vq: the struct virtqueue we're talking about. 2010 * 2011 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2012 * This is not valid on an active queue; it is useful only for device 2013 * shutdown. 2014 */ 2015 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2016 { 2017 struct vring_virtqueue *vq = to_vvq(_vq); 2018 2019 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2020 virtqueue_detach_unused_buf_split(_vq); 2021 } 2022 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2023 2024 static inline bool more_used(const struct vring_virtqueue *vq) 2025 { 2026 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2027 } 2028 2029 irqreturn_t vring_interrupt(int irq, void *_vq) 2030 { 2031 struct vring_virtqueue *vq = to_vvq(_vq); 2032 2033 if (!more_used(vq)) { 2034 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2035 return IRQ_NONE; 2036 } 2037 2038 if (unlikely(vq->broken)) 2039 return IRQ_HANDLED; 2040 2041 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2042 if (vq->vq.callback) 2043 vq->vq.callback(&vq->vq); 2044 2045 return IRQ_HANDLED; 2046 } 2047 EXPORT_SYMBOL_GPL(vring_interrupt); 2048 2049 /* Only available for split ring */ 2050 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2051 struct vring vring, 2052 struct virtio_device *vdev, 2053 bool weak_barriers, 2054 bool context, 2055 bool (*notify)(struct virtqueue *), 2056 void (*callback)(struct virtqueue *), 2057 const char *name) 2058 { 2059 unsigned int i; 2060 struct vring_virtqueue *vq; 2061 2062 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2063 return NULL; 2064 2065 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2066 if (!vq) 2067 return NULL; 2068 2069 vq->packed_ring = false; 2070 vq->vq.callback = callback; 2071 vq->vq.vdev = vdev; 2072 vq->vq.name = name; 2073 vq->vq.num_free = vring.num; 2074 vq->vq.index = index; 2075 vq->we_own_ring = false; 2076 vq->notify = notify; 2077 vq->weak_barriers = weak_barriers; 2078 vq->broken = false; 2079 vq->last_used_idx = 0; 2080 vq->num_added = 0; 2081 vq->use_dma_api = vring_use_dma_api(vdev); 2082 list_add_tail(&vq->vq.list, &vdev->vqs); 2083 #ifdef DEBUG 2084 vq->in_use = false; 2085 vq->last_add_time_valid = false; 2086 #endif 2087 2088 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2089 !context; 2090 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2091 2092 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2093 vq->weak_barriers = false; 2094 2095 vq->split.queue_dma_addr = 0; 2096 vq->split.queue_size_in_bytes = 0; 2097 2098 vq->split.vring = vring; 2099 vq->split.avail_flags_shadow = 0; 2100 vq->split.avail_idx_shadow = 0; 2101 2102 /* No callback? Tell other side not to bother us. */ 2103 if (!callback) { 2104 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2105 if (!vq->event) 2106 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2107 vq->split.avail_flags_shadow); 2108 } 2109 2110 vq->split.desc_state = kmalloc_array(vring.num, 2111 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2112 if (!vq->split.desc_state) { 2113 kfree(vq); 2114 return NULL; 2115 } 2116 2117 /* Put everything in free lists. */ 2118 vq->free_head = 0; 2119 for (i = 0; i < vring.num-1; i++) 2120 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 2121 memset(vq->split.desc_state, 0, vring.num * 2122 sizeof(struct vring_desc_state_split)); 2123 2124 return &vq->vq; 2125 } 2126 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2127 2128 struct virtqueue *vring_create_virtqueue( 2129 unsigned int index, 2130 unsigned int num, 2131 unsigned int vring_align, 2132 struct virtio_device *vdev, 2133 bool weak_barriers, 2134 bool may_reduce_num, 2135 bool context, 2136 bool (*notify)(struct virtqueue *), 2137 void (*callback)(struct virtqueue *), 2138 const char *name) 2139 { 2140 2141 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2142 return vring_create_virtqueue_packed(index, num, vring_align, 2143 vdev, weak_barriers, may_reduce_num, 2144 context, notify, callback, name); 2145 2146 return vring_create_virtqueue_split(index, num, vring_align, 2147 vdev, weak_barriers, may_reduce_num, 2148 context, notify, callback, name); 2149 } 2150 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2151 2152 /* Only available for split ring */ 2153 struct virtqueue *vring_new_virtqueue(unsigned int index, 2154 unsigned int num, 2155 unsigned int vring_align, 2156 struct virtio_device *vdev, 2157 bool weak_barriers, 2158 bool context, 2159 void *pages, 2160 bool (*notify)(struct virtqueue *vq), 2161 void (*callback)(struct virtqueue *vq), 2162 const char *name) 2163 { 2164 struct vring vring; 2165 2166 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2167 return NULL; 2168 2169 vring_init(&vring, num, pages, vring_align); 2170 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2171 notify, callback, name); 2172 } 2173 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2174 2175 void vring_del_virtqueue(struct virtqueue *_vq) 2176 { 2177 struct vring_virtqueue *vq = to_vvq(_vq); 2178 2179 if (vq->we_own_ring) { 2180 if (vq->packed_ring) { 2181 vring_free_queue(vq->vq.vdev, 2182 vq->packed.ring_size_in_bytes, 2183 vq->packed.vring.desc, 2184 vq->packed.ring_dma_addr); 2185 2186 vring_free_queue(vq->vq.vdev, 2187 vq->packed.event_size_in_bytes, 2188 vq->packed.vring.driver, 2189 vq->packed.driver_event_dma_addr); 2190 2191 vring_free_queue(vq->vq.vdev, 2192 vq->packed.event_size_in_bytes, 2193 vq->packed.vring.device, 2194 vq->packed.device_event_dma_addr); 2195 2196 kfree(vq->packed.desc_state); 2197 kfree(vq->packed.desc_extra); 2198 } else { 2199 vring_free_queue(vq->vq.vdev, 2200 vq->split.queue_size_in_bytes, 2201 vq->split.vring.desc, 2202 vq->split.queue_dma_addr); 2203 2204 kfree(vq->split.desc_state); 2205 } 2206 } 2207 list_del(&_vq->list); 2208 kfree(vq); 2209 } 2210 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2211 2212 /* Manipulates transport-specific feature bits. */ 2213 void vring_transport_features(struct virtio_device *vdev) 2214 { 2215 unsigned int i; 2216 2217 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2218 switch (i) { 2219 case VIRTIO_RING_F_INDIRECT_DESC: 2220 break; 2221 case VIRTIO_RING_F_EVENT_IDX: 2222 break; 2223 case VIRTIO_F_VERSION_1: 2224 break; 2225 case VIRTIO_F_IOMMU_PLATFORM: 2226 break; 2227 case VIRTIO_F_RING_PACKED: 2228 break; 2229 case VIRTIO_F_ORDER_PLATFORM: 2230 break; 2231 default: 2232 /* We don't understand this bit. */ 2233 __virtio_clear_bit(vdev, i); 2234 } 2235 } 2236 } 2237 EXPORT_SYMBOL_GPL(vring_transport_features); 2238 2239 /** 2240 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2241 * @_vq: the struct virtqueue containing the vring of interest. 2242 * 2243 * Returns the size of the vring. This is mainly used for boasting to 2244 * userspace. Unlike other operations, this need not be serialized. 2245 */ 2246 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2247 { 2248 2249 struct vring_virtqueue *vq = to_vvq(_vq); 2250 2251 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2252 } 2253 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2254 2255 bool virtqueue_is_broken(struct virtqueue *_vq) 2256 { 2257 struct vring_virtqueue *vq = to_vvq(_vq); 2258 2259 return vq->broken; 2260 } 2261 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2262 2263 /* 2264 * This should prevent the device from being used, allowing drivers to 2265 * recover. You may need to grab appropriate locks to flush. 2266 */ 2267 void virtio_break_device(struct virtio_device *dev) 2268 { 2269 struct virtqueue *_vq; 2270 2271 list_for_each_entry(_vq, &dev->vqs, list) { 2272 struct vring_virtqueue *vq = to_vvq(_vq); 2273 vq->broken = true; 2274 } 2275 } 2276 EXPORT_SYMBOL_GPL(virtio_break_device); 2277 2278 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2279 { 2280 struct vring_virtqueue *vq = to_vvq(_vq); 2281 2282 BUG_ON(!vq->we_own_ring); 2283 2284 if (vq->packed_ring) 2285 return vq->packed.ring_dma_addr; 2286 2287 return vq->split.queue_dma_addr; 2288 } 2289 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2290 2291 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2292 { 2293 struct vring_virtqueue *vq = to_vvq(_vq); 2294 2295 BUG_ON(!vq->we_own_ring); 2296 2297 if (vq->packed_ring) 2298 return vq->packed.driver_event_dma_addr; 2299 2300 return vq->split.queue_dma_addr + 2301 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2302 } 2303 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2304 2305 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2306 { 2307 struct vring_virtqueue *vq = to_vvq(_vq); 2308 2309 BUG_ON(!vq->we_own_ring); 2310 2311 if (vq->packed_ring) 2312 return vq->packed.device_event_dma_addr; 2313 2314 return vq->split.queue_dma_addr + 2315 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2316 } 2317 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2318 2319 /* Only available for split ring */ 2320 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2321 { 2322 return &to_vvq(vq)->split.vring; 2323 } 2324 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2325 2326 MODULE_LICENSE("GPL"); 2327