1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <xen/xen.h> 15 16 #ifdef DEBUG 17 /* For development, we want to crash whenever the ring is screwed. */ 18 #define BAD_RING(_vq, fmt, args...) \ 19 do { \ 20 dev_err(&(_vq)->vq.vdev->dev, \ 21 "%s:"fmt, (_vq)->vq.name, ##args); \ 22 BUG(); \ 23 } while (0) 24 /* Caller is supposed to guarantee no reentry. */ 25 #define START_USE(_vq) \ 26 do { \ 27 if ((_vq)->in_use) \ 28 panic("%s:in_use = %i\n", \ 29 (_vq)->vq.name, (_vq)->in_use); \ 30 (_vq)->in_use = __LINE__; \ 31 } while (0) 32 #define END_USE(_vq) \ 33 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 34 #define LAST_ADD_TIME_UPDATE(_vq) \ 35 do { \ 36 ktime_t now = ktime_get(); \ 37 \ 38 /* No kick or get, with .1 second between? Warn. */ \ 39 if ((_vq)->last_add_time_valid) \ 40 WARN_ON(ktime_to_ms(ktime_sub(now, \ 41 (_vq)->last_add_time)) > 100); \ 42 (_vq)->last_add_time = now; \ 43 (_vq)->last_add_time_valid = true; \ 44 } while (0) 45 #define LAST_ADD_TIME_CHECK(_vq) \ 46 do { \ 47 if ((_vq)->last_add_time_valid) { \ 48 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 49 (_vq)->last_add_time)) > 100); \ 50 } \ 51 } while (0) 52 #define LAST_ADD_TIME_INVALID(_vq) \ 53 ((_vq)->last_add_time_valid = false) 54 #else 55 #define BAD_RING(_vq, fmt, args...) \ 56 do { \ 57 dev_err(&_vq->vq.vdev->dev, \ 58 "%s:"fmt, (_vq)->vq.name, ##args); \ 59 (_vq)->broken = true; \ 60 } while (0) 61 #define START_USE(vq) 62 #define END_USE(vq) 63 #define LAST_ADD_TIME_UPDATE(vq) 64 #define LAST_ADD_TIME_CHECK(vq) 65 #define LAST_ADD_TIME_INVALID(vq) 66 #endif 67 68 struct vring_desc_state_split { 69 void *data; /* Data for callback. */ 70 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 71 }; 72 73 struct vring_desc_state_packed { 74 void *data; /* Data for callback. */ 75 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 76 u16 num; /* Descriptor list length. */ 77 u16 next; /* The next desc state in a list. */ 78 u16 last; /* The last desc state in a list. */ 79 }; 80 81 struct vring_desc_extra_packed { 82 dma_addr_t addr; /* Buffer DMA addr. */ 83 u32 len; /* Buffer length. */ 84 u16 flags; /* Descriptor flags. */ 85 }; 86 87 struct vring_virtqueue { 88 struct virtqueue vq; 89 90 /* Is this a packed ring? */ 91 bool packed_ring; 92 93 /* Is DMA API used? */ 94 bool use_dma_api; 95 96 /* Can we use weak barriers? */ 97 bool weak_barriers; 98 99 /* Other side has made a mess, don't try any more. */ 100 bool broken; 101 102 /* Host supports indirect buffers */ 103 bool indirect; 104 105 /* Host publishes avail event idx */ 106 bool event; 107 108 /* Head of free buffer list. */ 109 unsigned int free_head; 110 /* Number we've added since last sync. */ 111 unsigned int num_added; 112 113 /* Last used index we've seen. */ 114 u16 last_used_idx; 115 116 union { 117 /* Available for split ring */ 118 struct { 119 /* Actual memory layout for this queue. */ 120 struct vring vring; 121 122 /* Last written value to avail->flags */ 123 u16 avail_flags_shadow; 124 125 /* 126 * Last written value to avail->idx in 127 * guest byte order. 128 */ 129 u16 avail_idx_shadow; 130 131 /* Per-descriptor state. */ 132 struct vring_desc_state_split *desc_state; 133 134 /* DMA address and size information */ 135 dma_addr_t queue_dma_addr; 136 size_t queue_size_in_bytes; 137 } split; 138 139 /* Available for packed ring */ 140 struct { 141 /* Actual memory layout for this queue. */ 142 struct { 143 unsigned int num; 144 struct vring_packed_desc *desc; 145 struct vring_packed_desc_event *driver; 146 struct vring_packed_desc_event *device; 147 } vring; 148 149 /* Driver ring wrap counter. */ 150 bool avail_wrap_counter; 151 152 /* Device ring wrap counter. */ 153 bool used_wrap_counter; 154 155 /* Avail used flags. */ 156 u16 avail_used_flags; 157 158 /* Index of the next avail descriptor. */ 159 u16 next_avail_idx; 160 161 /* 162 * Last written value to driver->flags in 163 * guest byte order. 164 */ 165 u16 event_flags_shadow; 166 167 /* Per-descriptor state. */ 168 struct vring_desc_state_packed *desc_state; 169 struct vring_desc_extra_packed *desc_extra; 170 171 /* DMA address and size information */ 172 dma_addr_t ring_dma_addr; 173 dma_addr_t driver_event_dma_addr; 174 dma_addr_t device_event_dma_addr; 175 size_t ring_size_in_bytes; 176 size_t event_size_in_bytes; 177 } packed; 178 }; 179 180 /* How to notify other side. FIXME: commonalize hcalls! */ 181 bool (*notify)(struct virtqueue *vq); 182 183 /* DMA, allocation, and size information */ 184 bool we_own_ring; 185 186 #ifdef DEBUG 187 /* They're supposed to lock for us. */ 188 unsigned int in_use; 189 190 /* Figure out if their kicks are too delayed. */ 191 bool last_add_time_valid; 192 ktime_t last_add_time; 193 #endif 194 }; 195 196 197 /* 198 * Helpers. 199 */ 200 201 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 202 203 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 204 unsigned int total_sg) 205 { 206 struct vring_virtqueue *vq = to_vvq(_vq); 207 208 /* 209 * If the host supports indirect descriptor tables, and we have multiple 210 * buffers, then go indirect. FIXME: tune this threshold 211 */ 212 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 213 } 214 215 /* 216 * Modern virtio devices have feature bits to specify whether they need a 217 * quirk and bypass the IOMMU. If not there, just use the DMA API. 218 * 219 * If there, the interaction between virtio and DMA API is messy. 220 * 221 * On most systems with virtio, physical addresses match bus addresses, 222 * and it doesn't particularly matter whether we use the DMA API. 223 * 224 * On some systems, including Xen and any system with a physical device 225 * that speaks virtio behind a physical IOMMU, we must use the DMA API 226 * for virtio DMA to work at all. 227 * 228 * On other systems, including SPARC and PPC64, virtio-pci devices are 229 * enumerated as though they are behind an IOMMU, but the virtio host 230 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 231 * there or somehow map everything as the identity. 232 * 233 * For the time being, we preserve historic behavior and bypass the DMA 234 * API. 235 * 236 * TODO: install a per-device DMA ops structure that does the right thing 237 * taking into account all the above quirks, and use the DMA API 238 * unconditionally on data path. 239 */ 240 241 static bool vring_use_dma_api(struct virtio_device *vdev) 242 { 243 if (!virtio_has_iommu_quirk(vdev)) 244 return true; 245 246 /* Otherwise, we are left to guess. */ 247 /* 248 * In theory, it's possible to have a buggy QEMU-supposed 249 * emulated Q35 IOMMU and Xen enabled at the same time. On 250 * such a configuration, virtio has never worked and will 251 * not work without an even larger kludge. Instead, enable 252 * the DMA API if we're a Xen guest, which at least allows 253 * all of the sensible Xen configurations to work correctly. 254 */ 255 if (xen_domain()) 256 return true; 257 258 return false; 259 } 260 261 size_t virtio_max_dma_size(struct virtio_device *vdev) 262 { 263 size_t max_segment_size = SIZE_MAX; 264 265 if (vring_use_dma_api(vdev)) 266 max_segment_size = dma_max_mapping_size(&vdev->dev); 267 268 return max_segment_size; 269 } 270 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 271 272 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 273 dma_addr_t *dma_handle, gfp_t flag) 274 { 275 if (vring_use_dma_api(vdev)) { 276 return dma_alloc_coherent(vdev->dev.parent, size, 277 dma_handle, flag); 278 } else { 279 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 280 281 if (queue) { 282 phys_addr_t phys_addr = virt_to_phys(queue); 283 *dma_handle = (dma_addr_t)phys_addr; 284 285 /* 286 * Sanity check: make sure we dind't truncate 287 * the address. The only arches I can find that 288 * have 64-bit phys_addr_t but 32-bit dma_addr_t 289 * are certain non-highmem MIPS and x86 290 * configurations, but these configurations 291 * should never allocate physical pages above 32 292 * bits, so this is fine. Just in case, throw a 293 * warning and abort if we end up with an 294 * unrepresentable address. 295 */ 296 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 297 free_pages_exact(queue, PAGE_ALIGN(size)); 298 return NULL; 299 } 300 } 301 return queue; 302 } 303 } 304 305 static void vring_free_queue(struct virtio_device *vdev, size_t size, 306 void *queue, dma_addr_t dma_handle) 307 { 308 if (vring_use_dma_api(vdev)) 309 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 310 else 311 free_pages_exact(queue, PAGE_ALIGN(size)); 312 } 313 314 /* 315 * The DMA ops on various arches are rather gnarly right now, and 316 * making all of the arch DMA ops work on the vring device itself 317 * is a mess. For now, we use the parent device for DMA ops. 318 */ 319 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 320 { 321 return vq->vq.vdev->dev.parent; 322 } 323 324 /* Map one sg entry. */ 325 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 326 struct scatterlist *sg, 327 enum dma_data_direction direction) 328 { 329 if (!vq->use_dma_api) 330 return (dma_addr_t)sg_phys(sg); 331 332 /* 333 * We can't use dma_map_sg, because we don't use scatterlists in 334 * the way it expects (we don't guarantee that the scatterlist 335 * will exist for the lifetime of the mapping). 336 */ 337 return dma_map_page(vring_dma_dev(vq), 338 sg_page(sg), sg->offset, sg->length, 339 direction); 340 } 341 342 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 343 void *cpu_addr, size_t size, 344 enum dma_data_direction direction) 345 { 346 if (!vq->use_dma_api) 347 return (dma_addr_t)virt_to_phys(cpu_addr); 348 349 return dma_map_single(vring_dma_dev(vq), 350 cpu_addr, size, direction); 351 } 352 353 static int vring_mapping_error(const struct vring_virtqueue *vq, 354 dma_addr_t addr) 355 { 356 if (!vq->use_dma_api) 357 return 0; 358 359 return dma_mapping_error(vring_dma_dev(vq), addr); 360 } 361 362 363 /* 364 * Split ring specific functions - *_split(). 365 */ 366 367 static void vring_unmap_one_split(const struct vring_virtqueue *vq, 368 struct vring_desc *desc) 369 { 370 u16 flags; 371 372 if (!vq->use_dma_api) 373 return; 374 375 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 376 377 if (flags & VRING_DESC_F_INDIRECT) { 378 dma_unmap_single(vring_dma_dev(vq), 379 virtio64_to_cpu(vq->vq.vdev, desc->addr), 380 virtio32_to_cpu(vq->vq.vdev, desc->len), 381 (flags & VRING_DESC_F_WRITE) ? 382 DMA_FROM_DEVICE : DMA_TO_DEVICE); 383 } else { 384 dma_unmap_page(vring_dma_dev(vq), 385 virtio64_to_cpu(vq->vq.vdev, desc->addr), 386 virtio32_to_cpu(vq->vq.vdev, desc->len), 387 (flags & VRING_DESC_F_WRITE) ? 388 DMA_FROM_DEVICE : DMA_TO_DEVICE); 389 } 390 } 391 392 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 393 unsigned int total_sg, 394 gfp_t gfp) 395 { 396 struct vring_desc *desc; 397 unsigned int i; 398 399 /* 400 * We require lowmem mappings for the descriptors because 401 * otherwise virt_to_phys will give us bogus addresses in the 402 * virtqueue. 403 */ 404 gfp &= ~__GFP_HIGHMEM; 405 406 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 407 if (!desc) 408 return NULL; 409 410 for (i = 0; i < total_sg; i++) 411 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 412 return desc; 413 } 414 415 static inline int virtqueue_add_split(struct virtqueue *_vq, 416 struct scatterlist *sgs[], 417 unsigned int total_sg, 418 unsigned int out_sgs, 419 unsigned int in_sgs, 420 void *data, 421 void *ctx, 422 gfp_t gfp) 423 { 424 struct vring_virtqueue *vq = to_vvq(_vq); 425 struct scatterlist *sg; 426 struct vring_desc *desc; 427 unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 428 int head; 429 bool indirect; 430 431 START_USE(vq); 432 433 BUG_ON(data == NULL); 434 BUG_ON(ctx && vq->indirect); 435 436 if (unlikely(vq->broken)) { 437 END_USE(vq); 438 return -EIO; 439 } 440 441 LAST_ADD_TIME_UPDATE(vq); 442 443 BUG_ON(total_sg == 0); 444 445 head = vq->free_head; 446 447 if (virtqueue_use_indirect(_vq, total_sg)) 448 desc = alloc_indirect_split(_vq, total_sg, gfp); 449 else { 450 desc = NULL; 451 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 452 } 453 454 if (desc) { 455 /* Use a single buffer which doesn't continue */ 456 indirect = true; 457 /* Set up rest to use this indirect table. */ 458 i = 0; 459 descs_used = 1; 460 } else { 461 indirect = false; 462 desc = vq->split.vring.desc; 463 i = head; 464 descs_used = total_sg; 465 } 466 467 if (vq->vq.num_free < descs_used) { 468 pr_debug("Can't add buf len %i - avail = %i\n", 469 descs_used, vq->vq.num_free); 470 /* FIXME: for historical reasons, we force a notify here if 471 * there are outgoing parts to the buffer. Presumably the 472 * host should service the ring ASAP. */ 473 if (out_sgs) 474 vq->notify(&vq->vq); 475 if (indirect) 476 kfree(desc); 477 END_USE(vq); 478 return -ENOSPC; 479 } 480 481 for (n = 0; n < out_sgs; n++) { 482 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 483 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 484 if (vring_mapping_error(vq, addr)) 485 goto unmap_release; 486 487 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 488 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 489 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 490 prev = i; 491 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 492 } 493 } 494 for (; n < (out_sgs + in_sgs); n++) { 495 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 496 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 497 if (vring_mapping_error(vq, addr)) 498 goto unmap_release; 499 500 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 501 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 502 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 503 prev = i; 504 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 505 } 506 } 507 /* Last one doesn't continue. */ 508 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 509 510 if (indirect) { 511 /* Now that the indirect table is filled in, map it. */ 512 dma_addr_t addr = vring_map_single( 513 vq, desc, total_sg * sizeof(struct vring_desc), 514 DMA_TO_DEVICE); 515 if (vring_mapping_error(vq, addr)) 516 goto unmap_release; 517 518 vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 519 VRING_DESC_F_INDIRECT); 520 vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 521 addr); 522 523 vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 524 total_sg * sizeof(struct vring_desc)); 525 } 526 527 /* We're using some buffers from the free list. */ 528 vq->vq.num_free -= descs_used; 529 530 /* Update free pointer */ 531 if (indirect) 532 vq->free_head = virtio16_to_cpu(_vq->vdev, 533 vq->split.vring.desc[head].next); 534 else 535 vq->free_head = i; 536 537 /* Store token and indirect buffer state. */ 538 vq->split.desc_state[head].data = data; 539 if (indirect) 540 vq->split.desc_state[head].indir_desc = desc; 541 else 542 vq->split.desc_state[head].indir_desc = ctx; 543 544 /* Put entry in available array (but don't update avail->idx until they 545 * do sync). */ 546 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 547 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 548 549 /* Descriptors and available array need to be set before we expose the 550 * new available array entries. */ 551 virtio_wmb(vq->weak_barriers); 552 vq->split.avail_idx_shadow++; 553 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 554 vq->split.avail_idx_shadow); 555 vq->num_added++; 556 557 pr_debug("Added buffer head %i to %p\n", head, vq); 558 END_USE(vq); 559 560 /* This is very unlikely, but theoretically possible. Kick 561 * just in case. */ 562 if (unlikely(vq->num_added == (1 << 16) - 1)) 563 virtqueue_kick(_vq); 564 565 return 0; 566 567 unmap_release: 568 err_idx = i; 569 570 if (indirect) 571 i = 0; 572 else 573 i = head; 574 575 for (n = 0; n < total_sg; n++) { 576 if (i == err_idx) 577 break; 578 vring_unmap_one_split(vq, &desc[i]); 579 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 580 } 581 582 if (indirect) 583 kfree(desc); 584 585 END_USE(vq); 586 return -ENOMEM; 587 } 588 589 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 590 { 591 struct vring_virtqueue *vq = to_vvq(_vq); 592 u16 new, old; 593 bool needs_kick; 594 595 START_USE(vq); 596 /* We need to expose available array entries before checking avail 597 * event. */ 598 virtio_mb(vq->weak_barriers); 599 600 old = vq->split.avail_idx_shadow - vq->num_added; 601 new = vq->split.avail_idx_shadow; 602 vq->num_added = 0; 603 604 LAST_ADD_TIME_CHECK(vq); 605 LAST_ADD_TIME_INVALID(vq); 606 607 if (vq->event) { 608 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 609 vring_avail_event(&vq->split.vring)), 610 new, old); 611 } else { 612 needs_kick = !(vq->split.vring.used->flags & 613 cpu_to_virtio16(_vq->vdev, 614 VRING_USED_F_NO_NOTIFY)); 615 } 616 END_USE(vq); 617 return needs_kick; 618 } 619 620 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 621 void **ctx) 622 { 623 unsigned int i, j; 624 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 625 626 /* Clear data ptr. */ 627 vq->split.desc_state[head].data = NULL; 628 629 /* Put back on free list: unmap first-level descriptors and find end */ 630 i = head; 631 632 while (vq->split.vring.desc[i].flags & nextflag) { 633 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 634 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 635 vq->vq.num_free++; 636 } 637 638 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 639 vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 640 vq->free_head); 641 vq->free_head = head; 642 643 /* Plus final descriptor */ 644 vq->vq.num_free++; 645 646 if (vq->indirect) { 647 struct vring_desc *indir_desc = 648 vq->split.desc_state[head].indir_desc; 649 u32 len; 650 651 /* Free the indirect table, if any, now that it's unmapped. */ 652 if (!indir_desc) 653 return; 654 655 len = virtio32_to_cpu(vq->vq.vdev, 656 vq->split.vring.desc[head].len); 657 658 BUG_ON(!(vq->split.vring.desc[head].flags & 659 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 660 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 661 662 for (j = 0; j < len / sizeof(struct vring_desc); j++) 663 vring_unmap_one_split(vq, &indir_desc[j]); 664 665 kfree(indir_desc); 666 vq->split.desc_state[head].indir_desc = NULL; 667 } else if (ctx) { 668 *ctx = vq->split.desc_state[head].indir_desc; 669 } 670 } 671 672 static inline bool more_used_split(const struct vring_virtqueue *vq) 673 { 674 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 675 vq->split.vring.used->idx); 676 } 677 678 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 679 unsigned int *len, 680 void **ctx) 681 { 682 struct vring_virtqueue *vq = to_vvq(_vq); 683 void *ret; 684 unsigned int i; 685 u16 last_used; 686 687 START_USE(vq); 688 689 if (unlikely(vq->broken)) { 690 END_USE(vq); 691 return NULL; 692 } 693 694 if (!more_used_split(vq)) { 695 pr_debug("No more buffers in queue\n"); 696 END_USE(vq); 697 return NULL; 698 } 699 700 /* Only get used array entries after they have been exposed by host. */ 701 virtio_rmb(vq->weak_barriers); 702 703 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 704 i = virtio32_to_cpu(_vq->vdev, 705 vq->split.vring.used->ring[last_used].id); 706 *len = virtio32_to_cpu(_vq->vdev, 707 vq->split.vring.used->ring[last_used].len); 708 709 if (unlikely(i >= vq->split.vring.num)) { 710 BAD_RING(vq, "id %u out of range\n", i); 711 return NULL; 712 } 713 if (unlikely(!vq->split.desc_state[i].data)) { 714 BAD_RING(vq, "id %u is not a head!\n", i); 715 return NULL; 716 } 717 718 /* detach_buf_split clears data, so grab it now. */ 719 ret = vq->split.desc_state[i].data; 720 detach_buf_split(vq, i, ctx); 721 vq->last_used_idx++; 722 /* If we expect an interrupt for the next entry, tell host 723 * by writing event index and flush out the write before 724 * the read in the next get_buf call. */ 725 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 726 virtio_store_mb(vq->weak_barriers, 727 &vring_used_event(&vq->split.vring), 728 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 729 730 LAST_ADD_TIME_INVALID(vq); 731 732 END_USE(vq); 733 return ret; 734 } 735 736 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 737 { 738 struct vring_virtqueue *vq = to_vvq(_vq); 739 740 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 741 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 742 if (!vq->event) 743 vq->split.vring.avail->flags = 744 cpu_to_virtio16(_vq->vdev, 745 vq->split.avail_flags_shadow); 746 } 747 } 748 749 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 750 { 751 struct vring_virtqueue *vq = to_vvq(_vq); 752 u16 last_used_idx; 753 754 START_USE(vq); 755 756 /* We optimistically turn back on interrupts, then check if there was 757 * more to do. */ 758 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 759 * either clear the flags bit or point the event index at the next 760 * entry. Always do both to keep code simple. */ 761 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 762 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 763 if (!vq->event) 764 vq->split.vring.avail->flags = 765 cpu_to_virtio16(_vq->vdev, 766 vq->split.avail_flags_shadow); 767 } 768 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 769 last_used_idx = vq->last_used_idx); 770 END_USE(vq); 771 return last_used_idx; 772 } 773 774 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 775 { 776 struct vring_virtqueue *vq = to_vvq(_vq); 777 778 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 779 vq->split.vring.used->idx); 780 } 781 782 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 783 { 784 struct vring_virtqueue *vq = to_vvq(_vq); 785 u16 bufs; 786 787 START_USE(vq); 788 789 /* We optimistically turn back on interrupts, then check if there was 790 * more to do. */ 791 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 792 * either clear the flags bit or point the event index at the next 793 * entry. Always update the event index to keep code simple. */ 794 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 795 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 796 if (!vq->event) 797 vq->split.vring.avail->flags = 798 cpu_to_virtio16(_vq->vdev, 799 vq->split.avail_flags_shadow); 800 } 801 /* TODO: tune this threshold */ 802 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 803 804 virtio_store_mb(vq->weak_barriers, 805 &vring_used_event(&vq->split.vring), 806 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 807 808 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 809 - vq->last_used_idx) > bufs)) { 810 END_USE(vq); 811 return false; 812 } 813 814 END_USE(vq); 815 return true; 816 } 817 818 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 819 { 820 struct vring_virtqueue *vq = to_vvq(_vq); 821 unsigned int i; 822 void *buf; 823 824 START_USE(vq); 825 826 for (i = 0; i < vq->split.vring.num; i++) { 827 if (!vq->split.desc_state[i].data) 828 continue; 829 /* detach_buf_split clears data, so grab it now. */ 830 buf = vq->split.desc_state[i].data; 831 detach_buf_split(vq, i, NULL); 832 vq->split.avail_idx_shadow--; 833 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 834 vq->split.avail_idx_shadow); 835 END_USE(vq); 836 return buf; 837 } 838 /* That should have freed everything. */ 839 BUG_ON(vq->vq.num_free != vq->split.vring.num); 840 841 END_USE(vq); 842 return NULL; 843 } 844 845 static struct virtqueue *vring_create_virtqueue_split( 846 unsigned int index, 847 unsigned int num, 848 unsigned int vring_align, 849 struct virtio_device *vdev, 850 bool weak_barriers, 851 bool may_reduce_num, 852 bool context, 853 bool (*notify)(struct virtqueue *), 854 void (*callback)(struct virtqueue *), 855 const char *name) 856 { 857 struct virtqueue *vq; 858 void *queue = NULL; 859 dma_addr_t dma_addr; 860 size_t queue_size_in_bytes; 861 struct vring vring; 862 863 /* We assume num is a power of 2. */ 864 if (num & (num - 1)) { 865 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 866 return NULL; 867 } 868 869 /* TODO: allocate each queue chunk individually */ 870 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 871 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 872 &dma_addr, 873 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 874 if (queue) 875 break; 876 if (!may_reduce_num) 877 return NULL; 878 } 879 880 if (!num) 881 return NULL; 882 883 if (!queue) { 884 /* Try to get a single page. You are my only hope! */ 885 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 886 &dma_addr, GFP_KERNEL|__GFP_ZERO); 887 } 888 if (!queue) 889 return NULL; 890 891 queue_size_in_bytes = vring_size(num, vring_align); 892 vring_init(&vring, num, queue, vring_align); 893 894 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 895 notify, callback, name); 896 if (!vq) { 897 vring_free_queue(vdev, queue_size_in_bytes, queue, 898 dma_addr); 899 return NULL; 900 } 901 902 to_vvq(vq)->split.queue_dma_addr = dma_addr; 903 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 904 to_vvq(vq)->we_own_ring = true; 905 906 return vq; 907 } 908 909 910 /* 911 * Packed ring specific functions - *_packed(). 912 */ 913 914 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 915 struct vring_desc_extra_packed *state) 916 { 917 u16 flags; 918 919 if (!vq->use_dma_api) 920 return; 921 922 flags = state->flags; 923 924 if (flags & VRING_DESC_F_INDIRECT) { 925 dma_unmap_single(vring_dma_dev(vq), 926 state->addr, state->len, 927 (flags & VRING_DESC_F_WRITE) ? 928 DMA_FROM_DEVICE : DMA_TO_DEVICE); 929 } else { 930 dma_unmap_page(vring_dma_dev(vq), 931 state->addr, state->len, 932 (flags & VRING_DESC_F_WRITE) ? 933 DMA_FROM_DEVICE : DMA_TO_DEVICE); 934 } 935 } 936 937 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 938 struct vring_packed_desc *desc) 939 { 940 u16 flags; 941 942 if (!vq->use_dma_api) 943 return; 944 945 flags = le16_to_cpu(desc->flags); 946 947 if (flags & VRING_DESC_F_INDIRECT) { 948 dma_unmap_single(vring_dma_dev(vq), 949 le64_to_cpu(desc->addr), 950 le32_to_cpu(desc->len), 951 (flags & VRING_DESC_F_WRITE) ? 952 DMA_FROM_DEVICE : DMA_TO_DEVICE); 953 } else { 954 dma_unmap_page(vring_dma_dev(vq), 955 le64_to_cpu(desc->addr), 956 le32_to_cpu(desc->len), 957 (flags & VRING_DESC_F_WRITE) ? 958 DMA_FROM_DEVICE : DMA_TO_DEVICE); 959 } 960 } 961 962 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 963 gfp_t gfp) 964 { 965 struct vring_packed_desc *desc; 966 967 /* 968 * We require lowmem mappings for the descriptors because 969 * otherwise virt_to_phys will give us bogus addresses in the 970 * virtqueue. 971 */ 972 gfp &= ~__GFP_HIGHMEM; 973 974 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 975 976 return desc; 977 } 978 979 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 980 struct scatterlist *sgs[], 981 unsigned int total_sg, 982 unsigned int out_sgs, 983 unsigned int in_sgs, 984 void *data, 985 gfp_t gfp) 986 { 987 struct vring_packed_desc *desc; 988 struct scatterlist *sg; 989 unsigned int i, n, err_idx; 990 u16 head, id; 991 dma_addr_t addr; 992 993 head = vq->packed.next_avail_idx; 994 desc = alloc_indirect_packed(total_sg, gfp); 995 996 if (unlikely(vq->vq.num_free < 1)) { 997 pr_debug("Can't add buf len 1 - avail = 0\n"); 998 kfree(desc); 999 END_USE(vq); 1000 return -ENOSPC; 1001 } 1002 1003 i = 0; 1004 id = vq->free_head; 1005 BUG_ON(id == vq->packed.vring.num); 1006 1007 for (n = 0; n < out_sgs + in_sgs; n++) { 1008 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1009 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1010 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1011 if (vring_mapping_error(vq, addr)) 1012 goto unmap_release; 1013 1014 desc[i].flags = cpu_to_le16(n < out_sgs ? 1015 0 : VRING_DESC_F_WRITE); 1016 desc[i].addr = cpu_to_le64(addr); 1017 desc[i].len = cpu_to_le32(sg->length); 1018 i++; 1019 } 1020 } 1021 1022 /* Now that the indirect table is filled in, map it. */ 1023 addr = vring_map_single(vq, desc, 1024 total_sg * sizeof(struct vring_packed_desc), 1025 DMA_TO_DEVICE); 1026 if (vring_mapping_error(vq, addr)) 1027 goto unmap_release; 1028 1029 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1030 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1031 sizeof(struct vring_packed_desc)); 1032 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1033 1034 if (vq->use_dma_api) { 1035 vq->packed.desc_extra[id].addr = addr; 1036 vq->packed.desc_extra[id].len = total_sg * 1037 sizeof(struct vring_packed_desc); 1038 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1039 vq->packed.avail_used_flags; 1040 } 1041 1042 /* 1043 * A driver MUST NOT make the first descriptor in the list 1044 * available before all subsequent descriptors comprising 1045 * the list are made available. 1046 */ 1047 virtio_wmb(vq->weak_barriers); 1048 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1049 vq->packed.avail_used_flags); 1050 1051 /* We're using some buffers from the free list. */ 1052 vq->vq.num_free -= 1; 1053 1054 /* Update free pointer */ 1055 n = head + 1; 1056 if (n >= vq->packed.vring.num) { 1057 n = 0; 1058 vq->packed.avail_wrap_counter ^= 1; 1059 vq->packed.avail_used_flags ^= 1060 1 << VRING_PACKED_DESC_F_AVAIL | 1061 1 << VRING_PACKED_DESC_F_USED; 1062 } 1063 vq->packed.next_avail_idx = n; 1064 vq->free_head = vq->packed.desc_state[id].next; 1065 1066 /* Store token and indirect buffer state. */ 1067 vq->packed.desc_state[id].num = 1; 1068 vq->packed.desc_state[id].data = data; 1069 vq->packed.desc_state[id].indir_desc = desc; 1070 vq->packed.desc_state[id].last = id; 1071 1072 vq->num_added += 1; 1073 1074 pr_debug("Added buffer head %i to %p\n", head, vq); 1075 END_USE(vq); 1076 1077 return 0; 1078 1079 unmap_release: 1080 err_idx = i; 1081 1082 for (i = 0; i < err_idx; i++) 1083 vring_unmap_desc_packed(vq, &desc[i]); 1084 1085 kfree(desc); 1086 1087 END_USE(vq); 1088 return -ENOMEM; 1089 } 1090 1091 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1092 struct scatterlist *sgs[], 1093 unsigned int total_sg, 1094 unsigned int out_sgs, 1095 unsigned int in_sgs, 1096 void *data, 1097 void *ctx, 1098 gfp_t gfp) 1099 { 1100 struct vring_virtqueue *vq = to_vvq(_vq); 1101 struct vring_packed_desc *desc; 1102 struct scatterlist *sg; 1103 unsigned int i, n, c, descs_used, err_idx; 1104 __le16 uninitialized_var(head_flags), flags; 1105 u16 head, id, uninitialized_var(prev), curr, avail_used_flags; 1106 1107 START_USE(vq); 1108 1109 BUG_ON(data == NULL); 1110 BUG_ON(ctx && vq->indirect); 1111 1112 if (unlikely(vq->broken)) { 1113 END_USE(vq); 1114 return -EIO; 1115 } 1116 1117 LAST_ADD_TIME_UPDATE(vq); 1118 1119 BUG_ON(total_sg == 0); 1120 1121 if (virtqueue_use_indirect(_vq, total_sg)) 1122 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1123 out_sgs, in_sgs, data, gfp); 1124 1125 head = vq->packed.next_avail_idx; 1126 avail_used_flags = vq->packed.avail_used_flags; 1127 1128 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1129 1130 desc = vq->packed.vring.desc; 1131 i = head; 1132 descs_used = total_sg; 1133 1134 if (unlikely(vq->vq.num_free < descs_used)) { 1135 pr_debug("Can't add buf len %i - avail = %i\n", 1136 descs_used, vq->vq.num_free); 1137 END_USE(vq); 1138 return -ENOSPC; 1139 } 1140 1141 id = vq->free_head; 1142 BUG_ON(id == vq->packed.vring.num); 1143 1144 curr = id; 1145 c = 0; 1146 for (n = 0; n < out_sgs + in_sgs; n++) { 1147 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1148 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1149 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1150 if (vring_mapping_error(vq, addr)) 1151 goto unmap_release; 1152 1153 flags = cpu_to_le16(vq->packed.avail_used_flags | 1154 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1155 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1156 if (i == head) 1157 head_flags = flags; 1158 else 1159 desc[i].flags = flags; 1160 1161 desc[i].addr = cpu_to_le64(addr); 1162 desc[i].len = cpu_to_le32(sg->length); 1163 desc[i].id = cpu_to_le16(id); 1164 1165 if (unlikely(vq->use_dma_api)) { 1166 vq->packed.desc_extra[curr].addr = addr; 1167 vq->packed.desc_extra[curr].len = sg->length; 1168 vq->packed.desc_extra[curr].flags = 1169 le16_to_cpu(flags); 1170 } 1171 prev = curr; 1172 curr = vq->packed.desc_state[curr].next; 1173 1174 if ((unlikely(++i >= vq->packed.vring.num))) { 1175 i = 0; 1176 vq->packed.avail_used_flags ^= 1177 1 << VRING_PACKED_DESC_F_AVAIL | 1178 1 << VRING_PACKED_DESC_F_USED; 1179 } 1180 } 1181 } 1182 1183 if (i < head) 1184 vq->packed.avail_wrap_counter ^= 1; 1185 1186 /* We're using some buffers from the free list. */ 1187 vq->vq.num_free -= descs_used; 1188 1189 /* Update free pointer */ 1190 vq->packed.next_avail_idx = i; 1191 vq->free_head = curr; 1192 1193 /* Store token. */ 1194 vq->packed.desc_state[id].num = descs_used; 1195 vq->packed.desc_state[id].data = data; 1196 vq->packed.desc_state[id].indir_desc = ctx; 1197 vq->packed.desc_state[id].last = prev; 1198 1199 /* 1200 * A driver MUST NOT make the first descriptor in the list 1201 * available before all subsequent descriptors comprising 1202 * the list are made available. 1203 */ 1204 virtio_wmb(vq->weak_barriers); 1205 vq->packed.vring.desc[head].flags = head_flags; 1206 vq->num_added += descs_used; 1207 1208 pr_debug("Added buffer head %i to %p\n", head, vq); 1209 END_USE(vq); 1210 1211 return 0; 1212 1213 unmap_release: 1214 err_idx = i; 1215 i = head; 1216 1217 vq->packed.avail_used_flags = avail_used_flags; 1218 1219 for (n = 0; n < total_sg; n++) { 1220 if (i == err_idx) 1221 break; 1222 vring_unmap_desc_packed(vq, &desc[i]); 1223 i++; 1224 if (i >= vq->packed.vring.num) 1225 i = 0; 1226 } 1227 1228 END_USE(vq); 1229 return -EIO; 1230 } 1231 1232 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1233 { 1234 struct vring_virtqueue *vq = to_vvq(_vq); 1235 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1236 bool needs_kick; 1237 union { 1238 struct { 1239 __le16 off_wrap; 1240 __le16 flags; 1241 }; 1242 u32 u32; 1243 } snapshot; 1244 1245 START_USE(vq); 1246 1247 /* 1248 * We need to expose the new flags value before checking notification 1249 * suppressions. 1250 */ 1251 virtio_mb(vq->weak_barriers); 1252 1253 old = vq->packed.next_avail_idx - vq->num_added; 1254 new = vq->packed.next_avail_idx; 1255 vq->num_added = 0; 1256 1257 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1258 flags = le16_to_cpu(snapshot.flags); 1259 1260 LAST_ADD_TIME_CHECK(vq); 1261 LAST_ADD_TIME_INVALID(vq); 1262 1263 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1264 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1265 goto out; 1266 } 1267 1268 off_wrap = le16_to_cpu(snapshot.off_wrap); 1269 1270 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1271 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1272 if (wrap_counter != vq->packed.avail_wrap_counter) 1273 event_idx -= vq->packed.vring.num; 1274 1275 needs_kick = vring_need_event(event_idx, new, old); 1276 out: 1277 END_USE(vq); 1278 return needs_kick; 1279 } 1280 1281 static void detach_buf_packed(struct vring_virtqueue *vq, 1282 unsigned int id, void **ctx) 1283 { 1284 struct vring_desc_state_packed *state = NULL; 1285 struct vring_packed_desc *desc; 1286 unsigned int i, curr; 1287 1288 state = &vq->packed.desc_state[id]; 1289 1290 /* Clear data ptr. */ 1291 state->data = NULL; 1292 1293 vq->packed.desc_state[state->last].next = vq->free_head; 1294 vq->free_head = id; 1295 vq->vq.num_free += state->num; 1296 1297 if (unlikely(vq->use_dma_api)) { 1298 curr = id; 1299 for (i = 0; i < state->num; i++) { 1300 vring_unmap_state_packed(vq, 1301 &vq->packed.desc_extra[curr]); 1302 curr = vq->packed.desc_state[curr].next; 1303 } 1304 } 1305 1306 if (vq->indirect) { 1307 u32 len; 1308 1309 /* Free the indirect table, if any, now that it's unmapped. */ 1310 desc = state->indir_desc; 1311 if (!desc) 1312 return; 1313 1314 if (vq->use_dma_api) { 1315 len = vq->packed.desc_extra[id].len; 1316 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1317 i++) 1318 vring_unmap_desc_packed(vq, &desc[i]); 1319 } 1320 kfree(desc); 1321 state->indir_desc = NULL; 1322 } else if (ctx) { 1323 *ctx = state->indir_desc; 1324 } 1325 } 1326 1327 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1328 u16 idx, bool used_wrap_counter) 1329 { 1330 bool avail, used; 1331 u16 flags; 1332 1333 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1334 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1335 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1336 1337 return avail == used && used == used_wrap_counter; 1338 } 1339 1340 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1341 { 1342 return is_used_desc_packed(vq, vq->last_used_idx, 1343 vq->packed.used_wrap_counter); 1344 } 1345 1346 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1347 unsigned int *len, 1348 void **ctx) 1349 { 1350 struct vring_virtqueue *vq = to_vvq(_vq); 1351 u16 last_used, id; 1352 void *ret; 1353 1354 START_USE(vq); 1355 1356 if (unlikely(vq->broken)) { 1357 END_USE(vq); 1358 return NULL; 1359 } 1360 1361 if (!more_used_packed(vq)) { 1362 pr_debug("No more buffers in queue\n"); 1363 END_USE(vq); 1364 return NULL; 1365 } 1366 1367 /* Only get used elements after they have been exposed by host. */ 1368 virtio_rmb(vq->weak_barriers); 1369 1370 last_used = vq->last_used_idx; 1371 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1372 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1373 1374 if (unlikely(id >= vq->packed.vring.num)) { 1375 BAD_RING(vq, "id %u out of range\n", id); 1376 return NULL; 1377 } 1378 if (unlikely(!vq->packed.desc_state[id].data)) { 1379 BAD_RING(vq, "id %u is not a head!\n", id); 1380 return NULL; 1381 } 1382 1383 /* detach_buf_packed clears data, so grab it now. */ 1384 ret = vq->packed.desc_state[id].data; 1385 detach_buf_packed(vq, id, ctx); 1386 1387 vq->last_used_idx += vq->packed.desc_state[id].num; 1388 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1389 vq->last_used_idx -= vq->packed.vring.num; 1390 vq->packed.used_wrap_counter ^= 1; 1391 } 1392 1393 /* 1394 * If we expect an interrupt for the next entry, tell host 1395 * by writing event index and flush out the write before 1396 * the read in the next get_buf call. 1397 */ 1398 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1399 virtio_store_mb(vq->weak_barriers, 1400 &vq->packed.vring.driver->off_wrap, 1401 cpu_to_le16(vq->last_used_idx | 1402 (vq->packed.used_wrap_counter << 1403 VRING_PACKED_EVENT_F_WRAP_CTR))); 1404 1405 LAST_ADD_TIME_INVALID(vq); 1406 1407 END_USE(vq); 1408 return ret; 1409 } 1410 1411 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1412 { 1413 struct vring_virtqueue *vq = to_vvq(_vq); 1414 1415 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1416 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1417 vq->packed.vring.driver->flags = 1418 cpu_to_le16(vq->packed.event_flags_shadow); 1419 } 1420 } 1421 1422 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1423 { 1424 struct vring_virtqueue *vq = to_vvq(_vq); 1425 1426 START_USE(vq); 1427 1428 /* 1429 * We optimistically turn back on interrupts, then check if there was 1430 * more to do. 1431 */ 1432 1433 if (vq->event) { 1434 vq->packed.vring.driver->off_wrap = 1435 cpu_to_le16(vq->last_used_idx | 1436 (vq->packed.used_wrap_counter << 1437 VRING_PACKED_EVENT_F_WRAP_CTR)); 1438 /* 1439 * We need to update event offset and event wrap 1440 * counter first before updating event flags. 1441 */ 1442 virtio_wmb(vq->weak_barriers); 1443 } 1444 1445 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1446 vq->packed.event_flags_shadow = vq->event ? 1447 VRING_PACKED_EVENT_FLAG_DESC : 1448 VRING_PACKED_EVENT_FLAG_ENABLE; 1449 vq->packed.vring.driver->flags = 1450 cpu_to_le16(vq->packed.event_flags_shadow); 1451 } 1452 1453 END_USE(vq); 1454 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1455 VRING_PACKED_EVENT_F_WRAP_CTR); 1456 } 1457 1458 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1459 { 1460 struct vring_virtqueue *vq = to_vvq(_vq); 1461 bool wrap_counter; 1462 u16 used_idx; 1463 1464 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1465 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1466 1467 return is_used_desc_packed(vq, used_idx, wrap_counter); 1468 } 1469 1470 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1471 { 1472 struct vring_virtqueue *vq = to_vvq(_vq); 1473 u16 used_idx, wrap_counter; 1474 u16 bufs; 1475 1476 START_USE(vq); 1477 1478 /* 1479 * We optimistically turn back on interrupts, then check if there was 1480 * more to do. 1481 */ 1482 1483 if (vq->event) { 1484 /* TODO: tune this threshold */ 1485 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1486 wrap_counter = vq->packed.used_wrap_counter; 1487 1488 used_idx = vq->last_used_idx + bufs; 1489 if (used_idx >= vq->packed.vring.num) { 1490 used_idx -= vq->packed.vring.num; 1491 wrap_counter ^= 1; 1492 } 1493 1494 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1495 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1496 1497 /* 1498 * We need to update event offset and event wrap 1499 * counter first before updating event flags. 1500 */ 1501 virtio_wmb(vq->weak_barriers); 1502 } 1503 1504 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1505 vq->packed.event_flags_shadow = vq->event ? 1506 VRING_PACKED_EVENT_FLAG_DESC : 1507 VRING_PACKED_EVENT_FLAG_ENABLE; 1508 vq->packed.vring.driver->flags = 1509 cpu_to_le16(vq->packed.event_flags_shadow); 1510 } 1511 1512 /* 1513 * We need to update event suppression structure first 1514 * before re-checking for more used buffers. 1515 */ 1516 virtio_mb(vq->weak_barriers); 1517 1518 if (is_used_desc_packed(vq, 1519 vq->last_used_idx, 1520 vq->packed.used_wrap_counter)) { 1521 END_USE(vq); 1522 return false; 1523 } 1524 1525 END_USE(vq); 1526 return true; 1527 } 1528 1529 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1530 { 1531 struct vring_virtqueue *vq = to_vvq(_vq); 1532 unsigned int i; 1533 void *buf; 1534 1535 START_USE(vq); 1536 1537 for (i = 0; i < vq->packed.vring.num; i++) { 1538 if (!vq->packed.desc_state[i].data) 1539 continue; 1540 /* detach_buf clears data, so grab it now. */ 1541 buf = vq->packed.desc_state[i].data; 1542 detach_buf_packed(vq, i, NULL); 1543 END_USE(vq); 1544 return buf; 1545 } 1546 /* That should have freed everything. */ 1547 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1548 1549 END_USE(vq); 1550 return NULL; 1551 } 1552 1553 static struct virtqueue *vring_create_virtqueue_packed( 1554 unsigned int index, 1555 unsigned int num, 1556 unsigned int vring_align, 1557 struct virtio_device *vdev, 1558 bool weak_barriers, 1559 bool may_reduce_num, 1560 bool context, 1561 bool (*notify)(struct virtqueue *), 1562 void (*callback)(struct virtqueue *), 1563 const char *name) 1564 { 1565 struct vring_virtqueue *vq; 1566 struct vring_packed_desc *ring; 1567 struct vring_packed_desc_event *driver, *device; 1568 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1569 size_t ring_size_in_bytes, event_size_in_bytes; 1570 unsigned int i; 1571 1572 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1573 1574 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1575 &ring_dma_addr, 1576 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1577 if (!ring) 1578 goto err_ring; 1579 1580 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1581 1582 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1583 &driver_event_dma_addr, 1584 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1585 if (!driver) 1586 goto err_driver; 1587 1588 device = vring_alloc_queue(vdev, event_size_in_bytes, 1589 &device_event_dma_addr, 1590 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1591 if (!device) 1592 goto err_device; 1593 1594 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1595 if (!vq) 1596 goto err_vq; 1597 1598 vq->vq.callback = callback; 1599 vq->vq.vdev = vdev; 1600 vq->vq.name = name; 1601 vq->vq.num_free = num; 1602 vq->vq.index = index; 1603 vq->we_own_ring = true; 1604 vq->notify = notify; 1605 vq->weak_barriers = weak_barriers; 1606 vq->broken = false; 1607 vq->last_used_idx = 0; 1608 vq->num_added = 0; 1609 vq->packed_ring = true; 1610 vq->use_dma_api = vring_use_dma_api(vdev); 1611 list_add_tail(&vq->vq.list, &vdev->vqs); 1612 #ifdef DEBUG 1613 vq->in_use = false; 1614 vq->last_add_time_valid = false; 1615 #endif 1616 1617 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1618 !context; 1619 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1620 1621 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1622 vq->weak_barriers = false; 1623 1624 vq->packed.ring_dma_addr = ring_dma_addr; 1625 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1626 vq->packed.device_event_dma_addr = device_event_dma_addr; 1627 1628 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1629 vq->packed.event_size_in_bytes = event_size_in_bytes; 1630 1631 vq->packed.vring.num = num; 1632 vq->packed.vring.desc = ring; 1633 vq->packed.vring.driver = driver; 1634 vq->packed.vring.device = device; 1635 1636 vq->packed.next_avail_idx = 0; 1637 vq->packed.avail_wrap_counter = 1; 1638 vq->packed.used_wrap_counter = 1; 1639 vq->packed.event_flags_shadow = 0; 1640 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1641 1642 vq->packed.desc_state = kmalloc_array(num, 1643 sizeof(struct vring_desc_state_packed), 1644 GFP_KERNEL); 1645 if (!vq->packed.desc_state) 1646 goto err_desc_state; 1647 1648 memset(vq->packed.desc_state, 0, 1649 num * sizeof(struct vring_desc_state_packed)); 1650 1651 /* Put everything in free lists. */ 1652 vq->free_head = 0; 1653 for (i = 0; i < num-1; i++) 1654 vq->packed.desc_state[i].next = i + 1; 1655 1656 vq->packed.desc_extra = kmalloc_array(num, 1657 sizeof(struct vring_desc_extra_packed), 1658 GFP_KERNEL); 1659 if (!vq->packed.desc_extra) 1660 goto err_desc_extra; 1661 1662 memset(vq->packed.desc_extra, 0, 1663 num * sizeof(struct vring_desc_extra_packed)); 1664 1665 /* No callback? Tell other side not to bother us. */ 1666 if (!callback) { 1667 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1668 vq->packed.vring.driver->flags = 1669 cpu_to_le16(vq->packed.event_flags_shadow); 1670 } 1671 1672 return &vq->vq; 1673 1674 err_desc_extra: 1675 kfree(vq->packed.desc_state); 1676 err_desc_state: 1677 kfree(vq); 1678 err_vq: 1679 vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); 1680 err_device: 1681 vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); 1682 err_driver: 1683 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1684 err_ring: 1685 return NULL; 1686 } 1687 1688 1689 /* 1690 * Generic functions and exported symbols. 1691 */ 1692 1693 static inline int virtqueue_add(struct virtqueue *_vq, 1694 struct scatterlist *sgs[], 1695 unsigned int total_sg, 1696 unsigned int out_sgs, 1697 unsigned int in_sgs, 1698 void *data, 1699 void *ctx, 1700 gfp_t gfp) 1701 { 1702 struct vring_virtqueue *vq = to_vvq(_vq); 1703 1704 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1705 out_sgs, in_sgs, data, ctx, gfp) : 1706 virtqueue_add_split(_vq, sgs, total_sg, 1707 out_sgs, in_sgs, data, ctx, gfp); 1708 } 1709 1710 /** 1711 * virtqueue_add_sgs - expose buffers to other end 1712 * @_vq: the struct virtqueue we're talking about. 1713 * @sgs: array of terminated scatterlists. 1714 * @out_sgs: the number of scatterlists readable by other side 1715 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1716 * @data: the token identifying the buffer. 1717 * @gfp: how to do memory allocations (if necessary). 1718 * 1719 * Caller must ensure we don't call this with other virtqueue operations 1720 * at the same time (except where noted). 1721 * 1722 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1723 */ 1724 int virtqueue_add_sgs(struct virtqueue *_vq, 1725 struct scatterlist *sgs[], 1726 unsigned int out_sgs, 1727 unsigned int in_sgs, 1728 void *data, 1729 gfp_t gfp) 1730 { 1731 unsigned int i, total_sg = 0; 1732 1733 /* Count them first. */ 1734 for (i = 0; i < out_sgs + in_sgs; i++) { 1735 struct scatterlist *sg; 1736 1737 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1738 total_sg++; 1739 } 1740 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1741 data, NULL, gfp); 1742 } 1743 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1744 1745 /** 1746 * virtqueue_add_outbuf - expose output buffers to other end 1747 * @vq: the struct virtqueue we're talking about. 1748 * @sg: scatterlist (must be well-formed and terminated!) 1749 * @num: the number of entries in @sg readable by other side 1750 * @data: the token identifying the buffer. 1751 * @gfp: how to do memory allocations (if necessary). 1752 * 1753 * Caller must ensure we don't call this with other virtqueue operations 1754 * at the same time (except where noted). 1755 * 1756 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1757 */ 1758 int virtqueue_add_outbuf(struct virtqueue *vq, 1759 struct scatterlist *sg, unsigned int num, 1760 void *data, 1761 gfp_t gfp) 1762 { 1763 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1764 } 1765 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1766 1767 /** 1768 * virtqueue_add_inbuf - expose input buffers to other end 1769 * @vq: the struct virtqueue we're talking about. 1770 * @sg: scatterlist (must be well-formed and terminated!) 1771 * @num: the number of entries in @sg writable by other side 1772 * @data: the token identifying the buffer. 1773 * @gfp: how to do memory allocations (if necessary). 1774 * 1775 * Caller must ensure we don't call this with other virtqueue operations 1776 * at the same time (except where noted). 1777 * 1778 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1779 */ 1780 int virtqueue_add_inbuf(struct virtqueue *vq, 1781 struct scatterlist *sg, unsigned int num, 1782 void *data, 1783 gfp_t gfp) 1784 { 1785 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1786 } 1787 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1788 1789 /** 1790 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1791 * @vq: the struct virtqueue we're talking about. 1792 * @sg: scatterlist (must be well-formed and terminated!) 1793 * @num: the number of entries in @sg writable by other side 1794 * @data: the token identifying the buffer. 1795 * @ctx: extra context for the token 1796 * @gfp: how to do memory allocations (if necessary). 1797 * 1798 * Caller must ensure we don't call this with other virtqueue operations 1799 * at the same time (except where noted). 1800 * 1801 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1802 */ 1803 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1804 struct scatterlist *sg, unsigned int num, 1805 void *data, 1806 void *ctx, 1807 gfp_t gfp) 1808 { 1809 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1810 } 1811 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1812 1813 /** 1814 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1815 * @_vq: the struct virtqueue 1816 * 1817 * Instead of virtqueue_kick(), you can do: 1818 * if (virtqueue_kick_prepare(vq)) 1819 * virtqueue_notify(vq); 1820 * 1821 * This is sometimes useful because the virtqueue_kick_prepare() needs 1822 * to be serialized, but the actual virtqueue_notify() call does not. 1823 */ 1824 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1825 { 1826 struct vring_virtqueue *vq = to_vvq(_vq); 1827 1828 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1829 virtqueue_kick_prepare_split(_vq); 1830 } 1831 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1832 1833 /** 1834 * virtqueue_notify - second half of split virtqueue_kick call. 1835 * @_vq: the struct virtqueue 1836 * 1837 * This does not need to be serialized. 1838 * 1839 * Returns false if host notify failed or queue is broken, otherwise true. 1840 */ 1841 bool virtqueue_notify(struct virtqueue *_vq) 1842 { 1843 struct vring_virtqueue *vq = to_vvq(_vq); 1844 1845 if (unlikely(vq->broken)) 1846 return false; 1847 1848 /* Prod other side to tell it about changes. */ 1849 if (!vq->notify(_vq)) { 1850 vq->broken = true; 1851 return false; 1852 } 1853 return true; 1854 } 1855 EXPORT_SYMBOL_GPL(virtqueue_notify); 1856 1857 /** 1858 * virtqueue_kick - update after add_buf 1859 * @vq: the struct virtqueue 1860 * 1861 * After one or more virtqueue_add_* calls, invoke this to kick 1862 * the other side. 1863 * 1864 * Caller must ensure we don't call this with other virtqueue 1865 * operations at the same time (except where noted). 1866 * 1867 * Returns false if kick failed, otherwise true. 1868 */ 1869 bool virtqueue_kick(struct virtqueue *vq) 1870 { 1871 if (virtqueue_kick_prepare(vq)) 1872 return virtqueue_notify(vq); 1873 return true; 1874 } 1875 EXPORT_SYMBOL_GPL(virtqueue_kick); 1876 1877 /** 1878 * virtqueue_get_buf - get the next used buffer 1879 * @_vq: the struct virtqueue we're talking about. 1880 * @len: the length written into the buffer 1881 * @ctx: extra context for the token 1882 * 1883 * If the device wrote data into the buffer, @len will be set to the 1884 * amount written. This means you don't need to clear the buffer 1885 * beforehand to ensure there's no data leakage in the case of short 1886 * writes. 1887 * 1888 * Caller must ensure we don't call this with other virtqueue 1889 * operations at the same time (except where noted). 1890 * 1891 * Returns NULL if there are no used buffers, or the "data" token 1892 * handed to virtqueue_add_*(). 1893 */ 1894 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1895 void **ctx) 1896 { 1897 struct vring_virtqueue *vq = to_vvq(_vq); 1898 1899 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1900 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1901 } 1902 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1903 1904 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1905 { 1906 return virtqueue_get_buf_ctx(_vq, len, NULL); 1907 } 1908 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1909 /** 1910 * virtqueue_disable_cb - disable callbacks 1911 * @_vq: the struct virtqueue we're talking about. 1912 * 1913 * Note that this is not necessarily synchronous, hence unreliable and only 1914 * useful as an optimization. 1915 * 1916 * Unlike other operations, this need not be serialized. 1917 */ 1918 void virtqueue_disable_cb(struct virtqueue *_vq) 1919 { 1920 struct vring_virtqueue *vq = to_vvq(_vq); 1921 1922 if (vq->packed_ring) 1923 virtqueue_disable_cb_packed(_vq); 1924 else 1925 virtqueue_disable_cb_split(_vq); 1926 } 1927 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1928 1929 /** 1930 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1931 * @_vq: the struct virtqueue we're talking about. 1932 * 1933 * This re-enables callbacks; it returns current queue state 1934 * in an opaque unsigned value. This value should be later tested by 1935 * virtqueue_poll, to detect a possible race between the driver checking for 1936 * more work, and enabling callbacks. 1937 * 1938 * Caller must ensure we don't call this with other virtqueue 1939 * operations at the same time (except where noted). 1940 */ 1941 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1942 { 1943 struct vring_virtqueue *vq = to_vvq(_vq); 1944 1945 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 1946 virtqueue_enable_cb_prepare_split(_vq); 1947 } 1948 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1949 1950 /** 1951 * virtqueue_poll - query pending used buffers 1952 * @_vq: the struct virtqueue we're talking about. 1953 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1954 * 1955 * Returns "true" if there are pending used buffers in the queue. 1956 * 1957 * This does not need to be serialized. 1958 */ 1959 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1960 { 1961 struct vring_virtqueue *vq = to_vvq(_vq); 1962 1963 virtio_mb(vq->weak_barriers); 1964 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 1965 virtqueue_poll_split(_vq, last_used_idx); 1966 } 1967 EXPORT_SYMBOL_GPL(virtqueue_poll); 1968 1969 /** 1970 * virtqueue_enable_cb - restart callbacks after disable_cb. 1971 * @_vq: the struct virtqueue we're talking about. 1972 * 1973 * This re-enables callbacks; it returns "false" if there are pending 1974 * buffers in the queue, to detect a possible race between the driver 1975 * checking for more work, and enabling callbacks. 1976 * 1977 * Caller must ensure we don't call this with other virtqueue 1978 * operations at the same time (except where noted). 1979 */ 1980 bool virtqueue_enable_cb(struct virtqueue *_vq) 1981 { 1982 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 1983 1984 return !virtqueue_poll(_vq, last_used_idx); 1985 } 1986 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 1987 1988 /** 1989 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 1990 * @_vq: the struct virtqueue we're talking about. 1991 * 1992 * This re-enables callbacks but hints to the other side to delay 1993 * interrupts until most of the available buffers have been processed; 1994 * it returns "false" if there are many pending buffers in the queue, 1995 * to detect a possible race between the driver checking for more work, 1996 * and enabling callbacks. 1997 * 1998 * Caller must ensure we don't call this with other virtqueue 1999 * operations at the same time (except where noted). 2000 */ 2001 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2002 { 2003 struct vring_virtqueue *vq = to_vvq(_vq); 2004 2005 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2006 virtqueue_enable_cb_delayed_split(_vq); 2007 } 2008 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2009 2010 /** 2011 * virtqueue_detach_unused_buf - detach first unused buffer 2012 * @_vq: the struct virtqueue we're talking about. 2013 * 2014 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2015 * This is not valid on an active queue; it is useful only for device 2016 * shutdown. 2017 */ 2018 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2019 { 2020 struct vring_virtqueue *vq = to_vvq(_vq); 2021 2022 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2023 virtqueue_detach_unused_buf_split(_vq); 2024 } 2025 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2026 2027 static inline bool more_used(const struct vring_virtqueue *vq) 2028 { 2029 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2030 } 2031 2032 irqreturn_t vring_interrupt(int irq, void *_vq) 2033 { 2034 struct vring_virtqueue *vq = to_vvq(_vq); 2035 2036 if (!more_used(vq)) { 2037 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2038 return IRQ_NONE; 2039 } 2040 2041 if (unlikely(vq->broken)) 2042 return IRQ_HANDLED; 2043 2044 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2045 if (vq->vq.callback) 2046 vq->vq.callback(&vq->vq); 2047 2048 return IRQ_HANDLED; 2049 } 2050 EXPORT_SYMBOL_GPL(vring_interrupt); 2051 2052 /* Only available for split ring */ 2053 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2054 struct vring vring, 2055 struct virtio_device *vdev, 2056 bool weak_barriers, 2057 bool context, 2058 bool (*notify)(struct virtqueue *), 2059 void (*callback)(struct virtqueue *), 2060 const char *name) 2061 { 2062 unsigned int i; 2063 struct vring_virtqueue *vq; 2064 2065 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2066 return NULL; 2067 2068 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2069 if (!vq) 2070 return NULL; 2071 2072 vq->packed_ring = false; 2073 vq->vq.callback = callback; 2074 vq->vq.vdev = vdev; 2075 vq->vq.name = name; 2076 vq->vq.num_free = vring.num; 2077 vq->vq.index = index; 2078 vq->we_own_ring = false; 2079 vq->notify = notify; 2080 vq->weak_barriers = weak_barriers; 2081 vq->broken = false; 2082 vq->last_used_idx = 0; 2083 vq->num_added = 0; 2084 vq->use_dma_api = vring_use_dma_api(vdev); 2085 list_add_tail(&vq->vq.list, &vdev->vqs); 2086 #ifdef DEBUG 2087 vq->in_use = false; 2088 vq->last_add_time_valid = false; 2089 #endif 2090 2091 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2092 !context; 2093 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2094 2095 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2096 vq->weak_barriers = false; 2097 2098 vq->split.queue_dma_addr = 0; 2099 vq->split.queue_size_in_bytes = 0; 2100 2101 vq->split.vring = vring; 2102 vq->split.avail_flags_shadow = 0; 2103 vq->split.avail_idx_shadow = 0; 2104 2105 /* No callback? Tell other side not to bother us. */ 2106 if (!callback) { 2107 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2108 if (!vq->event) 2109 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2110 vq->split.avail_flags_shadow); 2111 } 2112 2113 vq->split.desc_state = kmalloc_array(vring.num, 2114 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2115 if (!vq->split.desc_state) { 2116 kfree(vq); 2117 return NULL; 2118 } 2119 2120 /* Put everything in free lists. */ 2121 vq->free_head = 0; 2122 for (i = 0; i < vring.num-1; i++) 2123 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 2124 memset(vq->split.desc_state, 0, vring.num * 2125 sizeof(struct vring_desc_state_split)); 2126 2127 return &vq->vq; 2128 } 2129 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2130 2131 struct virtqueue *vring_create_virtqueue( 2132 unsigned int index, 2133 unsigned int num, 2134 unsigned int vring_align, 2135 struct virtio_device *vdev, 2136 bool weak_barriers, 2137 bool may_reduce_num, 2138 bool context, 2139 bool (*notify)(struct virtqueue *), 2140 void (*callback)(struct virtqueue *), 2141 const char *name) 2142 { 2143 2144 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2145 return vring_create_virtqueue_packed(index, num, vring_align, 2146 vdev, weak_barriers, may_reduce_num, 2147 context, notify, callback, name); 2148 2149 return vring_create_virtqueue_split(index, num, vring_align, 2150 vdev, weak_barriers, may_reduce_num, 2151 context, notify, callback, name); 2152 } 2153 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2154 2155 /* Only available for split ring */ 2156 struct virtqueue *vring_new_virtqueue(unsigned int index, 2157 unsigned int num, 2158 unsigned int vring_align, 2159 struct virtio_device *vdev, 2160 bool weak_barriers, 2161 bool context, 2162 void *pages, 2163 bool (*notify)(struct virtqueue *vq), 2164 void (*callback)(struct virtqueue *vq), 2165 const char *name) 2166 { 2167 struct vring vring; 2168 2169 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2170 return NULL; 2171 2172 vring_init(&vring, num, pages, vring_align); 2173 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2174 notify, callback, name); 2175 } 2176 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2177 2178 void vring_del_virtqueue(struct virtqueue *_vq) 2179 { 2180 struct vring_virtqueue *vq = to_vvq(_vq); 2181 2182 if (vq->we_own_ring) { 2183 if (vq->packed_ring) { 2184 vring_free_queue(vq->vq.vdev, 2185 vq->packed.ring_size_in_bytes, 2186 vq->packed.vring.desc, 2187 vq->packed.ring_dma_addr); 2188 2189 vring_free_queue(vq->vq.vdev, 2190 vq->packed.event_size_in_bytes, 2191 vq->packed.vring.driver, 2192 vq->packed.driver_event_dma_addr); 2193 2194 vring_free_queue(vq->vq.vdev, 2195 vq->packed.event_size_in_bytes, 2196 vq->packed.vring.device, 2197 vq->packed.device_event_dma_addr); 2198 2199 kfree(vq->packed.desc_state); 2200 kfree(vq->packed.desc_extra); 2201 } else { 2202 vring_free_queue(vq->vq.vdev, 2203 vq->split.queue_size_in_bytes, 2204 vq->split.vring.desc, 2205 vq->split.queue_dma_addr); 2206 } 2207 } 2208 if (!vq->packed_ring) 2209 kfree(vq->split.desc_state); 2210 list_del(&_vq->list); 2211 kfree(vq); 2212 } 2213 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2214 2215 /* Manipulates transport-specific feature bits. */ 2216 void vring_transport_features(struct virtio_device *vdev) 2217 { 2218 unsigned int i; 2219 2220 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2221 switch (i) { 2222 case VIRTIO_RING_F_INDIRECT_DESC: 2223 break; 2224 case VIRTIO_RING_F_EVENT_IDX: 2225 break; 2226 case VIRTIO_F_VERSION_1: 2227 break; 2228 case VIRTIO_F_IOMMU_PLATFORM: 2229 break; 2230 case VIRTIO_F_RING_PACKED: 2231 break; 2232 case VIRTIO_F_ORDER_PLATFORM: 2233 break; 2234 default: 2235 /* We don't understand this bit. */ 2236 __virtio_clear_bit(vdev, i); 2237 } 2238 } 2239 } 2240 EXPORT_SYMBOL_GPL(vring_transport_features); 2241 2242 /** 2243 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2244 * @_vq: the struct virtqueue containing the vring of interest. 2245 * 2246 * Returns the size of the vring. This is mainly used for boasting to 2247 * userspace. Unlike other operations, this need not be serialized. 2248 */ 2249 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2250 { 2251 2252 struct vring_virtqueue *vq = to_vvq(_vq); 2253 2254 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2255 } 2256 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2257 2258 bool virtqueue_is_broken(struct virtqueue *_vq) 2259 { 2260 struct vring_virtqueue *vq = to_vvq(_vq); 2261 2262 return vq->broken; 2263 } 2264 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2265 2266 /* 2267 * This should prevent the device from being used, allowing drivers to 2268 * recover. You may need to grab appropriate locks to flush. 2269 */ 2270 void virtio_break_device(struct virtio_device *dev) 2271 { 2272 struct virtqueue *_vq; 2273 2274 list_for_each_entry(_vq, &dev->vqs, list) { 2275 struct vring_virtqueue *vq = to_vvq(_vq); 2276 vq->broken = true; 2277 } 2278 } 2279 EXPORT_SYMBOL_GPL(virtio_break_device); 2280 2281 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2282 { 2283 struct vring_virtqueue *vq = to_vvq(_vq); 2284 2285 BUG_ON(!vq->we_own_ring); 2286 2287 if (vq->packed_ring) 2288 return vq->packed.ring_dma_addr; 2289 2290 return vq->split.queue_dma_addr; 2291 } 2292 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2293 2294 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2295 { 2296 struct vring_virtqueue *vq = to_vvq(_vq); 2297 2298 BUG_ON(!vq->we_own_ring); 2299 2300 if (vq->packed_ring) 2301 return vq->packed.driver_event_dma_addr; 2302 2303 return vq->split.queue_dma_addr + 2304 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2305 } 2306 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2307 2308 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2309 { 2310 struct vring_virtqueue *vq = to_vvq(_vq); 2311 2312 BUG_ON(!vq->we_own_ring); 2313 2314 if (vq->packed_ring) 2315 return vq->packed.device_event_dma_addr; 2316 2317 return vq->split.queue_dma_addr + 2318 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2319 } 2320 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2321 2322 /* Only available for split ring */ 2323 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2324 { 2325 return &to_vvq(vq)->split.vring; 2326 } 2327 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2328 2329 MODULE_LICENSE("GPL"); 2330