1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6 #include <linux/virtio.h> 7 #include <linux/virtio_ring.h> 8 #include <linux/virtio_config.h> 9 #include <linux/device.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/hrtimer.h> 13 #include <linux/dma-mapping.h> 14 #include <xen/xen.h> 15 16 #ifdef DEBUG 17 /* For development, we want to crash whenever the ring is screwed. */ 18 #define BAD_RING(_vq, fmt, args...) \ 19 do { \ 20 dev_err(&(_vq)->vq.vdev->dev, \ 21 "%s:"fmt, (_vq)->vq.name, ##args); \ 22 BUG(); \ 23 } while (0) 24 /* Caller is supposed to guarantee no reentry. */ 25 #define START_USE(_vq) \ 26 do { \ 27 if ((_vq)->in_use) \ 28 panic("%s:in_use = %i\n", \ 29 (_vq)->vq.name, (_vq)->in_use); \ 30 (_vq)->in_use = __LINE__; \ 31 } while (0) 32 #define END_USE(_vq) \ 33 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 34 #define LAST_ADD_TIME_UPDATE(_vq) \ 35 do { \ 36 ktime_t now = ktime_get(); \ 37 \ 38 /* No kick or get, with .1 second between? Warn. */ \ 39 if ((_vq)->last_add_time_valid) \ 40 WARN_ON(ktime_to_ms(ktime_sub(now, \ 41 (_vq)->last_add_time)) > 100); \ 42 (_vq)->last_add_time = now; \ 43 (_vq)->last_add_time_valid = true; \ 44 } while (0) 45 #define LAST_ADD_TIME_CHECK(_vq) \ 46 do { \ 47 if ((_vq)->last_add_time_valid) { \ 48 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 49 (_vq)->last_add_time)) > 100); \ 50 } \ 51 } while (0) 52 #define LAST_ADD_TIME_INVALID(_vq) \ 53 ((_vq)->last_add_time_valid = false) 54 #else 55 #define BAD_RING(_vq, fmt, args...) \ 56 do { \ 57 dev_err(&_vq->vq.vdev->dev, \ 58 "%s:"fmt, (_vq)->vq.name, ##args); \ 59 (_vq)->broken = true; \ 60 } while (0) 61 #define START_USE(vq) 62 #define END_USE(vq) 63 #define LAST_ADD_TIME_UPDATE(vq) 64 #define LAST_ADD_TIME_CHECK(vq) 65 #define LAST_ADD_TIME_INVALID(vq) 66 #endif 67 68 struct vring_desc_state_split { 69 void *data; /* Data for callback. */ 70 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 71 }; 72 73 struct vring_desc_state_packed { 74 void *data; /* Data for callback. */ 75 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 76 u16 num; /* Descriptor list length. */ 77 u16 next; /* The next desc state in a list. */ 78 u16 last; /* The last desc state in a list. */ 79 }; 80 81 struct vring_desc_extra_packed { 82 dma_addr_t addr; /* Buffer DMA addr. */ 83 u32 len; /* Buffer length. */ 84 u16 flags; /* Descriptor flags. */ 85 }; 86 87 struct vring_virtqueue { 88 struct virtqueue vq; 89 90 /* Is this a packed ring? */ 91 bool packed_ring; 92 93 /* Is DMA API used? */ 94 bool use_dma_api; 95 96 /* Can we use weak barriers? */ 97 bool weak_barriers; 98 99 /* Other side has made a mess, don't try any more. */ 100 bool broken; 101 102 /* Host supports indirect buffers */ 103 bool indirect; 104 105 /* Host publishes avail event idx */ 106 bool event; 107 108 /* Head of free buffer list. */ 109 unsigned int free_head; 110 /* Number we've added since last sync. */ 111 unsigned int num_added; 112 113 /* Last used index we've seen. */ 114 u16 last_used_idx; 115 116 /* Hint for event idx: already triggered no need to disable. */ 117 bool event_triggered; 118 119 union { 120 /* Available for split ring */ 121 struct { 122 /* Actual memory layout for this queue. */ 123 struct vring vring; 124 125 /* Last written value to avail->flags */ 126 u16 avail_flags_shadow; 127 128 /* 129 * Last written value to avail->idx in 130 * guest byte order. 131 */ 132 u16 avail_idx_shadow; 133 134 /* Per-descriptor state. */ 135 struct vring_desc_state_split *desc_state; 136 137 /* DMA address and size information */ 138 dma_addr_t queue_dma_addr; 139 size_t queue_size_in_bytes; 140 } split; 141 142 /* Available for packed ring */ 143 struct { 144 /* Actual memory layout for this queue. */ 145 struct { 146 unsigned int num; 147 struct vring_packed_desc *desc; 148 struct vring_packed_desc_event *driver; 149 struct vring_packed_desc_event *device; 150 } vring; 151 152 /* Driver ring wrap counter. */ 153 bool avail_wrap_counter; 154 155 /* Device ring wrap counter. */ 156 bool used_wrap_counter; 157 158 /* Avail used flags. */ 159 u16 avail_used_flags; 160 161 /* Index of the next avail descriptor. */ 162 u16 next_avail_idx; 163 164 /* 165 * Last written value to driver->flags in 166 * guest byte order. 167 */ 168 u16 event_flags_shadow; 169 170 /* Per-descriptor state. */ 171 struct vring_desc_state_packed *desc_state; 172 struct vring_desc_extra_packed *desc_extra; 173 174 /* DMA address and size information */ 175 dma_addr_t ring_dma_addr; 176 dma_addr_t driver_event_dma_addr; 177 dma_addr_t device_event_dma_addr; 178 size_t ring_size_in_bytes; 179 size_t event_size_in_bytes; 180 } packed; 181 }; 182 183 /* How to notify other side. FIXME: commonalize hcalls! */ 184 bool (*notify)(struct virtqueue *vq); 185 186 /* DMA, allocation, and size information */ 187 bool we_own_ring; 188 189 #ifdef DEBUG 190 /* They're supposed to lock for us. */ 191 unsigned int in_use; 192 193 /* Figure out if their kicks are too delayed. */ 194 bool last_add_time_valid; 195 ktime_t last_add_time; 196 #endif 197 }; 198 199 200 /* 201 * Helpers. 202 */ 203 204 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 205 206 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 207 unsigned int total_sg) 208 { 209 struct vring_virtqueue *vq = to_vvq(_vq); 210 211 /* 212 * If the host supports indirect descriptor tables, and we have multiple 213 * buffers, then go indirect. FIXME: tune this threshold 214 */ 215 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 216 } 217 218 /* 219 * Modern virtio devices have feature bits to specify whether they need a 220 * quirk and bypass the IOMMU. If not there, just use the DMA API. 221 * 222 * If there, the interaction between virtio and DMA API is messy. 223 * 224 * On most systems with virtio, physical addresses match bus addresses, 225 * and it doesn't particularly matter whether we use the DMA API. 226 * 227 * On some systems, including Xen and any system with a physical device 228 * that speaks virtio behind a physical IOMMU, we must use the DMA API 229 * for virtio DMA to work at all. 230 * 231 * On other systems, including SPARC and PPC64, virtio-pci devices are 232 * enumerated as though they are behind an IOMMU, but the virtio host 233 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 234 * there or somehow map everything as the identity. 235 * 236 * For the time being, we preserve historic behavior and bypass the DMA 237 * API. 238 * 239 * TODO: install a per-device DMA ops structure that does the right thing 240 * taking into account all the above quirks, and use the DMA API 241 * unconditionally on data path. 242 */ 243 244 static bool vring_use_dma_api(struct virtio_device *vdev) 245 { 246 if (!virtio_has_dma_quirk(vdev)) 247 return true; 248 249 /* Otherwise, we are left to guess. */ 250 /* 251 * In theory, it's possible to have a buggy QEMU-supposed 252 * emulated Q35 IOMMU and Xen enabled at the same time. On 253 * such a configuration, virtio has never worked and will 254 * not work without an even larger kludge. Instead, enable 255 * the DMA API if we're a Xen guest, which at least allows 256 * all of the sensible Xen configurations to work correctly. 257 */ 258 if (xen_domain()) 259 return true; 260 261 return false; 262 } 263 264 size_t virtio_max_dma_size(struct virtio_device *vdev) 265 { 266 size_t max_segment_size = SIZE_MAX; 267 268 if (vring_use_dma_api(vdev)) 269 max_segment_size = dma_max_mapping_size(&vdev->dev); 270 271 return max_segment_size; 272 } 273 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 274 275 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 276 dma_addr_t *dma_handle, gfp_t flag) 277 { 278 if (vring_use_dma_api(vdev)) { 279 return dma_alloc_coherent(vdev->dev.parent, size, 280 dma_handle, flag); 281 } else { 282 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 283 284 if (queue) { 285 phys_addr_t phys_addr = virt_to_phys(queue); 286 *dma_handle = (dma_addr_t)phys_addr; 287 288 /* 289 * Sanity check: make sure we dind't truncate 290 * the address. The only arches I can find that 291 * have 64-bit phys_addr_t but 32-bit dma_addr_t 292 * are certain non-highmem MIPS and x86 293 * configurations, but these configurations 294 * should never allocate physical pages above 32 295 * bits, so this is fine. Just in case, throw a 296 * warning and abort if we end up with an 297 * unrepresentable address. 298 */ 299 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 300 free_pages_exact(queue, PAGE_ALIGN(size)); 301 return NULL; 302 } 303 } 304 return queue; 305 } 306 } 307 308 static void vring_free_queue(struct virtio_device *vdev, size_t size, 309 void *queue, dma_addr_t dma_handle) 310 { 311 if (vring_use_dma_api(vdev)) 312 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 313 else 314 free_pages_exact(queue, PAGE_ALIGN(size)); 315 } 316 317 /* 318 * The DMA ops on various arches are rather gnarly right now, and 319 * making all of the arch DMA ops work on the vring device itself 320 * is a mess. For now, we use the parent device for DMA ops. 321 */ 322 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 323 { 324 return vq->vq.vdev->dev.parent; 325 } 326 327 /* Map one sg entry. */ 328 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 329 struct scatterlist *sg, 330 enum dma_data_direction direction) 331 { 332 if (!vq->use_dma_api) 333 return (dma_addr_t)sg_phys(sg); 334 335 /* 336 * We can't use dma_map_sg, because we don't use scatterlists in 337 * the way it expects (we don't guarantee that the scatterlist 338 * will exist for the lifetime of the mapping). 339 */ 340 return dma_map_page(vring_dma_dev(vq), 341 sg_page(sg), sg->offset, sg->length, 342 direction); 343 } 344 345 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 346 void *cpu_addr, size_t size, 347 enum dma_data_direction direction) 348 { 349 if (!vq->use_dma_api) 350 return (dma_addr_t)virt_to_phys(cpu_addr); 351 352 return dma_map_single(vring_dma_dev(vq), 353 cpu_addr, size, direction); 354 } 355 356 static int vring_mapping_error(const struct vring_virtqueue *vq, 357 dma_addr_t addr) 358 { 359 if (!vq->use_dma_api) 360 return 0; 361 362 return dma_mapping_error(vring_dma_dev(vq), addr); 363 } 364 365 366 /* 367 * Split ring specific functions - *_split(). 368 */ 369 370 static void vring_unmap_one_split(const struct vring_virtqueue *vq, 371 struct vring_desc *desc) 372 { 373 u16 flags; 374 375 if (!vq->use_dma_api) 376 return; 377 378 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 379 380 if (flags & VRING_DESC_F_INDIRECT) { 381 dma_unmap_single(vring_dma_dev(vq), 382 virtio64_to_cpu(vq->vq.vdev, desc->addr), 383 virtio32_to_cpu(vq->vq.vdev, desc->len), 384 (flags & VRING_DESC_F_WRITE) ? 385 DMA_FROM_DEVICE : DMA_TO_DEVICE); 386 } else { 387 dma_unmap_page(vring_dma_dev(vq), 388 virtio64_to_cpu(vq->vq.vdev, desc->addr), 389 virtio32_to_cpu(vq->vq.vdev, desc->len), 390 (flags & VRING_DESC_F_WRITE) ? 391 DMA_FROM_DEVICE : DMA_TO_DEVICE); 392 } 393 } 394 395 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 396 unsigned int total_sg, 397 gfp_t gfp) 398 { 399 struct vring_desc *desc; 400 unsigned int i; 401 402 /* 403 * We require lowmem mappings for the descriptors because 404 * otherwise virt_to_phys will give us bogus addresses in the 405 * virtqueue. 406 */ 407 gfp &= ~__GFP_HIGHMEM; 408 409 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 410 if (!desc) 411 return NULL; 412 413 for (i = 0; i < total_sg; i++) 414 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 415 return desc; 416 } 417 418 static inline int virtqueue_add_split(struct virtqueue *_vq, 419 struct scatterlist *sgs[], 420 unsigned int total_sg, 421 unsigned int out_sgs, 422 unsigned int in_sgs, 423 void *data, 424 void *ctx, 425 gfp_t gfp) 426 { 427 struct vring_virtqueue *vq = to_vvq(_vq); 428 struct scatterlist *sg; 429 struct vring_desc *desc; 430 unsigned int i, n, avail, descs_used, prev, err_idx; 431 int head; 432 bool indirect; 433 434 START_USE(vq); 435 436 BUG_ON(data == NULL); 437 BUG_ON(ctx && vq->indirect); 438 439 if (unlikely(vq->broken)) { 440 END_USE(vq); 441 return -EIO; 442 } 443 444 LAST_ADD_TIME_UPDATE(vq); 445 446 BUG_ON(total_sg == 0); 447 448 head = vq->free_head; 449 450 if (virtqueue_use_indirect(_vq, total_sg)) 451 desc = alloc_indirect_split(_vq, total_sg, gfp); 452 else { 453 desc = NULL; 454 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 455 } 456 457 if (desc) { 458 /* Use a single buffer which doesn't continue */ 459 indirect = true; 460 /* Set up rest to use this indirect table. */ 461 i = 0; 462 descs_used = 1; 463 } else { 464 indirect = false; 465 desc = vq->split.vring.desc; 466 i = head; 467 descs_used = total_sg; 468 } 469 470 if (vq->vq.num_free < descs_used) { 471 pr_debug("Can't add buf len %i - avail = %i\n", 472 descs_used, vq->vq.num_free); 473 /* FIXME: for historical reasons, we force a notify here if 474 * there are outgoing parts to the buffer. Presumably the 475 * host should service the ring ASAP. */ 476 if (out_sgs) 477 vq->notify(&vq->vq); 478 if (indirect) 479 kfree(desc); 480 END_USE(vq); 481 return -ENOSPC; 482 } 483 484 for (n = 0; n < out_sgs; n++) { 485 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 486 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 487 if (vring_mapping_error(vq, addr)) 488 goto unmap_release; 489 490 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 491 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 492 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 493 prev = i; 494 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 495 } 496 } 497 for (; n < (out_sgs + in_sgs); n++) { 498 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 499 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 500 if (vring_mapping_error(vq, addr)) 501 goto unmap_release; 502 503 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 504 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 505 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 506 prev = i; 507 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 508 } 509 } 510 /* Last one doesn't continue. */ 511 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 512 513 if (indirect) { 514 /* Now that the indirect table is filled in, map it. */ 515 dma_addr_t addr = vring_map_single( 516 vq, desc, total_sg * sizeof(struct vring_desc), 517 DMA_TO_DEVICE); 518 if (vring_mapping_error(vq, addr)) 519 goto unmap_release; 520 521 vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 522 VRING_DESC_F_INDIRECT); 523 vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 524 addr); 525 526 vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 527 total_sg * sizeof(struct vring_desc)); 528 } 529 530 /* We're using some buffers from the free list. */ 531 vq->vq.num_free -= descs_used; 532 533 /* Update free pointer */ 534 if (indirect) 535 vq->free_head = virtio16_to_cpu(_vq->vdev, 536 vq->split.vring.desc[head].next); 537 else 538 vq->free_head = i; 539 540 /* Store token and indirect buffer state. */ 541 vq->split.desc_state[head].data = data; 542 if (indirect) 543 vq->split.desc_state[head].indir_desc = desc; 544 else 545 vq->split.desc_state[head].indir_desc = ctx; 546 547 /* Put entry in available array (but don't update avail->idx until they 548 * do sync). */ 549 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 550 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 551 552 /* Descriptors and available array need to be set before we expose the 553 * new available array entries. */ 554 virtio_wmb(vq->weak_barriers); 555 vq->split.avail_idx_shadow++; 556 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 557 vq->split.avail_idx_shadow); 558 vq->num_added++; 559 560 pr_debug("Added buffer head %i to %p\n", head, vq); 561 END_USE(vq); 562 563 /* This is very unlikely, but theoretically possible. Kick 564 * just in case. */ 565 if (unlikely(vq->num_added == (1 << 16) - 1)) 566 virtqueue_kick(_vq); 567 568 return 0; 569 570 unmap_release: 571 err_idx = i; 572 573 if (indirect) 574 i = 0; 575 else 576 i = head; 577 578 for (n = 0; n < total_sg; n++) { 579 if (i == err_idx) 580 break; 581 vring_unmap_one_split(vq, &desc[i]); 582 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 583 } 584 585 if (indirect) 586 kfree(desc); 587 588 END_USE(vq); 589 return -ENOMEM; 590 } 591 592 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 593 { 594 struct vring_virtqueue *vq = to_vvq(_vq); 595 u16 new, old; 596 bool needs_kick; 597 598 START_USE(vq); 599 /* We need to expose available array entries before checking avail 600 * event. */ 601 virtio_mb(vq->weak_barriers); 602 603 old = vq->split.avail_idx_shadow - vq->num_added; 604 new = vq->split.avail_idx_shadow; 605 vq->num_added = 0; 606 607 LAST_ADD_TIME_CHECK(vq); 608 LAST_ADD_TIME_INVALID(vq); 609 610 if (vq->event) { 611 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 612 vring_avail_event(&vq->split.vring)), 613 new, old); 614 } else { 615 needs_kick = !(vq->split.vring.used->flags & 616 cpu_to_virtio16(_vq->vdev, 617 VRING_USED_F_NO_NOTIFY)); 618 } 619 END_USE(vq); 620 return needs_kick; 621 } 622 623 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 624 void **ctx) 625 { 626 unsigned int i, j; 627 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 628 629 /* Clear data ptr. */ 630 vq->split.desc_state[head].data = NULL; 631 632 /* Put back on free list: unmap first-level descriptors and find end */ 633 i = head; 634 635 while (vq->split.vring.desc[i].flags & nextflag) { 636 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 637 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 638 vq->vq.num_free++; 639 } 640 641 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 642 vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 643 vq->free_head); 644 vq->free_head = head; 645 646 /* Plus final descriptor */ 647 vq->vq.num_free++; 648 649 if (vq->indirect) { 650 struct vring_desc *indir_desc = 651 vq->split.desc_state[head].indir_desc; 652 u32 len; 653 654 /* Free the indirect table, if any, now that it's unmapped. */ 655 if (!indir_desc) 656 return; 657 658 len = virtio32_to_cpu(vq->vq.vdev, 659 vq->split.vring.desc[head].len); 660 661 BUG_ON(!(vq->split.vring.desc[head].flags & 662 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 663 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 664 665 for (j = 0; j < len / sizeof(struct vring_desc); j++) 666 vring_unmap_one_split(vq, &indir_desc[j]); 667 668 kfree(indir_desc); 669 vq->split.desc_state[head].indir_desc = NULL; 670 } else if (ctx) { 671 *ctx = vq->split.desc_state[head].indir_desc; 672 } 673 } 674 675 static inline bool more_used_split(const struct vring_virtqueue *vq) 676 { 677 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 678 vq->split.vring.used->idx); 679 } 680 681 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 682 unsigned int *len, 683 void **ctx) 684 { 685 struct vring_virtqueue *vq = to_vvq(_vq); 686 void *ret; 687 unsigned int i; 688 u16 last_used; 689 690 START_USE(vq); 691 692 if (unlikely(vq->broken)) { 693 END_USE(vq); 694 return NULL; 695 } 696 697 if (!more_used_split(vq)) { 698 pr_debug("No more buffers in queue\n"); 699 END_USE(vq); 700 return NULL; 701 } 702 703 /* Only get used array entries after they have been exposed by host. */ 704 virtio_rmb(vq->weak_barriers); 705 706 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 707 i = virtio32_to_cpu(_vq->vdev, 708 vq->split.vring.used->ring[last_used].id); 709 *len = virtio32_to_cpu(_vq->vdev, 710 vq->split.vring.used->ring[last_used].len); 711 712 if (unlikely(i >= vq->split.vring.num)) { 713 BAD_RING(vq, "id %u out of range\n", i); 714 return NULL; 715 } 716 if (unlikely(!vq->split.desc_state[i].data)) { 717 BAD_RING(vq, "id %u is not a head!\n", i); 718 return NULL; 719 } 720 721 /* detach_buf_split clears data, so grab it now. */ 722 ret = vq->split.desc_state[i].data; 723 detach_buf_split(vq, i, ctx); 724 vq->last_used_idx++; 725 /* If we expect an interrupt for the next entry, tell host 726 * by writing event index and flush out the write before 727 * the read in the next get_buf call. */ 728 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 729 virtio_store_mb(vq->weak_barriers, 730 &vring_used_event(&vq->split.vring), 731 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 732 733 LAST_ADD_TIME_INVALID(vq); 734 735 END_USE(vq); 736 return ret; 737 } 738 739 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 740 { 741 struct vring_virtqueue *vq = to_vvq(_vq); 742 743 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 744 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 745 if (vq->event) 746 /* TODO: this is a hack. Figure out a cleaner value to write. */ 747 vring_used_event(&vq->split.vring) = 0x0; 748 else 749 vq->split.vring.avail->flags = 750 cpu_to_virtio16(_vq->vdev, 751 vq->split.avail_flags_shadow); 752 } 753 } 754 755 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 756 { 757 struct vring_virtqueue *vq = to_vvq(_vq); 758 u16 last_used_idx; 759 760 START_USE(vq); 761 762 /* We optimistically turn back on interrupts, then check if there was 763 * more to do. */ 764 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 765 * either clear the flags bit or point the event index at the next 766 * entry. Always do both to keep code simple. */ 767 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 768 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 769 if (!vq->event) 770 vq->split.vring.avail->flags = 771 cpu_to_virtio16(_vq->vdev, 772 vq->split.avail_flags_shadow); 773 } 774 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 775 last_used_idx = vq->last_used_idx); 776 END_USE(vq); 777 return last_used_idx; 778 } 779 780 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 781 { 782 struct vring_virtqueue *vq = to_vvq(_vq); 783 784 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 785 vq->split.vring.used->idx); 786 } 787 788 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 789 { 790 struct vring_virtqueue *vq = to_vvq(_vq); 791 u16 bufs; 792 793 START_USE(vq); 794 795 /* We optimistically turn back on interrupts, then check if there was 796 * more to do. */ 797 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 798 * either clear the flags bit or point the event index at the next 799 * entry. Always update the event index to keep code simple. */ 800 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 801 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 802 if (!vq->event) 803 vq->split.vring.avail->flags = 804 cpu_to_virtio16(_vq->vdev, 805 vq->split.avail_flags_shadow); 806 } 807 /* TODO: tune this threshold */ 808 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 809 810 virtio_store_mb(vq->weak_barriers, 811 &vring_used_event(&vq->split.vring), 812 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 813 814 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 815 - vq->last_used_idx) > bufs)) { 816 END_USE(vq); 817 return false; 818 } 819 820 END_USE(vq); 821 return true; 822 } 823 824 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 825 { 826 struct vring_virtqueue *vq = to_vvq(_vq); 827 unsigned int i; 828 void *buf; 829 830 START_USE(vq); 831 832 for (i = 0; i < vq->split.vring.num; i++) { 833 if (!vq->split.desc_state[i].data) 834 continue; 835 /* detach_buf_split clears data, so grab it now. */ 836 buf = vq->split.desc_state[i].data; 837 detach_buf_split(vq, i, NULL); 838 vq->split.avail_idx_shadow--; 839 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 840 vq->split.avail_idx_shadow); 841 END_USE(vq); 842 return buf; 843 } 844 /* That should have freed everything. */ 845 BUG_ON(vq->vq.num_free != vq->split.vring.num); 846 847 END_USE(vq); 848 return NULL; 849 } 850 851 static struct virtqueue *vring_create_virtqueue_split( 852 unsigned int index, 853 unsigned int num, 854 unsigned int vring_align, 855 struct virtio_device *vdev, 856 bool weak_barriers, 857 bool may_reduce_num, 858 bool context, 859 bool (*notify)(struct virtqueue *), 860 void (*callback)(struct virtqueue *), 861 const char *name) 862 { 863 struct virtqueue *vq; 864 void *queue = NULL; 865 dma_addr_t dma_addr; 866 size_t queue_size_in_bytes; 867 struct vring vring; 868 869 /* We assume num is a power of 2. */ 870 if (num & (num - 1)) { 871 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 872 return NULL; 873 } 874 875 /* TODO: allocate each queue chunk individually */ 876 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 877 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 878 &dma_addr, 879 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 880 if (queue) 881 break; 882 if (!may_reduce_num) 883 return NULL; 884 } 885 886 if (!num) 887 return NULL; 888 889 if (!queue) { 890 /* Try to get a single page. You are my only hope! */ 891 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 892 &dma_addr, GFP_KERNEL|__GFP_ZERO); 893 } 894 if (!queue) 895 return NULL; 896 897 queue_size_in_bytes = vring_size(num, vring_align); 898 vring_init(&vring, num, queue, vring_align); 899 900 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 901 notify, callback, name); 902 if (!vq) { 903 vring_free_queue(vdev, queue_size_in_bytes, queue, 904 dma_addr); 905 return NULL; 906 } 907 908 to_vvq(vq)->split.queue_dma_addr = dma_addr; 909 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 910 to_vvq(vq)->we_own_ring = true; 911 912 return vq; 913 } 914 915 916 /* 917 * Packed ring specific functions - *_packed(). 918 */ 919 920 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 921 struct vring_desc_extra_packed *state) 922 { 923 u16 flags; 924 925 if (!vq->use_dma_api) 926 return; 927 928 flags = state->flags; 929 930 if (flags & VRING_DESC_F_INDIRECT) { 931 dma_unmap_single(vring_dma_dev(vq), 932 state->addr, state->len, 933 (flags & VRING_DESC_F_WRITE) ? 934 DMA_FROM_DEVICE : DMA_TO_DEVICE); 935 } else { 936 dma_unmap_page(vring_dma_dev(vq), 937 state->addr, state->len, 938 (flags & VRING_DESC_F_WRITE) ? 939 DMA_FROM_DEVICE : DMA_TO_DEVICE); 940 } 941 } 942 943 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 944 struct vring_packed_desc *desc) 945 { 946 u16 flags; 947 948 if (!vq->use_dma_api) 949 return; 950 951 flags = le16_to_cpu(desc->flags); 952 953 if (flags & VRING_DESC_F_INDIRECT) { 954 dma_unmap_single(vring_dma_dev(vq), 955 le64_to_cpu(desc->addr), 956 le32_to_cpu(desc->len), 957 (flags & VRING_DESC_F_WRITE) ? 958 DMA_FROM_DEVICE : DMA_TO_DEVICE); 959 } else { 960 dma_unmap_page(vring_dma_dev(vq), 961 le64_to_cpu(desc->addr), 962 le32_to_cpu(desc->len), 963 (flags & VRING_DESC_F_WRITE) ? 964 DMA_FROM_DEVICE : DMA_TO_DEVICE); 965 } 966 } 967 968 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 969 gfp_t gfp) 970 { 971 struct vring_packed_desc *desc; 972 973 /* 974 * We require lowmem mappings for the descriptors because 975 * otherwise virt_to_phys will give us bogus addresses in the 976 * virtqueue. 977 */ 978 gfp &= ~__GFP_HIGHMEM; 979 980 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 981 982 return desc; 983 } 984 985 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 986 struct scatterlist *sgs[], 987 unsigned int total_sg, 988 unsigned int out_sgs, 989 unsigned int in_sgs, 990 void *data, 991 gfp_t gfp) 992 { 993 struct vring_packed_desc *desc; 994 struct scatterlist *sg; 995 unsigned int i, n, err_idx; 996 u16 head, id; 997 dma_addr_t addr; 998 999 head = vq->packed.next_avail_idx; 1000 desc = alloc_indirect_packed(total_sg, gfp); 1001 1002 if (unlikely(vq->vq.num_free < 1)) { 1003 pr_debug("Can't add buf len 1 - avail = 0\n"); 1004 kfree(desc); 1005 END_USE(vq); 1006 return -ENOSPC; 1007 } 1008 1009 i = 0; 1010 id = vq->free_head; 1011 BUG_ON(id == vq->packed.vring.num); 1012 1013 for (n = 0; n < out_sgs + in_sgs; n++) { 1014 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1015 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1016 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1017 if (vring_mapping_error(vq, addr)) 1018 goto unmap_release; 1019 1020 desc[i].flags = cpu_to_le16(n < out_sgs ? 1021 0 : VRING_DESC_F_WRITE); 1022 desc[i].addr = cpu_to_le64(addr); 1023 desc[i].len = cpu_to_le32(sg->length); 1024 i++; 1025 } 1026 } 1027 1028 /* Now that the indirect table is filled in, map it. */ 1029 addr = vring_map_single(vq, desc, 1030 total_sg * sizeof(struct vring_packed_desc), 1031 DMA_TO_DEVICE); 1032 if (vring_mapping_error(vq, addr)) 1033 goto unmap_release; 1034 1035 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1036 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1037 sizeof(struct vring_packed_desc)); 1038 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1039 1040 if (vq->use_dma_api) { 1041 vq->packed.desc_extra[id].addr = addr; 1042 vq->packed.desc_extra[id].len = total_sg * 1043 sizeof(struct vring_packed_desc); 1044 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1045 vq->packed.avail_used_flags; 1046 } 1047 1048 /* 1049 * A driver MUST NOT make the first descriptor in the list 1050 * available before all subsequent descriptors comprising 1051 * the list are made available. 1052 */ 1053 virtio_wmb(vq->weak_barriers); 1054 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1055 vq->packed.avail_used_flags); 1056 1057 /* We're using some buffers from the free list. */ 1058 vq->vq.num_free -= 1; 1059 1060 /* Update free pointer */ 1061 n = head + 1; 1062 if (n >= vq->packed.vring.num) { 1063 n = 0; 1064 vq->packed.avail_wrap_counter ^= 1; 1065 vq->packed.avail_used_flags ^= 1066 1 << VRING_PACKED_DESC_F_AVAIL | 1067 1 << VRING_PACKED_DESC_F_USED; 1068 } 1069 vq->packed.next_avail_idx = n; 1070 vq->free_head = vq->packed.desc_state[id].next; 1071 1072 /* Store token and indirect buffer state. */ 1073 vq->packed.desc_state[id].num = 1; 1074 vq->packed.desc_state[id].data = data; 1075 vq->packed.desc_state[id].indir_desc = desc; 1076 vq->packed.desc_state[id].last = id; 1077 1078 vq->num_added += 1; 1079 1080 pr_debug("Added buffer head %i to %p\n", head, vq); 1081 END_USE(vq); 1082 1083 return 0; 1084 1085 unmap_release: 1086 err_idx = i; 1087 1088 for (i = 0; i < err_idx; i++) 1089 vring_unmap_desc_packed(vq, &desc[i]); 1090 1091 kfree(desc); 1092 1093 END_USE(vq); 1094 return -ENOMEM; 1095 } 1096 1097 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1098 struct scatterlist *sgs[], 1099 unsigned int total_sg, 1100 unsigned int out_sgs, 1101 unsigned int in_sgs, 1102 void *data, 1103 void *ctx, 1104 gfp_t gfp) 1105 { 1106 struct vring_virtqueue *vq = to_vvq(_vq); 1107 struct vring_packed_desc *desc; 1108 struct scatterlist *sg; 1109 unsigned int i, n, c, descs_used, err_idx; 1110 __le16 head_flags, flags; 1111 u16 head, id, prev, curr, avail_used_flags; 1112 1113 START_USE(vq); 1114 1115 BUG_ON(data == NULL); 1116 BUG_ON(ctx && vq->indirect); 1117 1118 if (unlikely(vq->broken)) { 1119 END_USE(vq); 1120 return -EIO; 1121 } 1122 1123 LAST_ADD_TIME_UPDATE(vq); 1124 1125 BUG_ON(total_sg == 0); 1126 1127 if (virtqueue_use_indirect(_vq, total_sg)) 1128 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1129 out_sgs, in_sgs, data, gfp); 1130 1131 head = vq->packed.next_avail_idx; 1132 avail_used_flags = vq->packed.avail_used_flags; 1133 1134 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1135 1136 desc = vq->packed.vring.desc; 1137 i = head; 1138 descs_used = total_sg; 1139 1140 if (unlikely(vq->vq.num_free < descs_used)) { 1141 pr_debug("Can't add buf len %i - avail = %i\n", 1142 descs_used, vq->vq.num_free); 1143 END_USE(vq); 1144 return -ENOSPC; 1145 } 1146 1147 id = vq->free_head; 1148 BUG_ON(id == vq->packed.vring.num); 1149 1150 curr = id; 1151 c = 0; 1152 for (n = 0; n < out_sgs + in_sgs; n++) { 1153 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1154 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1155 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1156 if (vring_mapping_error(vq, addr)) 1157 goto unmap_release; 1158 1159 flags = cpu_to_le16(vq->packed.avail_used_flags | 1160 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1161 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1162 if (i == head) 1163 head_flags = flags; 1164 else 1165 desc[i].flags = flags; 1166 1167 desc[i].addr = cpu_to_le64(addr); 1168 desc[i].len = cpu_to_le32(sg->length); 1169 desc[i].id = cpu_to_le16(id); 1170 1171 if (unlikely(vq->use_dma_api)) { 1172 vq->packed.desc_extra[curr].addr = addr; 1173 vq->packed.desc_extra[curr].len = sg->length; 1174 vq->packed.desc_extra[curr].flags = 1175 le16_to_cpu(flags); 1176 } 1177 prev = curr; 1178 curr = vq->packed.desc_state[curr].next; 1179 1180 if ((unlikely(++i >= vq->packed.vring.num))) { 1181 i = 0; 1182 vq->packed.avail_used_flags ^= 1183 1 << VRING_PACKED_DESC_F_AVAIL | 1184 1 << VRING_PACKED_DESC_F_USED; 1185 } 1186 } 1187 } 1188 1189 if (i < head) 1190 vq->packed.avail_wrap_counter ^= 1; 1191 1192 /* We're using some buffers from the free list. */ 1193 vq->vq.num_free -= descs_used; 1194 1195 /* Update free pointer */ 1196 vq->packed.next_avail_idx = i; 1197 vq->free_head = curr; 1198 1199 /* Store token. */ 1200 vq->packed.desc_state[id].num = descs_used; 1201 vq->packed.desc_state[id].data = data; 1202 vq->packed.desc_state[id].indir_desc = ctx; 1203 vq->packed.desc_state[id].last = prev; 1204 1205 /* 1206 * A driver MUST NOT make the first descriptor in the list 1207 * available before all subsequent descriptors comprising 1208 * the list are made available. 1209 */ 1210 virtio_wmb(vq->weak_barriers); 1211 vq->packed.vring.desc[head].flags = head_flags; 1212 vq->num_added += descs_used; 1213 1214 pr_debug("Added buffer head %i to %p\n", head, vq); 1215 END_USE(vq); 1216 1217 return 0; 1218 1219 unmap_release: 1220 err_idx = i; 1221 i = head; 1222 1223 vq->packed.avail_used_flags = avail_used_flags; 1224 1225 for (n = 0; n < total_sg; n++) { 1226 if (i == err_idx) 1227 break; 1228 vring_unmap_desc_packed(vq, &desc[i]); 1229 i++; 1230 if (i >= vq->packed.vring.num) 1231 i = 0; 1232 } 1233 1234 END_USE(vq); 1235 return -EIO; 1236 } 1237 1238 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1239 { 1240 struct vring_virtqueue *vq = to_vvq(_vq); 1241 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1242 bool needs_kick; 1243 union { 1244 struct { 1245 __le16 off_wrap; 1246 __le16 flags; 1247 }; 1248 u32 u32; 1249 } snapshot; 1250 1251 START_USE(vq); 1252 1253 /* 1254 * We need to expose the new flags value before checking notification 1255 * suppressions. 1256 */ 1257 virtio_mb(vq->weak_barriers); 1258 1259 old = vq->packed.next_avail_idx - vq->num_added; 1260 new = vq->packed.next_avail_idx; 1261 vq->num_added = 0; 1262 1263 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1264 flags = le16_to_cpu(snapshot.flags); 1265 1266 LAST_ADD_TIME_CHECK(vq); 1267 LAST_ADD_TIME_INVALID(vq); 1268 1269 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1270 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1271 goto out; 1272 } 1273 1274 off_wrap = le16_to_cpu(snapshot.off_wrap); 1275 1276 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1277 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1278 if (wrap_counter != vq->packed.avail_wrap_counter) 1279 event_idx -= vq->packed.vring.num; 1280 1281 needs_kick = vring_need_event(event_idx, new, old); 1282 out: 1283 END_USE(vq); 1284 return needs_kick; 1285 } 1286 1287 static void detach_buf_packed(struct vring_virtqueue *vq, 1288 unsigned int id, void **ctx) 1289 { 1290 struct vring_desc_state_packed *state = NULL; 1291 struct vring_packed_desc *desc; 1292 unsigned int i, curr; 1293 1294 state = &vq->packed.desc_state[id]; 1295 1296 /* Clear data ptr. */ 1297 state->data = NULL; 1298 1299 vq->packed.desc_state[state->last].next = vq->free_head; 1300 vq->free_head = id; 1301 vq->vq.num_free += state->num; 1302 1303 if (unlikely(vq->use_dma_api)) { 1304 curr = id; 1305 for (i = 0; i < state->num; i++) { 1306 vring_unmap_state_packed(vq, 1307 &vq->packed.desc_extra[curr]); 1308 curr = vq->packed.desc_state[curr].next; 1309 } 1310 } 1311 1312 if (vq->indirect) { 1313 u32 len; 1314 1315 /* Free the indirect table, if any, now that it's unmapped. */ 1316 desc = state->indir_desc; 1317 if (!desc) 1318 return; 1319 1320 if (vq->use_dma_api) { 1321 len = vq->packed.desc_extra[id].len; 1322 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1323 i++) 1324 vring_unmap_desc_packed(vq, &desc[i]); 1325 } 1326 kfree(desc); 1327 state->indir_desc = NULL; 1328 } else if (ctx) { 1329 *ctx = state->indir_desc; 1330 } 1331 } 1332 1333 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1334 u16 idx, bool used_wrap_counter) 1335 { 1336 bool avail, used; 1337 u16 flags; 1338 1339 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1340 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1341 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1342 1343 return avail == used && used == used_wrap_counter; 1344 } 1345 1346 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1347 { 1348 return is_used_desc_packed(vq, vq->last_used_idx, 1349 vq->packed.used_wrap_counter); 1350 } 1351 1352 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1353 unsigned int *len, 1354 void **ctx) 1355 { 1356 struct vring_virtqueue *vq = to_vvq(_vq); 1357 u16 last_used, id; 1358 void *ret; 1359 1360 START_USE(vq); 1361 1362 if (unlikely(vq->broken)) { 1363 END_USE(vq); 1364 return NULL; 1365 } 1366 1367 if (!more_used_packed(vq)) { 1368 pr_debug("No more buffers in queue\n"); 1369 END_USE(vq); 1370 return NULL; 1371 } 1372 1373 /* Only get used elements after they have been exposed by host. */ 1374 virtio_rmb(vq->weak_barriers); 1375 1376 last_used = vq->last_used_idx; 1377 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1378 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1379 1380 if (unlikely(id >= vq->packed.vring.num)) { 1381 BAD_RING(vq, "id %u out of range\n", id); 1382 return NULL; 1383 } 1384 if (unlikely(!vq->packed.desc_state[id].data)) { 1385 BAD_RING(vq, "id %u is not a head!\n", id); 1386 return NULL; 1387 } 1388 1389 /* detach_buf_packed clears data, so grab it now. */ 1390 ret = vq->packed.desc_state[id].data; 1391 detach_buf_packed(vq, id, ctx); 1392 1393 vq->last_used_idx += vq->packed.desc_state[id].num; 1394 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1395 vq->last_used_idx -= vq->packed.vring.num; 1396 vq->packed.used_wrap_counter ^= 1; 1397 } 1398 1399 /* 1400 * If we expect an interrupt for the next entry, tell host 1401 * by writing event index and flush out the write before 1402 * the read in the next get_buf call. 1403 */ 1404 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1405 virtio_store_mb(vq->weak_barriers, 1406 &vq->packed.vring.driver->off_wrap, 1407 cpu_to_le16(vq->last_used_idx | 1408 (vq->packed.used_wrap_counter << 1409 VRING_PACKED_EVENT_F_WRAP_CTR))); 1410 1411 LAST_ADD_TIME_INVALID(vq); 1412 1413 END_USE(vq); 1414 return ret; 1415 } 1416 1417 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1418 { 1419 struct vring_virtqueue *vq = to_vvq(_vq); 1420 1421 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1422 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1423 vq->packed.vring.driver->flags = 1424 cpu_to_le16(vq->packed.event_flags_shadow); 1425 } 1426 } 1427 1428 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1429 { 1430 struct vring_virtqueue *vq = to_vvq(_vq); 1431 1432 START_USE(vq); 1433 1434 /* 1435 * We optimistically turn back on interrupts, then check if there was 1436 * more to do. 1437 */ 1438 1439 if (vq->event) { 1440 vq->packed.vring.driver->off_wrap = 1441 cpu_to_le16(vq->last_used_idx | 1442 (vq->packed.used_wrap_counter << 1443 VRING_PACKED_EVENT_F_WRAP_CTR)); 1444 /* 1445 * We need to update event offset and event wrap 1446 * counter first before updating event flags. 1447 */ 1448 virtio_wmb(vq->weak_barriers); 1449 } 1450 1451 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1452 vq->packed.event_flags_shadow = vq->event ? 1453 VRING_PACKED_EVENT_FLAG_DESC : 1454 VRING_PACKED_EVENT_FLAG_ENABLE; 1455 vq->packed.vring.driver->flags = 1456 cpu_to_le16(vq->packed.event_flags_shadow); 1457 } 1458 1459 END_USE(vq); 1460 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1461 VRING_PACKED_EVENT_F_WRAP_CTR); 1462 } 1463 1464 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1465 { 1466 struct vring_virtqueue *vq = to_vvq(_vq); 1467 bool wrap_counter; 1468 u16 used_idx; 1469 1470 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1471 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1472 1473 return is_used_desc_packed(vq, used_idx, wrap_counter); 1474 } 1475 1476 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1477 { 1478 struct vring_virtqueue *vq = to_vvq(_vq); 1479 u16 used_idx, wrap_counter; 1480 u16 bufs; 1481 1482 START_USE(vq); 1483 1484 /* 1485 * We optimistically turn back on interrupts, then check if there was 1486 * more to do. 1487 */ 1488 1489 if (vq->event) { 1490 /* TODO: tune this threshold */ 1491 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1492 wrap_counter = vq->packed.used_wrap_counter; 1493 1494 used_idx = vq->last_used_idx + bufs; 1495 if (used_idx >= vq->packed.vring.num) { 1496 used_idx -= vq->packed.vring.num; 1497 wrap_counter ^= 1; 1498 } 1499 1500 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1501 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1502 1503 /* 1504 * We need to update event offset and event wrap 1505 * counter first before updating event flags. 1506 */ 1507 virtio_wmb(vq->weak_barriers); 1508 } 1509 1510 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1511 vq->packed.event_flags_shadow = vq->event ? 1512 VRING_PACKED_EVENT_FLAG_DESC : 1513 VRING_PACKED_EVENT_FLAG_ENABLE; 1514 vq->packed.vring.driver->flags = 1515 cpu_to_le16(vq->packed.event_flags_shadow); 1516 } 1517 1518 /* 1519 * We need to update event suppression structure first 1520 * before re-checking for more used buffers. 1521 */ 1522 virtio_mb(vq->weak_barriers); 1523 1524 if (is_used_desc_packed(vq, 1525 vq->last_used_idx, 1526 vq->packed.used_wrap_counter)) { 1527 END_USE(vq); 1528 return false; 1529 } 1530 1531 END_USE(vq); 1532 return true; 1533 } 1534 1535 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1536 { 1537 struct vring_virtqueue *vq = to_vvq(_vq); 1538 unsigned int i; 1539 void *buf; 1540 1541 START_USE(vq); 1542 1543 for (i = 0; i < vq->packed.vring.num; i++) { 1544 if (!vq->packed.desc_state[i].data) 1545 continue; 1546 /* detach_buf clears data, so grab it now. */ 1547 buf = vq->packed.desc_state[i].data; 1548 detach_buf_packed(vq, i, NULL); 1549 END_USE(vq); 1550 return buf; 1551 } 1552 /* That should have freed everything. */ 1553 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1554 1555 END_USE(vq); 1556 return NULL; 1557 } 1558 1559 static struct virtqueue *vring_create_virtqueue_packed( 1560 unsigned int index, 1561 unsigned int num, 1562 unsigned int vring_align, 1563 struct virtio_device *vdev, 1564 bool weak_barriers, 1565 bool may_reduce_num, 1566 bool context, 1567 bool (*notify)(struct virtqueue *), 1568 void (*callback)(struct virtqueue *), 1569 const char *name) 1570 { 1571 struct vring_virtqueue *vq; 1572 struct vring_packed_desc *ring; 1573 struct vring_packed_desc_event *driver, *device; 1574 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1575 size_t ring_size_in_bytes, event_size_in_bytes; 1576 unsigned int i; 1577 1578 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1579 1580 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1581 &ring_dma_addr, 1582 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1583 if (!ring) 1584 goto err_ring; 1585 1586 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1587 1588 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1589 &driver_event_dma_addr, 1590 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1591 if (!driver) 1592 goto err_driver; 1593 1594 device = vring_alloc_queue(vdev, event_size_in_bytes, 1595 &device_event_dma_addr, 1596 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1597 if (!device) 1598 goto err_device; 1599 1600 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1601 if (!vq) 1602 goto err_vq; 1603 1604 vq->vq.callback = callback; 1605 vq->vq.vdev = vdev; 1606 vq->vq.name = name; 1607 vq->vq.num_free = num; 1608 vq->vq.index = index; 1609 vq->we_own_ring = true; 1610 vq->notify = notify; 1611 vq->weak_barriers = weak_barriers; 1612 vq->broken = false; 1613 vq->last_used_idx = 0; 1614 vq->event_triggered = false; 1615 vq->num_added = 0; 1616 vq->packed_ring = true; 1617 vq->use_dma_api = vring_use_dma_api(vdev); 1618 #ifdef DEBUG 1619 vq->in_use = false; 1620 vq->last_add_time_valid = false; 1621 #endif 1622 1623 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1624 !context; 1625 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1626 1627 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1628 vq->weak_barriers = false; 1629 1630 vq->packed.ring_dma_addr = ring_dma_addr; 1631 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1632 vq->packed.device_event_dma_addr = device_event_dma_addr; 1633 1634 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1635 vq->packed.event_size_in_bytes = event_size_in_bytes; 1636 1637 vq->packed.vring.num = num; 1638 vq->packed.vring.desc = ring; 1639 vq->packed.vring.driver = driver; 1640 vq->packed.vring.device = device; 1641 1642 vq->packed.next_avail_idx = 0; 1643 vq->packed.avail_wrap_counter = 1; 1644 vq->packed.used_wrap_counter = 1; 1645 vq->packed.event_flags_shadow = 0; 1646 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1647 1648 vq->packed.desc_state = kmalloc_array(num, 1649 sizeof(struct vring_desc_state_packed), 1650 GFP_KERNEL); 1651 if (!vq->packed.desc_state) 1652 goto err_desc_state; 1653 1654 memset(vq->packed.desc_state, 0, 1655 num * sizeof(struct vring_desc_state_packed)); 1656 1657 /* Put everything in free lists. */ 1658 vq->free_head = 0; 1659 for (i = 0; i < num-1; i++) 1660 vq->packed.desc_state[i].next = i + 1; 1661 1662 vq->packed.desc_extra = kmalloc_array(num, 1663 sizeof(struct vring_desc_extra_packed), 1664 GFP_KERNEL); 1665 if (!vq->packed.desc_extra) 1666 goto err_desc_extra; 1667 1668 memset(vq->packed.desc_extra, 0, 1669 num * sizeof(struct vring_desc_extra_packed)); 1670 1671 /* No callback? Tell other side not to bother us. */ 1672 if (!callback) { 1673 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1674 vq->packed.vring.driver->flags = 1675 cpu_to_le16(vq->packed.event_flags_shadow); 1676 } 1677 1678 list_add_tail(&vq->vq.list, &vdev->vqs); 1679 return &vq->vq; 1680 1681 err_desc_extra: 1682 kfree(vq->packed.desc_state); 1683 err_desc_state: 1684 kfree(vq); 1685 err_vq: 1686 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 1687 err_device: 1688 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 1689 err_driver: 1690 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1691 err_ring: 1692 return NULL; 1693 } 1694 1695 1696 /* 1697 * Generic functions and exported symbols. 1698 */ 1699 1700 static inline int virtqueue_add(struct virtqueue *_vq, 1701 struct scatterlist *sgs[], 1702 unsigned int total_sg, 1703 unsigned int out_sgs, 1704 unsigned int in_sgs, 1705 void *data, 1706 void *ctx, 1707 gfp_t gfp) 1708 { 1709 struct vring_virtqueue *vq = to_vvq(_vq); 1710 1711 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1712 out_sgs, in_sgs, data, ctx, gfp) : 1713 virtqueue_add_split(_vq, sgs, total_sg, 1714 out_sgs, in_sgs, data, ctx, gfp); 1715 } 1716 1717 /** 1718 * virtqueue_add_sgs - expose buffers to other end 1719 * @_vq: the struct virtqueue we're talking about. 1720 * @sgs: array of terminated scatterlists. 1721 * @out_sgs: the number of scatterlists readable by other side 1722 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1723 * @data: the token identifying the buffer. 1724 * @gfp: how to do memory allocations (if necessary). 1725 * 1726 * Caller must ensure we don't call this with other virtqueue operations 1727 * at the same time (except where noted). 1728 * 1729 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1730 */ 1731 int virtqueue_add_sgs(struct virtqueue *_vq, 1732 struct scatterlist *sgs[], 1733 unsigned int out_sgs, 1734 unsigned int in_sgs, 1735 void *data, 1736 gfp_t gfp) 1737 { 1738 unsigned int i, total_sg = 0; 1739 1740 /* Count them first. */ 1741 for (i = 0; i < out_sgs + in_sgs; i++) { 1742 struct scatterlist *sg; 1743 1744 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1745 total_sg++; 1746 } 1747 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1748 data, NULL, gfp); 1749 } 1750 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1751 1752 /** 1753 * virtqueue_add_outbuf - expose output buffers to other end 1754 * @vq: the struct virtqueue we're talking about. 1755 * @sg: scatterlist (must be well-formed and terminated!) 1756 * @num: the number of entries in @sg readable by other side 1757 * @data: the token identifying the buffer. 1758 * @gfp: how to do memory allocations (if necessary). 1759 * 1760 * Caller must ensure we don't call this with other virtqueue operations 1761 * at the same time (except where noted). 1762 * 1763 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1764 */ 1765 int virtqueue_add_outbuf(struct virtqueue *vq, 1766 struct scatterlist *sg, unsigned int num, 1767 void *data, 1768 gfp_t gfp) 1769 { 1770 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1771 } 1772 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1773 1774 /** 1775 * virtqueue_add_inbuf - expose input buffers to other end 1776 * @vq: the struct virtqueue we're talking about. 1777 * @sg: scatterlist (must be well-formed and terminated!) 1778 * @num: the number of entries in @sg writable by other side 1779 * @data: the token identifying the buffer. 1780 * @gfp: how to do memory allocations (if necessary). 1781 * 1782 * Caller must ensure we don't call this with other virtqueue operations 1783 * at the same time (except where noted). 1784 * 1785 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1786 */ 1787 int virtqueue_add_inbuf(struct virtqueue *vq, 1788 struct scatterlist *sg, unsigned int num, 1789 void *data, 1790 gfp_t gfp) 1791 { 1792 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1793 } 1794 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1795 1796 /** 1797 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1798 * @vq: the struct virtqueue we're talking about. 1799 * @sg: scatterlist (must be well-formed and terminated!) 1800 * @num: the number of entries in @sg writable by other side 1801 * @data: the token identifying the buffer. 1802 * @ctx: extra context for the token 1803 * @gfp: how to do memory allocations (if necessary). 1804 * 1805 * Caller must ensure we don't call this with other virtqueue operations 1806 * at the same time (except where noted). 1807 * 1808 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1809 */ 1810 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1811 struct scatterlist *sg, unsigned int num, 1812 void *data, 1813 void *ctx, 1814 gfp_t gfp) 1815 { 1816 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1817 } 1818 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1819 1820 /** 1821 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1822 * @_vq: the struct virtqueue 1823 * 1824 * Instead of virtqueue_kick(), you can do: 1825 * if (virtqueue_kick_prepare(vq)) 1826 * virtqueue_notify(vq); 1827 * 1828 * This is sometimes useful because the virtqueue_kick_prepare() needs 1829 * to be serialized, but the actual virtqueue_notify() call does not. 1830 */ 1831 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1832 { 1833 struct vring_virtqueue *vq = to_vvq(_vq); 1834 1835 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1836 virtqueue_kick_prepare_split(_vq); 1837 } 1838 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1839 1840 /** 1841 * virtqueue_notify - second half of split virtqueue_kick call. 1842 * @_vq: the struct virtqueue 1843 * 1844 * This does not need to be serialized. 1845 * 1846 * Returns false if host notify failed or queue is broken, otherwise true. 1847 */ 1848 bool virtqueue_notify(struct virtqueue *_vq) 1849 { 1850 struct vring_virtqueue *vq = to_vvq(_vq); 1851 1852 if (unlikely(vq->broken)) 1853 return false; 1854 1855 /* Prod other side to tell it about changes. */ 1856 if (!vq->notify(_vq)) { 1857 vq->broken = true; 1858 return false; 1859 } 1860 return true; 1861 } 1862 EXPORT_SYMBOL_GPL(virtqueue_notify); 1863 1864 /** 1865 * virtqueue_kick - update after add_buf 1866 * @vq: the struct virtqueue 1867 * 1868 * After one or more virtqueue_add_* calls, invoke this to kick 1869 * the other side. 1870 * 1871 * Caller must ensure we don't call this with other virtqueue 1872 * operations at the same time (except where noted). 1873 * 1874 * Returns false if kick failed, otherwise true. 1875 */ 1876 bool virtqueue_kick(struct virtqueue *vq) 1877 { 1878 if (virtqueue_kick_prepare(vq)) 1879 return virtqueue_notify(vq); 1880 return true; 1881 } 1882 EXPORT_SYMBOL_GPL(virtqueue_kick); 1883 1884 /** 1885 * virtqueue_get_buf_ctx - get the next used buffer 1886 * @_vq: the struct virtqueue we're talking about. 1887 * @len: the length written into the buffer 1888 * @ctx: extra context for the token 1889 * 1890 * If the device wrote data into the buffer, @len will be set to the 1891 * amount written. This means you don't need to clear the buffer 1892 * beforehand to ensure there's no data leakage in the case of short 1893 * writes. 1894 * 1895 * Caller must ensure we don't call this with other virtqueue 1896 * operations at the same time (except where noted). 1897 * 1898 * Returns NULL if there are no used buffers, or the "data" token 1899 * handed to virtqueue_add_*(). 1900 */ 1901 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1902 void **ctx) 1903 { 1904 struct vring_virtqueue *vq = to_vvq(_vq); 1905 1906 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1907 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1908 } 1909 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1910 1911 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1912 { 1913 return virtqueue_get_buf_ctx(_vq, len, NULL); 1914 } 1915 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1916 /** 1917 * virtqueue_disable_cb - disable callbacks 1918 * @_vq: the struct virtqueue we're talking about. 1919 * 1920 * Note that this is not necessarily synchronous, hence unreliable and only 1921 * useful as an optimization. 1922 * 1923 * Unlike other operations, this need not be serialized. 1924 */ 1925 void virtqueue_disable_cb(struct virtqueue *_vq) 1926 { 1927 struct vring_virtqueue *vq = to_vvq(_vq); 1928 1929 /* If device triggered an event already it won't trigger one again: 1930 * no need to disable. 1931 */ 1932 if (vq->event_triggered) 1933 return; 1934 1935 if (vq->packed_ring) 1936 virtqueue_disable_cb_packed(_vq); 1937 else 1938 virtqueue_disable_cb_split(_vq); 1939 } 1940 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1941 1942 /** 1943 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1944 * @_vq: the struct virtqueue we're talking about. 1945 * 1946 * This re-enables callbacks; it returns current queue state 1947 * in an opaque unsigned value. This value should be later tested by 1948 * virtqueue_poll, to detect a possible race between the driver checking for 1949 * more work, and enabling callbacks. 1950 * 1951 * Caller must ensure we don't call this with other virtqueue 1952 * operations at the same time (except where noted). 1953 */ 1954 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1955 { 1956 struct vring_virtqueue *vq = to_vvq(_vq); 1957 1958 if (vq->event_triggered) 1959 vq->event_triggered = false; 1960 1961 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 1962 virtqueue_enable_cb_prepare_split(_vq); 1963 } 1964 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1965 1966 /** 1967 * virtqueue_poll - query pending used buffers 1968 * @_vq: the struct virtqueue we're talking about. 1969 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1970 * 1971 * Returns "true" if there are pending used buffers in the queue. 1972 * 1973 * This does not need to be serialized. 1974 */ 1975 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1976 { 1977 struct vring_virtqueue *vq = to_vvq(_vq); 1978 1979 if (unlikely(vq->broken)) 1980 return false; 1981 1982 virtio_mb(vq->weak_barriers); 1983 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 1984 virtqueue_poll_split(_vq, last_used_idx); 1985 } 1986 EXPORT_SYMBOL_GPL(virtqueue_poll); 1987 1988 /** 1989 * virtqueue_enable_cb - restart callbacks after disable_cb. 1990 * @_vq: the struct virtqueue we're talking about. 1991 * 1992 * This re-enables callbacks; it returns "false" if there are pending 1993 * buffers in the queue, to detect a possible race between the driver 1994 * checking for more work, and enabling callbacks. 1995 * 1996 * Caller must ensure we don't call this with other virtqueue 1997 * operations at the same time (except where noted). 1998 */ 1999 bool virtqueue_enable_cb(struct virtqueue *_vq) 2000 { 2001 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 2002 2003 return !virtqueue_poll(_vq, last_used_idx); 2004 } 2005 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2006 2007 /** 2008 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2009 * @_vq: the struct virtqueue we're talking about. 2010 * 2011 * This re-enables callbacks but hints to the other side to delay 2012 * interrupts until most of the available buffers have been processed; 2013 * it returns "false" if there are many pending buffers in the queue, 2014 * to detect a possible race between the driver checking for more work, 2015 * and enabling callbacks. 2016 * 2017 * Caller must ensure we don't call this with other virtqueue 2018 * operations at the same time (except where noted). 2019 */ 2020 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2021 { 2022 struct vring_virtqueue *vq = to_vvq(_vq); 2023 2024 if (vq->event_triggered) 2025 vq->event_triggered = false; 2026 2027 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2028 virtqueue_enable_cb_delayed_split(_vq); 2029 } 2030 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2031 2032 /** 2033 * virtqueue_detach_unused_buf - detach first unused buffer 2034 * @_vq: the struct virtqueue we're talking about. 2035 * 2036 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2037 * This is not valid on an active queue; it is useful only for device 2038 * shutdown. 2039 */ 2040 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2041 { 2042 struct vring_virtqueue *vq = to_vvq(_vq); 2043 2044 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2045 virtqueue_detach_unused_buf_split(_vq); 2046 } 2047 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2048 2049 static inline bool more_used(const struct vring_virtqueue *vq) 2050 { 2051 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2052 } 2053 2054 irqreturn_t vring_interrupt(int irq, void *_vq) 2055 { 2056 struct vring_virtqueue *vq = to_vvq(_vq); 2057 2058 if (!more_used(vq)) { 2059 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2060 return IRQ_NONE; 2061 } 2062 2063 if (unlikely(vq->broken)) 2064 return IRQ_HANDLED; 2065 2066 /* Just a hint for performance: so it's ok that this can be racy! */ 2067 if (vq->event) 2068 vq->event_triggered = true; 2069 2070 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2071 if (vq->vq.callback) 2072 vq->vq.callback(&vq->vq); 2073 2074 return IRQ_HANDLED; 2075 } 2076 EXPORT_SYMBOL_GPL(vring_interrupt); 2077 2078 /* Only available for split ring */ 2079 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2080 struct vring vring, 2081 struct virtio_device *vdev, 2082 bool weak_barriers, 2083 bool context, 2084 bool (*notify)(struct virtqueue *), 2085 void (*callback)(struct virtqueue *), 2086 const char *name) 2087 { 2088 unsigned int i; 2089 struct vring_virtqueue *vq; 2090 2091 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2092 return NULL; 2093 2094 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2095 if (!vq) 2096 return NULL; 2097 2098 vq->packed_ring = false; 2099 vq->vq.callback = callback; 2100 vq->vq.vdev = vdev; 2101 vq->vq.name = name; 2102 vq->vq.num_free = vring.num; 2103 vq->vq.index = index; 2104 vq->we_own_ring = false; 2105 vq->notify = notify; 2106 vq->weak_barriers = weak_barriers; 2107 vq->broken = false; 2108 vq->last_used_idx = 0; 2109 vq->event_triggered = false; 2110 vq->num_added = 0; 2111 vq->use_dma_api = vring_use_dma_api(vdev); 2112 #ifdef DEBUG 2113 vq->in_use = false; 2114 vq->last_add_time_valid = false; 2115 #endif 2116 2117 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2118 !context; 2119 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2120 2121 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2122 vq->weak_barriers = false; 2123 2124 vq->split.queue_dma_addr = 0; 2125 vq->split.queue_size_in_bytes = 0; 2126 2127 vq->split.vring = vring; 2128 vq->split.avail_flags_shadow = 0; 2129 vq->split.avail_idx_shadow = 0; 2130 2131 /* No callback? Tell other side not to bother us. */ 2132 if (!callback) { 2133 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2134 if (!vq->event) 2135 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2136 vq->split.avail_flags_shadow); 2137 } 2138 2139 vq->split.desc_state = kmalloc_array(vring.num, 2140 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2141 if (!vq->split.desc_state) { 2142 kfree(vq); 2143 return NULL; 2144 } 2145 2146 /* Put everything in free lists. */ 2147 vq->free_head = 0; 2148 for (i = 0; i < vring.num-1; i++) 2149 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 2150 memset(vq->split.desc_state, 0, vring.num * 2151 sizeof(struct vring_desc_state_split)); 2152 2153 list_add_tail(&vq->vq.list, &vdev->vqs); 2154 return &vq->vq; 2155 } 2156 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2157 2158 struct virtqueue *vring_create_virtqueue( 2159 unsigned int index, 2160 unsigned int num, 2161 unsigned int vring_align, 2162 struct virtio_device *vdev, 2163 bool weak_barriers, 2164 bool may_reduce_num, 2165 bool context, 2166 bool (*notify)(struct virtqueue *), 2167 void (*callback)(struct virtqueue *), 2168 const char *name) 2169 { 2170 2171 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2172 return vring_create_virtqueue_packed(index, num, vring_align, 2173 vdev, weak_barriers, may_reduce_num, 2174 context, notify, callback, name); 2175 2176 return vring_create_virtqueue_split(index, num, vring_align, 2177 vdev, weak_barriers, may_reduce_num, 2178 context, notify, callback, name); 2179 } 2180 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2181 2182 /* Only available for split ring */ 2183 struct virtqueue *vring_new_virtqueue(unsigned int index, 2184 unsigned int num, 2185 unsigned int vring_align, 2186 struct virtio_device *vdev, 2187 bool weak_barriers, 2188 bool context, 2189 void *pages, 2190 bool (*notify)(struct virtqueue *vq), 2191 void (*callback)(struct virtqueue *vq), 2192 const char *name) 2193 { 2194 struct vring vring; 2195 2196 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2197 return NULL; 2198 2199 vring_init(&vring, num, pages, vring_align); 2200 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2201 notify, callback, name); 2202 } 2203 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2204 2205 void vring_del_virtqueue(struct virtqueue *_vq) 2206 { 2207 struct vring_virtqueue *vq = to_vvq(_vq); 2208 2209 if (vq->we_own_ring) { 2210 if (vq->packed_ring) { 2211 vring_free_queue(vq->vq.vdev, 2212 vq->packed.ring_size_in_bytes, 2213 vq->packed.vring.desc, 2214 vq->packed.ring_dma_addr); 2215 2216 vring_free_queue(vq->vq.vdev, 2217 vq->packed.event_size_in_bytes, 2218 vq->packed.vring.driver, 2219 vq->packed.driver_event_dma_addr); 2220 2221 vring_free_queue(vq->vq.vdev, 2222 vq->packed.event_size_in_bytes, 2223 vq->packed.vring.device, 2224 vq->packed.device_event_dma_addr); 2225 2226 kfree(vq->packed.desc_state); 2227 kfree(vq->packed.desc_extra); 2228 } else { 2229 vring_free_queue(vq->vq.vdev, 2230 vq->split.queue_size_in_bytes, 2231 vq->split.vring.desc, 2232 vq->split.queue_dma_addr); 2233 } 2234 } 2235 if (!vq->packed_ring) 2236 kfree(vq->split.desc_state); 2237 list_del(&_vq->list); 2238 kfree(vq); 2239 } 2240 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2241 2242 /* Manipulates transport-specific feature bits. */ 2243 void vring_transport_features(struct virtio_device *vdev) 2244 { 2245 unsigned int i; 2246 2247 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2248 switch (i) { 2249 case VIRTIO_RING_F_INDIRECT_DESC: 2250 break; 2251 case VIRTIO_RING_F_EVENT_IDX: 2252 break; 2253 case VIRTIO_F_VERSION_1: 2254 break; 2255 case VIRTIO_F_ACCESS_PLATFORM: 2256 break; 2257 case VIRTIO_F_RING_PACKED: 2258 break; 2259 case VIRTIO_F_ORDER_PLATFORM: 2260 break; 2261 default: 2262 /* We don't understand this bit. */ 2263 __virtio_clear_bit(vdev, i); 2264 } 2265 } 2266 } 2267 EXPORT_SYMBOL_GPL(vring_transport_features); 2268 2269 /** 2270 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2271 * @_vq: the struct virtqueue containing the vring of interest. 2272 * 2273 * Returns the size of the vring. This is mainly used for boasting to 2274 * userspace. Unlike other operations, this need not be serialized. 2275 */ 2276 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2277 { 2278 2279 struct vring_virtqueue *vq = to_vvq(_vq); 2280 2281 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2282 } 2283 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2284 2285 bool virtqueue_is_broken(struct virtqueue *_vq) 2286 { 2287 struct vring_virtqueue *vq = to_vvq(_vq); 2288 2289 return vq->broken; 2290 } 2291 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2292 2293 /* 2294 * This should prevent the device from being used, allowing drivers to 2295 * recover. You may need to grab appropriate locks to flush. 2296 */ 2297 void virtio_break_device(struct virtio_device *dev) 2298 { 2299 struct virtqueue *_vq; 2300 2301 list_for_each_entry(_vq, &dev->vqs, list) { 2302 struct vring_virtqueue *vq = to_vvq(_vq); 2303 vq->broken = true; 2304 } 2305 } 2306 EXPORT_SYMBOL_GPL(virtio_break_device); 2307 2308 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2309 { 2310 struct vring_virtqueue *vq = to_vvq(_vq); 2311 2312 BUG_ON(!vq->we_own_ring); 2313 2314 if (vq->packed_ring) 2315 return vq->packed.ring_dma_addr; 2316 2317 return vq->split.queue_dma_addr; 2318 } 2319 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2320 2321 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2322 { 2323 struct vring_virtqueue *vq = to_vvq(_vq); 2324 2325 BUG_ON(!vq->we_own_ring); 2326 2327 if (vq->packed_ring) 2328 return vq->packed.driver_event_dma_addr; 2329 2330 return vq->split.queue_dma_addr + 2331 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2332 } 2333 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2334 2335 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2336 { 2337 struct vring_virtqueue *vq = to_vvq(_vq); 2338 2339 BUG_ON(!vq->we_own_ring); 2340 2341 if (vq->packed_ring) 2342 return vq->packed.device_event_dma_addr; 2343 2344 return vq->split.queue_dma_addr + 2345 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2346 } 2347 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2348 2349 /* Only available for split ring */ 2350 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2351 { 2352 return &to_vvq(vq)->split.vring; 2353 } 2354 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2355 2356 MODULE_LICENSE("GPL"); 2357