1 /* Virtio ring implementation. 2 * 3 * Copyright 2007 Rusty Russell IBM Corporation 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19 #include <linux/virtio.h> 20 #include <linux/virtio_ring.h> 21 #include <linux/virtio_config.h> 22 #include <linux/device.h> 23 #include <linux/slab.h> 24 #include <linux/module.h> 25 #include <linux/hrtimer.h> 26 #include <linux/dma-mapping.h> 27 #include <xen/xen.h> 28 29 #ifdef DEBUG 30 /* For development, we want to crash whenever the ring is screwed. */ 31 #define BAD_RING(_vq, fmt, args...) \ 32 do { \ 33 dev_err(&(_vq)->vq.vdev->dev, \ 34 "%s:"fmt, (_vq)->vq.name, ##args); \ 35 BUG(); \ 36 } while (0) 37 /* Caller is supposed to guarantee no reentry. */ 38 #define START_USE(_vq) \ 39 do { \ 40 if ((_vq)->in_use) \ 41 panic("%s:in_use = %i\n", \ 42 (_vq)->vq.name, (_vq)->in_use); \ 43 (_vq)->in_use = __LINE__; \ 44 } while (0) 45 #define END_USE(_vq) \ 46 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 47 #define LAST_ADD_TIME_UPDATE(_vq) \ 48 do { \ 49 ktime_t now = ktime_get(); \ 50 \ 51 /* No kick or get, with .1 second between? Warn. */ \ 52 if ((_vq)->last_add_time_valid) \ 53 WARN_ON(ktime_to_ms(ktime_sub(now, \ 54 (_vq)->last_add_time)) > 100); \ 55 (_vq)->last_add_time = now; \ 56 (_vq)->last_add_time_valid = true; \ 57 } while (0) 58 #define LAST_ADD_TIME_CHECK(_vq) \ 59 do { \ 60 if ((_vq)->last_add_time_valid) { \ 61 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 62 (_vq)->last_add_time)) > 100); \ 63 } \ 64 } while (0) 65 #define LAST_ADD_TIME_INVALID(_vq) \ 66 ((_vq)->last_add_time_valid = false) 67 #else 68 #define BAD_RING(_vq, fmt, args...) \ 69 do { \ 70 dev_err(&_vq->vq.vdev->dev, \ 71 "%s:"fmt, (_vq)->vq.name, ##args); \ 72 (_vq)->broken = true; \ 73 } while (0) 74 #define START_USE(vq) 75 #define END_USE(vq) 76 #define LAST_ADD_TIME_UPDATE(vq) 77 #define LAST_ADD_TIME_CHECK(vq) 78 #define LAST_ADD_TIME_INVALID(vq) 79 #endif 80 81 struct vring_desc_state_split { 82 void *data; /* Data for callback. */ 83 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 84 }; 85 86 struct vring_desc_state_packed { 87 void *data; /* Data for callback. */ 88 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 89 u16 num; /* Descriptor list length. */ 90 u16 next; /* The next desc state in a list. */ 91 u16 last; /* The last desc state in a list. */ 92 }; 93 94 struct vring_desc_extra_packed { 95 dma_addr_t addr; /* Buffer DMA addr. */ 96 u32 len; /* Buffer length. */ 97 u16 flags; /* Descriptor flags. */ 98 }; 99 100 struct vring_virtqueue { 101 struct virtqueue vq; 102 103 /* Is this a packed ring? */ 104 bool packed_ring; 105 106 /* Is DMA API used? */ 107 bool use_dma_api; 108 109 /* Can we use weak barriers? */ 110 bool weak_barriers; 111 112 /* Other side has made a mess, don't try any more. */ 113 bool broken; 114 115 /* Host supports indirect buffers */ 116 bool indirect; 117 118 /* Host publishes avail event idx */ 119 bool event; 120 121 /* Head of free buffer list. */ 122 unsigned int free_head; 123 /* Number we've added since last sync. */ 124 unsigned int num_added; 125 126 /* Last used index we've seen. */ 127 u16 last_used_idx; 128 129 union { 130 /* Available for split ring */ 131 struct { 132 /* Actual memory layout for this queue. */ 133 struct vring vring; 134 135 /* Last written value to avail->flags */ 136 u16 avail_flags_shadow; 137 138 /* 139 * Last written value to avail->idx in 140 * guest byte order. 141 */ 142 u16 avail_idx_shadow; 143 144 /* Per-descriptor state. */ 145 struct vring_desc_state_split *desc_state; 146 147 /* DMA address and size information */ 148 dma_addr_t queue_dma_addr; 149 size_t queue_size_in_bytes; 150 } split; 151 152 /* Available for packed ring */ 153 struct { 154 /* Actual memory layout for this queue. */ 155 struct vring_packed vring; 156 157 /* Driver ring wrap counter. */ 158 bool avail_wrap_counter; 159 160 /* Device ring wrap counter. */ 161 bool used_wrap_counter; 162 163 /* Avail used flags. */ 164 u16 avail_used_flags; 165 166 /* Index of the next avail descriptor. */ 167 u16 next_avail_idx; 168 169 /* 170 * Last written value to driver->flags in 171 * guest byte order. 172 */ 173 u16 event_flags_shadow; 174 175 /* Per-descriptor state. */ 176 struct vring_desc_state_packed *desc_state; 177 struct vring_desc_extra_packed *desc_extra; 178 179 /* DMA address and size information */ 180 dma_addr_t ring_dma_addr; 181 dma_addr_t driver_event_dma_addr; 182 dma_addr_t device_event_dma_addr; 183 size_t ring_size_in_bytes; 184 size_t event_size_in_bytes; 185 } packed; 186 }; 187 188 /* How to notify other side. FIXME: commonalize hcalls! */ 189 bool (*notify)(struct virtqueue *vq); 190 191 /* DMA, allocation, and size information */ 192 bool we_own_ring; 193 194 #ifdef DEBUG 195 /* They're supposed to lock for us. */ 196 unsigned int in_use; 197 198 /* Figure out if their kicks are too delayed. */ 199 bool last_add_time_valid; 200 ktime_t last_add_time; 201 #endif 202 }; 203 204 205 /* 206 * Helpers. 207 */ 208 209 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 210 211 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 212 unsigned int total_sg) 213 { 214 struct vring_virtqueue *vq = to_vvq(_vq); 215 216 /* 217 * If the host supports indirect descriptor tables, and we have multiple 218 * buffers, then go indirect. FIXME: tune this threshold 219 */ 220 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 221 } 222 223 /* 224 * Modern virtio devices have feature bits to specify whether they need a 225 * quirk and bypass the IOMMU. If not there, just use the DMA API. 226 * 227 * If there, the interaction between virtio and DMA API is messy. 228 * 229 * On most systems with virtio, physical addresses match bus addresses, 230 * and it doesn't particularly matter whether we use the DMA API. 231 * 232 * On some systems, including Xen and any system with a physical device 233 * that speaks virtio behind a physical IOMMU, we must use the DMA API 234 * for virtio DMA to work at all. 235 * 236 * On other systems, including SPARC and PPC64, virtio-pci devices are 237 * enumerated as though they are behind an IOMMU, but the virtio host 238 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 239 * there or somehow map everything as the identity. 240 * 241 * For the time being, we preserve historic behavior and bypass the DMA 242 * API. 243 * 244 * TODO: install a per-device DMA ops structure that does the right thing 245 * taking into account all the above quirks, and use the DMA API 246 * unconditionally on data path. 247 */ 248 249 static bool vring_use_dma_api(struct virtio_device *vdev) 250 { 251 if (!virtio_has_iommu_quirk(vdev)) 252 return true; 253 254 /* Otherwise, we are left to guess. */ 255 /* 256 * In theory, it's possible to have a buggy QEMU-supposed 257 * emulated Q35 IOMMU and Xen enabled at the same time. On 258 * such a configuration, virtio has never worked and will 259 * not work without an even larger kludge. Instead, enable 260 * the DMA API if we're a Xen guest, which at least allows 261 * all of the sensible Xen configurations to work correctly. 262 */ 263 if (xen_domain()) 264 return true; 265 266 return false; 267 } 268 269 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 270 dma_addr_t *dma_handle, gfp_t flag) 271 { 272 if (vring_use_dma_api(vdev)) { 273 return dma_alloc_coherent(vdev->dev.parent, size, 274 dma_handle, flag); 275 } else { 276 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 277 278 if (queue) { 279 phys_addr_t phys_addr = virt_to_phys(queue); 280 *dma_handle = (dma_addr_t)phys_addr; 281 282 /* 283 * Sanity check: make sure we dind't truncate 284 * the address. The only arches I can find that 285 * have 64-bit phys_addr_t but 32-bit dma_addr_t 286 * are certain non-highmem MIPS and x86 287 * configurations, but these configurations 288 * should never allocate physical pages above 32 289 * bits, so this is fine. Just in case, throw a 290 * warning and abort if we end up with an 291 * unrepresentable address. 292 */ 293 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 294 free_pages_exact(queue, PAGE_ALIGN(size)); 295 return NULL; 296 } 297 } 298 return queue; 299 } 300 } 301 302 static void vring_free_queue(struct virtio_device *vdev, size_t size, 303 void *queue, dma_addr_t dma_handle) 304 { 305 if (vring_use_dma_api(vdev)) 306 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 307 else 308 free_pages_exact(queue, PAGE_ALIGN(size)); 309 } 310 311 /* 312 * The DMA ops on various arches are rather gnarly right now, and 313 * making all of the arch DMA ops work on the vring device itself 314 * is a mess. For now, we use the parent device for DMA ops. 315 */ 316 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 317 { 318 return vq->vq.vdev->dev.parent; 319 } 320 321 /* Map one sg entry. */ 322 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 323 struct scatterlist *sg, 324 enum dma_data_direction direction) 325 { 326 if (!vq->use_dma_api) 327 return (dma_addr_t)sg_phys(sg); 328 329 /* 330 * We can't use dma_map_sg, because we don't use scatterlists in 331 * the way it expects (we don't guarantee that the scatterlist 332 * will exist for the lifetime of the mapping). 333 */ 334 return dma_map_page(vring_dma_dev(vq), 335 sg_page(sg), sg->offset, sg->length, 336 direction); 337 } 338 339 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 340 void *cpu_addr, size_t size, 341 enum dma_data_direction direction) 342 { 343 if (!vq->use_dma_api) 344 return (dma_addr_t)virt_to_phys(cpu_addr); 345 346 return dma_map_single(vring_dma_dev(vq), 347 cpu_addr, size, direction); 348 } 349 350 static int vring_mapping_error(const struct vring_virtqueue *vq, 351 dma_addr_t addr) 352 { 353 if (!vq->use_dma_api) 354 return 0; 355 356 return dma_mapping_error(vring_dma_dev(vq), addr); 357 } 358 359 360 /* 361 * Split ring specific functions - *_split(). 362 */ 363 364 static void vring_unmap_one_split(const struct vring_virtqueue *vq, 365 struct vring_desc *desc) 366 { 367 u16 flags; 368 369 if (!vq->use_dma_api) 370 return; 371 372 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 373 374 if (flags & VRING_DESC_F_INDIRECT) { 375 dma_unmap_single(vring_dma_dev(vq), 376 virtio64_to_cpu(vq->vq.vdev, desc->addr), 377 virtio32_to_cpu(vq->vq.vdev, desc->len), 378 (flags & VRING_DESC_F_WRITE) ? 379 DMA_FROM_DEVICE : DMA_TO_DEVICE); 380 } else { 381 dma_unmap_page(vring_dma_dev(vq), 382 virtio64_to_cpu(vq->vq.vdev, desc->addr), 383 virtio32_to_cpu(vq->vq.vdev, desc->len), 384 (flags & VRING_DESC_F_WRITE) ? 385 DMA_FROM_DEVICE : DMA_TO_DEVICE); 386 } 387 } 388 389 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 390 unsigned int total_sg, 391 gfp_t gfp) 392 { 393 struct vring_desc *desc; 394 unsigned int i; 395 396 /* 397 * We require lowmem mappings for the descriptors because 398 * otherwise virt_to_phys will give us bogus addresses in the 399 * virtqueue. 400 */ 401 gfp &= ~__GFP_HIGHMEM; 402 403 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 404 if (!desc) 405 return NULL; 406 407 for (i = 0; i < total_sg; i++) 408 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 409 return desc; 410 } 411 412 static inline int virtqueue_add_split(struct virtqueue *_vq, 413 struct scatterlist *sgs[], 414 unsigned int total_sg, 415 unsigned int out_sgs, 416 unsigned int in_sgs, 417 void *data, 418 void *ctx, 419 gfp_t gfp) 420 { 421 struct vring_virtqueue *vq = to_vvq(_vq); 422 struct scatterlist *sg; 423 struct vring_desc *desc; 424 unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 425 int head; 426 bool indirect; 427 428 START_USE(vq); 429 430 BUG_ON(data == NULL); 431 BUG_ON(ctx && vq->indirect); 432 433 if (unlikely(vq->broken)) { 434 END_USE(vq); 435 return -EIO; 436 } 437 438 LAST_ADD_TIME_UPDATE(vq); 439 440 BUG_ON(total_sg == 0); 441 442 head = vq->free_head; 443 444 if (virtqueue_use_indirect(_vq, total_sg)) 445 desc = alloc_indirect_split(_vq, total_sg, gfp); 446 else { 447 desc = NULL; 448 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 449 } 450 451 if (desc) { 452 /* Use a single buffer which doesn't continue */ 453 indirect = true; 454 /* Set up rest to use this indirect table. */ 455 i = 0; 456 descs_used = 1; 457 } else { 458 indirect = false; 459 desc = vq->split.vring.desc; 460 i = head; 461 descs_used = total_sg; 462 } 463 464 if (vq->vq.num_free < descs_used) { 465 pr_debug("Can't add buf len %i - avail = %i\n", 466 descs_used, vq->vq.num_free); 467 /* FIXME: for historical reasons, we force a notify here if 468 * there are outgoing parts to the buffer. Presumably the 469 * host should service the ring ASAP. */ 470 if (out_sgs) 471 vq->notify(&vq->vq); 472 if (indirect) 473 kfree(desc); 474 END_USE(vq); 475 return -ENOSPC; 476 } 477 478 for (n = 0; n < out_sgs; n++) { 479 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 480 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 481 if (vring_mapping_error(vq, addr)) 482 goto unmap_release; 483 484 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 485 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 486 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 487 prev = i; 488 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 489 } 490 } 491 for (; n < (out_sgs + in_sgs); n++) { 492 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 493 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 494 if (vring_mapping_error(vq, addr)) 495 goto unmap_release; 496 497 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 498 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 499 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 500 prev = i; 501 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 502 } 503 } 504 /* Last one doesn't continue. */ 505 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 506 507 if (indirect) { 508 /* Now that the indirect table is filled in, map it. */ 509 dma_addr_t addr = vring_map_single( 510 vq, desc, total_sg * sizeof(struct vring_desc), 511 DMA_TO_DEVICE); 512 if (vring_mapping_error(vq, addr)) 513 goto unmap_release; 514 515 vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 516 VRING_DESC_F_INDIRECT); 517 vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 518 addr); 519 520 vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 521 total_sg * sizeof(struct vring_desc)); 522 } 523 524 /* We're using some buffers from the free list. */ 525 vq->vq.num_free -= descs_used; 526 527 /* Update free pointer */ 528 if (indirect) 529 vq->free_head = virtio16_to_cpu(_vq->vdev, 530 vq->split.vring.desc[head].next); 531 else 532 vq->free_head = i; 533 534 /* Store token and indirect buffer state. */ 535 vq->split.desc_state[head].data = data; 536 if (indirect) 537 vq->split.desc_state[head].indir_desc = desc; 538 else 539 vq->split.desc_state[head].indir_desc = ctx; 540 541 /* Put entry in available array (but don't update avail->idx until they 542 * do sync). */ 543 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 544 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 545 546 /* Descriptors and available array need to be set before we expose the 547 * new available array entries. */ 548 virtio_wmb(vq->weak_barriers); 549 vq->split.avail_idx_shadow++; 550 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 551 vq->split.avail_idx_shadow); 552 vq->num_added++; 553 554 pr_debug("Added buffer head %i to %p\n", head, vq); 555 END_USE(vq); 556 557 /* This is very unlikely, but theoretically possible. Kick 558 * just in case. */ 559 if (unlikely(vq->num_added == (1 << 16) - 1)) 560 virtqueue_kick(_vq); 561 562 return 0; 563 564 unmap_release: 565 err_idx = i; 566 i = head; 567 568 for (n = 0; n < total_sg; n++) { 569 if (i == err_idx) 570 break; 571 vring_unmap_one_split(vq, &desc[i]); 572 i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); 573 } 574 575 if (indirect) 576 kfree(desc); 577 578 END_USE(vq); 579 return -EIO; 580 } 581 582 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 583 { 584 struct vring_virtqueue *vq = to_vvq(_vq); 585 u16 new, old; 586 bool needs_kick; 587 588 START_USE(vq); 589 /* We need to expose available array entries before checking avail 590 * event. */ 591 virtio_mb(vq->weak_barriers); 592 593 old = vq->split.avail_idx_shadow - vq->num_added; 594 new = vq->split.avail_idx_shadow; 595 vq->num_added = 0; 596 597 LAST_ADD_TIME_CHECK(vq); 598 LAST_ADD_TIME_INVALID(vq); 599 600 if (vq->event) { 601 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 602 vring_avail_event(&vq->split.vring)), 603 new, old); 604 } else { 605 needs_kick = !(vq->split.vring.used->flags & 606 cpu_to_virtio16(_vq->vdev, 607 VRING_USED_F_NO_NOTIFY)); 608 } 609 END_USE(vq); 610 return needs_kick; 611 } 612 613 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 614 void **ctx) 615 { 616 unsigned int i, j; 617 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 618 619 /* Clear data ptr. */ 620 vq->split.desc_state[head].data = NULL; 621 622 /* Put back on free list: unmap first-level descriptors and find end */ 623 i = head; 624 625 while (vq->split.vring.desc[i].flags & nextflag) { 626 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 627 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 628 vq->vq.num_free++; 629 } 630 631 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 632 vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 633 vq->free_head); 634 vq->free_head = head; 635 636 /* Plus final descriptor */ 637 vq->vq.num_free++; 638 639 if (vq->indirect) { 640 struct vring_desc *indir_desc = 641 vq->split.desc_state[head].indir_desc; 642 u32 len; 643 644 /* Free the indirect table, if any, now that it's unmapped. */ 645 if (!indir_desc) 646 return; 647 648 len = virtio32_to_cpu(vq->vq.vdev, 649 vq->split.vring.desc[head].len); 650 651 BUG_ON(!(vq->split.vring.desc[head].flags & 652 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 653 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 654 655 for (j = 0; j < len / sizeof(struct vring_desc); j++) 656 vring_unmap_one_split(vq, &indir_desc[j]); 657 658 kfree(indir_desc); 659 vq->split.desc_state[head].indir_desc = NULL; 660 } else if (ctx) { 661 *ctx = vq->split.desc_state[head].indir_desc; 662 } 663 } 664 665 static inline bool more_used_split(const struct vring_virtqueue *vq) 666 { 667 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 668 vq->split.vring.used->idx); 669 } 670 671 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 672 unsigned int *len, 673 void **ctx) 674 { 675 struct vring_virtqueue *vq = to_vvq(_vq); 676 void *ret; 677 unsigned int i; 678 u16 last_used; 679 680 START_USE(vq); 681 682 if (unlikely(vq->broken)) { 683 END_USE(vq); 684 return NULL; 685 } 686 687 if (!more_used_split(vq)) { 688 pr_debug("No more buffers in queue\n"); 689 END_USE(vq); 690 return NULL; 691 } 692 693 /* Only get used array entries after they have been exposed by host. */ 694 virtio_rmb(vq->weak_barriers); 695 696 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 697 i = virtio32_to_cpu(_vq->vdev, 698 vq->split.vring.used->ring[last_used].id); 699 *len = virtio32_to_cpu(_vq->vdev, 700 vq->split.vring.used->ring[last_used].len); 701 702 if (unlikely(i >= vq->split.vring.num)) { 703 BAD_RING(vq, "id %u out of range\n", i); 704 return NULL; 705 } 706 if (unlikely(!vq->split.desc_state[i].data)) { 707 BAD_RING(vq, "id %u is not a head!\n", i); 708 return NULL; 709 } 710 711 /* detach_buf_split clears data, so grab it now. */ 712 ret = vq->split.desc_state[i].data; 713 detach_buf_split(vq, i, ctx); 714 vq->last_used_idx++; 715 /* If we expect an interrupt for the next entry, tell host 716 * by writing event index and flush out the write before 717 * the read in the next get_buf call. */ 718 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 719 virtio_store_mb(vq->weak_barriers, 720 &vring_used_event(&vq->split.vring), 721 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 722 723 LAST_ADD_TIME_INVALID(vq); 724 725 END_USE(vq); 726 return ret; 727 } 728 729 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 730 { 731 struct vring_virtqueue *vq = to_vvq(_vq); 732 733 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 734 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 735 if (!vq->event) 736 vq->split.vring.avail->flags = 737 cpu_to_virtio16(_vq->vdev, 738 vq->split.avail_flags_shadow); 739 } 740 } 741 742 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 743 { 744 struct vring_virtqueue *vq = to_vvq(_vq); 745 u16 last_used_idx; 746 747 START_USE(vq); 748 749 /* We optimistically turn back on interrupts, then check if there was 750 * more to do. */ 751 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 752 * either clear the flags bit or point the event index at the next 753 * entry. Always do both to keep code simple. */ 754 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 755 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 756 if (!vq->event) 757 vq->split.vring.avail->flags = 758 cpu_to_virtio16(_vq->vdev, 759 vq->split.avail_flags_shadow); 760 } 761 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 762 last_used_idx = vq->last_used_idx); 763 END_USE(vq); 764 return last_used_idx; 765 } 766 767 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 768 { 769 struct vring_virtqueue *vq = to_vvq(_vq); 770 771 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 772 vq->split.vring.used->idx); 773 } 774 775 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 776 { 777 struct vring_virtqueue *vq = to_vvq(_vq); 778 u16 bufs; 779 780 START_USE(vq); 781 782 /* We optimistically turn back on interrupts, then check if there was 783 * more to do. */ 784 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 785 * either clear the flags bit or point the event index at the next 786 * entry. Always update the event index to keep code simple. */ 787 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 788 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 789 if (!vq->event) 790 vq->split.vring.avail->flags = 791 cpu_to_virtio16(_vq->vdev, 792 vq->split.avail_flags_shadow); 793 } 794 /* TODO: tune this threshold */ 795 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 796 797 virtio_store_mb(vq->weak_barriers, 798 &vring_used_event(&vq->split.vring), 799 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 800 801 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 802 - vq->last_used_idx) > bufs)) { 803 END_USE(vq); 804 return false; 805 } 806 807 END_USE(vq); 808 return true; 809 } 810 811 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 812 { 813 struct vring_virtqueue *vq = to_vvq(_vq); 814 unsigned int i; 815 void *buf; 816 817 START_USE(vq); 818 819 for (i = 0; i < vq->split.vring.num; i++) { 820 if (!vq->split.desc_state[i].data) 821 continue; 822 /* detach_buf_split clears data, so grab it now. */ 823 buf = vq->split.desc_state[i].data; 824 detach_buf_split(vq, i, NULL); 825 vq->split.avail_idx_shadow--; 826 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 827 vq->split.avail_idx_shadow); 828 END_USE(vq); 829 return buf; 830 } 831 /* That should have freed everything. */ 832 BUG_ON(vq->vq.num_free != vq->split.vring.num); 833 834 END_USE(vq); 835 return NULL; 836 } 837 838 static struct virtqueue *vring_create_virtqueue_split( 839 unsigned int index, 840 unsigned int num, 841 unsigned int vring_align, 842 struct virtio_device *vdev, 843 bool weak_barriers, 844 bool may_reduce_num, 845 bool context, 846 bool (*notify)(struct virtqueue *), 847 void (*callback)(struct virtqueue *), 848 const char *name) 849 { 850 struct virtqueue *vq; 851 void *queue = NULL; 852 dma_addr_t dma_addr; 853 size_t queue_size_in_bytes; 854 struct vring vring; 855 856 /* We assume num is a power of 2. */ 857 if (num & (num - 1)) { 858 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 859 return NULL; 860 } 861 862 /* TODO: allocate each queue chunk individually */ 863 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 864 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 865 &dma_addr, 866 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 867 if (queue) 868 break; 869 } 870 871 if (!num) 872 return NULL; 873 874 if (!queue) { 875 /* Try to get a single page. You are my only hope! */ 876 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 877 &dma_addr, GFP_KERNEL|__GFP_ZERO); 878 } 879 if (!queue) 880 return NULL; 881 882 queue_size_in_bytes = vring_size(num, vring_align); 883 vring_init(&vring, num, queue, vring_align); 884 885 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 886 notify, callback, name); 887 if (!vq) { 888 vring_free_queue(vdev, queue_size_in_bytes, queue, 889 dma_addr); 890 return NULL; 891 } 892 893 to_vvq(vq)->split.queue_dma_addr = dma_addr; 894 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 895 to_vvq(vq)->we_own_ring = true; 896 897 return vq; 898 } 899 900 901 /* 902 * Packed ring specific functions - *_packed(). 903 */ 904 905 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 906 struct vring_desc_extra_packed *state) 907 { 908 u16 flags; 909 910 if (!vq->use_dma_api) 911 return; 912 913 flags = state->flags; 914 915 if (flags & VRING_DESC_F_INDIRECT) { 916 dma_unmap_single(vring_dma_dev(vq), 917 state->addr, state->len, 918 (flags & VRING_DESC_F_WRITE) ? 919 DMA_FROM_DEVICE : DMA_TO_DEVICE); 920 } else { 921 dma_unmap_page(vring_dma_dev(vq), 922 state->addr, state->len, 923 (flags & VRING_DESC_F_WRITE) ? 924 DMA_FROM_DEVICE : DMA_TO_DEVICE); 925 } 926 } 927 928 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 929 struct vring_packed_desc *desc) 930 { 931 u16 flags; 932 933 if (!vq->use_dma_api) 934 return; 935 936 flags = le16_to_cpu(desc->flags); 937 938 if (flags & VRING_DESC_F_INDIRECT) { 939 dma_unmap_single(vring_dma_dev(vq), 940 le64_to_cpu(desc->addr), 941 le32_to_cpu(desc->len), 942 (flags & VRING_DESC_F_WRITE) ? 943 DMA_FROM_DEVICE : DMA_TO_DEVICE); 944 } else { 945 dma_unmap_page(vring_dma_dev(vq), 946 le64_to_cpu(desc->addr), 947 le32_to_cpu(desc->len), 948 (flags & VRING_DESC_F_WRITE) ? 949 DMA_FROM_DEVICE : DMA_TO_DEVICE); 950 } 951 } 952 953 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 954 gfp_t gfp) 955 { 956 struct vring_packed_desc *desc; 957 958 /* 959 * We require lowmem mappings for the descriptors because 960 * otherwise virt_to_phys will give us bogus addresses in the 961 * virtqueue. 962 */ 963 gfp &= ~__GFP_HIGHMEM; 964 965 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 966 967 return desc; 968 } 969 970 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 971 struct scatterlist *sgs[], 972 unsigned int total_sg, 973 unsigned int out_sgs, 974 unsigned int in_sgs, 975 void *data, 976 gfp_t gfp) 977 { 978 struct vring_packed_desc *desc; 979 struct scatterlist *sg; 980 unsigned int i, n, err_idx; 981 u16 head, id; 982 dma_addr_t addr; 983 984 head = vq->packed.next_avail_idx; 985 desc = alloc_indirect_packed(total_sg, gfp); 986 987 if (unlikely(vq->vq.num_free < 1)) { 988 pr_debug("Can't add buf len 1 - avail = 0\n"); 989 END_USE(vq); 990 return -ENOSPC; 991 } 992 993 i = 0; 994 id = vq->free_head; 995 BUG_ON(id == vq->packed.vring.num); 996 997 for (n = 0; n < out_sgs + in_sgs; n++) { 998 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 999 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1000 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1001 if (vring_mapping_error(vq, addr)) 1002 goto unmap_release; 1003 1004 desc[i].flags = cpu_to_le16(n < out_sgs ? 1005 0 : VRING_DESC_F_WRITE); 1006 desc[i].addr = cpu_to_le64(addr); 1007 desc[i].len = cpu_to_le32(sg->length); 1008 i++; 1009 } 1010 } 1011 1012 /* Now that the indirect table is filled in, map it. */ 1013 addr = vring_map_single(vq, desc, 1014 total_sg * sizeof(struct vring_packed_desc), 1015 DMA_TO_DEVICE); 1016 if (vring_mapping_error(vq, addr)) 1017 goto unmap_release; 1018 1019 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1020 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1021 sizeof(struct vring_packed_desc)); 1022 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1023 1024 if (vq->use_dma_api) { 1025 vq->packed.desc_extra[id].addr = addr; 1026 vq->packed.desc_extra[id].len = total_sg * 1027 sizeof(struct vring_packed_desc); 1028 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1029 vq->packed.avail_used_flags; 1030 } 1031 1032 /* 1033 * A driver MUST NOT make the first descriptor in the list 1034 * available before all subsequent descriptors comprising 1035 * the list are made available. 1036 */ 1037 virtio_wmb(vq->weak_barriers); 1038 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1039 vq->packed.avail_used_flags); 1040 1041 /* We're using some buffers from the free list. */ 1042 vq->vq.num_free -= 1; 1043 1044 /* Update free pointer */ 1045 n = head + 1; 1046 if (n >= vq->packed.vring.num) { 1047 n = 0; 1048 vq->packed.avail_wrap_counter ^= 1; 1049 vq->packed.avail_used_flags ^= 1050 1 << VRING_PACKED_DESC_F_AVAIL | 1051 1 << VRING_PACKED_DESC_F_USED; 1052 } 1053 vq->packed.next_avail_idx = n; 1054 vq->free_head = vq->packed.desc_state[id].next; 1055 1056 /* Store token and indirect buffer state. */ 1057 vq->packed.desc_state[id].num = 1; 1058 vq->packed.desc_state[id].data = data; 1059 vq->packed.desc_state[id].indir_desc = desc; 1060 vq->packed.desc_state[id].last = id; 1061 1062 vq->num_added += 1; 1063 1064 pr_debug("Added buffer head %i to %p\n", head, vq); 1065 END_USE(vq); 1066 1067 return 0; 1068 1069 unmap_release: 1070 err_idx = i; 1071 1072 for (i = 0; i < err_idx; i++) 1073 vring_unmap_desc_packed(vq, &desc[i]); 1074 1075 kfree(desc); 1076 1077 END_USE(vq); 1078 return -EIO; 1079 } 1080 1081 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1082 struct scatterlist *sgs[], 1083 unsigned int total_sg, 1084 unsigned int out_sgs, 1085 unsigned int in_sgs, 1086 void *data, 1087 void *ctx, 1088 gfp_t gfp) 1089 { 1090 struct vring_virtqueue *vq = to_vvq(_vq); 1091 struct vring_packed_desc *desc; 1092 struct scatterlist *sg; 1093 unsigned int i, n, c, descs_used, err_idx; 1094 __le16 uninitialized_var(head_flags), flags; 1095 u16 head, id, uninitialized_var(prev), curr, avail_used_flags; 1096 1097 START_USE(vq); 1098 1099 BUG_ON(data == NULL); 1100 BUG_ON(ctx && vq->indirect); 1101 1102 if (unlikely(vq->broken)) { 1103 END_USE(vq); 1104 return -EIO; 1105 } 1106 1107 LAST_ADD_TIME_UPDATE(vq); 1108 1109 BUG_ON(total_sg == 0); 1110 1111 if (virtqueue_use_indirect(_vq, total_sg)) 1112 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1113 out_sgs, in_sgs, data, gfp); 1114 1115 head = vq->packed.next_avail_idx; 1116 avail_used_flags = vq->packed.avail_used_flags; 1117 1118 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1119 1120 desc = vq->packed.vring.desc; 1121 i = head; 1122 descs_used = total_sg; 1123 1124 if (unlikely(vq->vq.num_free < descs_used)) { 1125 pr_debug("Can't add buf len %i - avail = %i\n", 1126 descs_used, vq->vq.num_free); 1127 END_USE(vq); 1128 return -ENOSPC; 1129 } 1130 1131 id = vq->free_head; 1132 BUG_ON(id == vq->packed.vring.num); 1133 1134 curr = id; 1135 c = 0; 1136 for (n = 0; n < out_sgs + in_sgs; n++) { 1137 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1138 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1139 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1140 if (vring_mapping_error(vq, addr)) 1141 goto unmap_release; 1142 1143 flags = cpu_to_le16(vq->packed.avail_used_flags | 1144 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1145 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1146 if (i == head) 1147 head_flags = flags; 1148 else 1149 desc[i].flags = flags; 1150 1151 desc[i].addr = cpu_to_le64(addr); 1152 desc[i].len = cpu_to_le32(sg->length); 1153 desc[i].id = cpu_to_le16(id); 1154 1155 if (unlikely(vq->use_dma_api)) { 1156 vq->packed.desc_extra[curr].addr = addr; 1157 vq->packed.desc_extra[curr].len = sg->length; 1158 vq->packed.desc_extra[curr].flags = 1159 le16_to_cpu(flags); 1160 } 1161 prev = curr; 1162 curr = vq->packed.desc_state[curr].next; 1163 1164 if ((unlikely(++i >= vq->packed.vring.num))) { 1165 i = 0; 1166 vq->packed.avail_used_flags ^= 1167 1 << VRING_PACKED_DESC_F_AVAIL | 1168 1 << VRING_PACKED_DESC_F_USED; 1169 } 1170 } 1171 } 1172 1173 if (i < head) 1174 vq->packed.avail_wrap_counter ^= 1; 1175 1176 /* We're using some buffers from the free list. */ 1177 vq->vq.num_free -= descs_used; 1178 1179 /* Update free pointer */ 1180 vq->packed.next_avail_idx = i; 1181 vq->free_head = curr; 1182 1183 /* Store token. */ 1184 vq->packed.desc_state[id].num = descs_used; 1185 vq->packed.desc_state[id].data = data; 1186 vq->packed.desc_state[id].indir_desc = ctx; 1187 vq->packed.desc_state[id].last = prev; 1188 1189 /* 1190 * A driver MUST NOT make the first descriptor in the list 1191 * available before all subsequent descriptors comprising 1192 * the list are made available. 1193 */ 1194 virtio_wmb(vq->weak_barriers); 1195 vq->packed.vring.desc[head].flags = head_flags; 1196 vq->num_added += descs_used; 1197 1198 pr_debug("Added buffer head %i to %p\n", head, vq); 1199 END_USE(vq); 1200 1201 return 0; 1202 1203 unmap_release: 1204 err_idx = i; 1205 i = head; 1206 1207 vq->packed.avail_used_flags = avail_used_flags; 1208 1209 for (n = 0; n < total_sg; n++) { 1210 if (i == err_idx) 1211 break; 1212 vring_unmap_desc_packed(vq, &desc[i]); 1213 i++; 1214 if (i >= vq->packed.vring.num) 1215 i = 0; 1216 } 1217 1218 END_USE(vq); 1219 return -EIO; 1220 } 1221 1222 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1223 { 1224 struct vring_virtqueue *vq = to_vvq(_vq); 1225 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1226 bool needs_kick; 1227 union { 1228 struct { 1229 __le16 off_wrap; 1230 __le16 flags; 1231 }; 1232 u32 u32; 1233 } snapshot; 1234 1235 START_USE(vq); 1236 1237 /* 1238 * We need to expose the new flags value before checking notification 1239 * suppressions. 1240 */ 1241 virtio_mb(vq->weak_barriers); 1242 1243 old = vq->packed.next_avail_idx - vq->num_added; 1244 new = vq->packed.next_avail_idx; 1245 vq->num_added = 0; 1246 1247 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1248 flags = le16_to_cpu(snapshot.flags); 1249 1250 LAST_ADD_TIME_CHECK(vq); 1251 LAST_ADD_TIME_INVALID(vq); 1252 1253 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1254 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1255 goto out; 1256 } 1257 1258 off_wrap = le16_to_cpu(snapshot.off_wrap); 1259 1260 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1261 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1262 if (wrap_counter != vq->packed.avail_wrap_counter) 1263 event_idx -= vq->packed.vring.num; 1264 1265 needs_kick = vring_need_event(event_idx, new, old); 1266 out: 1267 END_USE(vq); 1268 return needs_kick; 1269 } 1270 1271 static void detach_buf_packed(struct vring_virtqueue *vq, 1272 unsigned int id, void **ctx) 1273 { 1274 struct vring_desc_state_packed *state = NULL; 1275 struct vring_packed_desc *desc; 1276 unsigned int i, curr; 1277 1278 state = &vq->packed.desc_state[id]; 1279 1280 /* Clear data ptr. */ 1281 state->data = NULL; 1282 1283 vq->packed.desc_state[state->last].next = vq->free_head; 1284 vq->free_head = id; 1285 vq->vq.num_free += state->num; 1286 1287 if (unlikely(vq->use_dma_api)) { 1288 curr = id; 1289 for (i = 0; i < state->num; i++) { 1290 vring_unmap_state_packed(vq, 1291 &vq->packed.desc_extra[curr]); 1292 curr = vq->packed.desc_state[curr].next; 1293 } 1294 } 1295 1296 if (vq->indirect) { 1297 u32 len; 1298 1299 /* Free the indirect table, if any, now that it's unmapped. */ 1300 desc = state->indir_desc; 1301 if (!desc) 1302 return; 1303 1304 if (vq->use_dma_api) { 1305 len = vq->packed.desc_extra[id].len; 1306 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1307 i++) 1308 vring_unmap_desc_packed(vq, &desc[i]); 1309 } 1310 kfree(desc); 1311 state->indir_desc = NULL; 1312 } else if (ctx) { 1313 *ctx = state->indir_desc; 1314 } 1315 } 1316 1317 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1318 u16 idx, bool used_wrap_counter) 1319 { 1320 bool avail, used; 1321 u16 flags; 1322 1323 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1324 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1325 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1326 1327 return avail == used && used == used_wrap_counter; 1328 } 1329 1330 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1331 { 1332 return is_used_desc_packed(vq, vq->last_used_idx, 1333 vq->packed.used_wrap_counter); 1334 } 1335 1336 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1337 unsigned int *len, 1338 void **ctx) 1339 { 1340 struct vring_virtqueue *vq = to_vvq(_vq); 1341 u16 last_used, id; 1342 void *ret; 1343 1344 START_USE(vq); 1345 1346 if (unlikely(vq->broken)) { 1347 END_USE(vq); 1348 return NULL; 1349 } 1350 1351 if (!more_used_packed(vq)) { 1352 pr_debug("No more buffers in queue\n"); 1353 END_USE(vq); 1354 return NULL; 1355 } 1356 1357 /* Only get used elements after they have been exposed by host. */ 1358 virtio_rmb(vq->weak_barriers); 1359 1360 last_used = vq->last_used_idx; 1361 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1362 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1363 1364 if (unlikely(id >= vq->packed.vring.num)) { 1365 BAD_RING(vq, "id %u out of range\n", id); 1366 return NULL; 1367 } 1368 if (unlikely(!vq->packed.desc_state[id].data)) { 1369 BAD_RING(vq, "id %u is not a head!\n", id); 1370 return NULL; 1371 } 1372 1373 /* detach_buf_packed clears data, so grab it now. */ 1374 ret = vq->packed.desc_state[id].data; 1375 detach_buf_packed(vq, id, ctx); 1376 1377 vq->last_used_idx += vq->packed.desc_state[id].num; 1378 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1379 vq->last_used_idx -= vq->packed.vring.num; 1380 vq->packed.used_wrap_counter ^= 1; 1381 } 1382 1383 /* 1384 * If we expect an interrupt for the next entry, tell host 1385 * by writing event index and flush out the write before 1386 * the read in the next get_buf call. 1387 */ 1388 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1389 virtio_store_mb(vq->weak_barriers, 1390 &vq->packed.vring.driver->off_wrap, 1391 cpu_to_le16(vq->last_used_idx | 1392 (vq->packed.used_wrap_counter << 1393 VRING_PACKED_EVENT_F_WRAP_CTR))); 1394 1395 LAST_ADD_TIME_INVALID(vq); 1396 1397 END_USE(vq); 1398 return ret; 1399 } 1400 1401 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1402 { 1403 struct vring_virtqueue *vq = to_vvq(_vq); 1404 1405 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1406 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1407 vq->packed.vring.driver->flags = 1408 cpu_to_le16(vq->packed.event_flags_shadow); 1409 } 1410 } 1411 1412 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1413 { 1414 struct vring_virtqueue *vq = to_vvq(_vq); 1415 1416 START_USE(vq); 1417 1418 /* 1419 * We optimistically turn back on interrupts, then check if there was 1420 * more to do. 1421 */ 1422 1423 if (vq->event) { 1424 vq->packed.vring.driver->off_wrap = 1425 cpu_to_le16(vq->last_used_idx | 1426 (vq->packed.used_wrap_counter << 1427 VRING_PACKED_EVENT_F_WRAP_CTR)); 1428 /* 1429 * We need to update event offset and event wrap 1430 * counter first before updating event flags. 1431 */ 1432 virtio_wmb(vq->weak_barriers); 1433 } 1434 1435 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1436 vq->packed.event_flags_shadow = vq->event ? 1437 VRING_PACKED_EVENT_FLAG_DESC : 1438 VRING_PACKED_EVENT_FLAG_ENABLE; 1439 vq->packed.vring.driver->flags = 1440 cpu_to_le16(vq->packed.event_flags_shadow); 1441 } 1442 1443 END_USE(vq); 1444 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1445 VRING_PACKED_EVENT_F_WRAP_CTR); 1446 } 1447 1448 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1449 { 1450 struct vring_virtqueue *vq = to_vvq(_vq); 1451 bool wrap_counter; 1452 u16 used_idx; 1453 1454 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1455 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1456 1457 return is_used_desc_packed(vq, used_idx, wrap_counter); 1458 } 1459 1460 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1461 { 1462 struct vring_virtqueue *vq = to_vvq(_vq); 1463 u16 used_idx, wrap_counter; 1464 u16 bufs; 1465 1466 START_USE(vq); 1467 1468 /* 1469 * We optimistically turn back on interrupts, then check if there was 1470 * more to do. 1471 */ 1472 1473 if (vq->event) { 1474 /* TODO: tune this threshold */ 1475 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1476 wrap_counter = vq->packed.used_wrap_counter; 1477 1478 used_idx = vq->last_used_idx + bufs; 1479 if (used_idx >= vq->packed.vring.num) { 1480 used_idx -= vq->packed.vring.num; 1481 wrap_counter ^= 1; 1482 } 1483 1484 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1485 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1486 1487 /* 1488 * We need to update event offset and event wrap 1489 * counter first before updating event flags. 1490 */ 1491 virtio_wmb(vq->weak_barriers); 1492 } else { 1493 used_idx = vq->last_used_idx; 1494 wrap_counter = vq->packed.used_wrap_counter; 1495 } 1496 1497 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1498 vq->packed.event_flags_shadow = vq->event ? 1499 VRING_PACKED_EVENT_FLAG_DESC : 1500 VRING_PACKED_EVENT_FLAG_ENABLE; 1501 vq->packed.vring.driver->flags = 1502 cpu_to_le16(vq->packed.event_flags_shadow); 1503 } 1504 1505 /* 1506 * We need to update event suppression structure first 1507 * before re-checking for more used buffers. 1508 */ 1509 virtio_mb(vq->weak_barriers); 1510 1511 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1512 END_USE(vq); 1513 return false; 1514 } 1515 1516 END_USE(vq); 1517 return true; 1518 } 1519 1520 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1521 { 1522 struct vring_virtqueue *vq = to_vvq(_vq); 1523 unsigned int i; 1524 void *buf; 1525 1526 START_USE(vq); 1527 1528 for (i = 0; i < vq->packed.vring.num; i++) { 1529 if (!vq->packed.desc_state[i].data) 1530 continue; 1531 /* detach_buf clears data, so grab it now. */ 1532 buf = vq->packed.desc_state[i].data; 1533 detach_buf_packed(vq, i, NULL); 1534 END_USE(vq); 1535 return buf; 1536 } 1537 /* That should have freed everything. */ 1538 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1539 1540 END_USE(vq); 1541 return NULL; 1542 } 1543 1544 static struct virtqueue *vring_create_virtqueue_packed( 1545 unsigned int index, 1546 unsigned int num, 1547 unsigned int vring_align, 1548 struct virtio_device *vdev, 1549 bool weak_barriers, 1550 bool may_reduce_num, 1551 bool context, 1552 bool (*notify)(struct virtqueue *), 1553 void (*callback)(struct virtqueue *), 1554 const char *name) 1555 { 1556 struct vring_virtqueue *vq; 1557 struct vring_packed_desc *ring; 1558 struct vring_packed_desc_event *driver, *device; 1559 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1560 size_t ring_size_in_bytes, event_size_in_bytes; 1561 unsigned int i; 1562 1563 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1564 1565 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1566 &ring_dma_addr, 1567 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1568 if (!ring) 1569 goto err_ring; 1570 1571 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1572 1573 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1574 &driver_event_dma_addr, 1575 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1576 if (!driver) 1577 goto err_driver; 1578 1579 device = vring_alloc_queue(vdev, event_size_in_bytes, 1580 &device_event_dma_addr, 1581 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1582 if (!device) 1583 goto err_device; 1584 1585 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1586 if (!vq) 1587 goto err_vq; 1588 1589 vq->vq.callback = callback; 1590 vq->vq.vdev = vdev; 1591 vq->vq.name = name; 1592 vq->vq.num_free = num; 1593 vq->vq.index = index; 1594 vq->we_own_ring = true; 1595 vq->notify = notify; 1596 vq->weak_barriers = weak_barriers; 1597 vq->broken = false; 1598 vq->last_used_idx = 0; 1599 vq->num_added = 0; 1600 vq->packed_ring = true; 1601 vq->use_dma_api = vring_use_dma_api(vdev); 1602 list_add_tail(&vq->vq.list, &vdev->vqs); 1603 #ifdef DEBUG 1604 vq->in_use = false; 1605 vq->last_add_time_valid = false; 1606 #endif 1607 1608 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1609 !context; 1610 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1611 1612 vq->packed.ring_dma_addr = ring_dma_addr; 1613 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1614 vq->packed.device_event_dma_addr = device_event_dma_addr; 1615 1616 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1617 vq->packed.event_size_in_bytes = event_size_in_bytes; 1618 1619 vq->packed.vring.num = num; 1620 vq->packed.vring.desc = ring; 1621 vq->packed.vring.driver = driver; 1622 vq->packed.vring.device = device; 1623 1624 vq->packed.next_avail_idx = 0; 1625 vq->packed.avail_wrap_counter = 1; 1626 vq->packed.used_wrap_counter = 1; 1627 vq->packed.event_flags_shadow = 0; 1628 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1629 1630 vq->packed.desc_state = kmalloc_array(num, 1631 sizeof(struct vring_desc_state_packed), 1632 GFP_KERNEL); 1633 if (!vq->packed.desc_state) 1634 goto err_desc_state; 1635 1636 memset(vq->packed.desc_state, 0, 1637 num * sizeof(struct vring_desc_state_packed)); 1638 1639 /* Put everything in free lists. */ 1640 vq->free_head = 0; 1641 for (i = 0; i < num-1; i++) 1642 vq->packed.desc_state[i].next = i + 1; 1643 1644 vq->packed.desc_extra = kmalloc_array(num, 1645 sizeof(struct vring_desc_extra_packed), 1646 GFP_KERNEL); 1647 if (!vq->packed.desc_extra) 1648 goto err_desc_extra; 1649 1650 memset(vq->packed.desc_extra, 0, 1651 num * sizeof(struct vring_desc_extra_packed)); 1652 1653 /* No callback? Tell other side not to bother us. */ 1654 if (!callback) { 1655 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1656 vq->packed.vring.driver->flags = 1657 cpu_to_le16(vq->packed.event_flags_shadow); 1658 } 1659 1660 return &vq->vq; 1661 1662 err_desc_extra: 1663 kfree(vq->packed.desc_state); 1664 err_desc_state: 1665 kfree(vq); 1666 err_vq: 1667 vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); 1668 err_device: 1669 vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); 1670 err_driver: 1671 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1672 err_ring: 1673 return NULL; 1674 } 1675 1676 1677 /* 1678 * Generic functions and exported symbols. 1679 */ 1680 1681 static inline int virtqueue_add(struct virtqueue *_vq, 1682 struct scatterlist *sgs[], 1683 unsigned int total_sg, 1684 unsigned int out_sgs, 1685 unsigned int in_sgs, 1686 void *data, 1687 void *ctx, 1688 gfp_t gfp) 1689 { 1690 struct vring_virtqueue *vq = to_vvq(_vq); 1691 1692 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1693 out_sgs, in_sgs, data, ctx, gfp) : 1694 virtqueue_add_split(_vq, sgs, total_sg, 1695 out_sgs, in_sgs, data, ctx, gfp); 1696 } 1697 1698 /** 1699 * virtqueue_add_sgs - expose buffers to other end 1700 * @vq: the struct virtqueue we're talking about. 1701 * @sgs: array of terminated scatterlists. 1702 * @out_num: the number of scatterlists readable by other side 1703 * @in_num: the number of scatterlists which are writable (after readable ones) 1704 * @data: the token identifying the buffer. 1705 * @gfp: how to do memory allocations (if necessary). 1706 * 1707 * Caller must ensure we don't call this with other virtqueue operations 1708 * at the same time (except where noted). 1709 * 1710 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1711 */ 1712 int virtqueue_add_sgs(struct virtqueue *_vq, 1713 struct scatterlist *sgs[], 1714 unsigned int out_sgs, 1715 unsigned int in_sgs, 1716 void *data, 1717 gfp_t gfp) 1718 { 1719 unsigned int i, total_sg = 0; 1720 1721 /* Count them first. */ 1722 for (i = 0; i < out_sgs + in_sgs; i++) { 1723 struct scatterlist *sg; 1724 1725 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1726 total_sg++; 1727 } 1728 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1729 data, NULL, gfp); 1730 } 1731 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1732 1733 /** 1734 * virtqueue_add_outbuf - expose output buffers to other end 1735 * @vq: the struct virtqueue we're talking about. 1736 * @sg: scatterlist (must be well-formed and terminated!) 1737 * @num: the number of entries in @sg readable by other side 1738 * @data: the token identifying the buffer. 1739 * @gfp: how to do memory allocations (if necessary). 1740 * 1741 * Caller must ensure we don't call this with other virtqueue operations 1742 * at the same time (except where noted). 1743 * 1744 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1745 */ 1746 int virtqueue_add_outbuf(struct virtqueue *vq, 1747 struct scatterlist *sg, unsigned int num, 1748 void *data, 1749 gfp_t gfp) 1750 { 1751 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1752 } 1753 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1754 1755 /** 1756 * virtqueue_add_inbuf - expose input buffers to other end 1757 * @vq: the struct virtqueue we're talking about. 1758 * @sg: scatterlist (must be well-formed and terminated!) 1759 * @num: the number of entries in @sg writable by other side 1760 * @data: the token identifying the buffer. 1761 * @gfp: how to do memory allocations (if necessary). 1762 * 1763 * Caller must ensure we don't call this with other virtqueue operations 1764 * at the same time (except where noted). 1765 * 1766 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1767 */ 1768 int virtqueue_add_inbuf(struct virtqueue *vq, 1769 struct scatterlist *sg, unsigned int num, 1770 void *data, 1771 gfp_t gfp) 1772 { 1773 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1774 } 1775 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1776 1777 /** 1778 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1779 * @vq: the struct virtqueue we're talking about. 1780 * @sg: scatterlist (must be well-formed and terminated!) 1781 * @num: the number of entries in @sg writable by other side 1782 * @data: the token identifying the buffer. 1783 * @ctx: extra context for the token 1784 * @gfp: how to do memory allocations (if necessary). 1785 * 1786 * Caller must ensure we don't call this with other virtqueue operations 1787 * at the same time (except where noted). 1788 * 1789 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1790 */ 1791 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1792 struct scatterlist *sg, unsigned int num, 1793 void *data, 1794 void *ctx, 1795 gfp_t gfp) 1796 { 1797 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1798 } 1799 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1800 1801 /** 1802 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1803 * @vq: the struct virtqueue 1804 * 1805 * Instead of virtqueue_kick(), you can do: 1806 * if (virtqueue_kick_prepare(vq)) 1807 * virtqueue_notify(vq); 1808 * 1809 * This is sometimes useful because the virtqueue_kick_prepare() needs 1810 * to be serialized, but the actual virtqueue_notify() call does not. 1811 */ 1812 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1813 { 1814 struct vring_virtqueue *vq = to_vvq(_vq); 1815 1816 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1817 virtqueue_kick_prepare_split(_vq); 1818 } 1819 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1820 1821 /** 1822 * virtqueue_notify - second half of split virtqueue_kick call. 1823 * @vq: the struct virtqueue 1824 * 1825 * This does not need to be serialized. 1826 * 1827 * Returns false if host notify failed or queue is broken, otherwise true. 1828 */ 1829 bool virtqueue_notify(struct virtqueue *_vq) 1830 { 1831 struct vring_virtqueue *vq = to_vvq(_vq); 1832 1833 if (unlikely(vq->broken)) 1834 return false; 1835 1836 /* Prod other side to tell it about changes. */ 1837 if (!vq->notify(_vq)) { 1838 vq->broken = true; 1839 return false; 1840 } 1841 return true; 1842 } 1843 EXPORT_SYMBOL_GPL(virtqueue_notify); 1844 1845 /** 1846 * virtqueue_kick - update after add_buf 1847 * @vq: the struct virtqueue 1848 * 1849 * After one or more virtqueue_add_* calls, invoke this to kick 1850 * the other side. 1851 * 1852 * Caller must ensure we don't call this with other virtqueue 1853 * operations at the same time (except where noted). 1854 * 1855 * Returns false if kick failed, otherwise true. 1856 */ 1857 bool virtqueue_kick(struct virtqueue *vq) 1858 { 1859 if (virtqueue_kick_prepare(vq)) 1860 return virtqueue_notify(vq); 1861 return true; 1862 } 1863 EXPORT_SYMBOL_GPL(virtqueue_kick); 1864 1865 /** 1866 * virtqueue_get_buf - get the next used buffer 1867 * @vq: the struct virtqueue we're talking about. 1868 * @len: the length written into the buffer 1869 * 1870 * If the device wrote data into the buffer, @len will be set to the 1871 * amount written. This means you don't need to clear the buffer 1872 * beforehand to ensure there's no data leakage in the case of short 1873 * writes. 1874 * 1875 * Caller must ensure we don't call this with other virtqueue 1876 * operations at the same time (except where noted). 1877 * 1878 * Returns NULL if there are no used buffers, or the "data" token 1879 * handed to virtqueue_add_*(). 1880 */ 1881 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1882 void **ctx) 1883 { 1884 struct vring_virtqueue *vq = to_vvq(_vq); 1885 1886 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1887 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1888 } 1889 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1890 1891 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1892 { 1893 return virtqueue_get_buf_ctx(_vq, len, NULL); 1894 } 1895 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1896 /** 1897 * virtqueue_disable_cb - disable callbacks 1898 * @vq: the struct virtqueue we're talking about. 1899 * 1900 * Note that this is not necessarily synchronous, hence unreliable and only 1901 * useful as an optimization. 1902 * 1903 * Unlike other operations, this need not be serialized. 1904 */ 1905 void virtqueue_disable_cb(struct virtqueue *_vq) 1906 { 1907 struct vring_virtqueue *vq = to_vvq(_vq); 1908 1909 if (vq->packed_ring) 1910 virtqueue_disable_cb_packed(_vq); 1911 else 1912 virtqueue_disable_cb_split(_vq); 1913 } 1914 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1915 1916 /** 1917 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1918 * @vq: the struct virtqueue we're talking about. 1919 * 1920 * This re-enables callbacks; it returns current queue state 1921 * in an opaque unsigned value. This value should be later tested by 1922 * virtqueue_poll, to detect a possible race between the driver checking for 1923 * more work, and enabling callbacks. 1924 * 1925 * Caller must ensure we don't call this with other virtqueue 1926 * operations at the same time (except where noted). 1927 */ 1928 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1929 { 1930 struct vring_virtqueue *vq = to_vvq(_vq); 1931 1932 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 1933 virtqueue_enable_cb_prepare_split(_vq); 1934 } 1935 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1936 1937 /** 1938 * virtqueue_poll - query pending used buffers 1939 * @vq: the struct virtqueue we're talking about. 1940 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1941 * 1942 * Returns "true" if there are pending used buffers in the queue. 1943 * 1944 * This does not need to be serialized. 1945 */ 1946 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1947 { 1948 struct vring_virtqueue *vq = to_vvq(_vq); 1949 1950 virtio_mb(vq->weak_barriers); 1951 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 1952 virtqueue_poll_split(_vq, last_used_idx); 1953 } 1954 EXPORT_SYMBOL_GPL(virtqueue_poll); 1955 1956 /** 1957 * virtqueue_enable_cb - restart callbacks after disable_cb. 1958 * @vq: the struct virtqueue we're talking about. 1959 * 1960 * This re-enables callbacks; it returns "false" if there are pending 1961 * buffers in the queue, to detect a possible race between the driver 1962 * checking for more work, and enabling callbacks. 1963 * 1964 * Caller must ensure we don't call this with other virtqueue 1965 * operations at the same time (except where noted). 1966 */ 1967 bool virtqueue_enable_cb(struct virtqueue *_vq) 1968 { 1969 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 1970 1971 return !virtqueue_poll(_vq, last_used_idx); 1972 } 1973 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 1974 1975 /** 1976 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 1977 * @vq: the struct virtqueue we're talking about. 1978 * 1979 * This re-enables callbacks but hints to the other side to delay 1980 * interrupts until most of the available buffers have been processed; 1981 * it returns "false" if there are many pending buffers in the queue, 1982 * to detect a possible race between the driver checking for more work, 1983 * and enabling callbacks. 1984 * 1985 * Caller must ensure we don't call this with other virtqueue 1986 * operations at the same time (except where noted). 1987 */ 1988 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 1989 { 1990 struct vring_virtqueue *vq = to_vvq(_vq); 1991 1992 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 1993 virtqueue_enable_cb_delayed_split(_vq); 1994 } 1995 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 1996 1997 /** 1998 * virtqueue_detach_unused_buf - detach first unused buffer 1999 * @vq: the struct virtqueue we're talking about. 2000 * 2001 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2002 * This is not valid on an active queue; it is useful only for device 2003 * shutdown. 2004 */ 2005 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2006 { 2007 struct vring_virtqueue *vq = to_vvq(_vq); 2008 2009 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2010 virtqueue_detach_unused_buf_split(_vq); 2011 } 2012 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2013 2014 static inline bool more_used(const struct vring_virtqueue *vq) 2015 { 2016 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2017 } 2018 2019 irqreturn_t vring_interrupt(int irq, void *_vq) 2020 { 2021 struct vring_virtqueue *vq = to_vvq(_vq); 2022 2023 if (!more_used(vq)) { 2024 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2025 return IRQ_NONE; 2026 } 2027 2028 if (unlikely(vq->broken)) 2029 return IRQ_HANDLED; 2030 2031 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2032 if (vq->vq.callback) 2033 vq->vq.callback(&vq->vq); 2034 2035 return IRQ_HANDLED; 2036 } 2037 EXPORT_SYMBOL_GPL(vring_interrupt); 2038 2039 /* Only available for split ring */ 2040 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2041 struct vring vring, 2042 struct virtio_device *vdev, 2043 bool weak_barriers, 2044 bool context, 2045 bool (*notify)(struct virtqueue *), 2046 void (*callback)(struct virtqueue *), 2047 const char *name) 2048 { 2049 unsigned int i; 2050 struct vring_virtqueue *vq; 2051 2052 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2053 return NULL; 2054 2055 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2056 if (!vq) 2057 return NULL; 2058 2059 vq->packed_ring = false; 2060 vq->vq.callback = callback; 2061 vq->vq.vdev = vdev; 2062 vq->vq.name = name; 2063 vq->vq.num_free = vring.num; 2064 vq->vq.index = index; 2065 vq->we_own_ring = false; 2066 vq->notify = notify; 2067 vq->weak_barriers = weak_barriers; 2068 vq->broken = false; 2069 vq->last_used_idx = 0; 2070 vq->num_added = 0; 2071 vq->use_dma_api = vring_use_dma_api(vdev); 2072 list_add_tail(&vq->vq.list, &vdev->vqs); 2073 #ifdef DEBUG 2074 vq->in_use = false; 2075 vq->last_add_time_valid = false; 2076 #endif 2077 2078 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2079 !context; 2080 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2081 2082 vq->split.queue_dma_addr = 0; 2083 vq->split.queue_size_in_bytes = 0; 2084 2085 vq->split.vring = vring; 2086 vq->split.avail_flags_shadow = 0; 2087 vq->split.avail_idx_shadow = 0; 2088 2089 /* No callback? Tell other side not to bother us. */ 2090 if (!callback) { 2091 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2092 if (!vq->event) 2093 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2094 vq->split.avail_flags_shadow); 2095 } 2096 2097 vq->split.desc_state = kmalloc_array(vring.num, 2098 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2099 if (!vq->split.desc_state) { 2100 kfree(vq); 2101 return NULL; 2102 } 2103 2104 /* Put everything in free lists. */ 2105 vq->free_head = 0; 2106 for (i = 0; i < vring.num-1; i++) 2107 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 2108 memset(vq->split.desc_state, 0, vring.num * 2109 sizeof(struct vring_desc_state_split)); 2110 2111 return &vq->vq; 2112 } 2113 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2114 2115 struct virtqueue *vring_create_virtqueue( 2116 unsigned int index, 2117 unsigned int num, 2118 unsigned int vring_align, 2119 struct virtio_device *vdev, 2120 bool weak_barriers, 2121 bool may_reduce_num, 2122 bool context, 2123 bool (*notify)(struct virtqueue *), 2124 void (*callback)(struct virtqueue *), 2125 const char *name) 2126 { 2127 2128 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2129 return vring_create_virtqueue_packed(index, num, vring_align, 2130 vdev, weak_barriers, may_reduce_num, 2131 context, notify, callback, name); 2132 2133 return vring_create_virtqueue_split(index, num, vring_align, 2134 vdev, weak_barriers, may_reduce_num, 2135 context, notify, callback, name); 2136 } 2137 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2138 2139 /* Only available for split ring */ 2140 struct virtqueue *vring_new_virtqueue(unsigned int index, 2141 unsigned int num, 2142 unsigned int vring_align, 2143 struct virtio_device *vdev, 2144 bool weak_barriers, 2145 bool context, 2146 void *pages, 2147 bool (*notify)(struct virtqueue *vq), 2148 void (*callback)(struct virtqueue *vq), 2149 const char *name) 2150 { 2151 struct vring vring; 2152 2153 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2154 return NULL; 2155 2156 vring_init(&vring, num, pages, vring_align); 2157 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2158 notify, callback, name); 2159 } 2160 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2161 2162 void vring_del_virtqueue(struct virtqueue *_vq) 2163 { 2164 struct vring_virtqueue *vq = to_vvq(_vq); 2165 2166 if (vq->we_own_ring) { 2167 if (vq->packed_ring) { 2168 vring_free_queue(vq->vq.vdev, 2169 vq->packed.ring_size_in_bytes, 2170 vq->packed.vring.desc, 2171 vq->packed.ring_dma_addr); 2172 2173 vring_free_queue(vq->vq.vdev, 2174 vq->packed.event_size_in_bytes, 2175 vq->packed.vring.driver, 2176 vq->packed.driver_event_dma_addr); 2177 2178 vring_free_queue(vq->vq.vdev, 2179 vq->packed.event_size_in_bytes, 2180 vq->packed.vring.device, 2181 vq->packed.device_event_dma_addr); 2182 2183 kfree(vq->packed.desc_state); 2184 kfree(vq->packed.desc_extra); 2185 } else { 2186 vring_free_queue(vq->vq.vdev, 2187 vq->split.queue_size_in_bytes, 2188 vq->split.vring.desc, 2189 vq->split.queue_dma_addr); 2190 2191 kfree(vq->split.desc_state); 2192 } 2193 } 2194 list_del(&_vq->list); 2195 kfree(vq); 2196 } 2197 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2198 2199 /* Manipulates transport-specific feature bits. */ 2200 void vring_transport_features(struct virtio_device *vdev) 2201 { 2202 unsigned int i; 2203 2204 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2205 switch (i) { 2206 case VIRTIO_RING_F_INDIRECT_DESC: 2207 break; 2208 case VIRTIO_RING_F_EVENT_IDX: 2209 break; 2210 case VIRTIO_F_VERSION_1: 2211 break; 2212 case VIRTIO_F_IOMMU_PLATFORM: 2213 break; 2214 case VIRTIO_F_RING_PACKED: 2215 break; 2216 default: 2217 /* We don't understand this bit. */ 2218 __virtio_clear_bit(vdev, i); 2219 } 2220 } 2221 } 2222 EXPORT_SYMBOL_GPL(vring_transport_features); 2223 2224 /** 2225 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2226 * @vq: the struct virtqueue containing the vring of interest. 2227 * 2228 * Returns the size of the vring. This is mainly used for boasting to 2229 * userspace. Unlike other operations, this need not be serialized. 2230 */ 2231 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2232 { 2233 2234 struct vring_virtqueue *vq = to_vvq(_vq); 2235 2236 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2237 } 2238 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2239 2240 bool virtqueue_is_broken(struct virtqueue *_vq) 2241 { 2242 struct vring_virtqueue *vq = to_vvq(_vq); 2243 2244 return vq->broken; 2245 } 2246 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2247 2248 /* 2249 * This should prevent the device from being used, allowing drivers to 2250 * recover. You may need to grab appropriate locks to flush. 2251 */ 2252 void virtio_break_device(struct virtio_device *dev) 2253 { 2254 struct virtqueue *_vq; 2255 2256 list_for_each_entry(_vq, &dev->vqs, list) { 2257 struct vring_virtqueue *vq = to_vvq(_vq); 2258 vq->broken = true; 2259 } 2260 } 2261 EXPORT_SYMBOL_GPL(virtio_break_device); 2262 2263 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2264 { 2265 struct vring_virtqueue *vq = to_vvq(_vq); 2266 2267 BUG_ON(!vq->we_own_ring); 2268 2269 if (vq->packed_ring) 2270 return vq->packed.ring_dma_addr; 2271 2272 return vq->split.queue_dma_addr; 2273 } 2274 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2275 2276 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2277 { 2278 struct vring_virtqueue *vq = to_vvq(_vq); 2279 2280 BUG_ON(!vq->we_own_ring); 2281 2282 if (vq->packed_ring) 2283 return vq->packed.driver_event_dma_addr; 2284 2285 return vq->split.queue_dma_addr + 2286 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2287 } 2288 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2289 2290 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2291 { 2292 struct vring_virtqueue *vq = to_vvq(_vq); 2293 2294 BUG_ON(!vq->we_own_ring); 2295 2296 if (vq->packed_ring) 2297 return vq->packed.device_event_dma_addr; 2298 2299 return vq->split.queue_dma_addr + 2300 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2301 } 2302 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2303 2304 /* Only available for split ring */ 2305 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2306 { 2307 return &to_vvq(vq)->split.vring; 2308 } 2309 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2310 2311 MODULE_LICENSE("GPL"); 2312