1 /* Virtio ring implementation. 2 * 3 * Copyright 2007 Rusty Russell IBM Corporation 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19 #include <linux/virtio.h> 20 #include <linux/virtio_ring.h> 21 #include <linux/virtio_config.h> 22 #include <linux/device.h> 23 #include <linux/slab.h> 24 #include <linux/module.h> 25 #include <linux/hrtimer.h> 26 #include <linux/dma-mapping.h> 27 #include <xen/xen.h> 28 29 #ifdef DEBUG 30 /* For development, we want to crash whenever the ring is screwed. */ 31 #define BAD_RING(_vq, fmt, args...) \ 32 do { \ 33 dev_err(&(_vq)->vq.vdev->dev, \ 34 "%s:"fmt, (_vq)->vq.name, ##args); \ 35 BUG(); \ 36 } while (0) 37 /* Caller is supposed to guarantee no reentry. */ 38 #define START_USE(_vq) \ 39 do { \ 40 if ((_vq)->in_use) \ 41 panic("%s:in_use = %i\n", \ 42 (_vq)->vq.name, (_vq)->in_use); \ 43 (_vq)->in_use = __LINE__; \ 44 } while (0) 45 #define END_USE(_vq) \ 46 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 47 #define LAST_ADD_TIME_UPDATE(_vq) \ 48 do { \ 49 ktime_t now = ktime_get(); \ 50 \ 51 /* No kick or get, with .1 second between? Warn. */ \ 52 if ((_vq)->last_add_time_valid) \ 53 WARN_ON(ktime_to_ms(ktime_sub(now, \ 54 (_vq)->last_add_time)) > 100); \ 55 (_vq)->last_add_time = now; \ 56 (_vq)->last_add_time_valid = true; \ 57 } while (0) 58 #define LAST_ADD_TIME_CHECK(_vq) \ 59 do { \ 60 if ((_vq)->last_add_time_valid) { \ 61 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 62 (_vq)->last_add_time)) > 100); \ 63 } \ 64 } while (0) 65 #define LAST_ADD_TIME_INVALID(_vq) \ 66 ((_vq)->last_add_time_valid = false) 67 #else 68 #define BAD_RING(_vq, fmt, args...) \ 69 do { \ 70 dev_err(&_vq->vq.vdev->dev, \ 71 "%s:"fmt, (_vq)->vq.name, ##args); \ 72 (_vq)->broken = true; \ 73 } while (0) 74 #define START_USE(vq) 75 #define END_USE(vq) 76 #define LAST_ADD_TIME_UPDATE(vq) 77 #define LAST_ADD_TIME_CHECK(vq) 78 #define LAST_ADD_TIME_INVALID(vq) 79 #endif 80 81 struct vring_desc_state_split { 82 void *data; /* Data for callback. */ 83 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 84 }; 85 86 struct vring_desc_state_packed { 87 void *data; /* Data for callback. */ 88 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 89 u16 num; /* Descriptor list length. */ 90 u16 next; /* The next desc state in a list. */ 91 u16 last; /* The last desc state in a list. */ 92 }; 93 94 struct vring_desc_extra_packed { 95 dma_addr_t addr; /* Buffer DMA addr. */ 96 u32 len; /* Buffer length. */ 97 u16 flags; /* Descriptor flags. */ 98 }; 99 100 struct vring_virtqueue { 101 struct virtqueue vq; 102 103 /* Is this a packed ring? */ 104 bool packed_ring; 105 106 /* Is DMA API used? */ 107 bool use_dma_api; 108 109 /* Can we use weak barriers? */ 110 bool weak_barriers; 111 112 /* Other side has made a mess, don't try any more. */ 113 bool broken; 114 115 /* Host supports indirect buffers */ 116 bool indirect; 117 118 /* Host publishes avail event idx */ 119 bool event; 120 121 /* Head of free buffer list. */ 122 unsigned int free_head; 123 /* Number we've added since last sync. */ 124 unsigned int num_added; 125 126 /* Last used index we've seen. */ 127 u16 last_used_idx; 128 129 union { 130 /* Available for split ring */ 131 struct { 132 /* Actual memory layout for this queue. */ 133 struct vring vring; 134 135 /* Last written value to avail->flags */ 136 u16 avail_flags_shadow; 137 138 /* 139 * Last written value to avail->idx in 140 * guest byte order. 141 */ 142 u16 avail_idx_shadow; 143 144 /* Per-descriptor state. */ 145 struct vring_desc_state_split *desc_state; 146 147 /* DMA address and size information */ 148 dma_addr_t queue_dma_addr; 149 size_t queue_size_in_bytes; 150 } split; 151 152 /* Available for packed ring */ 153 struct { 154 /* Actual memory layout for this queue. */ 155 struct { 156 unsigned int num; 157 struct vring_packed_desc *desc; 158 struct vring_packed_desc_event *driver; 159 struct vring_packed_desc_event *device; 160 } vring; 161 162 /* Driver ring wrap counter. */ 163 bool avail_wrap_counter; 164 165 /* Device ring wrap counter. */ 166 bool used_wrap_counter; 167 168 /* Avail used flags. */ 169 u16 avail_used_flags; 170 171 /* Index of the next avail descriptor. */ 172 u16 next_avail_idx; 173 174 /* 175 * Last written value to driver->flags in 176 * guest byte order. 177 */ 178 u16 event_flags_shadow; 179 180 /* Per-descriptor state. */ 181 struct vring_desc_state_packed *desc_state; 182 struct vring_desc_extra_packed *desc_extra; 183 184 /* DMA address and size information */ 185 dma_addr_t ring_dma_addr; 186 dma_addr_t driver_event_dma_addr; 187 dma_addr_t device_event_dma_addr; 188 size_t ring_size_in_bytes; 189 size_t event_size_in_bytes; 190 } packed; 191 }; 192 193 /* How to notify other side. FIXME: commonalize hcalls! */ 194 bool (*notify)(struct virtqueue *vq); 195 196 /* DMA, allocation, and size information */ 197 bool we_own_ring; 198 199 #ifdef DEBUG 200 /* They're supposed to lock for us. */ 201 unsigned int in_use; 202 203 /* Figure out if their kicks are too delayed. */ 204 bool last_add_time_valid; 205 ktime_t last_add_time; 206 #endif 207 }; 208 209 210 /* 211 * Helpers. 212 */ 213 214 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 215 216 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 217 unsigned int total_sg) 218 { 219 struct vring_virtqueue *vq = to_vvq(_vq); 220 221 /* 222 * If the host supports indirect descriptor tables, and we have multiple 223 * buffers, then go indirect. FIXME: tune this threshold 224 */ 225 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 226 } 227 228 /* 229 * Modern virtio devices have feature bits to specify whether they need a 230 * quirk and bypass the IOMMU. If not there, just use the DMA API. 231 * 232 * If there, the interaction between virtio and DMA API is messy. 233 * 234 * On most systems with virtio, physical addresses match bus addresses, 235 * and it doesn't particularly matter whether we use the DMA API. 236 * 237 * On some systems, including Xen and any system with a physical device 238 * that speaks virtio behind a physical IOMMU, we must use the DMA API 239 * for virtio DMA to work at all. 240 * 241 * On other systems, including SPARC and PPC64, virtio-pci devices are 242 * enumerated as though they are behind an IOMMU, but the virtio host 243 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 244 * there or somehow map everything as the identity. 245 * 246 * For the time being, we preserve historic behavior and bypass the DMA 247 * API. 248 * 249 * TODO: install a per-device DMA ops structure that does the right thing 250 * taking into account all the above quirks, and use the DMA API 251 * unconditionally on data path. 252 */ 253 254 static bool vring_use_dma_api(struct virtio_device *vdev) 255 { 256 if (!virtio_has_iommu_quirk(vdev)) 257 return true; 258 259 /* Otherwise, we are left to guess. */ 260 /* 261 * In theory, it's possible to have a buggy QEMU-supposed 262 * emulated Q35 IOMMU and Xen enabled at the same time. On 263 * such a configuration, virtio has never worked and will 264 * not work without an even larger kludge. Instead, enable 265 * the DMA API if we're a Xen guest, which at least allows 266 * all of the sensible Xen configurations to work correctly. 267 */ 268 if (xen_domain()) 269 return true; 270 271 return false; 272 } 273 274 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 275 dma_addr_t *dma_handle, gfp_t flag) 276 { 277 if (vring_use_dma_api(vdev)) { 278 return dma_alloc_coherent(vdev->dev.parent, size, 279 dma_handle, flag); 280 } else { 281 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 282 283 if (queue) { 284 phys_addr_t phys_addr = virt_to_phys(queue); 285 *dma_handle = (dma_addr_t)phys_addr; 286 287 /* 288 * Sanity check: make sure we dind't truncate 289 * the address. The only arches I can find that 290 * have 64-bit phys_addr_t but 32-bit dma_addr_t 291 * are certain non-highmem MIPS and x86 292 * configurations, but these configurations 293 * should never allocate physical pages above 32 294 * bits, so this is fine. Just in case, throw a 295 * warning and abort if we end up with an 296 * unrepresentable address. 297 */ 298 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 299 free_pages_exact(queue, PAGE_ALIGN(size)); 300 return NULL; 301 } 302 } 303 return queue; 304 } 305 } 306 307 static void vring_free_queue(struct virtio_device *vdev, size_t size, 308 void *queue, dma_addr_t dma_handle) 309 { 310 if (vring_use_dma_api(vdev)) 311 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 312 else 313 free_pages_exact(queue, PAGE_ALIGN(size)); 314 } 315 316 /* 317 * The DMA ops on various arches are rather gnarly right now, and 318 * making all of the arch DMA ops work on the vring device itself 319 * is a mess. For now, we use the parent device for DMA ops. 320 */ 321 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 322 { 323 return vq->vq.vdev->dev.parent; 324 } 325 326 /* Map one sg entry. */ 327 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 328 struct scatterlist *sg, 329 enum dma_data_direction direction) 330 { 331 if (!vq->use_dma_api) 332 return (dma_addr_t)sg_phys(sg); 333 334 /* 335 * We can't use dma_map_sg, because we don't use scatterlists in 336 * the way it expects (we don't guarantee that the scatterlist 337 * will exist for the lifetime of the mapping). 338 */ 339 return dma_map_page(vring_dma_dev(vq), 340 sg_page(sg), sg->offset, sg->length, 341 direction); 342 } 343 344 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 345 void *cpu_addr, size_t size, 346 enum dma_data_direction direction) 347 { 348 if (!vq->use_dma_api) 349 return (dma_addr_t)virt_to_phys(cpu_addr); 350 351 return dma_map_single(vring_dma_dev(vq), 352 cpu_addr, size, direction); 353 } 354 355 static int vring_mapping_error(const struct vring_virtqueue *vq, 356 dma_addr_t addr) 357 { 358 if (!vq->use_dma_api) 359 return 0; 360 361 return dma_mapping_error(vring_dma_dev(vq), addr); 362 } 363 364 365 /* 366 * Split ring specific functions - *_split(). 367 */ 368 369 static void vring_unmap_one_split(const struct vring_virtqueue *vq, 370 struct vring_desc *desc) 371 { 372 u16 flags; 373 374 if (!vq->use_dma_api) 375 return; 376 377 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 378 379 if (flags & VRING_DESC_F_INDIRECT) { 380 dma_unmap_single(vring_dma_dev(vq), 381 virtio64_to_cpu(vq->vq.vdev, desc->addr), 382 virtio32_to_cpu(vq->vq.vdev, desc->len), 383 (flags & VRING_DESC_F_WRITE) ? 384 DMA_FROM_DEVICE : DMA_TO_DEVICE); 385 } else { 386 dma_unmap_page(vring_dma_dev(vq), 387 virtio64_to_cpu(vq->vq.vdev, desc->addr), 388 virtio32_to_cpu(vq->vq.vdev, desc->len), 389 (flags & VRING_DESC_F_WRITE) ? 390 DMA_FROM_DEVICE : DMA_TO_DEVICE); 391 } 392 } 393 394 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 395 unsigned int total_sg, 396 gfp_t gfp) 397 { 398 struct vring_desc *desc; 399 unsigned int i; 400 401 /* 402 * We require lowmem mappings for the descriptors because 403 * otherwise virt_to_phys will give us bogus addresses in the 404 * virtqueue. 405 */ 406 gfp &= ~__GFP_HIGHMEM; 407 408 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 409 if (!desc) 410 return NULL; 411 412 for (i = 0; i < total_sg; i++) 413 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 414 return desc; 415 } 416 417 static inline int virtqueue_add_split(struct virtqueue *_vq, 418 struct scatterlist *sgs[], 419 unsigned int total_sg, 420 unsigned int out_sgs, 421 unsigned int in_sgs, 422 void *data, 423 void *ctx, 424 gfp_t gfp) 425 { 426 struct vring_virtqueue *vq = to_vvq(_vq); 427 struct scatterlist *sg; 428 struct vring_desc *desc; 429 unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 430 int head; 431 bool indirect; 432 433 START_USE(vq); 434 435 BUG_ON(data == NULL); 436 BUG_ON(ctx && vq->indirect); 437 438 if (unlikely(vq->broken)) { 439 END_USE(vq); 440 return -EIO; 441 } 442 443 LAST_ADD_TIME_UPDATE(vq); 444 445 BUG_ON(total_sg == 0); 446 447 head = vq->free_head; 448 449 if (virtqueue_use_indirect(_vq, total_sg)) 450 desc = alloc_indirect_split(_vq, total_sg, gfp); 451 else { 452 desc = NULL; 453 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 454 } 455 456 if (desc) { 457 /* Use a single buffer which doesn't continue */ 458 indirect = true; 459 /* Set up rest to use this indirect table. */ 460 i = 0; 461 descs_used = 1; 462 } else { 463 indirect = false; 464 desc = vq->split.vring.desc; 465 i = head; 466 descs_used = total_sg; 467 } 468 469 if (vq->vq.num_free < descs_used) { 470 pr_debug("Can't add buf len %i - avail = %i\n", 471 descs_used, vq->vq.num_free); 472 /* FIXME: for historical reasons, we force a notify here if 473 * there are outgoing parts to the buffer. Presumably the 474 * host should service the ring ASAP. */ 475 if (out_sgs) 476 vq->notify(&vq->vq); 477 if (indirect) 478 kfree(desc); 479 END_USE(vq); 480 return -ENOSPC; 481 } 482 483 for (n = 0; n < out_sgs; n++) { 484 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 485 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 486 if (vring_mapping_error(vq, addr)) 487 goto unmap_release; 488 489 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 490 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 491 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 492 prev = i; 493 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 494 } 495 } 496 for (; n < (out_sgs + in_sgs); n++) { 497 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 498 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 499 if (vring_mapping_error(vq, addr)) 500 goto unmap_release; 501 502 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 503 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 504 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 505 prev = i; 506 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 507 } 508 } 509 /* Last one doesn't continue. */ 510 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 511 512 if (indirect) { 513 /* Now that the indirect table is filled in, map it. */ 514 dma_addr_t addr = vring_map_single( 515 vq, desc, total_sg * sizeof(struct vring_desc), 516 DMA_TO_DEVICE); 517 if (vring_mapping_error(vq, addr)) 518 goto unmap_release; 519 520 vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 521 VRING_DESC_F_INDIRECT); 522 vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 523 addr); 524 525 vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 526 total_sg * sizeof(struct vring_desc)); 527 } 528 529 /* We're using some buffers from the free list. */ 530 vq->vq.num_free -= descs_used; 531 532 /* Update free pointer */ 533 if (indirect) 534 vq->free_head = virtio16_to_cpu(_vq->vdev, 535 vq->split.vring.desc[head].next); 536 else 537 vq->free_head = i; 538 539 /* Store token and indirect buffer state. */ 540 vq->split.desc_state[head].data = data; 541 if (indirect) 542 vq->split.desc_state[head].indir_desc = desc; 543 else 544 vq->split.desc_state[head].indir_desc = ctx; 545 546 /* Put entry in available array (but don't update avail->idx until they 547 * do sync). */ 548 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 549 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 550 551 /* Descriptors and available array need to be set before we expose the 552 * new available array entries. */ 553 virtio_wmb(vq->weak_barriers); 554 vq->split.avail_idx_shadow++; 555 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 556 vq->split.avail_idx_shadow); 557 vq->num_added++; 558 559 pr_debug("Added buffer head %i to %p\n", head, vq); 560 END_USE(vq); 561 562 /* This is very unlikely, but theoretically possible. Kick 563 * just in case. */ 564 if (unlikely(vq->num_added == (1 << 16) - 1)) 565 virtqueue_kick(_vq); 566 567 return 0; 568 569 unmap_release: 570 err_idx = i; 571 i = head; 572 573 for (n = 0; n < total_sg; n++) { 574 if (i == err_idx) 575 break; 576 vring_unmap_one_split(vq, &desc[i]); 577 i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); 578 } 579 580 if (indirect) 581 kfree(desc); 582 583 END_USE(vq); 584 return -EIO; 585 } 586 587 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 588 { 589 struct vring_virtqueue *vq = to_vvq(_vq); 590 u16 new, old; 591 bool needs_kick; 592 593 START_USE(vq); 594 /* We need to expose available array entries before checking avail 595 * event. */ 596 virtio_mb(vq->weak_barriers); 597 598 old = vq->split.avail_idx_shadow - vq->num_added; 599 new = vq->split.avail_idx_shadow; 600 vq->num_added = 0; 601 602 LAST_ADD_TIME_CHECK(vq); 603 LAST_ADD_TIME_INVALID(vq); 604 605 if (vq->event) { 606 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 607 vring_avail_event(&vq->split.vring)), 608 new, old); 609 } else { 610 needs_kick = !(vq->split.vring.used->flags & 611 cpu_to_virtio16(_vq->vdev, 612 VRING_USED_F_NO_NOTIFY)); 613 } 614 END_USE(vq); 615 return needs_kick; 616 } 617 618 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 619 void **ctx) 620 { 621 unsigned int i, j; 622 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 623 624 /* Clear data ptr. */ 625 vq->split.desc_state[head].data = NULL; 626 627 /* Put back on free list: unmap first-level descriptors and find end */ 628 i = head; 629 630 while (vq->split.vring.desc[i].flags & nextflag) { 631 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 632 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 633 vq->vq.num_free++; 634 } 635 636 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 637 vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 638 vq->free_head); 639 vq->free_head = head; 640 641 /* Plus final descriptor */ 642 vq->vq.num_free++; 643 644 if (vq->indirect) { 645 struct vring_desc *indir_desc = 646 vq->split.desc_state[head].indir_desc; 647 u32 len; 648 649 /* Free the indirect table, if any, now that it's unmapped. */ 650 if (!indir_desc) 651 return; 652 653 len = virtio32_to_cpu(vq->vq.vdev, 654 vq->split.vring.desc[head].len); 655 656 BUG_ON(!(vq->split.vring.desc[head].flags & 657 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 658 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 659 660 for (j = 0; j < len / sizeof(struct vring_desc); j++) 661 vring_unmap_one_split(vq, &indir_desc[j]); 662 663 kfree(indir_desc); 664 vq->split.desc_state[head].indir_desc = NULL; 665 } else if (ctx) { 666 *ctx = vq->split.desc_state[head].indir_desc; 667 } 668 } 669 670 static inline bool more_used_split(const struct vring_virtqueue *vq) 671 { 672 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 673 vq->split.vring.used->idx); 674 } 675 676 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 677 unsigned int *len, 678 void **ctx) 679 { 680 struct vring_virtqueue *vq = to_vvq(_vq); 681 void *ret; 682 unsigned int i; 683 u16 last_used; 684 685 START_USE(vq); 686 687 if (unlikely(vq->broken)) { 688 END_USE(vq); 689 return NULL; 690 } 691 692 if (!more_used_split(vq)) { 693 pr_debug("No more buffers in queue\n"); 694 END_USE(vq); 695 return NULL; 696 } 697 698 /* Only get used array entries after they have been exposed by host. */ 699 virtio_rmb(vq->weak_barriers); 700 701 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 702 i = virtio32_to_cpu(_vq->vdev, 703 vq->split.vring.used->ring[last_used].id); 704 *len = virtio32_to_cpu(_vq->vdev, 705 vq->split.vring.used->ring[last_used].len); 706 707 if (unlikely(i >= vq->split.vring.num)) { 708 BAD_RING(vq, "id %u out of range\n", i); 709 return NULL; 710 } 711 if (unlikely(!vq->split.desc_state[i].data)) { 712 BAD_RING(vq, "id %u is not a head!\n", i); 713 return NULL; 714 } 715 716 /* detach_buf_split clears data, so grab it now. */ 717 ret = vq->split.desc_state[i].data; 718 detach_buf_split(vq, i, ctx); 719 vq->last_used_idx++; 720 /* If we expect an interrupt for the next entry, tell host 721 * by writing event index and flush out the write before 722 * the read in the next get_buf call. */ 723 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 724 virtio_store_mb(vq->weak_barriers, 725 &vring_used_event(&vq->split.vring), 726 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 727 728 LAST_ADD_TIME_INVALID(vq); 729 730 END_USE(vq); 731 return ret; 732 } 733 734 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 735 { 736 struct vring_virtqueue *vq = to_vvq(_vq); 737 738 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 739 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 740 if (!vq->event) 741 vq->split.vring.avail->flags = 742 cpu_to_virtio16(_vq->vdev, 743 vq->split.avail_flags_shadow); 744 } 745 } 746 747 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 748 { 749 struct vring_virtqueue *vq = to_vvq(_vq); 750 u16 last_used_idx; 751 752 START_USE(vq); 753 754 /* We optimistically turn back on interrupts, then check if there was 755 * more to do. */ 756 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 757 * either clear the flags bit or point the event index at the next 758 * entry. Always do both to keep code simple. */ 759 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 760 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 761 if (!vq->event) 762 vq->split.vring.avail->flags = 763 cpu_to_virtio16(_vq->vdev, 764 vq->split.avail_flags_shadow); 765 } 766 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 767 last_used_idx = vq->last_used_idx); 768 END_USE(vq); 769 return last_used_idx; 770 } 771 772 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 773 { 774 struct vring_virtqueue *vq = to_vvq(_vq); 775 776 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 777 vq->split.vring.used->idx); 778 } 779 780 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 781 { 782 struct vring_virtqueue *vq = to_vvq(_vq); 783 u16 bufs; 784 785 START_USE(vq); 786 787 /* We optimistically turn back on interrupts, then check if there was 788 * more to do. */ 789 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 790 * either clear the flags bit or point the event index at the next 791 * entry. Always update the event index to keep code simple. */ 792 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 793 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 794 if (!vq->event) 795 vq->split.vring.avail->flags = 796 cpu_to_virtio16(_vq->vdev, 797 vq->split.avail_flags_shadow); 798 } 799 /* TODO: tune this threshold */ 800 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 801 802 virtio_store_mb(vq->weak_barriers, 803 &vring_used_event(&vq->split.vring), 804 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 805 806 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 807 - vq->last_used_idx) > bufs)) { 808 END_USE(vq); 809 return false; 810 } 811 812 END_USE(vq); 813 return true; 814 } 815 816 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 817 { 818 struct vring_virtqueue *vq = to_vvq(_vq); 819 unsigned int i; 820 void *buf; 821 822 START_USE(vq); 823 824 for (i = 0; i < vq->split.vring.num; i++) { 825 if (!vq->split.desc_state[i].data) 826 continue; 827 /* detach_buf_split clears data, so grab it now. */ 828 buf = vq->split.desc_state[i].data; 829 detach_buf_split(vq, i, NULL); 830 vq->split.avail_idx_shadow--; 831 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 832 vq->split.avail_idx_shadow); 833 END_USE(vq); 834 return buf; 835 } 836 /* That should have freed everything. */ 837 BUG_ON(vq->vq.num_free != vq->split.vring.num); 838 839 END_USE(vq); 840 return NULL; 841 } 842 843 static struct virtqueue *vring_create_virtqueue_split( 844 unsigned int index, 845 unsigned int num, 846 unsigned int vring_align, 847 struct virtio_device *vdev, 848 bool weak_barriers, 849 bool may_reduce_num, 850 bool context, 851 bool (*notify)(struct virtqueue *), 852 void (*callback)(struct virtqueue *), 853 const char *name) 854 { 855 struct virtqueue *vq; 856 void *queue = NULL; 857 dma_addr_t dma_addr; 858 size_t queue_size_in_bytes; 859 struct vring vring; 860 861 /* We assume num is a power of 2. */ 862 if (num & (num - 1)) { 863 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 864 return NULL; 865 } 866 867 /* TODO: allocate each queue chunk individually */ 868 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 869 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 870 &dma_addr, 871 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 872 if (queue) 873 break; 874 } 875 876 if (!num) 877 return NULL; 878 879 if (!queue) { 880 /* Try to get a single page. You are my only hope! */ 881 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 882 &dma_addr, GFP_KERNEL|__GFP_ZERO); 883 } 884 if (!queue) 885 return NULL; 886 887 queue_size_in_bytes = vring_size(num, vring_align); 888 vring_init(&vring, num, queue, vring_align); 889 890 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 891 notify, callback, name); 892 if (!vq) { 893 vring_free_queue(vdev, queue_size_in_bytes, queue, 894 dma_addr); 895 return NULL; 896 } 897 898 to_vvq(vq)->split.queue_dma_addr = dma_addr; 899 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 900 to_vvq(vq)->we_own_ring = true; 901 902 return vq; 903 } 904 905 906 /* 907 * Packed ring specific functions - *_packed(). 908 */ 909 910 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 911 struct vring_desc_extra_packed *state) 912 { 913 u16 flags; 914 915 if (!vq->use_dma_api) 916 return; 917 918 flags = state->flags; 919 920 if (flags & VRING_DESC_F_INDIRECT) { 921 dma_unmap_single(vring_dma_dev(vq), 922 state->addr, state->len, 923 (flags & VRING_DESC_F_WRITE) ? 924 DMA_FROM_DEVICE : DMA_TO_DEVICE); 925 } else { 926 dma_unmap_page(vring_dma_dev(vq), 927 state->addr, state->len, 928 (flags & VRING_DESC_F_WRITE) ? 929 DMA_FROM_DEVICE : DMA_TO_DEVICE); 930 } 931 } 932 933 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 934 struct vring_packed_desc *desc) 935 { 936 u16 flags; 937 938 if (!vq->use_dma_api) 939 return; 940 941 flags = le16_to_cpu(desc->flags); 942 943 if (flags & VRING_DESC_F_INDIRECT) { 944 dma_unmap_single(vring_dma_dev(vq), 945 le64_to_cpu(desc->addr), 946 le32_to_cpu(desc->len), 947 (flags & VRING_DESC_F_WRITE) ? 948 DMA_FROM_DEVICE : DMA_TO_DEVICE); 949 } else { 950 dma_unmap_page(vring_dma_dev(vq), 951 le64_to_cpu(desc->addr), 952 le32_to_cpu(desc->len), 953 (flags & VRING_DESC_F_WRITE) ? 954 DMA_FROM_DEVICE : DMA_TO_DEVICE); 955 } 956 } 957 958 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 959 gfp_t gfp) 960 { 961 struct vring_packed_desc *desc; 962 963 /* 964 * We require lowmem mappings for the descriptors because 965 * otherwise virt_to_phys will give us bogus addresses in the 966 * virtqueue. 967 */ 968 gfp &= ~__GFP_HIGHMEM; 969 970 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 971 972 return desc; 973 } 974 975 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 976 struct scatterlist *sgs[], 977 unsigned int total_sg, 978 unsigned int out_sgs, 979 unsigned int in_sgs, 980 void *data, 981 gfp_t gfp) 982 { 983 struct vring_packed_desc *desc; 984 struct scatterlist *sg; 985 unsigned int i, n, err_idx; 986 u16 head, id; 987 dma_addr_t addr; 988 989 head = vq->packed.next_avail_idx; 990 desc = alloc_indirect_packed(total_sg, gfp); 991 992 if (unlikely(vq->vq.num_free < 1)) { 993 pr_debug("Can't add buf len 1 - avail = 0\n"); 994 END_USE(vq); 995 return -ENOSPC; 996 } 997 998 i = 0; 999 id = vq->free_head; 1000 BUG_ON(id == vq->packed.vring.num); 1001 1002 for (n = 0; n < out_sgs + in_sgs; n++) { 1003 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1004 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1005 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1006 if (vring_mapping_error(vq, addr)) 1007 goto unmap_release; 1008 1009 desc[i].flags = cpu_to_le16(n < out_sgs ? 1010 0 : VRING_DESC_F_WRITE); 1011 desc[i].addr = cpu_to_le64(addr); 1012 desc[i].len = cpu_to_le32(sg->length); 1013 i++; 1014 } 1015 } 1016 1017 /* Now that the indirect table is filled in, map it. */ 1018 addr = vring_map_single(vq, desc, 1019 total_sg * sizeof(struct vring_packed_desc), 1020 DMA_TO_DEVICE); 1021 if (vring_mapping_error(vq, addr)) 1022 goto unmap_release; 1023 1024 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1025 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1026 sizeof(struct vring_packed_desc)); 1027 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1028 1029 if (vq->use_dma_api) { 1030 vq->packed.desc_extra[id].addr = addr; 1031 vq->packed.desc_extra[id].len = total_sg * 1032 sizeof(struct vring_packed_desc); 1033 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1034 vq->packed.avail_used_flags; 1035 } 1036 1037 /* 1038 * A driver MUST NOT make the first descriptor in the list 1039 * available before all subsequent descriptors comprising 1040 * the list are made available. 1041 */ 1042 virtio_wmb(vq->weak_barriers); 1043 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1044 vq->packed.avail_used_flags); 1045 1046 /* We're using some buffers from the free list. */ 1047 vq->vq.num_free -= 1; 1048 1049 /* Update free pointer */ 1050 n = head + 1; 1051 if (n >= vq->packed.vring.num) { 1052 n = 0; 1053 vq->packed.avail_wrap_counter ^= 1; 1054 vq->packed.avail_used_flags ^= 1055 1 << VRING_PACKED_DESC_F_AVAIL | 1056 1 << VRING_PACKED_DESC_F_USED; 1057 } 1058 vq->packed.next_avail_idx = n; 1059 vq->free_head = vq->packed.desc_state[id].next; 1060 1061 /* Store token and indirect buffer state. */ 1062 vq->packed.desc_state[id].num = 1; 1063 vq->packed.desc_state[id].data = data; 1064 vq->packed.desc_state[id].indir_desc = desc; 1065 vq->packed.desc_state[id].last = id; 1066 1067 vq->num_added += 1; 1068 1069 pr_debug("Added buffer head %i to %p\n", head, vq); 1070 END_USE(vq); 1071 1072 return 0; 1073 1074 unmap_release: 1075 err_idx = i; 1076 1077 for (i = 0; i < err_idx; i++) 1078 vring_unmap_desc_packed(vq, &desc[i]); 1079 1080 kfree(desc); 1081 1082 END_USE(vq); 1083 return -EIO; 1084 } 1085 1086 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1087 struct scatterlist *sgs[], 1088 unsigned int total_sg, 1089 unsigned int out_sgs, 1090 unsigned int in_sgs, 1091 void *data, 1092 void *ctx, 1093 gfp_t gfp) 1094 { 1095 struct vring_virtqueue *vq = to_vvq(_vq); 1096 struct vring_packed_desc *desc; 1097 struct scatterlist *sg; 1098 unsigned int i, n, c, descs_used, err_idx; 1099 __le16 uninitialized_var(head_flags), flags; 1100 u16 head, id, uninitialized_var(prev), curr, avail_used_flags; 1101 1102 START_USE(vq); 1103 1104 BUG_ON(data == NULL); 1105 BUG_ON(ctx && vq->indirect); 1106 1107 if (unlikely(vq->broken)) { 1108 END_USE(vq); 1109 return -EIO; 1110 } 1111 1112 LAST_ADD_TIME_UPDATE(vq); 1113 1114 BUG_ON(total_sg == 0); 1115 1116 if (virtqueue_use_indirect(_vq, total_sg)) 1117 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1118 out_sgs, in_sgs, data, gfp); 1119 1120 head = vq->packed.next_avail_idx; 1121 avail_used_flags = vq->packed.avail_used_flags; 1122 1123 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1124 1125 desc = vq->packed.vring.desc; 1126 i = head; 1127 descs_used = total_sg; 1128 1129 if (unlikely(vq->vq.num_free < descs_used)) { 1130 pr_debug("Can't add buf len %i - avail = %i\n", 1131 descs_used, vq->vq.num_free); 1132 END_USE(vq); 1133 return -ENOSPC; 1134 } 1135 1136 id = vq->free_head; 1137 BUG_ON(id == vq->packed.vring.num); 1138 1139 curr = id; 1140 c = 0; 1141 for (n = 0; n < out_sgs + in_sgs; n++) { 1142 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1143 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1144 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1145 if (vring_mapping_error(vq, addr)) 1146 goto unmap_release; 1147 1148 flags = cpu_to_le16(vq->packed.avail_used_flags | 1149 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1150 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1151 if (i == head) 1152 head_flags = flags; 1153 else 1154 desc[i].flags = flags; 1155 1156 desc[i].addr = cpu_to_le64(addr); 1157 desc[i].len = cpu_to_le32(sg->length); 1158 desc[i].id = cpu_to_le16(id); 1159 1160 if (unlikely(vq->use_dma_api)) { 1161 vq->packed.desc_extra[curr].addr = addr; 1162 vq->packed.desc_extra[curr].len = sg->length; 1163 vq->packed.desc_extra[curr].flags = 1164 le16_to_cpu(flags); 1165 } 1166 prev = curr; 1167 curr = vq->packed.desc_state[curr].next; 1168 1169 if ((unlikely(++i >= vq->packed.vring.num))) { 1170 i = 0; 1171 vq->packed.avail_used_flags ^= 1172 1 << VRING_PACKED_DESC_F_AVAIL | 1173 1 << VRING_PACKED_DESC_F_USED; 1174 } 1175 } 1176 } 1177 1178 if (i < head) 1179 vq->packed.avail_wrap_counter ^= 1; 1180 1181 /* We're using some buffers from the free list. */ 1182 vq->vq.num_free -= descs_used; 1183 1184 /* Update free pointer */ 1185 vq->packed.next_avail_idx = i; 1186 vq->free_head = curr; 1187 1188 /* Store token. */ 1189 vq->packed.desc_state[id].num = descs_used; 1190 vq->packed.desc_state[id].data = data; 1191 vq->packed.desc_state[id].indir_desc = ctx; 1192 vq->packed.desc_state[id].last = prev; 1193 1194 /* 1195 * A driver MUST NOT make the first descriptor in the list 1196 * available before all subsequent descriptors comprising 1197 * the list are made available. 1198 */ 1199 virtio_wmb(vq->weak_barriers); 1200 vq->packed.vring.desc[head].flags = head_flags; 1201 vq->num_added += descs_used; 1202 1203 pr_debug("Added buffer head %i to %p\n", head, vq); 1204 END_USE(vq); 1205 1206 return 0; 1207 1208 unmap_release: 1209 err_idx = i; 1210 i = head; 1211 1212 vq->packed.avail_used_flags = avail_used_flags; 1213 1214 for (n = 0; n < total_sg; n++) { 1215 if (i == err_idx) 1216 break; 1217 vring_unmap_desc_packed(vq, &desc[i]); 1218 i++; 1219 if (i >= vq->packed.vring.num) 1220 i = 0; 1221 } 1222 1223 END_USE(vq); 1224 return -EIO; 1225 } 1226 1227 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1228 { 1229 struct vring_virtqueue *vq = to_vvq(_vq); 1230 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1231 bool needs_kick; 1232 union { 1233 struct { 1234 __le16 off_wrap; 1235 __le16 flags; 1236 }; 1237 u32 u32; 1238 } snapshot; 1239 1240 START_USE(vq); 1241 1242 /* 1243 * We need to expose the new flags value before checking notification 1244 * suppressions. 1245 */ 1246 virtio_mb(vq->weak_barriers); 1247 1248 old = vq->packed.next_avail_idx - vq->num_added; 1249 new = vq->packed.next_avail_idx; 1250 vq->num_added = 0; 1251 1252 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1253 flags = le16_to_cpu(snapshot.flags); 1254 1255 LAST_ADD_TIME_CHECK(vq); 1256 LAST_ADD_TIME_INVALID(vq); 1257 1258 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1259 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1260 goto out; 1261 } 1262 1263 off_wrap = le16_to_cpu(snapshot.off_wrap); 1264 1265 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1266 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1267 if (wrap_counter != vq->packed.avail_wrap_counter) 1268 event_idx -= vq->packed.vring.num; 1269 1270 needs_kick = vring_need_event(event_idx, new, old); 1271 out: 1272 END_USE(vq); 1273 return needs_kick; 1274 } 1275 1276 static void detach_buf_packed(struct vring_virtqueue *vq, 1277 unsigned int id, void **ctx) 1278 { 1279 struct vring_desc_state_packed *state = NULL; 1280 struct vring_packed_desc *desc; 1281 unsigned int i, curr; 1282 1283 state = &vq->packed.desc_state[id]; 1284 1285 /* Clear data ptr. */ 1286 state->data = NULL; 1287 1288 vq->packed.desc_state[state->last].next = vq->free_head; 1289 vq->free_head = id; 1290 vq->vq.num_free += state->num; 1291 1292 if (unlikely(vq->use_dma_api)) { 1293 curr = id; 1294 for (i = 0; i < state->num; i++) { 1295 vring_unmap_state_packed(vq, 1296 &vq->packed.desc_extra[curr]); 1297 curr = vq->packed.desc_state[curr].next; 1298 } 1299 } 1300 1301 if (vq->indirect) { 1302 u32 len; 1303 1304 /* Free the indirect table, if any, now that it's unmapped. */ 1305 desc = state->indir_desc; 1306 if (!desc) 1307 return; 1308 1309 if (vq->use_dma_api) { 1310 len = vq->packed.desc_extra[id].len; 1311 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1312 i++) 1313 vring_unmap_desc_packed(vq, &desc[i]); 1314 } 1315 kfree(desc); 1316 state->indir_desc = NULL; 1317 } else if (ctx) { 1318 *ctx = state->indir_desc; 1319 } 1320 } 1321 1322 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1323 u16 idx, bool used_wrap_counter) 1324 { 1325 bool avail, used; 1326 u16 flags; 1327 1328 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1329 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1330 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1331 1332 return avail == used && used == used_wrap_counter; 1333 } 1334 1335 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1336 { 1337 return is_used_desc_packed(vq, vq->last_used_idx, 1338 vq->packed.used_wrap_counter); 1339 } 1340 1341 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1342 unsigned int *len, 1343 void **ctx) 1344 { 1345 struct vring_virtqueue *vq = to_vvq(_vq); 1346 u16 last_used, id; 1347 void *ret; 1348 1349 START_USE(vq); 1350 1351 if (unlikely(vq->broken)) { 1352 END_USE(vq); 1353 return NULL; 1354 } 1355 1356 if (!more_used_packed(vq)) { 1357 pr_debug("No more buffers in queue\n"); 1358 END_USE(vq); 1359 return NULL; 1360 } 1361 1362 /* Only get used elements after they have been exposed by host. */ 1363 virtio_rmb(vq->weak_barriers); 1364 1365 last_used = vq->last_used_idx; 1366 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1367 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1368 1369 if (unlikely(id >= vq->packed.vring.num)) { 1370 BAD_RING(vq, "id %u out of range\n", id); 1371 return NULL; 1372 } 1373 if (unlikely(!vq->packed.desc_state[id].data)) { 1374 BAD_RING(vq, "id %u is not a head!\n", id); 1375 return NULL; 1376 } 1377 1378 /* detach_buf_packed clears data, so grab it now. */ 1379 ret = vq->packed.desc_state[id].data; 1380 detach_buf_packed(vq, id, ctx); 1381 1382 vq->last_used_idx += vq->packed.desc_state[id].num; 1383 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1384 vq->last_used_idx -= vq->packed.vring.num; 1385 vq->packed.used_wrap_counter ^= 1; 1386 } 1387 1388 /* 1389 * If we expect an interrupt for the next entry, tell host 1390 * by writing event index and flush out the write before 1391 * the read in the next get_buf call. 1392 */ 1393 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1394 virtio_store_mb(vq->weak_barriers, 1395 &vq->packed.vring.driver->off_wrap, 1396 cpu_to_le16(vq->last_used_idx | 1397 (vq->packed.used_wrap_counter << 1398 VRING_PACKED_EVENT_F_WRAP_CTR))); 1399 1400 LAST_ADD_TIME_INVALID(vq); 1401 1402 END_USE(vq); 1403 return ret; 1404 } 1405 1406 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1407 { 1408 struct vring_virtqueue *vq = to_vvq(_vq); 1409 1410 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1411 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1412 vq->packed.vring.driver->flags = 1413 cpu_to_le16(vq->packed.event_flags_shadow); 1414 } 1415 } 1416 1417 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1418 { 1419 struct vring_virtqueue *vq = to_vvq(_vq); 1420 1421 START_USE(vq); 1422 1423 /* 1424 * We optimistically turn back on interrupts, then check if there was 1425 * more to do. 1426 */ 1427 1428 if (vq->event) { 1429 vq->packed.vring.driver->off_wrap = 1430 cpu_to_le16(vq->last_used_idx | 1431 (vq->packed.used_wrap_counter << 1432 VRING_PACKED_EVENT_F_WRAP_CTR)); 1433 /* 1434 * We need to update event offset and event wrap 1435 * counter first before updating event flags. 1436 */ 1437 virtio_wmb(vq->weak_barriers); 1438 } 1439 1440 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1441 vq->packed.event_flags_shadow = vq->event ? 1442 VRING_PACKED_EVENT_FLAG_DESC : 1443 VRING_PACKED_EVENT_FLAG_ENABLE; 1444 vq->packed.vring.driver->flags = 1445 cpu_to_le16(vq->packed.event_flags_shadow); 1446 } 1447 1448 END_USE(vq); 1449 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1450 VRING_PACKED_EVENT_F_WRAP_CTR); 1451 } 1452 1453 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1454 { 1455 struct vring_virtqueue *vq = to_vvq(_vq); 1456 bool wrap_counter; 1457 u16 used_idx; 1458 1459 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1460 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1461 1462 return is_used_desc_packed(vq, used_idx, wrap_counter); 1463 } 1464 1465 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1466 { 1467 struct vring_virtqueue *vq = to_vvq(_vq); 1468 u16 used_idx, wrap_counter; 1469 u16 bufs; 1470 1471 START_USE(vq); 1472 1473 /* 1474 * We optimistically turn back on interrupts, then check if there was 1475 * more to do. 1476 */ 1477 1478 if (vq->event) { 1479 /* TODO: tune this threshold */ 1480 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1481 wrap_counter = vq->packed.used_wrap_counter; 1482 1483 used_idx = vq->last_used_idx + bufs; 1484 if (used_idx >= vq->packed.vring.num) { 1485 used_idx -= vq->packed.vring.num; 1486 wrap_counter ^= 1; 1487 } 1488 1489 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1490 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1491 1492 /* 1493 * We need to update event offset and event wrap 1494 * counter first before updating event flags. 1495 */ 1496 virtio_wmb(vq->weak_barriers); 1497 } else { 1498 used_idx = vq->last_used_idx; 1499 wrap_counter = vq->packed.used_wrap_counter; 1500 } 1501 1502 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1503 vq->packed.event_flags_shadow = vq->event ? 1504 VRING_PACKED_EVENT_FLAG_DESC : 1505 VRING_PACKED_EVENT_FLAG_ENABLE; 1506 vq->packed.vring.driver->flags = 1507 cpu_to_le16(vq->packed.event_flags_shadow); 1508 } 1509 1510 /* 1511 * We need to update event suppression structure first 1512 * before re-checking for more used buffers. 1513 */ 1514 virtio_mb(vq->weak_barriers); 1515 1516 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1517 END_USE(vq); 1518 return false; 1519 } 1520 1521 END_USE(vq); 1522 return true; 1523 } 1524 1525 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1526 { 1527 struct vring_virtqueue *vq = to_vvq(_vq); 1528 unsigned int i; 1529 void *buf; 1530 1531 START_USE(vq); 1532 1533 for (i = 0; i < vq->packed.vring.num; i++) { 1534 if (!vq->packed.desc_state[i].data) 1535 continue; 1536 /* detach_buf clears data, so grab it now. */ 1537 buf = vq->packed.desc_state[i].data; 1538 detach_buf_packed(vq, i, NULL); 1539 END_USE(vq); 1540 return buf; 1541 } 1542 /* That should have freed everything. */ 1543 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1544 1545 END_USE(vq); 1546 return NULL; 1547 } 1548 1549 static struct virtqueue *vring_create_virtqueue_packed( 1550 unsigned int index, 1551 unsigned int num, 1552 unsigned int vring_align, 1553 struct virtio_device *vdev, 1554 bool weak_barriers, 1555 bool may_reduce_num, 1556 bool context, 1557 bool (*notify)(struct virtqueue *), 1558 void (*callback)(struct virtqueue *), 1559 const char *name) 1560 { 1561 struct vring_virtqueue *vq; 1562 struct vring_packed_desc *ring; 1563 struct vring_packed_desc_event *driver, *device; 1564 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1565 size_t ring_size_in_bytes, event_size_in_bytes; 1566 unsigned int i; 1567 1568 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1569 1570 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1571 &ring_dma_addr, 1572 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1573 if (!ring) 1574 goto err_ring; 1575 1576 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1577 1578 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1579 &driver_event_dma_addr, 1580 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1581 if (!driver) 1582 goto err_driver; 1583 1584 device = vring_alloc_queue(vdev, event_size_in_bytes, 1585 &device_event_dma_addr, 1586 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1587 if (!device) 1588 goto err_device; 1589 1590 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1591 if (!vq) 1592 goto err_vq; 1593 1594 vq->vq.callback = callback; 1595 vq->vq.vdev = vdev; 1596 vq->vq.name = name; 1597 vq->vq.num_free = num; 1598 vq->vq.index = index; 1599 vq->we_own_ring = true; 1600 vq->notify = notify; 1601 vq->weak_barriers = weak_barriers; 1602 vq->broken = false; 1603 vq->last_used_idx = 0; 1604 vq->num_added = 0; 1605 vq->packed_ring = true; 1606 vq->use_dma_api = vring_use_dma_api(vdev); 1607 list_add_tail(&vq->vq.list, &vdev->vqs); 1608 #ifdef DEBUG 1609 vq->in_use = false; 1610 vq->last_add_time_valid = false; 1611 #endif 1612 1613 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1614 !context; 1615 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1616 1617 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1618 vq->weak_barriers = false; 1619 1620 vq->packed.ring_dma_addr = ring_dma_addr; 1621 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1622 vq->packed.device_event_dma_addr = device_event_dma_addr; 1623 1624 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1625 vq->packed.event_size_in_bytes = event_size_in_bytes; 1626 1627 vq->packed.vring.num = num; 1628 vq->packed.vring.desc = ring; 1629 vq->packed.vring.driver = driver; 1630 vq->packed.vring.device = device; 1631 1632 vq->packed.next_avail_idx = 0; 1633 vq->packed.avail_wrap_counter = 1; 1634 vq->packed.used_wrap_counter = 1; 1635 vq->packed.event_flags_shadow = 0; 1636 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1637 1638 vq->packed.desc_state = kmalloc_array(num, 1639 sizeof(struct vring_desc_state_packed), 1640 GFP_KERNEL); 1641 if (!vq->packed.desc_state) 1642 goto err_desc_state; 1643 1644 memset(vq->packed.desc_state, 0, 1645 num * sizeof(struct vring_desc_state_packed)); 1646 1647 /* Put everything in free lists. */ 1648 vq->free_head = 0; 1649 for (i = 0; i < num-1; i++) 1650 vq->packed.desc_state[i].next = i + 1; 1651 1652 vq->packed.desc_extra = kmalloc_array(num, 1653 sizeof(struct vring_desc_extra_packed), 1654 GFP_KERNEL); 1655 if (!vq->packed.desc_extra) 1656 goto err_desc_extra; 1657 1658 memset(vq->packed.desc_extra, 0, 1659 num * sizeof(struct vring_desc_extra_packed)); 1660 1661 /* No callback? Tell other side not to bother us. */ 1662 if (!callback) { 1663 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1664 vq->packed.vring.driver->flags = 1665 cpu_to_le16(vq->packed.event_flags_shadow); 1666 } 1667 1668 return &vq->vq; 1669 1670 err_desc_extra: 1671 kfree(vq->packed.desc_state); 1672 err_desc_state: 1673 kfree(vq); 1674 err_vq: 1675 vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); 1676 err_device: 1677 vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); 1678 err_driver: 1679 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1680 err_ring: 1681 return NULL; 1682 } 1683 1684 1685 /* 1686 * Generic functions and exported symbols. 1687 */ 1688 1689 static inline int virtqueue_add(struct virtqueue *_vq, 1690 struct scatterlist *sgs[], 1691 unsigned int total_sg, 1692 unsigned int out_sgs, 1693 unsigned int in_sgs, 1694 void *data, 1695 void *ctx, 1696 gfp_t gfp) 1697 { 1698 struct vring_virtqueue *vq = to_vvq(_vq); 1699 1700 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1701 out_sgs, in_sgs, data, ctx, gfp) : 1702 virtqueue_add_split(_vq, sgs, total_sg, 1703 out_sgs, in_sgs, data, ctx, gfp); 1704 } 1705 1706 /** 1707 * virtqueue_add_sgs - expose buffers to other end 1708 * @vq: the struct virtqueue we're talking about. 1709 * @sgs: array of terminated scatterlists. 1710 * @out_num: the number of scatterlists readable by other side 1711 * @in_num: the number of scatterlists which are writable (after readable ones) 1712 * @data: the token identifying the buffer. 1713 * @gfp: how to do memory allocations (if necessary). 1714 * 1715 * Caller must ensure we don't call this with other virtqueue operations 1716 * at the same time (except where noted). 1717 * 1718 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1719 */ 1720 int virtqueue_add_sgs(struct virtqueue *_vq, 1721 struct scatterlist *sgs[], 1722 unsigned int out_sgs, 1723 unsigned int in_sgs, 1724 void *data, 1725 gfp_t gfp) 1726 { 1727 unsigned int i, total_sg = 0; 1728 1729 /* Count them first. */ 1730 for (i = 0; i < out_sgs + in_sgs; i++) { 1731 struct scatterlist *sg; 1732 1733 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1734 total_sg++; 1735 } 1736 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1737 data, NULL, gfp); 1738 } 1739 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1740 1741 /** 1742 * virtqueue_add_outbuf - expose output buffers to other end 1743 * @vq: the struct virtqueue we're talking about. 1744 * @sg: scatterlist (must be well-formed and terminated!) 1745 * @num: the number of entries in @sg readable by other side 1746 * @data: the token identifying the buffer. 1747 * @gfp: how to do memory allocations (if necessary). 1748 * 1749 * Caller must ensure we don't call this with other virtqueue operations 1750 * at the same time (except where noted). 1751 * 1752 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1753 */ 1754 int virtqueue_add_outbuf(struct virtqueue *vq, 1755 struct scatterlist *sg, unsigned int num, 1756 void *data, 1757 gfp_t gfp) 1758 { 1759 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1760 } 1761 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1762 1763 /** 1764 * virtqueue_add_inbuf - expose input buffers to other end 1765 * @vq: the struct virtqueue we're talking about. 1766 * @sg: scatterlist (must be well-formed and terminated!) 1767 * @num: the number of entries in @sg writable by other side 1768 * @data: the token identifying the buffer. 1769 * @gfp: how to do memory allocations (if necessary). 1770 * 1771 * Caller must ensure we don't call this with other virtqueue operations 1772 * at the same time (except where noted). 1773 * 1774 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1775 */ 1776 int virtqueue_add_inbuf(struct virtqueue *vq, 1777 struct scatterlist *sg, unsigned int num, 1778 void *data, 1779 gfp_t gfp) 1780 { 1781 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1782 } 1783 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1784 1785 /** 1786 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1787 * @vq: the struct virtqueue we're talking about. 1788 * @sg: scatterlist (must be well-formed and terminated!) 1789 * @num: the number of entries in @sg writable by other side 1790 * @data: the token identifying the buffer. 1791 * @ctx: extra context for the token 1792 * @gfp: how to do memory allocations (if necessary). 1793 * 1794 * Caller must ensure we don't call this with other virtqueue operations 1795 * at the same time (except where noted). 1796 * 1797 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1798 */ 1799 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1800 struct scatterlist *sg, unsigned int num, 1801 void *data, 1802 void *ctx, 1803 gfp_t gfp) 1804 { 1805 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1806 } 1807 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1808 1809 /** 1810 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1811 * @vq: the struct virtqueue 1812 * 1813 * Instead of virtqueue_kick(), you can do: 1814 * if (virtqueue_kick_prepare(vq)) 1815 * virtqueue_notify(vq); 1816 * 1817 * This is sometimes useful because the virtqueue_kick_prepare() needs 1818 * to be serialized, but the actual virtqueue_notify() call does not. 1819 */ 1820 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1821 { 1822 struct vring_virtqueue *vq = to_vvq(_vq); 1823 1824 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1825 virtqueue_kick_prepare_split(_vq); 1826 } 1827 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1828 1829 /** 1830 * virtqueue_notify - second half of split virtqueue_kick call. 1831 * @vq: the struct virtqueue 1832 * 1833 * This does not need to be serialized. 1834 * 1835 * Returns false if host notify failed or queue is broken, otherwise true. 1836 */ 1837 bool virtqueue_notify(struct virtqueue *_vq) 1838 { 1839 struct vring_virtqueue *vq = to_vvq(_vq); 1840 1841 if (unlikely(vq->broken)) 1842 return false; 1843 1844 /* Prod other side to tell it about changes. */ 1845 if (!vq->notify(_vq)) { 1846 vq->broken = true; 1847 return false; 1848 } 1849 return true; 1850 } 1851 EXPORT_SYMBOL_GPL(virtqueue_notify); 1852 1853 /** 1854 * virtqueue_kick - update after add_buf 1855 * @vq: the struct virtqueue 1856 * 1857 * After one or more virtqueue_add_* calls, invoke this to kick 1858 * the other side. 1859 * 1860 * Caller must ensure we don't call this with other virtqueue 1861 * operations at the same time (except where noted). 1862 * 1863 * Returns false if kick failed, otherwise true. 1864 */ 1865 bool virtqueue_kick(struct virtqueue *vq) 1866 { 1867 if (virtqueue_kick_prepare(vq)) 1868 return virtqueue_notify(vq); 1869 return true; 1870 } 1871 EXPORT_SYMBOL_GPL(virtqueue_kick); 1872 1873 /** 1874 * virtqueue_get_buf - get the next used buffer 1875 * @vq: the struct virtqueue we're talking about. 1876 * @len: the length written into the buffer 1877 * 1878 * If the device wrote data into the buffer, @len will be set to the 1879 * amount written. This means you don't need to clear the buffer 1880 * beforehand to ensure there's no data leakage in the case of short 1881 * writes. 1882 * 1883 * Caller must ensure we don't call this with other virtqueue 1884 * operations at the same time (except where noted). 1885 * 1886 * Returns NULL if there are no used buffers, or the "data" token 1887 * handed to virtqueue_add_*(). 1888 */ 1889 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1890 void **ctx) 1891 { 1892 struct vring_virtqueue *vq = to_vvq(_vq); 1893 1894 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1895 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1896 } 1897 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1898 1899 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1900 { 1901 return virtqueue_get_buf_ctx(_vq, len, NULL); 1902 } 1903 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1904 /** 1905 * virtqueue_disable_cb - disable callbacks 1906 * @vq: the struct virtqueue we're talking about. 1907 * 1908 * Note that this is not necessarily synchronous, hence unreliable and only 1909 * useful as an optimization. 1910 * 1911 * Unlike other operations, this need not be serialized. 1912 */ 1913 void virtqueue_disable_cb(struct virtqueue *_vq) 1914 { 1915 struct vring_virtqueue *vq = to_vvq(_vq); 1916 1917 if (vq->packed_ring) 1918 virtqueue_disable_cb_packed(_vq); 1919 else 1920 virtqueue_disable_cb_split(_vq); 1921 } 1922 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1923 1924 /** 1925 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1926 * @vq: the struct virtqueue we're talking about. 1927 * 1928 * This re-enables callbacks; it returns current queue state 1929 * in an opaque unsigned value. This value should be later tested by 1930 * virtqueue_poll, to detect a possible race between the driver checking for 1931 * more work, and enabling callbacks. 1932 * 1933 * Caller must ensure we don't call this with other virtqueue 1934 * operations at the same time (except where noted). 1935 */ 1936 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1937 { 1938 struct vring_virtqueue *vq = to_vvq(_vq); 1939 1940 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 1941 virtqueue_enable_cb_prepare_split(_vq); 1942 } 1943 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1944 1945 /** 1946 * virtqueue_poll - query pending used buffers 1947 * @vq: the struct virtqueue we're talking about. 1948 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1949 * 1950 * Returns "true" if there are pending used buffers in the queue. 1951 * 1952 * This does not need to be serialized. 1953 */ 1954 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1955 { 1956 struct vring_virtqueue *vq = to_vvq(_vq); 1957 1958 virtio_mb(vq->weak_barriers); 1959 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 1960 virtqueue_poll_split(_vq, last_used_idx); 1961 } 1962 EXPORT_SYMBOL_GPL(virtqueue_poll); 1963 1964 /** 1965 * virtqueue_enable_cb - restart callbacks after disable_cb. 1966 * @vq: the struct virtqueue we're talking about. 1967 * 1968 * This re-enables callbacks; it returns "false" if there are pending 1969 * buffers in the queue, to detect a possible race between the driver 1970 * checking for more work, and enabling callbacks. 1971 * 1972 * Caller must ensure we don't call this with other virtqueue 1973 * operations at the same time (except where noted). 1974 */ 1975 bool virtqueue_enable_cb(struct virtqueue *_vq) 1976 { 1977 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 1978 1979 return !virtqueue_poll(_vq, last_used_idx); 1980 } 1981 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 1982 1983 /** 1984 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 1985 * @vq: the struct virtqueue we're talking about. 1986 * 1987 * This re-enables callbacks but hints to the other side to delay 1988 * interrupts until most of the available buffers have been processed; 1989 * it returns "false" if there are many pending buffers in the queue, 1990 * to detect a possible race between the driver checking for more work, 1991 * and enabling callbacks. 1992 * 1993 * Caller must ensure we don't call this with other virtqueue 1994 * operations at the same time (except where noted). 1995 */ 1996 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 1997 { 1998 struct vring_virtqueue *vq = to_vvq(_vq); 1999 2000 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2001 virtqueue_enable_cb_delayed_split(_vq); 2002 } 2003 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2004 2005 /** 2006 * virtqueue_detach_unused_buf - detach first unused buffer 2007 * @vq: the struct virtqueue we're talking about. 2008 * 2009 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2010 * This is not valid on an active queue; it is useful only for device 2011 * shutdown. 2012 */ 2013 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2014 { 2015 struct vring_virtqueue *vq = to_vvq(_vq); 2016 2017 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2018 virtqueue_detach_unused_buf_split(_vq); 2019 } 2020 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2021 2022 static inline bool more_used(const struct vring_virtqueue *vq) 2023 { 2024 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2025 } 2026 2027 irqreturn_t vring_interrupt(int irq, void *_vq) 2028 { 2029 struct vring_virtqueue *vq = to_vvq(_vq); 2030 2031 if (!more_used(vq)) { 2032 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2033 return IRQ_NONE; 2034 } 2035 2036 if (unlikely(vq->broken)) 2037 return IRQ_HANDLED; 2038 2039 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2040 if (vq->vq.callback) 2041 vq->vq.callback(&vq->vq); 2042 2043 return IRQ_HANDLED; 2044 } 2045 EXPORT_SYMBOL_GPL(vring_interrupt); 2046 2047 /* Only available for split ring */ 2048 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2049 struct vring vring, 2050 struct virtio_device *vdev, 2051 bool weak_barriers, 2052 bool context, 2053 bool (*notify)(struct virtqueue *), 2054 void (*callback)(struct virtqueue *), 2055 const char *name) 2056 { 2057 unsigned int i; 2058 struct vring_virtqueue *vq; 2059 2060 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2061 return NULL; 2062 2063 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2064 if (!vq) 2065 return NULL; 2066 2067 vq->packed_ring = false; 2068 vq->vq.callback = callback; 2069 vq->vq.vdev = vdev; 2070 vq->vq.name = name; 2071 vq->vq.num_free = vring.num; 2072 vq->vq.index = index; 2073 vq->we_own_ring = false; 2074 vq->notify = notify; 2075 vq->weak_barriers = weak_barriers; 2076 vq->broken = false; 2077 vq->last_used_idx = 0; 2078 vq->num_added = 0; 2079 vq->use_dma_api = vring_use_dma_api(vdev); 2080 list_add_tail(&vq->vq.list, &vdev->vqs); 2081 #ifdef DEBUG 2082 vq->in_use = false; 2083 vq->last_add_time_valid = false; 2084 #endif 2085 2086 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2087 !context; 2088 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2089 2090 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2091 vq->weak_barriers = false; 2092 2093 vq->split.queue_dma_addr = 0; 2094 vq->split.queue_size_in_bytes = 0; 2095 2096 vq->split.vring = vring; 2097 vq->split.avail_flags_shadow = 0; 2098 vq->split.avail_idx_shadow = 0; 2099 2100 /* No callback? Tell other side not to bother us. */ 2101 if (!callback) { 2102 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2103 if (!vq->event) 2104 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2105 vq->split.avail_flags_shadow); 2106 } 2107 2108 vq->split.desc_state = kmalloc_array(vring.num, 2109 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2110 if (!vq->split.desc_state) { 2111 kfree(vq); 2112 return NULL; 2113 } 2114 2115 /* Put everything in free lists. */ 2116 vq->free_head = 0; 2117 for (i = 0; i < vring.num-1; i++) 2118 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 2119 memset(vq->split.desc_state, 0, vring.num * 2120 sizeof(struct vring_desc_state_split)); 2121 2122 return &vq->vq; 2123 } 2124 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2125 2126 struct virtqueue *vring_create_virtqueue( 2127 unsigned int index, 2128 unsigned int num, 2129 unsigned int vring_align, 2130 struct virtio_device *vdev, 2131 bool weak_barriers, 2132 bool may_reduce_num, 2133 bool context, 2134 bool (*notify)(struct virtqueue *), 2135 void (*callback)(struct virtqueue *), 2136 const char *name) 2137 { 2138 2139 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2140 return vring_create_virtqueue_packed(index, num, vring_align, 2141 vdev, weak_barriers, may_reduce_num, 2142 context, notify, callback, name); 2143 2144 return vring_create_virtqueue_split(index, num, vring_align, 2145 vdev, weak_barriers, may_reduce_num, 2146 context, notify, callback, name); 2147 } 2148 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2149 2150 /* Only available for split ring */ 2151 struct virtqueue *vring_new_virtqueue(unsigned int index, 2152 unsigned int num, 2153 unsigned int vring_align, 2154 struct virtio_device *vdev, 2155 bool weak_barriers, 2156 bool context, 2157 void *pages, 2158 bool (*notify)(struct virtqueue *vq), 2159 void (*callback)(struct virtqueue *vq), 2160 const char *name) 2161 { 2162 struct vring vring; 2163 2164 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2165 return NULL; 2166 2167 vring_init(&vring, num, pages, vring_align); 2168 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2169 notify, callback, name); 2170 } 2171 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2172 2173 void vring_del_virtqueue(struct virtqueue *_vq) 2174 { 2175 struct vring_virtqueue *vq = to_vvq(_vq); 2176 2177 if (vq->we_own_ring) { 2178 if (vq->packed_ring) { 2179 vring_free_queue(vq->vq.vdev, 2180 vq->packed.ring_size_in_bytes, 2181 vq->packed.vring.desc, 2182 vq->packed.ring_dma_addr); 2183 2184 vring_free_queue(vq->vq.vdev, 2185 vq->packed.event_size_in_bytes, 2186 vq->packed.vring.driver, 2187 vq->packed.driver_event_dma_addr); 2188 2189 vring_free_queue(vq->vq.vdev, 2190 vq->packed.event_size_in_bytes, 2191 vq->packed.vring.device, 2192 vq->packed.device_event_dma_addr); 2193 2194 kfree(vq->packed.desc_state); 2195 kfree(vq->packed.desc_extra); 2196 } else { 2197 vring_free_queue(vq->vq.vdev, 2198 vq->split.queue_size_in_bytes, 2199 vq->split.vring.desc, 2200 vq->split.queue_dma_addr); 2201 2202 kfree(vq->split.desc_state); 2203 } 2204 } 2205 list_del(&_vq->list); 2206 kfree(vq); 2207 } 2208 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2209 2210 /* Manipulates transport-specific feature bits. */ 2211 void vring_transport_features(struct virtio_device *vdev) 2212 { 2213 unsigned int i; 2214 2215 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2216 switch (i) { 2217 case VIRTIO_RING_F_INDIRECT_DESC: 2218 break; 2219 case VIRTIO_RING_F_EVENT_IDX: 2220 break; 2221 case VIRTIO_F_VERSION_1: 2222 break; 2223 case VIRTIO_F_IOMMU_PLATFORM: 2224 break; 2225 case VIRTIO_F_RING_PACKED: 2226 break; 2227 case VIRTIO_F_ORDER_PLATFORM: 2228 break; 2229 default: 2230 /* We don't understand this bit. */ 2231 __virtio_clear_bit(vdev, i); 2232 } 2233 } 2234 } 2235 EXPORT_SYMBOL_GPL(vring_transport_features); 2236 2237 /** 2238 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2239 * @vq: the struct virtqueue containing the vring of interest. 2240 * 2241 * Returns the size of the vring. This is mainly used for boasting to 2242 * userspace. Unlike other operations, this need not be serialized. 2243 */ 2244 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2245 { 2246 2247 struct vring_virtqueue *vq = to_vvq(_vq); 2248 2249 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2250 } 2251 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2252 2253 bool virtqueue_is_broken(struct virtqueue *_vq) 2254 { 2255 struct vring_virtqueue *vq = to_vvq(_vq); 2256 2257 return vq->broken; 2258 } 2259 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2260 2261 /* 2262 * This should prevent the device from being used, allowing drivers to 2263 * recover. You may need to grab appropriate locks to flush. 2264 */ 2265 void virtio_break_device(struct virtio_device *dev) 2266 { 2267 struct virtqueue *_vq; 2268 2269 list_for_each_entry(_vq, &dev->vqs, list) { 2270 struct vring_virtqueue *vq = to_vvq(_vq); 2271 vq->broken = true; 2272 } 2273 } 2274 EXPORT_SYMBOL_GPL(virtio_break_device); 2275 2276 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2277 { 2278 struct vring_virtqueue *vq = to_vvq(_vq); 2279 2280 BUG_ON(!vq->we_own_ring); 2281 2282 if (vq->packed_ring) 2283 return vq->packed.ring_dma_addr; 2284 2285 return vq->split.queue_dma_addr; 2286 } 2287 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2288 2289 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2290 { 2291 struct vring_virtqueue *vq = to_vvq(_vq); 2292 2293 BUG_ON(!vq->we_own_ring); 2294 2295 if (vq->packed_ring) 2296 return vq->packed.driver_event_dma_addr; 2297 2298 return vq->split.queue_dma_addr + 2299 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2300 } 2301 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2302 2303 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2304 { 2305 struct vring_virtqueue *vq = to_vvq(_vq); 2306 2307 BUG_ON(!vq->we_own_ring); 2308 2309 if (vq->packed_ring) 2310 return vq->packed.device_event_dma_addr; 2311 2312 return vq->split.queue_dma_addr + 2313 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2314 } 2315 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2316 2317 /* Only available for split ring */ 2318 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2319 { 2320 return &to_vvq(vq)->split.vring; 2321 } 2322 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2323 2324 MODULE_LICENSE("GPL"); 2325