1 /* Virtio ring implementation. 2 * 3 * Copyright 2007 Rusty Russell IBM Corporation 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19 #include <linux/virtio.h> 20 #include <linux/virtio_ring.h> 21 #include <linux/virtio_config.h> 22 #include <linux/device.h> 23 #include <linux/slab.h> 24 #include <linux/module.h> 25 #include <linux/hrtimer.h> 26 #include <linux/dma-mapping.h> 27 #include <xen/xen.h> 28 29 #ifdef DEBUG 30 /* For development, we want to crash whenever the ring is screwed. */ 31 #define BAD_RING(_vq, fmt, args...) \ 32 do { \ 33 dev_err(&(_vq)->vq.vdev->dev, \ 34 "%s:"fmt, (_vq)->vq.name, ##args); \ 35 BUG(); \ 36 } while (0) 37 /* Caller is supposed to guarantee no reentry. */ 38 #define START_USE(_vq) \ 39 do { \ 40 if ((_vq)->in_use) \ 41 panic("%s:in_use = %i\n", \ 42 (_vq)->vq.name, (_vq)->in_use); \ 43 (_vq)->in_use = __LINE__; \ 44 } while (0) 45 #define END_USE(_vq) \ 46 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 47 #define LAST_ADD_TIME_UPDATE(_vq) \ 48 do { \ 49 ktime_t now = ktime_get(); \ 50 \ 51 /* No kick or get, with .1 second between? Warn. */ \ 52 if ((_vq)->last_add_time_valid) \ 53 WARN_ON(ktime_to_ms(ktime_sub(now, \ 54 (_vq)->last_add_time)) > 100); \ 55 (_vq)->last_add_time = now; \ 56 (_vq)->last_add_time_valid = true; \ 57 } while (0) 58 #define LAST_ADD_TIME_CHECK(_vq) \ 59 do { \ 60 if ((_vq)->last_add_time_valid) { \ 61 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 62 (_vq)->last_add_time)) > 100); \ 63 } \ 64 } while (0) 65 #define LAST_ADD_TIME_INVALID(_vq) \ 66 ((_vq)->last_add_time_valid = false) 67 #else 68 #define BAD_RING(_vq, fmt, args...) \ 69 do { \ 70 dev_err(&_vq->vq.vdev->dev, \ 71 "%s:"fmt, (_vq)->vq.name, ##args); \ 72 (_vq)->broken = true; \ 73 } while (0) 74 #define START_USE(vq) 75 #define END_USE(vq) 76 #define LAST_ADD_TIME_UPDATE(vq) 77 #define LAST_ADD_TIME_CHECK(vq) 78 #define LAST_ADD_TIME_INVALID(vq) 79 #endif 80 81 struct vring_desc_state_split { 82 void *data; /* Data for callback. */ 83 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 84 }; 85 86 struct vring_desc_state_packed { 87 void *data; /* Data for callback. */ 88 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 89 u16 num; /* Descriptor list length. */ 90 u16 next; /* The next desc state in a list. */ 91 u16 last; /* The last desc state in a list. */ 92 }; 93 94 struct vring_desc_extra_packed { 95 dma_addr_t addr; /* Buffer DMA addr. */ 96 u32 len; /* Buffer length. */ 97 u16 flags; /* Descriptor flags. */ 98 }; 99 100 struct vring_virtqueue { 101 struct virtqueue vq; 102 103 /* Is this a packed ring? */ 104 bool packed_ring; 105 106 /* Is DMA API used? */ 107 bool use_dma_api; 108 109 /* Can we use weak barriers? */ 110 bool weak_barriers; 111 112 /* Other side has made a mess, don't try any more. */ 113 bool broken; 114 115 /* Host supports indirect buffers */ 116 bool indirect; 117 118 /* Host publishes avail event idx */ 119 bool event; 120 121 /* Head of free buffer list. */ 122 unsigned int free_head; 123 /* Number we've added since last sync. */ 124 unsigned int num_added; 125 126 /* Last used index we've seen. */ 127 u16 last_used_idx; 128 129 union { 130 /* Available for split ring */ 131 struct { 132 /* Actual memory layout for this queue. */ 133 struct vring vring; 134 135 /* Last written value to avail->flags */ 136 u16 avail_flags_shadow; 137 138 /* 139 * Last written value to avail->idx in 140 * guest byte order. 141 */ 142 u16 avail_idx_shadow; 143 144 /* Per-descriptor state. */ 145 struct vring_desc_state_split *desc_state; 146 147 /* DMA address and size information */ 148 dma_addr_t queue_dma_addr; 149 size_t queue_size_in_bytes; 150 } split; 151 152 /* Available for packed ring */ 153 struct { 154 /* Actual memory layout for this queue. */ 155 struct { 156 unsigned int num; 157 struct vring_packed_desc *desc; 158 struct vring_packed_desc_event *driver; 159 struct vring_packed_desc_event *device; 160 } vring; 161 162 /* Driver ring wrap counter. */ 163 bool avail_wrap_counter; 164 165 /* Device ring wrap counter. */ 166 bool used_wrap_counter; 167 168 /* Avail used flags. */ 169 u16 avail_used_flags; 170 171 /* Index of the next avail descriptor. */ 172 u16 next_avail_idx; 173 174 /* 175 * Last written value to driver->flags in 176 * guest byte order. 177 */ 178 u16 event_flags_shadow; 179 180 /* Per-descriptor state. */ 181 struct vring_desc_state_packed *desc_state; 182 struct vring_desc_extra_packed *desc_extra; 183 184 /* DMA address and size information */ 185 dma_addr_t ring_dma_addr; 186 dma_addr_t driver_event_dma_addr; 187 dma_addr_t device_event_dma_addr; 188 size_t ring_size_in_bytes; 189 size_t event_size_in_bytes; 190 } packed; 191 }; 192 193 /* How to notify other side. FIXME: commonalize hcalls! */ 194 bool (*notify)(struct virtqueue *vq); 195 196 /* DMA, allocation, and size information */ 197 bool we_own_ring; 198 199 #ifdef DEBUG 200 /* They're supposed to lock for us. */ 201 unsigned int in_use; 202 203 /* Figure out if their kicks are too delayed. */ 204 bool last_add_time_valid; 205 ktime_t last_add_time; 206 #endif 207 }; 208 209 210 /* 211 * Helpers. 212 */ 213 214 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 215 216 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 217 unsigned int total_sg) 218 { 219 struct vring_virtqueue *vq = to_vvq(_vq); 220 221 /* 222 * If the host supports indirect descriptor tables, and we have multiple 223 * buffers, then go indirect. FIXME: tune this threshold 224 */ 225 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 226 } 227 228 /* 229 * Modern virtio devices have feature bits to specify whether they need a 230 * quirk and bypass the IOMMU. If not there, just use the DMA API. 231 * 232 * If there, the interaction between virtio and DMA API is messy. 233 * 234 * On most systems with virtio, physical addresses match bus addresses, 235 * and it doesn't particularly matter whether we use the DMA API. 236 * 237 * On some systems, including Xen and any system with a physical device 238 * that speaks virtio behind a physical IOMMU, we must use the DMA API 239 * for virtio DMA to work at all. 240 * 241 * On other systems, including SPARC and PPC64, virtio-pci devices are 242 * enumerated as though they are behind an IOMMU, but the virtio host 243 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 244 * there or somehow map everything as the identity. 245 * 246 * For the time being, we preserve historic behavior and bypass the DMA 247 * API. 248 * 249 * TODO: install a per-device DMA ops structure that does the right thing 250 * taking into account all the above quirks, and use the DMA API 251 * unconditionally on data path. 252 */ 253 254 static bool vring_use_dma_api(struct virtio_device *vdev) 255 { 256 if (!virtio_has_iommu_quirk(vdev)) 257 return true; 258 259 /* Otherwise, we are left to guess. */ 260 /* 261 * In theory, it's possible to have a buggy QEMU-supposed 262 * emulated Q35 IOMMU and Xen enabled at the same time. On 263 * such a configuration, virtio has never worked and will 264 * not work without an even larger kludge. Instead, enable 265 * the DMA API if we're a Xen guest, which at least allows 266 * all of the sensible Xen configurations to work correctly. 267 */ 268 if (xen_domain()) 269 return true; 270 271 return false; 272 } 273 274 size_t virtio_max_dma_size(struct virtio_device *vdev) 275 { 276 size_t max_segment_size = SIZE_MAX; 277 278 if (vring_use_dma_api(vdev)) 279 max_segment_size = dma_max_mapping_size(&vdev->dev); 280 281 return max_segment_size; 282 } 283 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 284 285 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 286 dma_addr_t *dma_handle, gfp_t flag) 287 { 288 if (vring_use_dma_api(vdev)) { 289 return dma_alloc_coherent(vdev->dev.parent, size, 290 dma_handle, flag); 291 } else { 292 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 293 294 if (queue) { 295 phys_addr_t phys_addr = virt_to_phys(queue); 296 *dma_handle = (dma_addr_t)phys_addr; 297 298 /* 299 * Sanity check: make sure we dind't truncate 300 * the address. The only arches I can find that 301 * have 64-bit phys_addr_t but 32-bit dma_addr_t 302 * are certain non-highmem MIPS and x86 303 * configurations, but these configurations 304 * should never allocate physical pages above 32 305 * bits, so this is fine. Just in case, throw a 306 * warning and abort if we end up with an 307 * unrepresentable address. 308 */ 309 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 310 free_pages_exact(queue, PAGE_ALIGN(size)); 311 return NULL; 312 } 313 } 314 return queue; 315 } 316 } 317 318 static void vring_free_queue(struct virtio_device *vdev, size_t size, 319 void *queue, dma_addr_t dma_handle) 320 { 321 if (vring_use_dma_api(vdev)) 322 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 323 else 324 free_pages_exact(queue, PAGE_ALIGN(size)); 325 } 326 327 /* 328 * The DMA ops on various arches are rather gnarly right now, and 329 * making all of the arch DMA ops work on the vring device itself 330 * is a mess. For now, we use the parent device for DMA ops. 331 */ 332 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 333 { 334 return vq->vq.vdev->dev.parent; 335 } 336 337 /* Map one sg entry. */ 338 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 339 struct scatterlist *sg, 340 enum dma_data_direction direction) 341 { 342 if (!vq->use_dma_api) 343 return (dma_addr_t)sg_phys(sg); 344 345 /* 346 * We can't use dma_map_sg, because we don't use scatterlists in 347 * the way it expects (we don't guarantee that the scatterlist 348 * will exist for the lifetime of the mapping). 349 */ 350 return dma_map_page(vring_dma_dev(vq), 351 sg_page(sg), sg->offset, sg->length, 352 direction); 353 } 354 355 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 356 void *cpu_addr, size_t size, 357 enum dma_data_direction direction) 358 { 359 if (!vq->use_dma_api) 360 return (dma_addr_t)virt_to_phys(cpu_addr); 361 362 return dma_map_single(vring_dma_dev(vq), 363 cpu_addr, size, direction); 364 } 365 366 static int vring_mapping_error(const struct vring_virtqueue *vq, 367 dma_addr_t addr) 368 { 369 if (!vq->use_dma_api) 370 return 0; 371 372 return dma_mapping_error(vring_dma_dev(vq), addr); 373 } 374 375 376 /* 377 * Split ring specific functions - *_split(). 378 */ 379 380 static void vring_unmap_one_split(const struct vring_virtqueue *vq, 381 struct vring_desc *desc) 382 { 383 u16 flags; 384 385 if (!vq->use_dma_api) 386 return; 387 388 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 389 390 if (flags & VRING_DESC_F_INDIRECT) { 391 dma_unmap_single(vring_dma_dev(vq), 392 virtio64_to_cpu(vq->vq.vdev, desc->addr), 393 virtio32_to_cpu(vq->vq.vdev, desc->len), 394 (flags & VRING_DESC_F_WRITE) ? 395 DMA_FROM_DEVICE : DMA_TO_DEVICE); 396 } else { 397 dma_unmap_page(vring_dma_dev(vq), 398 virtio64_to_cpu(vq->vq.vdev, desc->addr), 399 virtio32_to_cpu(vq->vq.vdev, desc->len), 400 (flags & VRING_DESC_F_WRITE) ? 401 DMA_FROM_DEVICE : DMA_TO_DEVICE); 402 } 403 } 404 405 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 406 unsigned int total_sg, 407 gfp_t gfp) 408 { 409 struct vring_desc *desc; 410 unsigned int i; 411 412 /* 413 * We require lowmem mappings for the descriptors because 414 * otherwise virt_to_phys will give us bogus addresses in the 415 * virtqueue. 416 */ 417 gfp &= ~__GFP_HIGHMEM; 418 419 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 420 if (!desc) 421 return NULL; 422 423 for (i = 0; i < total_sg; i++) 424 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 425 return desc; 426 } 427 428 static inline int virtqueue_add_split(struct virtqueue *_vq, 429 struct scatterlist *sgs[], 430 unsigned int total_sg, 431 unsigned int out_sgs, 432 unsigned int in_sgs, 433 void *data, 434 void *ctx, 435 gfp_t gfp) 436 { 437 struct vring_virtqueue *vq = to_vvq(_vq); 438 struct scatterlist *sg; 439 struct vring_desc *desc; 440 unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 441 int head; 442 bool indirect; 443 444 START_USE(vq); 445 446 BUG_ON(data == NULL); 447 BUG_ON(ctx && vq->indirect); 448 449 if (unlikely(vq->broken)) { 450 END_USE(vq); 451 return -EIO; 452 } 453 454 LAST_ADD_TIME_UPDATE(vq); 455 456 BUG_ON(total_sg == 0); 457 458 head = vq->free_head; 459 460 if (virtqueue_use_indirect(_vq, total_sg)) 461 desc = alloc_indirect_split(_vq, total_sg, gfp); 462 else { 463 desc = NULL; 464 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 465 } 466 467 if (desc) { 468 /* Use a single buffer which doesn't continue */ 469 indirect = true; 470 /* Set up rest to use this indirect table. */ 471 i = 0; 472 descs_used = 1; 473 } else { 474 indirect = false; 475 desc = vq->split.vring.desc; 476 i = head; 477 descs_used = total_sg; 478 } 479 480 if (vq->vq.num_free < descs_used) { 481 pr_debug("Can't add buf len %i - avail = %i\n", 482 descs_used, vq->vq.num_free); 483 /* FIXME: for historical reasons, we force a notify here if 484 * there are outgoing parts to the buffer. Presumably the 485 * host should service the ring ASAP. */ 486 if (out_sgs) 487 vq->notify(&vq->vq); 488 if (indirect) 489 kfree(desc); 490 END_USE(vq); 491 return -ENOSPC; 492 } 493 494 for (n = 0; n < out_sgs; n++) { 495 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 496 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 497 if (vring_mapping_error(vq, addr)) 498 goto unmap_release; 499 500 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 501 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 502 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 503 prev = i; 504 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 505 } 506 } 507 for (; n < (out_sgs + in_sgs); n++) { 508 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 509 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 510 if (vring_mapping_error(vq, addr)) 511 goto unmap_release; 512 513 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 514 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 515 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 516 prev = i; 517 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 518 } 519 } 520 /* Last one doesn't continue. */ 521 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 522 523 if (indirect) { 524 /* Now that the indirect table is filled in, map it. */ 525 dma_addr_t addr = vring_map_single( 526 vq, desc, total_sg * sizeof(struct vring_desc), 527 DMA_TO_DEVICE); 528 if (vring_mapping_error(vq, addr)) 529 goto unmap_release; 530 531 vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 532 VRING_DESC_F_INDIRECT); 533 vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 534 addr); 535 536 vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 537 total_sg * sizeof(struct vring_desc)); 538 } 539 540 /* We're using some buffers from the free list. */ 541 vq->vq.num_free -= descs_used; 542 543 /* Update free pointer */ 544 if (indirect) 545 vq->free_head = virtio16_to_cpu(_vq->vdev, 546 vq->split.vring.desc[head].next); 547 else 548 vq->free_head = i; 549 550 /* Store token and indirect buffer state. */ 551 vq->split.desc_state[head].data = data; 552 if (indirect) 553 vq->split.desc_state[head].indir_desc = desc; 554 else 555 vq->split.desc_state[head].indir_desc = ctx; 556 557 /* Put entry in available array (but don't update avail->idx until they 558 * do sync). */ 559 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 560 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 561 562 /* Descriptors and available array need to be set before we expose the 563 * new available array entries. */ 564 virtio_wmb(vq->weak_barriers); 565 vq->split.avail_idx_shadow++; 566 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 567 vq->split.avail_idx_shadow); 568 vq->num_added++; 569 570 pr_debug("Added buffer head %i to %p\n", head, vq); 571 END_USE(vq); 572 573 /* This is very unlikely, but theoretically possible. Kick 574 * just in case. */ 575 if (unlikely(vq->num_added == (1 << 16) - 1)) 576 virtqueue_kick(_vq); 577 578 return 0; 579 580 unmap_release: 581 err_idx = i; 582 i = head; 583 584 for (n = 0; n < total_sg; n++) { 585 if (i == err_idx) 586 break; 587 vring_unmap_one_split(vq, &desc[i]); 588 i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); 589 } 590 591 if (indirect) 592 kfree(desc); 593 594 END_USE(vq); 595 return -EIO; 596 } 597 598 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 599 { 600 struct vring_virtqueue *vq = to_vvq(_vq); 601 u16 new, old; 602 bool needs_kick; 603 604 START_USE(vq); 605 /* We need to expose available array entries before checking avail 606 * event. */ 607 virtio_mb(vq->weak_barriers); 608 609 old = vq->split.avail_idx_shadow - vq->num_added; 610 new = vq->split.avail_idx_shadow; 611 vq->num_added = 0; 612 613 LAST_ADD_TIME_CHECK(vq); 614 LAST_ADD_TIME_INVALID(vq); 615 616 if (vq->event) { 617 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 618 vring_avail_event(&vq->split.vring)), 619 new, old); 620 } else { 621 needs_kick = !(vq->split.vring.used->flags & 622 cpu_to_virtio16(_vq->vdev, 623 VRING_USED_F_NO_NOTIFY)); 624 } 625 END_USE(vq); 626 return needs_kick; 627 } 628 629 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 630 void **ctx) 631 { 632 unsigned int i, j; 633 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 634 635 /* Clear data ptr. */ 636 vq->split.desc_state[head].data = NULL; 637 638 /* Put back on free list: unmap first-level descriptors and find end */ 639 i = head; 640 641 while (vq->split.vring.desc[i].flags & nextflag) { 642 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 643 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 644 vq->vq.num_free++; 645 } 646 647 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 648 vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 649 vq->free_head); 650 vq->free_head = head; 651 652 /* Plus final descriptor */ 653 vq->vq.num_free++; 654 655 if (vq->indirect) { 656 struct vring_desc *indir_desc = 657 vq->split.desc_state[head].indir_desc; 658 u32 len; 659 660 /* Free the indirect table, if any, now that it's unmapped. */ 661 if (!indir_desc) 662 return; 663 664 len = virtio32_to_cpu(vq->vq.vdev, 665 vq->split.vring.desc[head].len); 666 667 BUG_ON(!(vq->split.vring.desc[head].flags & 668 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 669 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 670 671 for (j = 0; j < len / sizeof(struct vring_desc); j++) 672 vring_unmap_one_split(vq, &indir_desc[j]); 673 674 kfree(indir_desc); 675 vq->split.desc_state[head].indir_desc = NULL; 676 } else if (ctx) { 677 *ctx = vq->split.desc_state[head].indir_desc; 678 } 679 } 680 681 static inline bool more_used_split(const struct vring_virtqueue *vq) 682 { 683 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 684 vq->split.vring.used->idx); 685 } 686 687 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 688 unsigned int *len, 689 void **ctx) 690 { 691 struct vring_virtqueue *vq = to_vvq(_vq); 692 void *ret; 693 unsigned int i; 694 u16 last_used; 695 696 START_USE(vq); 697 698 if (unlikely(vq->broken)) { 699 END_USE(vq); 700 return NULL; 701 } 702 703 if (!more_used_split(vq)) { 704 pr_debug("No more buffers in queue\n"); 705 END_USE(vq); 706 return NULL; 707 } 708 709 /* Only get used array entries after they have been exposed by host. */ 710 virtio_rmb(vq->weak_barriers); 711 712 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 713 i = virtio32_to_cpu(_vq->vdev, 714 vq->split.vring.used->ring[last_used].id); 715 *len = virtio32_to_cpu(_vq->vdev, 716 vq->split.vring.used->ring[last_used].len); 717 718 if (unlikely(i >= vq->split.vring.num)) { 719 BAD_RING(vq, "id %u out of range\n", i); 720 return NULL; 721 } 722 if (unlikely(!vq->split.desc_state[i].data)) { 723 BAD_RING(vq, "id %u is not a head!\n", i); 724 return NULL; 725 } 726 727 /* detach_buf_split clears data, so grab it now. */ 728 ret = vq->split.desc_state[i].data; 729 detach_buf_split(vq, i, ctx); 730 vq->last_used_idx++; 731 /* If we expect an interrupt for the next entry, tell host 732 * by writing event index and flush out the write before 733 * the read in the next get_buf call. */ 734 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 735 virtio_store_mb(vq->weak_barriers, 736 &vring_used_event(&vq->split.vring), 737 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 738 739 LAST_ADD_TIME_INVALID(vq); 740 741 END_USE(vq); 742 return ret; 743 } 744 745 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 746 { 747 struct vring_virtqueue *vq = to_vvq(_vq); 748 749 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 750 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 751 if (!vq->event) 752 vq->split.vring.avail->flags = 753 cpu_to_virtio16(_vq->vdev, 754 vq->split.avail_flags_shadow); 755 } 756 } 757 758 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 759 { 760 struct vring_virtqueue *vq = to_vvq(_vq); 761 u16 last_used_idx; 762 763 START_USE(vq); 764 765 /* We optimistically turn back on interrupts, then check if there was 766 * more to do. */ 767 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 768 * either clear the flags bit or point the event index at the next 769 * entry. Always do both to keep code simple. */ 770 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 771 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 772 if (!vq->event) 773 vq->split.vring.avail->flags = 774 cpu_to_virtio16(_vq->vdev, 775 vq->split.avail_flags_shadow); 776 } 777 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 778 last_used_idx = vq->last_used_idx); 779 END_USE(vq); 780 return last_used_idx; 781 } 782 783 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 784 { 785 struct vring_virtqueue *vq = to_vvq(_vq); 786 787 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 788 vq->split.vring.used->idx); 789 } 790 791 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 792 { 793 struct vring_virtqueue *vq = to_vvq(_vq); 794 u16 bufs; 795 796 START_USE(vq); 797 798 /* We optimistically turn back on interrupts, then check if there was 799 * more to do. */ 800 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 801 * either clear the flags bit or point the event index at the next 802 * entry. Always update the event index to keep code simple. */ 803 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 804 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 805 if (!vq->event) 806 vq->split.vring.avail->flags = 807 cpu_to_virtio16(_vq->vdev, 808 vq->split.avail_flags_shadow); 809 } 810 /* TODO: tune this threshold */ 811 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 812 813 virtio_store_mb(vq->weak_barriers, 814 &vring_used_event(&vq->split.vring), 815 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 816 817 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 818 - vq->last_used_idx) > bufs)) { 819 END_USE(vq); 820 return false; 821 } 822 823 END_USE(vq); 824 return true; 825 } 826 827 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 828 { 829 struct vring_virtqueue *vq = to_vvq(_vq); 830 unsigned int i; 831 void *buf; 832 833 START_USE(vq); 834 835 for (i = 0; i < vq->split.vring.num; i++) { 836 if (!vq->split.desc_state[i].data) 837 continue; 838 /* detach_buf_split clears data, so grab it now. */ 839 buf = vq->split.desc_state[i].data; 840 detach_buf_split(vq, i, NULL); 841 vq->split.avail_idx_shadow--; 842 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 843 vq->split.avail_idx_shadow); 844 END_USE(vq); 845 return buf; 846 } 847 /* That should have freed everything. */ 848 BUG_ON(vq->vq.num_free != vq->split.vring.num); 849 850 END_USE(vq); 851 return NULL; 852 } 853 854 static struct virtqueue *vring_create_virtqueue_split( 855 unsigned int index, 856 unsigned int num, 857 unsigned int vring_align, 858 struct virtio_device *vdev, 859 bool weak_barriers, 860 bool may_reduce_num, 861 bool context, 862 bool (*notify)(struct virtqueue *), 863 void (*callback)(struct virtqueue *), 864 const char *name) 865 { 866 struct virtqueue *vq; 867 void *queue = NULL; 868 dma_addr_t dma_addr; 869 size_t queue_size_in_bytes; 870 struct vring vring; 871 872 /* We assume num is a power of 2. */ 873 if (num & (num - 1)) { 874 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 875 return NULL; 876 } 877 878 /* TODO: allocate each queue chunk individually */ 879 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 880 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 881 &dma_addr, 882 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 883 if (queue) 884 break; 885 if (!may_reduce_num) 886 return NULL; 887 } 888 889 if (!num) 890 return NULL; 891 892 if (!queue) { 893 /* Try to get a single page. You are my only hope! */ 894 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 895 &dma_addr, GFP_KERNEL|__GFP_ZERO); 896 } 897 if (!queue) 898 return NULL; 899 900 queue_size_in_bytes = vring_size(num, vring_align); 901 vring_init(&vring, num, queue, vring_align); 902 903 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 904 notify, callback, name); 905 if (!vq) { 906 vring_free_queue(vdev, queue_size_in_bytes, queue, 907 dma_addr); 908 return NULL; 909 } 910 911 to_vvq(vq)->split.queue_dma_addr = dma_addr; 912 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 913 to_vvq(vq)->we_own_ring = true; 914 915 return vq; 916 } 917 918 919 /* 920 * Packed ring specific functions - *_packed(). 921 */ 922 923 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 924 struct vring_desc_extra_packed *state) 925 { 926 u16 flags; 927 928 if (!vq->use_dma_api) 929 return; 930 931 flags = state->flags; 932 933 if (flags & VRING_DESC_F_INDIRECT) { 934 dma_unmap_single(vring_dma_dev(vq), 935 state->addr, state->len, 936 (flags & VRING_DESC_F_WRITE) ? 937 DMA_FROM_DEVICE : DMA_TO_DEVICE); 938 } else { 939 dma_unmap_page(vring_dma_dev(vq), 940 state->addr, state->len, 941 (flags & VRING_DESC_F_WRITE) ? 942 DMA_FROM_DEVICE : DMA_TO_DEVICE); 943 } 944 } 945 946 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 947 struct vring_packed_desc *desc) 948 { 949 u16 flags; 950 951 if (!vq->use_dma_api) 952 return; 953 954 flags = le16_to_cpu(desc->flags); 955 956 if (flags & VRING_DESC_F_INDIRECT) { 957 dma_unmap_single(vring_dma_dev(vq), 958 le64_to_cpu(desc->addr), 959 le32_to_cpu(desc->len), 960 (flags & VRING_DESC_F_WRITE) ? 961 DMA_FROM_DEVICE : DMA_TO_DEVICE); 962 } else { 963 dma_unmap_page(vring_dma_dev(vq), 964 le64_to_cpu(desc->addr), 965 le32_to_cpu(desc->len), 966 (flags & VRING_DESC_F_WRITE) ? 967 DMA_FROM_DEVICE : DMA_TO_DEVICE); 968 } 969 } 970 971 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 972 gfp_t gfp) 973 { 974 struct vring_packed_desc *desc; 975 976 /* 977 * We require lowmem mappings for the descriptors because 978 * otherwise virt_to_phys will give us bogus addresses in the 979 * virtqueue. 980 */ 981 gfp &= ~__GFP_HIGHMEM; 982 983 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 984 985 return desc; 986 } 987 988 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 989 struct scatterlist *sgs[], 990 unsigned int total_sg, 991 unsigned int out_sgs, 992 unsigned int in_sgs, 993 void *data, 994 gfp_t gfp) 995 { 996 struct vring_packed_desc *desc; 997 struct scatterlist *sg; 998 unsigned int i, n, err_idx; 999 u16 head, id; 1000 dma_addr_t addr; 1001 1002 head = vq->packed.next_avail_idx; 1003 desc = alloc_indirect_packed(total_sg, gfp); 1004 1005 if (unlikely(vq->vq.num_free < 1)) { 1006 pr_debug("Can't add buf len 1 - avail = 0\n"); 1007 kfree(desc); 1008 END_USE(vq); 1009 return -ENOSPC; 1010 } 1011 1012 i = 0; 1013 id = vq->free_head; 1014 BUG_ON(id == vq->packed.vring.num); 1015 1016 for (n = 0; n < out_sgs + in_sgs; n++) { 1017 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1018 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1019 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1020 if (vring_mapping_error(vq, addr)) 1021 goto unmap_release; 1022 1023 desc[i].flags = cpu_to_le16(n < out_sgs ? 1024 0 : VRING_DESC_F_WRITE); 1025 desc[i].addr = cpu_to_le64(addr); 1026 desc[i].len = cpu_to_le32(sg->length); 1027 i++; 1028 } 1029 } 1030 1031 /* Now that the indirect table is filled in, map it. */ 1032 addr = vring_map_single(vq, desc, 1033 total_sg * sizeof(struct vring_packed_desc), 1034 DMA_TO_DEVICE); 1035 if (vring_mapping_error(vq, addr)) 1036 goto unmap_release; 1037 1038 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1039 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1040 sizeof(struct vring_packed_desc)); 1041 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1042 1043 if (vq->use_dma_api) { 1044 vq->packed.desc_extra[id].addr = addr; 1045 vq->packed.desc_extra[id].len = total_sg * 1046 sizeof(struct vring_packed_desc); 1047 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1048 vq->packed.avail_used_flags; 1049 } 1050 1051 /* 1052 * A driver MUST NOT make the first descriptor in the list 1053 * available before all subsequent descriptors comprising 1054 * the list are made available. 1055 */ 1056 virtio_wmb(vq->weak_barriers); 1057 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1058 vq->packed.avail_used_flags); 1059 1060 /* We're using some buffers from the free list. */ 1061 vq->vq.num_free -= 1; 1062 1063 /* Update free pointer */ 1064 n = head + 1; 1065 if (n >= vq->packed.vring.num) { 1066 n = 0; 1067 vq->packed.avail_wrap_counter ^= 1; 1068 vq->packed.avail_used_flags ^= 1069 1 << VRING_PACKED_DESC_F_AVAIL | 1070 1 << VRING_PACKED_DESC_F_USED; 1071 } 1072 vq->packed.next_avail_idx = n; 1073 vq->free_head = vq->packed.desc_state[id].next; 1074 1075 /* Store token and indirect buffer state. */ 1076 vq->packed.desc_state[id].num = 1; 1077 vq->packed.desc_state[id].data = data; 1078 vq->packed.desc_state[id].indir_desc = desc; 1079 vq->packed.desc_state[id].last = id; 1080 1081 vq->num_added += 1; 1082 1083 pr_debug("Added buffer head %i to %p\n", head, vq); 1084 END_USE(vq); 1085 1086 return 0; 1087 1088 unmap_release: 1089 err_idx = i; 1090 1091 for (i = 0; i < err_idx; i++) 1092 vring_unmap_desc_packed(vq, &desc[i]); 1093 1094 kfree(desc); 1095 1096 END_USE(vq); 1097 return -EIO; 1098 } 1099 1100 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1101 struct scatterlist *sgs[], 1102 unsigned int total_sg, 1103 unsigned int out_sgs, 1104 unsigned int in_sgs, 1105 void *data, 1106 void *ctx, 1107 gfp_t gfp) 1108 { 1109 struct vring_virtqueue *vq = to_vvq(_vq); 1110 struct vring_packed_desc *desc; 1111 struct scatterlist *sg; 1112 unsigned int i, n, c, descs_used, err_idx; 1113 __le16 uninitialized_var(head_flags), flags; 1114 u16 head, id, uninitialized_var(prev), curr, avail_used_flags; 1115 1116 START_USE(vq); 1117 1118 BUG_ON(data == NULL); 1119 BUG_ON(ctx && vq->indirect); 1120 1121 if (unlikely(vq->broken)) { 1122 END_USE(vq); 1123 return -EIO; 1124 } 1125 1126 LAST_ADD_TIME_UPDATE(vq); 1127 1128 BUG_ON(total_sg == 0); 1129 1130 if (virtqueue_use_indirect(_vq, total_sg)) 1131 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1132 out_sgs, in_sgs, data, gfp); 1133 1134 head = vq->packed.next_avail_idx; 1135 avail_used_flags = vq->packed.avail_used_flags; 1136 1137 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1138 1139 desc = vq->packed.vring.desc; 1140 i = head; 1141 descs_used = total_sg; 1142 1143 if (unlikely(vq->vq.num_free < descs_used)) { 1144 pr_debug("Can't add buf len %i - avail = %i\n", 1145 descs_used, vq->vq.num_free); 1146 END_USE(vq); 1147 return -ENOSPC; 1148 } 1149 1150 id = vq->free_head; 1151 BUG_ON(id == vq->packed.vring.num); 1152 1153 curr = id; 1154 c = 0; 1155 for (n = 0; n < out_sgs + in_sgs; n++) { 1156 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1157 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1158 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1159 if (vring_mapping_error(vq, addr)) 1160 goto unmap_release; 1161 1162 flags = cpu_to_le16(vq->packed.avail_used_flags | 1163 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1164 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1165 if (i == head) 1166 head_flags = flags; 1167 else 1168 desc[i].flags = flags; 1169 1170 desc[i].addr = cpu_to_le64(addr); 1171 desc[i].len = cpu_to_le32(sg->length); 1172 desc[i].id = cpu_to_le16(id); 1173 1174 if (unlikely(vq->use_dma_api)) { 1175 vq->packed.desc_extra[curr].addr = addr; 1176 vq->packed.desc_extra[curr].len = sg->length; 1177 vq->packed.desc_extra[curr].flags = 1178 le16_to_cpu(flags); 1179 } 1180 prev = curr; 1181 curr = vq->packed.desc_state[curr].next; 1182 1183 if ((unlikely(++i >= vq->packed.vring.num))) { 1184 i = 0; 1185 vq->packed.avail_used_flags ^= 1186 1 << VRING_PACKED_DESC_F_AVAIL | 1187 1 << VRING_PACKED_DESC_F_USED; 1188 } 1189 } 1190 } 1191 1192 if (i < head) 1193 vq->packed.avail_wrap_counter ^= 1; 1194 1195 /* We're using some buffers from the free list. */ 1196 vq->vq.num_free -= descs_used; 1197 1198 /* Update free pointer */ 1199 vq->packed.next_avail_idx = i; 1200 vq->free_head = curr; 1201 1202 /* Store token. */ 1203 vq->packed.desc_state[id].num = descs_used; 1204 vq->packed.desc_state[id].data = data; 1205 vq->packed.desc_state[id].indir_desc = ctx; 1206 vq->packed.desc_state[id].last = prev; 1207 1208 /* 1209 * A driver MUST NOT make the first descriptor in the list 1210 * available before all subsequent descriptors comprising 1211 * the list are made available. 1212 */ 1213 virtio_wmb(vq->weak_barriers); 1214 vq->packed.vring.desc[head].flags = head_flags; 1215 vq->num_added += descs_used; 1216 1217 pr_debug("Added buffer head %i to %p\n", head, vq); 1218 END_USE(vq); 1219 1220 return 0; 1221 1222 unmap_release: 1223 err_idx = i; 1224 i = head; 1225 1226 vq->packed.avail_used_flags = avail_used_flags; 1227 1228 for (n = 0; n < total_sg; n++) { 1229 if (i == err_idx) 1230 break; 1231 vring_unmap_desc_packed(vq, &desc[i]); 1232 i++; 1233 if (i >= vq->packed.vring.num) 1234 i = 0; 1235 } 1236 1237 END_USE(vq); 1238 return -EIO; 1239 } 1240 1241 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1242 { 1243 struct vring_virtqueue *vq = to_vvq(_vq); 1244 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1245 bool needs_kick; 1246 union { 1247 struct { 1248 __le16 off_wrap; 1249 __le16 flags; 1250 }; 1251 u32 u32; 1252 } snapshot; 1253 1254 START_USE(vq); 1255 1256 /* 1257 * We need to expose the new flags value before checking notification 1258 * suppressions. 1259 */ 1260 virtio_mb(vq->weak_barriers); 1261 1262 old = vq->packed.next_avail_idx - vq->num_added; 1263 new = vq->packed.next_avail_idx; 1264 vq->num_added = 0; 1265 1266 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1267 flags = le16_to_cpu(snapshot.flags); 1268 1269 LAST_ADD_TIME_CHECK(vq); 1270 LAST_ADD_TIME_INVALID(vq); 1271 1272 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1273 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1274 goto out; 1275 } 1276 1277 off_wrap = le16_to_cpu(snapshot.off_wrap); 1278 1279 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1280 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1281 if (wrap_counter != vq->packed.avail_wrap_counter) 1282 event_idx -= vq->packed.vring.num; 1283 1284 needs_kick = vring_need_event(event_idx, new, old); 1285 out: 1286 END_USE(vq); 1287 return needs_kick; 1288 } 1289 1290 static void detach_buf_packed(struct vring_virtqueue *vq, 1291 unsigned int id, void **ctx) 1292 { 1293 struct vring_desc_state_packed *state = NULL; 1294 struct vring_packed_desc *desc; 1295 unsigned int i, curr; 1296 1297 state = &vq->packed.desc_state[id]; 1298 1299 /* Clear data ptr. */ 1300 state->data = NULL; 1301 1302 vq->packed.desc_state[state->last].next = vq->free_head; 1303 vq->free_head = id; 1304 vq->vq.num_free += state->num; 1305 1306 if (unlikely(vq->use_dma_api)) { 1307 curr = id; 1308 for (i = 0; i < state->num; i++) { 1309 vring_unmap_state_packed(vq, 1310 &vq->packed.desc_extra[curr]); 1311 curr = vq->packed.desc_state[curr].next; 1312 } 1313 } 1314 1315 if (vq->indirect) { 1316 u32 len; 1317 1318 /* Free the indirect table, if any, now that it's unmapped. */ 1319 desc = state->indir_desc; 1320 if (!desc) 1321 return; 1322 1323 if (vq->use_dma_api) { 1324 len = vq->packed.desc_extra[id].len; 1325 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1326 i++) 1327 vring_unmap_desc_packed(vq, &desc[i]); 1328 } 1329 kfree(desc); 1330 state->indir_desc = NULL; 1331 } else if (ctx) { 1332 *ctx = state->indir_desc; 1333 } 1334 } 1335 1336 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1337 u16 idx, bool used_wrap_counter) 1338 { 1339 bool avail, used; 1340 u16 flags; 1341 1342 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1343 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1344 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1345 1346 return avail == used && used == used_wrap_counter; 1347 } 1348 1349 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1350 { 1351 return is_used_desc_packed(vq, vq->last_used_idx, 1352 vq->packed.used_wrap_counter); 1353 } 1354 1355 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1356 unsigned int *len, 1357 void **ctx) 1358 { 1359 struct vring_virtqueue *vq = to_vvq(_vq); 1360 u16 last_used, id; 1361 void *ret; 1362 1363 START_USE(vq); 1364 1365 if (unlikely(vq->broken)) { 1366 END_USE(vq); 1367 return NULL; 1368 } 1369 1370 if (!more_used_packed(vq)) { 1371 pr_debug("No more buffers in queue\n"); 1372 END_USE(vq); 1373 return NULL; 1374 } 1375 1376 /* Only get used elements after they have been exposed by host. */ 1377 virtio_rmb(vq->weak_barriers); 1378 1379 last_used = vq->last_used_idx; 1380 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1381 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1382 1383 if (unlikely(id >= vq->packed.vring.num)) { 1384 BAD_RING(vq, "id %u out of range\n", id); 1385 return NULL; 1386 } 1387 if (unlikely(!vq->packed.desc_state[id].data)) { 1388 BAD_RING(vq, "id %u is not a head!\n", id); 1389 return NULL; 1390 } 1391 1392 /* detach_buf_packed clears data, so grab it now. */ 1393 ret = vq->packed.desc_state[id].data; 1394 detach_buf_packed(vq, id, ctx); 1395 1396 vq->last_used_idx += vq->packed.desc_state[id].num; 1397 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1398 vq->last_used_idx -= vq->packed.vring.num; 1399 vq->packed.used_wrap_counter ^= 1; 1400 } 1401 1402 /* 1403 * If we expect an interrupt for the next entry, tell host 1404 * by writing event index and flush out the write before 1405 * the read in the next get_buf call. 1406 */ 1407 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1408 virtio_store_mb(vq->weak_barriers, 1409 &vq->packed.vring.driver->off_wrap, 1410 cpu_to_le16(vq->last_used_idx | 1411 (vq->packed.used_wrap_counter << 1412 VRING_PACKED_EVENT_F_WRAP_CTR))); 1413 1414 LAST_ADD_TIME_INVALID(vq); 1415 1416 END_USE(vq); 1417 return ret; 1418 } 1419 1420 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1421 { 1422 struct vring_virtqueue *vq = to_vvq(_vq); 1423 1424 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1425 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1426 vq->packed.vring.driver->flags = 1427 cpu_to_le16(vq->packed.event_flags_shadow); 1428 } 1429 } 1430 1431 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1432 { 1433 struct vring_virtqueue *vq = to_vvq(_vq); 1434 1435 START_USE(vq); 1436 1437 /* 1438 * We optimistically turn back on interrupts, then check if there was 1439 * more to do. 1440 */ 1441 1442 if (vq->event) { 1443 vq->packed.vring.driver->off_wrap = 1444 cpu_to_le16(vq->last_used_idx | 1445 (vq->packed.used_wrap_counter << 1446 VRING_PACKED_EVENT_F_WRAP_CTR)); 1447 /* 1448 * We need to update event offset and event wrap 1449 * counter first before updating event flags. 1450 */ 1451 virtio_wmb(vq->weak_barriers); 1452 } 1453 1454 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1455 vq->packed.event_flags_shadow = vq->event ? 1456 VRING_PACKED_EVENT_FLAG_DESC : 1457 VRING_PACKED_EVENT_FLAG_ENABLE; 1458 vq->packed.vring.driver->flags = 1459 cpu_to_le16(vq->packed.event_flags_shadow); 1460 } 1461 1462 END_USE(vq); 1463 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1464 VRING_PACKED_EVENT_F_WRAP_CTR); 1465 } 1466 1467 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1468 { 1469 struct vring_virtqueue *vq = to_vvq(_vq); 1470 bool wrap_counter; 1471 u16 used_idx; 1472 1473 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1474 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1475 1476 return is_used_desc_packed(vq, used_idx, wrap_counter); 1477 } 1478 1479 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1480 { 1481 struct vring_virtqueue *vq = to_vvq(_vq); 1482 u16 used_idx, wrap_counter; 1483 u16 bufs; 1484 1485 START_USE(vq); 1486 1487 /* 1488 * We optimistically turn back on interrupts, then check if there was 1489 * more to do. 1490 */ 1491 1492 if (vq->event) { 1493 /* TODO: tune this threshold */ 1494 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1495 wrap_counter = vq->packed.used_wrap_counter; 1496 1497 used_idx = vq->last_used_idx + bufs; 1498 if (used_idx >= vq->packed.vring.num) { 1499 used_idx -= vq->packed.vring.num; 1500 wrap_counter ^= 1; 1501 } 1502 1503 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1504 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1505 1506 /* 1507 * We need to update event offset and event wrap 1508 * counter first before updating event flags. 1509 */ 1510 virtio_wmb(vq->weak_barriers); 1511 } else { 1512 used_idx = vq->last_used_idx; 1513 wrap_counter = vq->packed.used_wrap_counter; 1514 } 1515 1516 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1517 vq->packed.event_flags_shadow = vq->event ? 1518 VRING_PACKED_EVENT_FLAG_DESC : 1519 VRING_PACKED_EVENT_FLAG_ENABLE; 1520 vq->packed.vring.driver->flags = 1521 cpu_to_le16(vq->packed.event_flags_shadow); 1522 } 1523 1524 /* 1525 * We need to update event suppression structure first 1526 * before re-checking for more used buffers. 1527 */ 1528 virtio_mb(vq->weak_barriers); 1529 1530 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1531 END_USE(vq); 1532 return false; 1533 } 1534 1535 END_USE(vq); 1536 return true; 1537 } 1538 1539 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1540 { 1541 struct vring_virtqueue *vq = to_vvq(_vq); 1542 unsigned int i; 1543 void *buf; 1544 1545 START_USE(vq); 1546 1547 for (i = 0; i < vq->packed.vring.num; i++) { 1548 if (!vq->packed.desc_state[i].data) 1549 continue; 1550 /* detach_buf clears data, so grab it now. */ 1551 buf = vq->packed.desc_state[i].data; 1552 detach_buf_packed(vq, i, NULL); 1553 END_USE(vq); 1554 return buf; 1555 } 1556 /* That should have freed everything. */ 1557 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1558 1559 END_USE(vq); 1560 return NULL; 1561 } 1562 1563 static struct virtqueue *vring_create_virtqueue_packed( 1564 unsigned int index, 1565 unsigned int num, 1566 unsigned int vring_align, 1567 struct virtio_device *vdev, 1568 bool weak_barriers, 1569 bool may_reduce_num, 1570 bool context, 1571 bool (*notify)(struct virtqueue *), 1572 void (*callback)(struct virtqueue *), 1573 const char *name) 1574 { 1575 struct vring_virtqueue *vq; 1576 struct vring_packed_desc *ring; 1577 struct vring_packed_desc_event *driver, *device; 1578 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1579 size_t ring_size_in_bytes, event_size_in_bytes; 1580 unsigned int i; 1581 1582 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1583 1584 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1585 &ring_dma_addr, 1586 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1587 if (!ring) 1588 goto err_ring; 1589 1590 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1591 1592 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1593 &driver_event_dma_addr, 1594 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1595 if (!driver) 1596 goto err_driver; 1597 1598 device = vring_alloc_queue(vdev, event_size_in_bytes, 1599 &device_event_dma_addr, 1600 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1601 if (!device) 1602 goto err_device; 1603 1604 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1605 if (!vq) 1606 goto err_vq; 1607 1608 vq->vq.callback = callback; 1609 vq->vq.vdev = vdev; 1610 vq->vq.name = name; 1611 vq->vq.num_free = num; 1612 vq->vq.index = index; 1613 vq->we_own_ring = true; 1614 vq->notify = notify; 1615 vq->weak_barriers = weak_barriers; 1616 vq->broken = false; 1617 vq->last_used_idx = 0; 1618 vq->num_added = 0; 1619 vq->packed_ring = true; 1620 vq->use_dma_api = vring_use_dma_api(vdev); 1621 list_add_tail(&vq->vq.list, &vdev->vqs); 1622 #ifdef DEBUG 1623 vq->in_use = false; 1624 vq->last_add_time_valid = false; 1625 #endif 1626 1627 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1628 !context; 1629 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1630 1631 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1632 vq->weak_barriers = false; 1633 1634 vq->packed.ring_dma_addr = ring_dma_addr; 1635 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1636 vq->packed.device_event_dma_addr = device_event_dma_addr; 1637 1638 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1639 vq->packed.event_size_in_bytes = event_size_in_bytes; 1640 1641 vq->packed.vring.num = num; 1642 vq->packed.vring.desc = ring; 1643 vq->packed.vring.driver = driver; 1644 vq->packed.vring.device = device; 1645 1646 vq->packed.next_avail_idx = 0; 1647 vq->packed.avail_wrap_counter = 1; 1648 vq->packed.used_wrap_counter = 1; 1649 vq->packed.event_flags_shadow = 0; 1650 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1651 1652 vq->packed.desc_state = kmalloc_array(num, 1653 sizeof(struct vring_desc_state_packed), 1654 GFP_KERNEL); 1655 if (!vq->packed.desc_state) 1656 goto err_desc_state; 1657 1658 memset(vq->packed.desc_state, 0, 1659 num * sizeof(struct vring_desc_state_packed)); 1660 1661 /* Put everything in free lists. */ 1662 vq->free_head = 0; 1663 for (i = 0; i < num-1; i++) 1664 vq->packed.desc_state[i].next = i + 1; 1665 1666 vq->packed.desc_extra = kmalloc_array(num, 1667 sizeof(struct vring_desc_extra_packed), 1668 GFP_KERNEL); 1669 if (!vq->packed.desc_extra) 1670 goto err_desc_extra; 1671 1672 memset(vq->packed.desc_extra, 0, 1673 num * sizeof(struct vring_desc_extra_packed)); 1674 1675 /* No callback? Tell other side not to bother us. */ 1676 if (!callback) { 1677 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1678 vq->packed.vring.driver->flags = 1679 cpu_to_le16(vq->packed.event_flags_shadow); 1680 } 1681 1682 return &vq->vq; 1683 1684 err_desc_extra: 1685 kfree(vq->packed.desc_state); 1686 err_desc_state: 1687 kfree(vq); 1688 err_vq: 1689 vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); 1690 err_device: 1691 vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); 1692 err_driver: 1693 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1694 err_ring: 1695 return NULL; 1696 } 1697 1698 1699 /* 1700 * Generic functions and exported symbols. 1701 */ 1702 1703 static inline int virtqueue_add(struct virtqueue *_vq, 1704 struct scatterlist *sgs[], 1705 unsigned int total_sg, 1706 unsigned int out_sgs, 1707 unsigned int in_sgs, 1708 void *data, 1709 void *ctx, 1710 gfp_t gfp) 1711 { 1712 struct vring_virtqueue *vq = to_vvq(_vq); 1713 1714 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1715 out_sgs, in_sgs, data, ctx, gfp) : 1716 virtqueue_add_split(_vq, sgs, total_sg, 1717 out_sgs, in_sgs, data, ctx, gfp); 1718 } 1719 1720 /** 1721 * virtqueue_add_sgs - expose buffers to other end 1722 * @_vq: the struct virtqueue we're talking about. 1723 * @sgs: array of terminated scatterlists. 1724 * @out_sgs: the number of scatterlists readable by other side 1725 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1726 * @data: the token identifying the buffer. 1727 * @gfp: how to do memory allocations (if necessary). 1728 * 1729 * Caller must ensure we don't call this with other virtqueue operations 1730 * at the same time (except where noted). 1731 * 1732 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1733 */ 1734 int virtqueue_add_sgs(struct virtqueue *_vq, 1735 struct scatterlist *sgs[], 1736 unsigned int out_sgs, 1737 unsigned int in_sgs, 1738 void *data, 1739 gfp_t gfp) 1740 { 1741 unsigned int i, total_sg = 0; 1742 1743 /* Count them first. */ 1744 for (i = 0; i < out_sgs + in_sgs; i++) { 1745 struct scatterlist *sg; 1746 1747 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1748 total_sg++; 1749 } 1750 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1751 data, NULL, gfp); 1752 } 1753 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1754 1755 /** 1756 * virtqueue_add_outbuf - expose output buffers to other end 1757 * @vq: the struct virtqueue we're talking about. 1758 * @sg: scatterlist (must be well-formed and terminated!) 1759 * @num: the number of entries in @sg readable by other side 1760 * @data: the token identifying the buffer. 1761 * @gfp: how to do memory allocations (if necessary). 1762 * 1763 * Caller must ensure we don't call this with other virtqueue operations 1764 * at the same time (except where noted). 1765 * 1766 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1767 */ 1768 int virtqueue_add_outbuf(struct virtqueue *vq, 1769 struct scatterlist *sg, unsigned int num, 1770 void *data, 1771 gfp_t gfp) 1772 { 1773 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1774 } 1775 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1776 1777 /** 1778 * virtqueue_add_inbuf - expose input buffers to other end 1779 * @vq: the struct virtqueue we're talking about. 1780 * @sg: scatterlist (must be well-formed and terminated!) 1781 * @num: the number of entries in @sg writable by other side 1782 * @data: the token identifying the buffer. 1783 * @gfp: how to do memory allocations (if necessary). 1784 * 1785 * Caller must ensure we don't call this with other virtqueue operations 1786 * at the same time (except where noted). 1787 * 1788 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1789 */ 1790 int virtqueue_add_inbuf(struct virtqueue *vq, 1791 struct scatterlist *sg, unsigned int num, 1792 void *data, 1793 gfp_t gfp) 1794 { 1795 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1796 } 1797 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1798 1799 /** 1800 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1801 * @vq: the struct virtqueue we're talking about. 1802 * @sg: scatterlist (must be well-formed and terminated!) 1803 * @num: the number of entries in @sg writable by other side 1804 * @data: the token identifying the buffer. 1805 * @ctx: extra context for the token 1806 * @gfp: how to do memory allocations (if necessary). 1807 * 1808 * Caller must ensure we don't call this with other virtqueue operations 1809 * at the same time (except where noted). 1810 * 1811 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1812 */ 1813 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1814 struct scatterlist *sg, unsigned int num, 1815 void *data, 1816 void *ctx, 1817 gfp_t gfp) 1818 { 1819 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1820 } 1821 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1822 1823 /** 1824 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1825 * @_vq: the struct virtqueue 1826 * 1827 * Instead of virtqueue_kick(), you can do: 1828 * if (virtqueue_kick_prepare(vq)) 1829 * virtqueue_notify(vq); 1830 * 1831 * This is sometimes useful because the virtqueue_kick_prepare() needs 1832 * to be serialized, but the actual virtqueue_notify() call does not. 1833 */ 1834 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1835 { 1836 struct vring_virtqueue *vq = to_vvq(_vq); 1837 1838 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1839 virtqueue_kick_prepare_split(_vq); 1840 } 1841 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1842 1843 /** 1844 * virtqueue_notify - second half of split virtqueue_kick call. 1845 * @_vq: the struct virtqueue 1846 * 1847 * This does not need to be serialized. 1848 * 1849 * Returns false if host notify failed or queue is broken, otherwise true. 1850 */ 1851 bool virtqueue_notify(struct virtqueue *_vq) 1852 { 1853 struct vring_virtqueue *vq = to_vvq(_vq); 1854 1855 if (unlikely(vq->broken)) 1856 return false; 1857 1858 /* Prod other side to tell it about changes. */ 1859 if (!vq->notify(_vq)) { 1860 vq->broken = true; 1861 return false; 1862 } 1863 return true; 1864 } 1865 EXPORT_SYMBOL_GPL(virtqueue_notify); 1866 1867 /** 1868 * virtqueue_kick - update after add_buf 1869 * @vq: the struct virtqueue 1870 * 1871 * After one or more virtqueue_add_* calls, invoke this to kick 1872 * the other side. 1873 * 1874 * Caller must ensure we don't call this with other virtqueue 1875 * operations at the same time (except where noted). 1876 * 1877 * Returns false if kick failed, otherwise true. 1878 */ 1879 bool virtqueue_kick(struct virtqueue *vq) 1880 { 1881 if (virtqueue_kick_prepare(vq)) 1882 return virtqueue_notify(vq); 1883 return true; 1884 } 1885 EXPORT_SYMBOL_GPL(virtqueue_kick); 1886 1887 /** 1888 * virtqueue_get_buf - get the next used buffer 1889 * @_vq: the struct virtqueue we're talking about. 1890 * @len: the length written into the buffer 1891 * @ctx: extra context for the token 1892 * 1893 * If the device wrote data into the buffer, @len will be set to the 1894 * amount written. This means you don't need to clear the buffer 1895 * beforehand to ensure there's no data leakage in the case of short 1896 * writes. 1897 * 1898 * Caller must ensure we don't call this with other virtqueue 1899 * operations at the same time (except where noted). 1900 * 1901 * Returns NULL if there are no used buffers, or the "data" token 1902 * handed to virtqueue_add_*(). 1903 */ 1904 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1905 void **ctx) 1906 { 1907 struct vring_virtqueue *vq = to_vvq(_vq); 1908 1909 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1910 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1911 } 1912 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1913 1914 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1915 { 1916 return virtqueue_get_buf_ctx(_vq, len, NULL); 1917 } 1918 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1919 /** 1920 * virtqueue_disable_cb - disable callbacks 1921 * @_vq: the struct virtqueue we're talking about. 1922 * 1923 * Note that this is not necessarily synchronous, hence unreliable and only 1924 * useful as an optimization. 1925 * 1926 * Unlike other operations, this need not be serialized. 1927 */ 1928 void virtqueue_disable_cb(struct virtqueue *_vq) 1929 { 1930 struct vring_virtqueue *vq = to_vvq(_vq); 1931 1932 if (vq->packed_ring) 1933 virtqueue_disable_cb_packed(_vq); 1934 else 1935 virtqueue_disable_cb_split(_vq); 1936 } 1937 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1938 1939 /** 1940 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1941 * @_vq: the struct virtqueue we're talking about. 1942 * 1943 * This re-enables callbacks; it returns current queue state 1944 * in an opaque unsigned value. This value should be later tested by 1945 * virtqueue_poll, to detect a possible race between the driver checking for 1946 * more work, and enabling callbacks. 1947 * 1948 * Caller must ensure we don't call this with other virtqueue 1949 * operations at the same time (except where noted). 1950 */ 1951 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1952 { 1953 struct vring_virtqueue *vq = to_vvq(_vq); 1954 1955 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 1956 virtqueue_enable_cb_prepare_split(_vq); 1957 } 1958 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1959 1960 /** 1961 * virtqueue_poll - query pending used buffers 1962 * @_vq: the struct virtqueue we're talking about. 1963 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1964 * 1965 * Returns "true" if there are pending used buffers in the queue. 1966 * 1967 * This does not need to be serialized. 1968 */ 1969 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1970 { 1971 struct vring_virtqueue *vq = to_vvq(_vq); 1972 1973 virtio_mb(vq->weak_barriers); 1974 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 1975 virtqueue_poll_split(_vq, last_used_idx); 1976 } 1977 EXPORT_SYMBOL_GPL(virtqueue_poll); 1978 1979 /** 1980 * virtqueue_enable_cb - restart callbacks after disable_cb. 1981 * @_vq: the struct virtqueue we're talking about. 1982 * 1983 * This re-enables callbacks; it returns "false" if there are pending 1984 * buffers in the queue, to detect a possible race between the driver 1985 * checking for more work, and enabling callbacks. 1986 * 1987 * Caller must ensure we don't call this with other virtqueue 1988 * operations at the same time (except where noted). 1989 */ 1990 bool virtqueue_enable_cb(struct virtqueue *_vq) 1991 { 1992 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 1993 1994 return !virtqueue_poll(_vq, last_used_idx); 1995 } 1996 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 1997 1998 /** 1999 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2000 * @_vq: the struct virtqueue we're talking about. 2001 * 2002 * This re-enables callbacks but hints to the other side to delay 2003 * interrupts until most of the available buffers have been processed; 2004 * it returns "false" if there are many pending buffers in the queue, 2005 * to detect a possible race between the driver checking for more work, 2006 * and enabling callbacks. 2007 * 2008 * Caller must ensure we don't call this with other virtqueue 2009 * operations at the same time (except where noted). 2010 */ 2011 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2012 { 2013 struct vring_virtqueue *vq = to_vvq(_vq); 2014 2015 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2016 virtqueue_enable_cb_delayed_split(_vq); 2017 } 2018 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2019 2020 /** 2021 * virtqueue_detach_unused_buf - detach first unused buffer 2022 * @_vq: the struct virtqueue we're talking about. 2023 * 2024 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2025 * This is not valid on an active queue; it is useful only for device 2026 * shutdown. 2027 */ 2028 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2029 { 2030 struct vring_virtqueue *vq = to_vvq(_vq); 2031 2032 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2033 virtqueue_detach_unused_buf_split(_vq); 2034 } 2035 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2036 2037 static inline bool more_used(const struct vring_virtqueue *vq) 2038 { 2039 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2040 } 2041 2042 irqreturn_t vring_interrupt(int irq, void *_vq) 2043 { 2044 struct vring_virtqueue *vq = to_vvq(_vq); 2045 2046 if (!more_used(vq)) { 2047 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2048 return IRQ_NONE; 2049 } 2050 2051 if (unlikely(vq->broken)) 2052 return IRQ_HANDLED; 2053 2054 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2055 if (vq->vq.callback) 2056 vq->vq.callback(&vq->vq); 2057 2058 return IRQ_HANDLED; 2059 } 2060 EXPORT_SYMBOL_GPL(vring_interrupt); 2061 2062 /* Only available for split ring */ 2063 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2064 struct vring vring, 2065 struct virtio_device *vdev, 2066 bool weak_barriers, 2067 bool context, 2068 bool (*notify)(struct virtqueue *), 2069 void (*callback)(struct virtqueue *), 2070 const char *name) 2071 { 2072 unsigned int i; 2073 struct vring_virtqueue *vq; 2074 2075 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2076 return NULL; 2077 2078 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2079 if (!vq) 2080 return NULL; 2081 2082 vq->packed_ring = false; 2083 vq->vq.callback = callback; 2084 vq->vq.vdev = vdev; 2085 vq->vq.name = name; 2086 vq->vq.num_free = vring.num; 2087 vq->vq.index = index; 2088 vq->we_own_ring = false; 2089 vq->notify = notify; 2090 vq->weak_barriers = weak_barriers; 2091 vq->broken = false; 2092 vq->last_used_idx = 0; 2093 vq->num_added = 0; 2094 vq->use_dma_api = vring_use_dma_api(vdev); 2095 list_add_tail(&vq->vq.list, &vdev->vqs); 2096 #ifdef DEBUG 2097 vq->in_use = false; 2098 vq->last_add_time_valid = false; 2099 #endif 2100 2101 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2102 !context; 2103 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2104 2105 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2106 vq->weak_barriers = false; 2107 2108 vq->split.queue_dma_addr = 0; 2109 vq->split.queue_size_in_bytes = 0; 2110 2111 vq->split.vring = vring; 2112 vq->split.avail_flags_shadow = 0; 2113 vq->split.avail_idx_shadow = 0; 2114 2115 /* No callback? Tell other side not to bother us. */ 2116 if (!callback) { 2117 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2118 if (!vq->event) 2119 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2120 vq->split.avail_flags_shadow); 2121 } 2122 2123 vq->split.desc_state = kmalloc_array(vring.num, 2124 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2125 if (!vq->split.desc_state) { 2126 kfree(vq); 2127 return NULL; 2128 } 2129 2130 /* Put everything in free lists. */ 2131 vq->free_head = 0; 2132 for (i = 0; i < vring.num-1; i++) 2133 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 2134 memset(vq->split.desc_state, 0, vring.num * 2135 sizeof(struct vring_desc_state_split)); 2136 2137 return &vq->vq; 2138 } 2139 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2140 2141 struct virtqueue *vring_create_virtqueue( 2142 unsigned int index, 2143 unsigned int num, 2144 unsigned int vring_align, 2145 struct virtio_device *vdev, 2146 bool weak_barriers, 2147 bool may_reduce_num, 2148 bool context, 2149 bool (*notify)(struct virtqueue *), 2150 void (*callback)(struct virtqueue *), 2151 const char *name) 2152 { 2153 2154 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2155 return vring_create_virtqueue_packed(index, num, vring_align, 2156 vdev, weak_barriers, may_reduce_num, 2157 context, notify, callback, name); 2158 2159 return vring_create_virtqueue_split(index, num, vring_align, 2160 vdev, weak_barriers, may_reduce_num, 2161 context, notify, callback, name); 2162 } 2163 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2164 2165 /* Only available for split ring */ 2166 struct virtqueue *vring_new_virtqueue(unsigned int index, 2167 unsigned int num, 2168 unsigned int vring_align, 2169 struct virtio_device *vdev, 2170 bool weak_barriers, 2171 bool context, 2172 void *pages, 2173 bool (*notify)(struct virtqueue *vq), 2174 void (*callback)(struct virtqueue *vq), 2175 const char *name) 2176 { 2177 struct vring vring; 2178 2179 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2180 return NULL; 2181 2182 vring_init(&vring, num, pages, vring_align); 2183 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2184 notify, callback, name); 2185 } 2186 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2187 2188 void vring_del_virtqueue(struct virtqueue *_vq) 2189 { 2190 struct vring_virtqueue *vq = to_vvq(_vq); 2191 2192 if (vq->we_own_ring) { 2193 if (vq->packed_ring) { 2194 vring_free_queue(vq->vq.vdev, 2195 vq->packed.ring_size_in_bytes, 2196 vq->packed.vring.desc, 2197 vq->packed.ring_dma_addr); 2198 2199 vring_free_queue(vq->vq.vdev, 2200 vq->packed.event_size_in_bytes, 2201 vq->packed.vring.driver, 2202 vq->packed.driver_event_dma_addr); 2203 2204 vring_free_queue(vq->vq.vdev, 2205 vq->packed.event_size_in_bytes, 2206 vq->packed.vring.device, 2207 vq->packed.device_event_dma_addr); 2208 2209 kfree(vq->packed.desc_state); 2210 kfree(vq->packed.desc_extra); 2211 } else { 2212 vring_free_queue(vq->vq.vdev, 2213 vq->split.queue_size_in_bytes, 2214 vq->split.vring.desc, 2215 vq->split.queue_dma_addr); 2216 2217 kfree(vq->split.desc_state); 2218 } 2219 } 2220 list_del(&_vq->list); 2221 kfree(vq); 2222 } 2223 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2224 2225 /* Manipulates transport-specific feature bits. */ 2226 void vring_transport_features(struct virtio_device *vdev) 2227 { 2228 unsigned int i; 2229 2230 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2231 switch (i) { 2232 case VIRTIO_RING_F_INDIRECT_DESC: 2233 break; 2234 case VIRTIO_RING_F_EVENT_IDX: 2235 break; 2236 case VIRTIO_F_VERSION_1: 2237 break; 2238 case VIRTIO_F_IOMMU_PLATFORM: 2239 break; 2240 case VIRTIO_F_RING_PACKED: 2241 break; 2242 case VIRTIO_F_ORDER_PLATFORM: 2243 break; 2244 default: 2245 /* We don't understand this bit. */ 2246 __virtio_clear_bit(vdev, i); 2247 } 2248 } 2249 } 2250 EXPORT_SYMBOL_GPL(vring_transport_features); 2251 2252 /** 2253 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2254 * @_vq: the struct virtqueue containing the vring of interest. 2255 * 2256 * Returns the size of the vring. This is mainly used for boasting to 2257 * userspace. Unlike other operations, this need not be serialized. 2258 */ 2259 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2260 { 2261 2262 struct vring_virtqueue *vq = to_vvq(_vq); 2263 2264 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2265 } 2266 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2267 2268 bool virtqueue_is_broken(struct virtqueue *_vq) 2269 { 2270 struct vring_virtqueue *vq = to_vvq(_vq); 2271 2272 return vq->broken; 2273 } 2274 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2275 2276 /* 2277 * This should prevent the device from being used, allowing drivers to 2278 * recover. You may need to grab appropriate locks to flush. 2279 */ 2280 void virtio_break_device(struct virtio_device *dev) 2281 { 2282 struct virtqueue *_vq; 2283 2284 list_for_each_entry(_vq, &dev->vqs, list) { 2285 struct vring_virtqueue *vq = to_vvq(_vq); 2286 vq->broken = true; 2287 } 2288 } 2289 EXPORT_SYMBOL_GPL(virtio_break_device); 2290 2291 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2292 { 2293 struct vring_virtqueue *vq = to_vvq(_vq); 2294 2295 BUG_ON(!vq->we_own_ring); 2296 2297 if (vq->packed_ring) 2298 return vq->packed.ring_dma_addr; 2299 2300 return vq->split.queue_dma_addr; 2301 } 2302 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2303 2304 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2305 { 2306 struct vring_virtqueue *vq = to_vvq(_vq); 2307 2308 BUG_ON(!vq->we_own_ring); 2309 2310 if (vq->packed_ring) 2311 return vq->packed.driver_event_dma_addr; 2312 2313 return vq->split.queue_dma_addr + 2314 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2315 } 2316 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2317 2318 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2319 { 2320 struct vring_virtqueue *vq = to_vvq(_vq); 2321 2322 BUG_ON(!vq->we_own_ring); 2323 2324 if (vq->packed_ring) 2325 return vq->packed.device_event_dma_addr; 2326 2327 return vq->split.queue_dma_addr + 2328 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2329 } 2330 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2331 2332 /* Only available for split ring */ 2333 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2334 { 2335 return &to_vvq(vq)->split.vring; 2336 } 2337 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2338 2339 MODULE_LICENSE("GPL"); 2340