1 /* Virtio ring implementation. 2 * 3 * Copyright 2007 Rusty Russell IBM Corporation 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19 #include <linux/virtio.h> 20 #include <linux/virtio_ring.h> 21 #include <linux/virtio_config.h> 22 #include <linux/device.h> 23 #include <linux/slab.h> 24 #include <linux/module.h> 25 #include <linux/hrtimer.h> 26 #include <linux/dma-mapping.h> 27 #include <xen/xen.h> 28 29 #ifdef DEBUG 30 /* For development, we want to crash whenever the ring is screwed. */ 31 #define BAD_RING(_vq, fmt, args...) \ 32 do { \ 33 dev_err(&(_vq)->vq.vdev->dev, \ 34 "%s:"fmt, (_vq)->vq.name, ##args); \ 35 BUG(); \ 36 } while (0) 37 /* Caller is supposed to guarantee no reentry. */ 38 #define START_USE(_vq) \ 39 do { \ 40 if ((_vq)->in_use) \ 41 panic("%s:in_use = %i\n", \ 42 (_vq)->vq.name, (_vq)->in_use); \ 43 (_vq)->in_use = __LINE__; \ 44 } while (0) 45 #define END_USE(_vq) \ 46 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 47 #define LAST_ADD_TIME_UPDATE(_vq) \ 48 do { \ 49 ktime_t now = ktime_get(); \ 50 \ 51 /* No kick or get, with .1 second between? Warn. */ \ 52 if ((_vq)->last_add_time_valid) \ 53 WARN_ON(ktime_to_ms(ktime_sub(now, \ 54 (_vq)->last_add_time)) > 100); \ 55 (_vq)->last_add_time = now; \ 56 (_vq)->last_add_time_valid = true; \ 57 } while (0) 58 #define LAST_ADD_TIME_CHECK(_vq) \ 59 do { \ 60 if ((_vq)->last_add_time_valid) { \ 61 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 62 (_vq)->last_add_time)) > 100); \ 63 } \ 64 } while (0) 65 #define LAST_ADD_TIME_INVALID(_vq) \ 66 ((_vq)->last_add_time_valid = false) 67 #else 68 #define BAD_RING(_vq, fmt, args...) \ 69 do { \ 70 dev_err(&_vq->vq.vdev->dev, \ 71 "%s:"fmt, (_vq)->vq.name, ##args); \ 72 (_vq)->broken = true; \ 73 } while (0) 74 #define START_USE(vq) 75 #define END_USE(vq) 76 #define LAST_ADD_TIME_UPDATE(vq) 77 #define LAST_ADD_TIME_CHECK(vq) 78 #define LAST_ADD_TIME_INVALID(vq) 79 #endif 80 81 struct vring_desc_state_split { 82 void *data; /* Data for callback. */ 83 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 84 }; 85 86 struct vring_desc_state_packed { 87 void *data; /* Data for callback. */ 88 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 89 u16 num; /* Descriptor list length. */ 90 u16 next; /* The next desc state in a list. */ 91 u16 last; /* The last desc state in a list. */ 92 }; 93 94 struct vring_desc_extra_packed { 95 dma_addr_t addr; /* Buffer DMA addr. */ 96 u32 len; /* Buffer length. */ 97 u16 flags; /* Descriptor flags. */ 98 }; 99 100 struct vring_virtqueue { 101 struct virtqueue vq; 102 103 /* Is this a packed ring? */ 104 bool packed_ring; 105 106 /* Is DMA API used? */ 107 bool use_dma_api; 108 109 /* Can we use weak barriers? */ 110 bool weak_barriers; 111 112 /* Other side has made a mess, don't try any more. */ 113 bool broken; 114 115 /* Host supports indirect buffers */ 116 bool indirect; 117 118 /* Host publishes avail event idx */ 119 bool event; 120 121 /* Head of free buffer list. */ 122 unsigned int free_head; 123 /* Number we've added since last sync. */ 124 unsigned int num_added; 125 126 /* Last used index we've seen. */ 127 u16 last_used_idx; 128 129 union { 130 /* Available for split ring */ 131 struct { 132 /* Actual memory layout for this queue. */ 133 struct vring vring; 134 135 /* Last written value to avail->flags */ 136 u16 avail_flags_shadow; 137 138 /* 139 * Last written value to avail->idx in 140 * guest byte order. 141 */ 142 u16 avail_idx_shadow; 143 144 /* Per-descriptor state. */ 145 struct vring_desc_state_split *desc_state; 146 147 /* DMA address and size information */ 148 dma_addr_t queue_dma_addr; 149 size_t queue_size_in_bytes; 150 } split; 151 152 /* Available for packed ring */ 153 struct { 154 /* Actual memory layout for this queue. */ 155 struct { 156 unsigned int num; 157 struct vring_packed_desc *desc; 158 struct vring_packed_desc_event *driver; 159 struct vring_packed_desc_event *device; 160 } vring; 161 162 /* Driver ring wrap counter. */ 163 bool avail_wrap_counter; 164 165 /* Device ring wrap counter. */ 166 bool used_wrap_counter; 167 168 /* Avail used flags. */ 169 u16 avail_used_flags; 170 171 /* Index of the next avail descriptor. */ 172 u16 next_avail_idx; 173 174 /* 175 * Last written value to driver->flags in 176 * guest byte order. 177 */ 178 u16 event_flags_shadow; 179 180 /* Per-descriptor state. */ 181 struct vring_desc_state_packed *desc_state; 182 struct vring_desc_extra_packed *desc_extra; 183 184 /* DMA address and size information */ 185 dma_addr_t ring_dma_addr; 186 dma_addr_t driver_event_dma_addr; 187 dma_addr_t device_event_dma_addr; 188 size_t ring_size_in_bytes; 189 size_t event_size_in_bytes; 190 } packed; 191 }; 192 193 /* How to notify other side. FIXME: commonalize hcalls! */ 194 bool (*notify)(struct virtqueue *vq); 195 196 /* DMA, allocation, and size information */ 197 bool we_own_ring; 198 199 #ifdef DEBUG 200 /* They're supposed to lock for us. */ 201 unsigned int in_use; 202 203 /* Figure out if their kicks are too delayed. */ 204 bool last_add_time_valid; 205 ktime_t last_add_time; 206 #endif 207 }; 208 209 210 /* 211 * Helpers. 212 */ 213 214 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 215 216 static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 217 unsigned int total_sg) 218 { 219 struct vring_virtqueue *vq = to_vvq(_vq); 220 221 /* 222 * If the host supports indirect descriptor tables, and we have multiple 223 * buffers, then go indirect. FIXME: tune this threshold 224 */ 225 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 226 } 227 228 /* 229 * Modern virtio devices have feature bits to specify whether they need a 230 * quirk and bypass the IOMMU. If not there, just use the DMA API. 231 * 232 * If there, the interaction between virtio and DMA API is messy. 233 * 234 * On most systems with virtio, physical addresses match bus addresses, 235 * and it doesn't particularly matter whether we use the DMA API. 236 * 237 * On some systems, including Xen and any system with a physical device 238 * that speaks virtio behind a physical IOMMU, we must use the DMA API 239 * for virtio DMA to work at all. 240 * 241 * On other systems, including SPARC and PPC64, virtio-pci devices are 242 * enumerated as though they are behind an IOMMU, but the virtio host 243 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 244 * there or somehow map everything as the identity. 245 * 246 * For the time being, we preserve historic behavior and bypass the DMA 247 * API. 248 * 249 * TODO: install a per-device DMA ops structure that does the right thing 250 * taking into account all the above quirks, and use the DMA API 251 * unconditionally on data path. 252 */ 253 254 static bool vring_use_dma_api(struct virtio_device *vdev) 255 { 256 if (!virtio_has_iommu_quirk(vdev)) 257 return true; 258 259 /* Otherwise, we are left to guess. */ 260 /* 261 * In theory, it's possible to have a buggy QEMU-supposed 262 * emulated Q35 IOMMU and Xen enabled at the same time. On 263 * such a configuration, virtio has never worked and will 264 * not work without an even larger kludge. Instead, enable 265 * the DMA API if we're a Xen guest, which at least allows 266 * all of the sensible Xen configurations to work correctly. 267 */ 268 if (xen_domain()) 269 return true; 270 271 return false; 272 } 273 274 size_t virtio_max_dma_size(struct virtio_device *vdev) 275 { 276 size_t max_segment_size = SIZE_MAX; 277 278 if (vring_use_dma_api(vdev)) 279 max_segment_size = dma_max_mapping_size(&vdev->dev); 280 281 return max_segment_size; 282 } 283 EXPORT_SYMBOL_GPL(virtio_max_dma_size); 284 285 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 286 dma_addr_t *dma_handle, gfp_t flag) 287 { 288 if (vring_use_dma_api(vdev)) { 289 return dma_alloc_coherent(vdev->dev.parent, size, 290 dma_handle, flag); 291 } else { 292 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 293 294 if (queue) { 295 phys_addr_t phys_addr = virt_to_phys(queue); 296 *dma_handle = (dma_addr_t)phys_addr; 297 298 /* 299 * Sanity check: make sure we dind't truncate 300 * the address. The only arches I can find that 301 * have 64-bit phys_addr_t but 32-bit dma_addr_t 302 * are certain non-highmem MIPS and x86 303 * configurations, but these configurations 304 * should never allocate physical pages above 32 305 * bits, so this is fine. Just in case, throw a 306 * warning and abort if we end up with an 307 * unrepresentable address. 308 */ 309 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 310 free_pages_exact(queue, PAGE_ALIGN(size)); 311 return NULL; 312 } 313 } 314 return queue; 315 } 316 } 317 318 static void vring_free_queue(struct virtio_device *vdev, size_t size, 319 void *queue, dma_addr_t dma_handle) 320 { 321 if (vring_use_dma_api(vdev)) 322 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 323 else 324 free_pages_exact(queue, PAGE_ALIGN(size)); 325 } 326 327 /* 328 * The DMA ops on various arches are rather gnarly right now, and 329 * making all of the arch DMA ops work on the vring device itself 330 * is a mess. For now, we use the parent device for DMA ops. 331 */ 332 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 333 { 334 return vq->vq.vdev->dev.parent; 335 } 336 337 /* Map one sg entry. */ 338 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 339 struct scatterlist *sg, 340 enum dma_data_direction direction) 341 { 342 if (!vq->use_dma_api) 343 return (dma_addr_t)sg_phys(sg); 344 345 /* 346 * We can't use dma_map_sg, because we don't use scatterlists in 347 * the way it expects (we don't guarantee that the scatterlist 348 * will exist for the lifetime of the mapping). 349 */ 350 return dma_map_page(vring_dma_dev(vq), 351 sg_page(sg), sg->offset, sg->length, 352 direction); 353 } 354 355 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 356 void *cpu_addr, size_t size, 357 enum dma_data_direction direction) 358 { 359 if (!vq->use_dma_api) 360 return (dma_addr_t)virt_to_phys(cpu_addr); 361 362 return dma_map_single(vring_dma_dev(vq), 363 cpu_addr, size, direction); 364 } 365 366 static int vring_mapping_error(const struct vring_virtqueue *vq, 367 dma_addr_t addr) 368 { 369 if (!vq->use_dma_api) 370 return 0; 371 372 return dma_mapping_error(vring_dma_dev(vq), addr); 373 } 374 375 376 /* 377 * Split ring specific functions - *_split(). 378 */ 379 380 static void vring_unmap_one_split(const struct vring_virtqueue *vq, 381 struct vring_desc *desc) 382 { 383 u16 flags; 384 385 if (!vq->use_dma_api) 386 return; 387 388 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 389 390 if (flags & VRING_DESC_F_INDIRECT) { 391 dma_unmap_single(vring_dma_dev(vq), 392 virtio64_to_cpu(vq->vq.vdev, desc->addr), 393 virtio32_to_cpu(vq->vq.vdev, desc->len), 394 (flags & VRING_DESC_F_WRITE) ? 395 DMA_FROM_DEVICE : DMA_TO_DEVICE); 396 } else { 397 dma_unmap_page(vring_dma_dev(vq), 398 virtio64_to_cpu(vq->vq.vdev, desc->addr), 399 virtio32_to_cpu(vq->vq.vdev, desc->len), 400 (flags & VRING_DESC_F_WRITE) ? 401 DMA_FROM_DEVICE : DMA_TO_DEVICE); 402 } 403 } 404 405 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 406 unsigned int total_sg, 407 gfp_t gfp) 408 { 409 struct vring_desc *desc; 410 unsigned int i; 411 412 /* 413 * We require lowmem mappings for the descriptors because 414 * otherwise virt_to_phys will give us bogus addresses in the 415 * virtqueue. 416 */ 417 gfp &= ~__GFP_HIGHMEM; 418 419 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 420 if (!desc) 421 return NULL; 422 423 for (i = 0; i < total_sg; i++) 424 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 425 return desc; 426 } 427 428 static inline int virtqueue_add_split(struct virtqueue *_vq, 429 struct scatterlist *sgs[], 430 unsigned int total_sg, 431 unsigned int out_sgs, 432 unsigned int in_sgs, 433 void *data, 434 void *ctx, 435 gfp_t gfp) 436 { 437 struct vring_virtqueue *vq = to_vvq(_vq); 438 struct scatterlist *sg; 439 struct vring_desc *desc; 440 unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 441 int head; 442 bool indirect; 443 444 START_USE(vq); 445 446 BUG_ON(data == NULL); 447 BUG_ON(ctx && vq->indirect); 448 449 if (unlikely(vq->broken)) { 450 END_USE(vq); 451 return -EIO; 452 } 453 454 LAST_ADD_TIME_UPDATE(vq); 455 456 BUG_ON(total_sg == 0); 457 458 head = vq->free_head; 459 460 if (virtqueue_use_indirect(_vq, total_sg)) 461 desc = alloc_indirect_split(_vq, total_sg, gfp); 462 else { 463 desc = NULL; 464 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 465 } 466 467 if (desc) { 468 /* Use a single buffer which doesn't continue */ 469 indirect = true; 470 /* Set up rest to use this indirect table. */ 471 i = 0; 472 descs_used = 1; 473 } else { 474 indirect = false; 475 desc = vq->split.vring.desc; 476 i = head; 477 descs_used = total_sg; 478 } 479 480 if (vq->vq.num_free < descs_used) { 481 pr_debug("Can't add buf len %i - avail = %i\n", 482 descs_used, vq->vq.num_free); 483 /* FIXME: for historical reasons, we force a notify here if 484 * there are outgoing parts to the buffer. Presumably the 485 * host should service the ring ASAP. */ 486 if (out_sgs) 487 vq->notify(&vq->vq); 488 if (indirect) 489 kfree(desc); 490 END_USE(vq); 491 return -ENOSPC; 492 } 493 494 for (n = 0; n < out_sgs; n++) { 495 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 496 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 497 if (vring_mapping_error(vq, addr)) 498 goto unmap_release; 499 500 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 501 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 502 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 503 prev = i; 504 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 505 } 506 } 507 for (; n < (out_sgs + in_sgs); n++) { 508 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 509 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 510 if (vring_mapping_error(vq, addr)) 511 goto unmap_release; 512 513 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 514 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 515 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 516 prev = i; 517 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 518 } 519 } 520 /* Last one doesn't continue. */ 521 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 522 523 if (indirect) { 524 /* Now that the indirect table is filled in, map it. */ 525 dma_addr_t addr = vring_map_single( 526 vq, desc, total_sg * sizeof(struct vring_desc), 527 DMA_TO_DEVICE); 528 if (vring_mapping_error(vq, addr)) 529 goto unmap_release; 530 531 vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 532 VRING_DESC_F_INDIRECT); 533 vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 534 addr); 535 536 vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 537 total_sg * sizeof(struct vring_desc)); 538 } 539 540 /* We're using some buffers from the free list. */ 541 vq->vq.num_free -= descs_used; 542 543 /* Update free pointer */ 544 if (indirect) 545 vq->free_head = virtio16_to_cpu(_vq->vdev, 546 vq->split.vring.desc[head].next); 547 else 548 vq->free_head = i; 549 550 /* Store token and indirect buffer state. */ 551 vq->split.desc_state[head].data = data; 552 if (indirect) 553 vq->split.desc_state[head].indir_desc = desc; 554 else 555 vq->split.desc_state[head].indir_desc = ctx; 556 557 /* Put entry in available array (but don't update avail->idx until they 558 * do sync). */ 559 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 560 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 561 562 /* Descriptors and available array need to be set before we expose the 563 * new available array entries. */ 564 virtio_wmb(vq->weak_barriers); 565 vq->split.avail_idx_shadow++; 566 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 567 vq->split.avail_idx_shadow); 568 vq->num_added++; 569 570 pr_debug("Added buffer head %i to %p\n", head, vq); 571 END_USE(vq); 572 573 /* This is very unlikely, but theoretically possible. Kick 574 * just in case. */ 575 if (unlikely(vq->num_added == (1 << 16) - 1)) 576 virtqueue_kick(_vq); 577 578 return 0; 579 580 unmap_release: 581 err_idx = i; 582 i = head; 583 584 for (n = 0; n < total_sg; n++) { 585 if (i == err_idx) 586 break; 587 vring_unmap_one_split(vq, &desc[i]); 588 i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); 589 } 590 591 if (indirect) 592 kfree(desc); 593 594 END_USE(vq); 595 return -EIO; 596 } 597 598 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 599 { 600 struct vring_virtqueue *vq = to_vvq(_vq); 601 u16 new, old; 602 bool needs_kick; 603 604 START_USE(vq); 605 /* We need to expose available array entries before checking avail 606 * event. */ 607 virtio_mb(vq->weak_barriers); 608 609 old = vq->split.avail_idx_shadow - vq->num_added; 610 new = vq->split.avail_idx_shadow; 611 vq->num_added = 0; 612 613 LAST_ADD_TIME_CHECK(vq); 614 LAST_ADD_TIME_INVALID(vq); 615 616 if (vq->event) { 617 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 618 vring_avail_event(&vq->split.vring)), 619 new, old); 620 } else { 621 needs_kick = !(vq->split.vring.used->flags & 622 cpu_to_virtio16(_vq->vdev, 623 VRING_USED_F_NO_NOTIFY)); 624 } 625 END_USE(vq); 626 return needs_kick; 627 } 628 629 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 630 void **ctx) 631 { 632 unsigned int i, j; 633 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 634 635 /* Clear data ptr. */ 636 vq->split.desc_state[head].data = NULL; 637 638 /* Put back on free list: unmap first-level descriptors and find end */ 639 i = head; 640 641 while (vq->split.vring.desc[i].flags & nextflag) { 642 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 643 i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 644 vq->vq.num_free++; 645 } 646 647 vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 648 vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 649 vq->free_head); 650 vq->free_head = head; 651 652 /* Plus final descriptor */ 653 vq->vq.num_free++; 654 655 if (vq->indirect) { 656 struct vring_desc *indir_desc = 657 vq->split.desc_state[head].indir_desc; 658 u32 len; 659 660 /* Free the indirect table, if any, now that it's unmapped. */ 661 if (!indir_desc) 662 return; 663 664 len = virtio32_to_cpu(vq->vq.vdev, 665 vq->split.vring.desc[head].len); 666 667 BUG_ON(!(vq->split.vring.desc[head].flags & 668 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 669 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 670 671 for (j = 0; j < len / sizeof(struct vring_desc); j++) 672 vring_unmap_one_split(vq, &indir_desc[j]); 673 674 kfree(indir_desc); 675 vq->split.desc_state[head].indir_desc = NULL; 676 } else if (ctx) { 677 *ctx = vq->split.desc_state[head].indir_desc; 678 } 679 } 680 681 static inline bool more_used_split(const struct vring_virtqueue *vq) 682 { 683 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 684 vq->split.vring.used->idx); 685 } 686 687 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 688 unsigned int *len, 689 void **ctx) 690 { 691 struct vring_virtqueue *vq = to_vvq(_vq); 692 void *ret; 693 unsigned int i; 694 u16 last_used; 695 696 START_USE(vq); 697 698 if (unlikely(vq->broken)) { 699 END_USE(vq); 700 return NULL; 701 } 702 703 if (!more_used_split(vq)) { 704 pr_debug("No more buffers in queue\n"); 705 END_USE(vq); 706 return NULL; 707 } 708 709 /* Only get used array entries after they have been exposed by host. */ 710 virtio_rmb(vq->weak_barriers); 711 712 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 713 i = virtio32_to_cpu(_vq->vdev, 714 vq->split.vring.used->ring[last_used].id); 715 *len = virtio32_to_cpu(_vq->vdev, 716 vq->split.vring.used->ring[last_used].len); 717 718 if (unlikely(i >= vq->split.vring.num)) { 719 BAD_RING(vq, "id %u out of range\n", i); 720 return NULL; 721 } 722 if (unlikely(!vq->split.desc_state[i].data)) { 723 BAD_RING(vq, "id %u is not a head!\n", i); 724 return NULL; 725 } 726 727 /* detach_buf_split clears data, so grab it now. */ 728 ret = vq->split.desc_state[i].data; 729 detach_buf_split(vq, i, ctx); 730 vq->last_used_idx++; 731 /* If we expect an interrupt for the next entry, tell host 732 * by writing event index and flush out the write before 733 * the read in the next get_buf call. */ 734 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 735 virtio_store_mb(vq->weak_barriers, 736 &vring_used_event(&vq->split.vring), 737 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 738 739 LAST_ADD_TIME_INVALID(vq); 740 741 END_USE(vq); 742 return ret; 743 } 744 745 static void virtqueue_disable_cb_split(struct virtqueue *_vq) 746 { 747 struct vring_virtqueue *vq = to_vvq(_vq); 748 749 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 750 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 751 if (!vq->event) 752 vq->split.vring.avail->flags = 753 cpu_to_virtio16(_vq->vdev, 754 vq->split.avail_flags_shadow); 755 } 756 } 757 758 static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 759 { 760 struct vring_virtqueue *vq = to_vvq(_vq); 761 u16 last_used_idx; 762 763 START_USE(vq); 764 765 /* We optimistically turn back on interrupts, then check if there was 766 * more to do. */ 767 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 768 * either clear the flags bit or point the event index at the next 769 * entry. Always do both to keep code simple. */ 770 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 771 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 772 if (!vq->event) 773 vq->split.vring.avail->flags = 774 cpu_to_virtio16(_vq->vdev, 775 vq->split.avail_flags_shadow); 776 } 777 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 778 last_used_idx = vq->last_used_idx); 779 END_USE(vq); 780 return last_used_idx; 781 } 782 783 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 784 { 785 struct vring_virtqueue *vq = to_vvq(_vq); 786 787 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 788 vq->split.vring.used->idx); 789 } 790 791 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 792 { 793 struct vring_virtqueue *vq = to_vvq(_vq); 794 u16 bufs; 795 796 START_USE(vq); 797 798 /* We optimistically turn back on interrupts, then check if there was 799 * more to do. */ 800 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 801 * either clear the flags bit or point the event index at the next 802 * entry. Always update the event index to keep code simple. */ 803 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 804 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 805 if (!vq->event) 806 vq->split.vring.avail->flags = 807 cpu_to_virtio16(_vq->vdev, 808 vq->split.avail_flags_shadow); 809 } 810 /* TODO: tune this threshold */ 811 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 812 813 virtio_store_mb(vq->weak_barriers, 814 &vring_used_event(&vq->split.vring), 815 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 816 817 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 818 - vq->last_used_idx) > bufs)) { 819 END_USE(vq); 820 return false; 821 } 822 823 END_USE(vq); 824 return true; 825 } 826 827 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 828 { 829 struct vring_virtqueue *vq = to_vvq(_vq); 830 unsigned int i; 831 void *buf; 832 833 START_USE(vq); 834 835 for (i = 0; i < vq->split.vring.num; i++) { 836 if (!vq->split.desc_state[i].data) 837 continue; 838 /* detach_buf_split clears data, so grab it now. */ 839 buf = vq->split.desc_state[i].data; 840 detach_buf_split(vq, i, NULL); 841 vq->split.avail_idx_shadow--; 842 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 843 vq->split.avail_idx_shadow); 844 END_USE(vq); 845 return buf; 846 } 847 /* That should have freed everything. */ 848 BUG_ON(vq->vq.num_free != vq->split.vring.num); 849 850 END_USE(vq); 851 return NULL; 852 } 853 854 static struct virtqueue *vring_create_virtqueue_split( 855 unsigned int index, 856 unsigned int num, 857 unsigned int vring_align, 858 struct virtio_device *vdev, 859 bool weak_barriers, 860 bool may_reduce_num, 861 bool context, 862 bool (*notify)(struct virtqueue *), 863 void (*callback)(struct virtqueue *), 864 const char *name) 865 { 866 struct virtqueue *vq; 867 void *queue = NULL; 868 dma_addr_t dma_addr; 869 size_t queue_size_in_bytes; 870 struct vring vring; 871 872 /* We assume num is a power of 2. */ 873 if (num & (num - 1)) { 874 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 875 return NULL; 876 } 877 878 /* TODO: allocate each queue chunk individually */ 879 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 880 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 881 &dma_addr, 882 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 883 if (queue) 884 break; 885 if (!may_reduce_num) 886 return NULL; 887 } 888 889 if (!num) 890 return NULL; 891 892 if (!queue) { 893 /* Try to get a single page. You are my only hope! */ 894 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 895 &dma_addr, GFP_KERNEL|__GFP_ZERO); 896 } 897 if (!queue) 898 return NULL; 899 900 queue_size_in_bytes = vring_size(num, vring_align); 901 vring_init(&vring, num, queue, vring_align); 902 903 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 904 notify, callback, name); 905 if (!vq) { 906 vring_free_queue(vdev, queue_size_in_bytes, queue, 907 dma_addr); 908 return NULL; 909 } 910 911 to_vvq(vq)->split.queue_dma_addr = dma_addr; 912 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 913 to_vvq(vq)->we_own_ring = true; 914 915 return vq; 916 } 917 918 919 /* 920 * Packed ring specific functions - *_packed(). 921 */ 922 923 static void vring_unmap_state_packed(const struct vring_virtqueue *vq, 924 struct vring_desc_extra_packed *state) 925 { 926 u16 flags; 927 928 if (!vq->use_dma_api) 929 return; 930 931 flags = state->flags; 932 933 if (flags & VRING_DESC_F_INDIRECT) { 934 dma_unmap_single(vring_dma_dev(vq), 935 state->addr, state->len, 936 (flags & VRING_DESC_F_WRITE) ? 937 DMA_FROM_DEVICE : DMA_TO_DEVICE); 938 } else { 939 dma_unmap_page(vring_dma_dev(vq), 940 state->addr, state->len, 941 (flags & VRING_DESC_F_WRITE) ? 942 DMA_FROM_DEVICE : DMA_TO_DEVICE); 943 } 944 } 945 946 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 947 struct vring_packed_desc *desc) 948 { 949 u16 flags; 950 951 if (!vq->use_dma_api) 952 return; 953 954 flags = le16_to_cpu(desc->flags); 955 956 if (flags & VRING_DESC_F_INDIRECT) { 957 dma_unmap_single(vring_dma_dev(vq), 958 le64_to_cpu(desc->addr), 959 le32_to_cpu(desc->len), 960 (flags & VRING_DESC_F_WRITE) ? 961 DMA_FROM_DEVICE : DMA_TO_DEVICE); 962 } else { 963 dma_unmap_page(vring_dma_dev(vq), 964 le64_to_cpu(desc->addr), 965 le32_to_cpu(desc->len), 966 (flags & VRING_DESC_F_WRITE) ? 967 DMA_FROM_DEVICE : DMA_TO_DEVICE); 968 } 969 } 970 971 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 972 gfp_t gfp) 973 { 974 struct vring_packed_desc *desc; 975 976 /* 977 * We require lowmem mappings for the descriptors because 978 * otherwise virt_to_phys will give us bogus addresses in the 979 * virtqueue. 980 */ 981 gfp &= ~__GFP_HIGHMEM; 982 983 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 984 985 return desc; 986 } 987 988 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 989 struct scatterlist *sgs[], 990 unsigned int total_sg, 991 unsigned int out_sgs, 992 unsigned int in_sgs, 993 void *data, 994 gfp_t gfp) 995 { 996 struct vring_packed_desc *desc; 997 struct scatterlist *sg; 998 unsigned int i, n, err_idx; 999 u16 head, id; 1000 dma_addr_t addr; 1001 1002 head = vq->packed.next_avail_idx; 1003 desc = alloc_indirect_packed(total_sg, gfp); 1004 1005 if (unlikely(vq->vq.num_free < 1)) { 1006 pr_debug("Can't add buf len 1 - avail = 0\n"); 1007 END_USE(vq); 1008 return -ENOSPC; 1009 } 1010 1011 i = 0; 1012 id = vq->free_head; 1013 BUG_ON(id == vq->packed.vring.num); 1014 1015 for (n = 0; n < out_sgs + in_sgs; n++) { 1016 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1017 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1018 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1019 if (vring_mapping_error(vq, addr)) 1020 goto unmap_release; 1021 1022 desc[i].flags = cpu_to_le16(n < out_sgs ? 1023 0 : VRING_DESC_F_WRITE); 1024 desc[i].addr = cpu_to_le64(addr); 1025 desc[i].len = cpu_to_le32(sg->length); 1026 i++; 1027 } 1028 } 1029 1030 /* Now that the indirect table is filled in, map it. */ 1031 addr = vring_map_single(vq, desc, 1032 total_sg * sizeof(struct vring_packed_desc), 1033 DMA_TO_DEVICE); 1034 if (vring_mapping_error(vq, addr)) 1035 goto unmap_release; 1036 1037 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1038 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1039 sizeof(struct vring_packed_desc)); 1040 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1041 1042 if (vq->use_dma_api) { 1043 vq->packed.desc_extra[id].addr = addr; 1044 vq->packed.desc_extra[id].len = total_sg * 1045 sizeof(struct vring_packed_desc); 1046 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1047 vq->packed.avail_used_flags; 1048 } 1049 1050 /* 1051 * A driver MUST NOT make the first descriptor in the list 1052 * available before all subsequent descriptors comprising 1053 * the list are made available. 1054 */ 1055 virtio_wmb(vq->weak_barriers); 1056 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1057 vq->packed.avail_used_flags); 1058 1059 /* We're using some buffers from the free list. */ 1060 vq->vq.num_free -= 1; 1061 1062 /* Update free pointer */ 1063 n = head + 1; 1064 if (n >= vq->packed.vring.num) { 1065 n = 0; 1066 vq->packed.avail_wrap_counter ^= 1; 1067 vq->packed.avail_used_flags ^= 1068 1 << VRING_PACKED_DESC_F_AVAIL | 1069 1 << VRING_PACKED_DESC_F_USED; 1070 } 1071 vq->packed.next_avail_idx = n; 1072 vq->free_head = vq->packed.desc_state[id].next; 1073 1074 /* Store token and indirect buffer state. */ 1075 vq->packed.desc_state[id].num = 1; 1076 vq->packed.desc_state[id].data = data; 1077 vq->packed.desc_state[id].indir_desc = desc; 1078 vq->packed.desc_state[id].last = id; 1079 1080 vq->num_added += 1; 1081 1082 pr_debug("Added buffer head %i to %p\n", head, vq); 1083 END_USE(vq); 1084 1085 return 0; 1086 1087 unmap_release: 1088 err_idx = i; 1089 1090 for (i = 0; i < err_idx; i++) 1091 vring_unmap_desc_packed(vq, &desc[i]); 1092 1093 kfree(desc); 1094 1095 END_USE(vq); 1096 return -EIO; 1097 } 1098 1099 static inline int virtqueue_add_packed(struct virtqueue *_vq, 1100 struct scatterlist *sgs[], 1101 unsigned int total_sg, 1102 unsigned int out_sgs, 1103 unsigned int in_sgs, 1104 void *data, 1105 void *ctx, 1106 gfp_t gfp) 1107 { 1108 struct vring_virtqueue *vq = to_vvq(_vq); 1109 struct vring_packed_desc *desc; 1110 struct scatterlist *sg; 1111 unsigned int i, n, c, descs_used, err_idx; 1112 __le16 uninitialized_var(head_flags), flags; 1113 u16 head, id, uninitialized_var(prev), curr, avail_used_flags; 1114 1115 START_USE(vq); 1116 1117 BUG_ON(data == NULL); 1118 BUG_ON(ctx && vq->indirect); 1119 1120 if (unlikely(vq->broken)) { 1121 END_USE(vq); 1122 return -EIO; 1123 } 1124 1125 LAST_ADD_TIME_UPDATE(vq); 1126 1127 BUG_ON(total_sg == 0); 1128 1129 if (virtqueue_use_indirect(_vq, total_sg)) 1130 return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1131 out_sgs, in_sgs, data, gfp); 1132 1133 head = vq->packed.next_avail_idx; 1134 avail_used_flags = vq->packed.avail_used_flags; 1135 1136 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1137 1138 desc = vq->packed.vring.desc; 1139 i = head; 1140 descs_used = total_sg; 1141 1142 if (unlikely(vq->vq.num_free < descs_used)) { 1143 pr_debug("Can't add buf len %i - avail = %i\n", 1144 descs_used, vq->vq.num_free); 1145 END_USE(vq); 1146 return -ENOSPC; 1147 } 1148 1149 id = vq->free_head; 1150 BUG_ON(id == vq->packed.vring.num); 1151 1152 curr = id; 1153 c = 0; 1154 for (n = 0; n < out_sgs + in_sgs; n++) { 1155 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1156 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1157 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1158 if (vring_mapping_error(vq, addr)) 1159 goto unmap_release; 1160 1161 flags = cpu_to_le16(vq->packed.avail_used_flags | 1162 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1163 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1164 if (i == head) 1165 head_flags = flags; 1166 else 1167 desc[i].flags = flags; 1168 1169 desc[i].addr = cpu_to_le64(addr); 1170 desc[i].len = cpu_to_le32(sg->length); 1171 desc[i].id = cpu_to_le16(id); 1172 1173 if (unlikely(vq->use_dma_api)) { 1174 vq->packed.desc_extra[curr].addr = addr; 1175 vq->packed.desc_extra[curr].len = sg->length; 1176 vq->packed.desc_extra[curr].flags = 1177 le16_to_cpu(flags); 1178 } 1179 prev = curr; 1180 curr = vq->packed.desc_state[curr].next; 1181 1182 if ((unlikely(++i >= vq->packed.vring.num))) { 1183 i = 0; 1184 vq->packed.avail_used_flags ^= 1185 1 << VRING_PACKED_DESC_F_AVAIL | 1186 1 << VRING_PACKED_DESC_F_USED; 1187 } 1188 } 1189 } 1190 1191 if (i < head) 1192 vq->packed.avail_wrap_counter ^= 1; 1193 1194 /* We're using some buffers from the free list. */ 1195 vq->vq.num_free -= descs_used; 1196 1197 /* Update free pointer */ 1198 vq->packed.next_avail_idx = i; 1199 vq->free_head = curr; 1200 1201 /* Store token. */ 1202 vq->packed.desc_state[id].num = descs_used; 1203 vq->packed.desc_state[id].data = data; 1204 vq->packed.desc_state[id].indir_desc = ctx; 1205 vq->packed.desc_state[id].last = prev; 1206 1207 /* 1208 * A driver MUST NOT make the first descriptor in the list 1209 * available before all subsequent descriptors comprising 1210 * the list are made available. 1211 */ 1212 virtio_wmb(vq->weak_barriers); 1213 vq->packed.vring.desc[head].flags = head_flags; 1214 vq->num_added += descs_used; 1215 1216 pr_debug("Added buffer head %i to %p\n", head, vq); 1217 END_USE(vq); 1218 1219 return 0; 1220 1221 unmap_release: 1222 err_idx = i; 1223 i = head; 1224 1225 vq->packed.avail_used_flags = avail_used_flags; 1226 1227 for (n = 0; n < total_sg; n++) { 1228 if (i == err_idx) 1229 break; 1230 vring_unmap_desc_packed(vq, &desc[i]); 1231 i++; 1232 if (i >= vq->packed.vring.num) 1233 i = 0; 1234 } 1235 1236 END_USE(vq); 1237 return -EIO; 1238 } 1239 1240 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1241 { 1242 struct vring_virtqueue *vq = to_vvq(_vq); 1243 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1244 bool needs_kick; 1245 union { 1246 struct { 1247 __le16 off_wrap; 1248 __le16 flags; 1249 }; 1250 u32 u32; 1251 } snapshot; 1252 1253 START_USE(vq); 1254 1255 /* 1256 * We need to expose the new flags value before checking notification 1257 * suppressions. 1258 */ 1259 virtio_mb(vq->weak_barriers); 1260 1261 old = vq->packed.next_avail_idx - vq->num_added; 1262 new = vq->packed.next_avail_idx; 1263 vq->num_added = 0; 1264 1265 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1266 flags = le16_to_cpu(snapshot.flags); 1267 1268 LAST_ADD_TIME_CHECK(vq); 1269 LAST_ADD_TIME_INVALID(vq); 1270 1271 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1272 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1273 goto out; 1274 } 1275 1276 off_wrap = le16_to_cpu(snapshot.off_wrap); 1277 1278 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1279 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1280 if (wrap_counter != vq->packed.avail_wrap_counter) 1281 event_idx -= vq->packed.vring.num; 1282 1283 needs_kick = vring_need_event(event_idx, new, old); 1284 out: 1285 END_USE(vq); 1286 return needs_kick; 1287 } 1288 1289 static void detach_buf_packed(struct vring_virtqueue *vq, 1290 unsigned int id, void **ctx) 1291 { 1292 struct vring_desc_state_packed *state = NULL; 1293 struct vring_packed_desc *desc; 1294 unsigned int i, curr; 1295 1296 state = &vq->packed.desc_state[id]; 1297 1298 /* Clear data ptr. */ 1299 state->data = NULL; 1300 1301 vq->packed.desc_state[state->last].next = vq->free_head; 1302 vq->free_head = id; 1303 vq->vq.num_free += state->num; 1304 1305 if (unlikely(vq->use_dma_api)) { 1306 curr = id; 1307 for (i = 0; i < state->num; i++) { 1308 vring_unmap_state_packed(vq, 1309 &vq->packed.desc_extra[curr]); 1310 curr = vq->packed.desc_state[curr].next; 1311 } 1312 } 1313 1314 if (vq->indirect) { 1315 u32 len; 1316 1317 /* Free the indirect table, if any, now that it's unmapped. */ 1318 desc = state->indir_desc; 1319 if (!desc) 1320 return; 1321 1322 if (vq->use_dma_api) { 1323 len = vq->packed.desc_extra[id].len; 1324 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1325 i++) 1326 vring_unmap_desc_packed(vq, &desc[i]); 1327 } 1328 kfree(desc); 1329 state->indir_desc = NULL; 1330 } else if (ctx) { 1331 *ctx = state->indir_desc; 1332 } 1333 } 1334 1335 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1336 u16 idx, bool used_wrap_counter) 1337 { 1338 bool avail, used; 1339 u16 flags; 1340 1341 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1342 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1343 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1344 1345 return avail == used && used == used_wrap_counter; 1346 } 1347 1348 static inline bool more_used_packed(const struct vring_virtqueue *vq) 1349 { 1350 return is_used_desc_packed(vq, vq->last_used_idx, 1351 vq->packed.used_wrap_counter); 1352 } 1353 1354 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1355 unsigned int *len, 1356 void **ctx) 1357 { 1358 struct vring_virtqueue *vq = to_vvq(_vq); 1359 u16 last_used, id; 1360 void *ret; 1361 1362 START_USE(vq); 1363 1364 if (unlikely(vq->broken)) { 1365 END_USE(vq); 1366 return NULL; 1367 } 1368 1369 if (!more_used_packed(vq)) { 1370 pr_debug("No more buffers in queue\n"); 1371 END_USE(vq); 1372 return NULL; 1373 } 1374 1375 /* Only get used elements after they have been exposed by host. */ 1376 virtio_rmb(vq->weak_barriers); 1377 1378 last_used = vq->last_used_idx; 1379 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1380 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1381 1382 if (unlikely(id >= vq->packed.vring.num)) { 1383 BAD_RING(vq, "id %u out of range\n", id); 1384 return NULL; 1385 } 1386 if (unlikely(!vq->packed.desc_state[id].data)) { 1387 BAD_RING(vq, "id %u is not a head!\n", id); 1388 return NULL; 1389 } 1390 1391 /* detach_buf_packed clears data, so grab it now. */ 1392 ret = vq->packed.desc_state[id].data; 1393 detach_buf_packed(vq, id, ctx); 1394 1395 vq->last_used_idx += vq->packed.desc_state[id].num; 1396 if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { 1397 vq->last_used_idx -= vq->packed.vring.num; 1398 vq->packed.used_wrap_counter ^= 1; 1399 } 1400 1401 /* 1402 * If we expect an interrupt for the next entry, tell host 1403 * by writing event index and flush out the write before 1404 * the read in the next get_buf call. 1405 */ 1406 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1407 virtio_store_mb(vq->weak_barriers, 1408 &vq->packed.vring.driver->off_wrap, 1409 cpu_to_le16(vq->last_used_idx | 1410 (vq->packed.used_wrap_counter << 1411 VRING_PACKED_EVENT_F_WRAP_CTR))); 1412 1413 LAST_ADD_TIME_INVALID(vq); 1414 1415 END_USE(vq); 1416 return ret; 1417 } 1418 1419 static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1420 { 1421 struct vring_virtqueue *vq = to_vvq(_vq); 1422 1423 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1424 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1425 vq->packed.vring.driver->flags = 1426 cpu_to_le16(vq->packed.event_flags_shadow); 1427 } 1428 } 1429 1430 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1431 { 1432 struct vring_virtqueue *vq = to_vvq(_vq); 1433 1434 START_USE(vq); 1435 1436 /* 1437 * We optimistically turn back on interrupts, then check if there was 1438 * more to do. 1439 */ 1440 1441 if (vq->event) { 1442 vq->packed.vring.driver->off_wrap = 1443 cpu_to_le16(vq->last_used_idx | 1444 (vq->packed.used_wrap_counter << 1445 VRING_PACKED_EVENT_F_WRAP_CTR)); 1446 /* 1447 * We need to update event offset and event wrap 1448 * counter first before updating event flags. 1449 */ 1450 virtio_wmb(vq->weak_barriers); 1451 } 1452 1453 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1454 vq->packed.event_flags_shadow = vq->event ? 1455 VRING_PACKED_EVENT_FLAG_DESC : 1456 VRING_PACKED_EVENT_FLAG_ENABLE; 1457 vq->packed.vring.driver->flags = 1458 cpu_to_le16(vq->packed.event_flags_shadow); 1459 } 1460 1461 END_USE(vq); 1462 return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << 1463 VRING_PACKED_EVENT_F_WRAP_CTR); 1464 } 1465 1466 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1467 { 1468 struct vring_virtqueue *vq = to_vvq(_vq); 1469 bool wrap_counter; 1470 u16 used_idx; 1471 1472 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1473 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1474 1475 return is_used_desc_packed(vq, used_idx, wrap_counter); 1476 } 1477 1478 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1479 { 1480 struct vring_virtqueue *vq = to_vvq(_vq); 1481 u16 used_idx, wrap_counter; 1482 u16 bufs; 1483 1484 START_USE(vq); 1485 1486 /* 1487 * We optimistically turn back on interrupts, then check if there was 1488 * more to do. 1489 */ 1490 1491 if (vq->event) { 1492 /* TODO: tune this threshold */ 1493 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1494 wrap_counter = vq->packed.used_wrap_counter; 1495 1496 used_idx = vq->last_used_idx + bufs; 1497 if (used_idx >= vq->packed.vring.num) { 1498 used_idx -= vq->packed.vring.num; 1499 wrap_counter ^= 1; 1500 } 1501 1502 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1503 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1504 1505 /* 1506 * We need to update event offset and event wrap 1507 * counter first before updating event flags. 1508 */ 1509 virtio_wmb(vq->weak_barriers); 1510 } else { 1511 used_idx = vq->last_used_idx; 1512 wrap_counter = vq->packed.used_wrap_counter; 1513 } 1514 1515 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1516 vq->packed.event_flags_shadow = vq->event ? 1517 VRING_PACKED_EVENT_FLAG_DESC : 1518 VRING_PACKED_EVENT_FLAG_ENABLE; 1519 vq->packed.vring.driver->flags = 1520 cpu_to_le16(vq->packed.event_flags_shadow); 1521 } 1522 1523 /* 1524 * We need to update event suppression structure first 1525 * before re-checking for more used buffers. 1526 */ 1527 virtio_mb(vq->weak_barriers); 1528 1529 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1530 END_USE(vq); 1531 return false; 1532 } 1533 1534 END_USE(vq); 1535 return true; 1536 } 1537 1538 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1539 { 1540 struct vring_virtqueue *vq = to_vvq(_vq); 1541 unsigned int i; 1542 void *buf; 1543 1544 START_USE(vq); 1545 1546 for (i = 0; i < vq->packed.vring.num; i++) { 1547 if (!vq->packed.desc_state[i].data) 1548 continue; 1549 /* detach_buf clears data, so grab it now. */ 1550 buf = vq->packed.desc_state[i].data; 1551 detach_buf_packed(vq, i, NULL); 1552 END_USE(vq); 1553 return buf; 1554 } 1555 /* That should have freed everything. */ 1556 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1557 1558 END_USE(vq); 1559 return NULL; 1560 } 1561 1562 static struct virtqueue *vring_create_virtqueue_packed( 1563 unsigned int index, 1564 unsigned int num, 1565 unsigned int vring_align, 1566 struct virtio_device *vdev, 1567 bool weak_barriers, 1568 bool may_reduce_num, 1569 bool context, 1570 bool (*notify)(struct virtqueue *), 1571 void (*callback)(struct virtqueue *), 1572 const char *name) 1573 { 1574 struct vring_virtqueue *vq; 1575 struct vring_packed_desc *ring; 1576 struct vring_packed_desc_event *driver, *device; 1577 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1578 size_t ring_size_in_bytes, event_size_in_bytes; 1579 unsigned int i; 1580 1581 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1582 1583 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1584 &ring_dma_addr, 1585 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1586 if (!ring) 1587 goto err_ring; 1588 1589 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1590 1591 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1592 &driver_event_dma_addr, 1593 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1594 if (!driver) 1595 goto err_driver; 1596 1597 device = vring_alloc_queue(vdev, event_size_in_bytes, 1598 &device_event_dma_addr, 1599 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1600 if (!device) 1601 goto err_device; 1602 1603 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1604 if (!vq) 1605 goto err_vq; 1606 1607 vq->vq.callback = callback; 1608 vq->vq.vdev = vdev; 1609 vq->vq.name = name; 1610 vq->vq.num_free = num; 1611 vq->vq.index = index; 1612 vq->we_own_ring = true; 1613 vq->notify = notify; 1614 vq->weak_barriers = weak_barriers; 1615 vq->broken = false; 1616 vq->last_used_idx = 0; 1617 vq->num_added = 0; 1618 vq->packed_ring = true; 1619 vq->use_dma_api = vring_use_dma_api(vdev); 1620 list_add_tail(&vq->vq.list, &vdev->vqs); 1621 #ifdef DEBUG 1622 vq->in_use = false; 1623 vq->last_add_time_valid = false; 1624 #endif 1625 1626 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1627 !context; 1628 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1629 1630 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1631 vq->weak_barriers = false; 1632 1633 vq->packed.ring_dma_addr = ring_dma_addr; 1634 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1635 vq->packed.device_event_dma_addr = device_event_dma_addr; 1636 1637 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1638 vq->packed.event_size_in_bytes = event_size_in_bytes; 1639 1640 vq->packed.vring.num = num; 1641 vq->packed.vring.desc = ring; 1642 vq->packed.vring.driver = driver; 1643 vq->packed.vring.device = device; 1644 1645 vq->packed.next_avail_idx = 0; 1646 vq->packed.avail_wrap_counter = 1; 1647 vq->packed.used_wrap_counter = 1; 1648 vq->packed.event_flags_shadow = 0; 1649 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1650 1651 vq->packed.desc_state = kmalloc_array(num, 1652 sizeof(struct vring_desc_state_packed), 1653 GFP_KERNEL); 1654 if (!vq->packed.desc_state) 1655 goto err_desc_state; 1656 1657 memset(vq->packed.desc_state, 0, 1658 num * sizeof(struct vring_desc_state_packed)); 1659 1660 /* Put everything in free lists. */ 1661 vq->free_head = 0; 1662 for (i = 0; i < num-1; i++) 1663 vq->packed.desc_state[i].next = i + 1; 1664 1665 vq->packed.desc_extra = kmalloc_array(num, 1666 sizeof(struct vring_desc_extra_packed), 1667 GFP_KERNEL); 1668 if (!vq->packed.desc_extra) 1669 goto err_desc_extra; 1670 1671 memset(vq->packed.desc_extra, 0, 1672 num * sizeof(struct vring_desc_extra_packed)); 1673 1674 /* No callback? Tell other side not to bother us. */ 1675 if (!callback) { 1676 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1677 vq->packed.vring.driver->flags = 1678 cpu_to_le16(vq->packed.event_flags_shadow); 1679 } 1680 1681 return &vq->vq; 1682 1683 err_desc_extra: 1684 kfree(vq->packed.desc_state); 1685 err_desc_state: 1686 kfree(vq); 1687 err_vq: 1688 vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); 1689 err_device: 1690 vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); 1691 err_driver: 1692 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1693 err_ring: 1694 return NULL; 1695 } 1696 1697 1698 /* 1699 * Generic functions and exported symbols. 1700 */ 1701 1702 static inline int virtqueue_add(struct virtqueue *_vq, 1703 struct scatterlist *sgs[], 1704 unsigned int total_sg, 1705 unsigned int out_sgs, 1706 unsigned int in_sgs, 1707 void *data, 1708 void *ctx, 1709 gfp_t gfp) 1710 { 1711 struct vring_virtqueue *vq = to_vvq(_vq); 1712 1713 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1714 out_sgs, in_sgs, data, ctx, gfp) : 1715 virtqueue_add_split(_vq, sgs, total_sg, 1716 out_sgs, in_sgs, data, ctx, gfp); 1717 } 1718 1719 /** 1720 * virtqueue_add_sgs - expose buffers to other end 1721 * @vq: the struct virtqueue we're talking about. 1722 * @sgs: array of terminated scatterlists. 1723 * @out_num: the number of scatterlists readable by other side 1724 * @in_num: the number of scatterlists which are writable (after readable ones) 1725 * @data: the token identifying the buffer. 1726 * @gfp: how to do memory allocations (if necessary). 1727 * 1728 * Caller must ensure we don't call this with other virtqueue operations 1729 * at the same time (except where noted). 1730 * 1731 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1732 */ 1733 int virtqueue_add_sgs(struct virtqueue *_vq, 1734 struct scatterlist *sgs[], 1735 unsigned int out_sgs, 1736 unsigned int in_sgs, 1737 void *data, 1738 gfp_t gfp) 1739 { 1740 unsigned int i, total_sg = 0; 1741 1742 /* Count them first. */ 1743 for (i = 0; i < out_sgs + in_sgs; i++) { 1744 struct scatterlist *sg; 1745 1746 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1747 total_sg++; 1748 } 1749 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1750 data, NULL, gfp); 1751 } 1752 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1753 1754 /** 1755 * virtqueue_add_outbuf - expose output buffers to other end 1756 * @vq: the struct virtqueue we're talking about. 1757 * @sg: scatterlist (must be well-formed and terminated!) 1758 * @num: the number of entries in @sg readable by other side 1759 * @data: the token identifying the buffer. 1760 * @gfp: how to do memory allocations (if necessary). 1761 * 1762 * Caller must ensure we don't call this with other virtqueue operations 1763 * at the same time (except where noted). 1764 * 1765 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1766 */ 1767 int virtqueue_add_outbuf(struct virtqueue *vq, 1768 struct scatterlist *sg, unsigned int num, 1769 void *data, 1770 gfp_t gfp) 1771 { 1772 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1773 } 1774 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1775 1776 /** 1777 * virtqueue_add_inbuf - expose input buffers to other end 1778 * @vq: the struct virtqueue we're talking about. 1779 * @sg: scatterlist (must be well-formed and terminated!) 1780 * @num: the number of entries in @sg writable by other side 1781 * @data: the token identifying the buffer. 1782 * @gfp: how to do memory allocations (if necessary). 1783 * 1784 * Caller must ensure we don't call this with other virtqueue operations 1785 * at the same time (except where noted). 1786 * 1787 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1788 */ 1789 int virtqueue_add_inbuf(struct virtqueue *vq, 1790 struct scatterlist *sg, unsigned int num, 1791 void *data, 1792 gfp_t gfp) 1793 { 1794 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1795 } 1796 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1797 1798 /** 1799 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1800 * @vq: the struct virtqueue we're talking about. 1801 * @sg: scatterlist (must be well-formed and terminated!) 1802 * @num: the number of entries in @sg writable by other side 1803 * @data: the token identifying the buffer. 1804 * @ctx: extra context for the token 1805 * @gfp: how to do memory allocations (if necessary). 1806 * 1807 * Caller must ensure we don't call this with other virtqueue operations 1808 * at the same time (except where noted). 1809 * 1810 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1811 */ 1812 int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1813 struct scatterlist *sg, unsigned int num, 1814 void *data, 1815 void *ctx, 1816 gfp_t gfp) 1817 { 1818 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1819 } 1820 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1821 1822 /** 1823 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1824 * @vq: the struct virtqueue 1825 * 1826 * Instead of virtqueue_kick(), you can do: 1827 * if (virtqueue_kick_prepare(vq)) 1828 * virtqueue_notify(vq); 1829 * 1830 * This is sometimes useful because the virtqueue_kick_prepare() needs 1831 * to be serialized, but the actual virtqueue_notify() call does not. 1832 */ 1833 bool virtqueue_kick_prepare(struct virtqueue *_vq) 1834 { 1835 struct vring_virtqueue *vq = to_vvq(_vq); 1836 1837 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1838 virtqueue_kick_prepare_split(_vq); 1839 } 1840 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1841 1842 /** 1843 * virtqueue_notify - second half of split virtqueue_kick call. 1844 * @vq: the struct virtqueue 1845 * 1846 * This does not need to be serialized. 1847 * 1848 * Returns false if host notify failed or queue is broken, otherwise true. 1849 */ 1850 bool virtqueue_notify(struct virtqueue *_vq) 1851 { 1852 struct vring_virtqueue *vq = to_vvq(_vq); 1853 1854 if (unlikely(vq->broken)) 1855 return false; 1856 1857 /* Prod other side to tell it about changes. */ 1858 if (!vq->notify(_vq)) { 1859 vq->broken = true; 1860 return false; 1861 } 1862 return true; 1863 } 1864 EXPORT_SYMBOL_GPL(virtqueue_notify); 1865 1866 /** 1867 * virtqueue_kick - update after add_buf 1868 * @vq: the struct virtqueue 1869 * 1870 * After one or more virtqueue_add_* calls, invoke this to kick 1871 * the other side. 1872 * 1873 * Caller must ensure we don't call this with other virtqueue 1874 * operations at the same time (except where noted). 1875 * 1876 * Returns false if kick failed, otherwise true. 1877 */ 1878 bool virtqueue_kick(struct virtqueue *vq) 1879 { 1880 if (virtqueue_kick_prepare(vq)) 1881 return virtqueue_notify(vq); 1882 return true; 1883 } 1884 EXPORT_SYMBOL_GPL(virtqueue_kick); 1885 1886 /** 1887 * virtqueue_get_buf - get the next used buffer 1888 * @vq: the struct virtqueue we're talking about. 1889 * @len: the length written into the buffer 1890 * 1891 * If the device wrote data into the buffer, @len will be set to the 1892 * amount written. This means you don't need to clear the buffer 1893 * beforehand to ensure there's no data leakage in the case of short 1894 * writes. 1895 * 1896 * Caller must ensure we don't call this with other virtqueue 1897 * operations at the same time (except where noted). 1898 * 1899 * Returns NULL if there are no used buffers, or the "data" token 1900 * handed to virtqueue_add_*(). 1901 */ 1902 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1903 void **ctx) 1904 { 1905 struct vring_virtqueue *vq = to_vvq(_vq); 1906 1907 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 1908 virtqueue_get_buf_ctx_split(_vq, len, ctx); 1909 } 1910 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1911 1912 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1913 { 1914 return virtqueue_get_buf_ctx(_vq, len, NULL); 1915 } 1916 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1917 /** 1918 * virtqueue_disable_cb - disable callbacks 1919 * @vq: the struct virtqueue we're talking about. 1920 * 1921 * Note that this is not necessarily synchronous, hence unreliable and only 1922 * useful as an optimization. 1923 * 1924 * Unlike other operations, this need not be serialized. 1925 */ 1926 void virtqueue_disable_cb(struct virtqueue *_vq) 1927 { 1928 struct vring_virtqueue *vq = to_vvq(_vq); 1929 1930 if (vq->packed_ring) 1931 virtqueue_disable_cb_packed(_vq); 1932 else 1933 virtqueue_disable_cb_split(_vq); 1934 } 1935 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1936 1937 /** 1938 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1939 * @vq: the struct virtqueue we're talking about. 1940 * 1941 * This re-enables callbacks; it returns current queue state 1942 * in an opaque unsigned value. This value should be later tested by 1943 * virtqueue_poll, to detect a possible race between the driver checking for 1944 * more work, and enabling callbacks. 1945 * 1946 * Caller must ensure we don't call this with other virtqueue 1947 * operations at the same time (except where noted). 1948 */ 1949 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1950 { 1951 struct vring_virtqueue *vq = to_vvq(_vq); 1952 1953 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 1954 virtqueue_enable_cb_prepare_split(_vq); 1955 } 1956 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1957 1958 /** 1959 * virtqueue_poll - query pending used buffers 1960 * @vq: the struct virtqueue we're talking about. 1961 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1962 * 1963 * Returns "true" if there are pending used buffers in the queue. 1964 * 1965 * This does not need to be serialized. 1966 */ 1967 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1968 { 1969 struct vring_virtqueue *vq = to_vvq(_vq); 1970 1971 virtio_mb(vq->weak_barriers); 1972 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 1973 virtqueue_poll_split(_vq, last_used_idx); 1974 } 1975 EXPORT_SYMBOL_GPL(virtqueue_poll); 1976 1977 /** 1978 * virtqueue_enable_cb - restart callbacks after disable_cb. 1979 * @vq: the struct virtqueue we're talking about. 1980 * 1981 * This re-enables callbacks; it returns "false" if there are pending 1982 * buffers in the queue, to detect a possible race between the driver 1983 * checking for more work, and enabling callbacks. 1984 * 1985 * Caller must ensure we don't call this with other virtqueue 1986 * operations at the same time (except where noted). 1987 */ 1988 bool virtqueue_enable_cb(struct virtqueue *_vq) 1989 { 1990 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 1991 1992 return !virtqueue_poll(_vq, last_used_idx); 1993 } 1994 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 1995 1996 /** 1997 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 1998 * @vq: the struct virtqueue we're talking about. 1999 * 2000 * This re-enables callbacks but hints to the other side to delay 2001 * interrupts until most of the available buffers have been processed; 2002 * it returns "false" if there are many pending buffers in the queue, 2003 * to detect a possible race between the driver checking for more work, 2004 * and enabling callbacks. 2005 * 2006 * Caller must ensure we don't call this with other virtqueue 2007 * operations at the same time (except where noted). 2008 */ 2009 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2010 { 2011 struct vring_virtqueue *vq = to_vvq(_vq); 2012 2013 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2014 virtqueue_enable_cb_delayed_split(_vq); 2015 } 2016 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2017 2018 /** 2019 * virtqueue_detach_unused_buf - detach first unused buffer 2020 * @vq: the struct virtqueue we're talking about. 2021 * 2022 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2023 * This is not valid on an active queue; it is useful only for device 2024 * shutdown. 2025 */ 2026 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2027 { 2028 struct vring_virtqueue *vq = to_vvq(_vq); 2029 2030 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2031 virtqueue_detach_unused_buf_split(_vq); 2032 } 2033 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2034 2035 static inline bool more_used(const struct vring_virtqueue *vq) 2036 { 2037 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2038 } 2039 2040 irqreturn_t vring_interrupt(int irq, void *_vq) 2041 { 2042 struct vring_virtqueue *vq = to_vvq(_vq); 2043 2044 if (!more_used(vq)) { 2045 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2046 return IRQ_NONE; 2047 } 2048 2049 if (unlikely(vq->broken)) 2050 return IRQ_HANDLED; 2051 2052 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2053 if (vq->vq.callback) 2054 vq->vq.callback(&vq->vq); 2055 2056 return IRQ_HANDLED; 2057 } 2058 EXPORT_SYMBOL_GPL(vring_interrupt); 2059 2060 /* Only available for split ring */ 2061 struct virtqueue *__vring_new_virtqueue(unsigned int index, 2062 struct vring vring, 2063 struct virtio_device *vdev, 2064 bool weak_barriers, 2065 bool context, 2066 bool (*notify)(struct virtqueue *), 2067 void (*callback)(struct virtqueue *), 2068 const char *name) 2069 { 2070 unsigned int i; 2071 struct vring_virtqueue *vq; 2072 2073 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2074 return NULL; 2075 2076 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2077 if (!vq) 2078 return NULL; 2079 2080 vq->packed_ring = false; 2081 vq->vq.callback = callback; 2082 vq->vq.vdev = vdev; 2083 vq->vq.name = name; 2084 vq->vq.num_free = vring.num; 2085 vq->vq.index = index; 2086 vq->we_own_ring = false; 2087 vq->notify = notify; 2088 vq->weak_barriers = weak_barriers; 2089 vq->broken = false; 2090 vq->last_used_idx = 0; 2091 vq->num_added = 0; 2092 vq->use_dma_api = vring_use_dma_api(vdev); 2093 list_add_tail(&vq->vq.list, &vdev->vqs); 2094 #ifdef DEBUG 2095 vq->in_use = false; 2096 vq->last_add_time_valid = false; 2097 #endif 2098 2099 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2100 !context; 2101 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2102 2103 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2104 vq->weak_barriers = false; 2105 2106 vq->split.queue_dma_addr = 0; 2107 vq->split.queue_size_in_bytes = 0; 2108 2109 vq->split.vring = vring; 2110 vq->split.avail_flags_shadow = 0; 2111 vq->split.avail_idx_shadow = 0; 2112 2113 /* No callback? Tell other side not to bother us. */ 2114 if (!callback) { 2115 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2116 if (!vq->event) 2117 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2118 vq->split.avail_flags_shadow); 2119 } 2120 2121 vq->split.desc_state = kmalloc_array(vring.num, 2122 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2123 if (!vq->split.desc_state) { 2124 kfree(vq); 2125 return NULL; 2126 } 2127 2128 /* Put everything in free lists. */ 2129 vq->free_head = 0; 2130 for (i = 0; i < vring.num-1; i++) 2131 vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 2132 memset(vq->split.desc_state, 0, vring.num * 2133 sizeof(struct vring_desc_state_split)); 2134 2135 return &vq->vq; 2136 } 2137 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2138 2139 struct virtqueue *vring_create_virtqueue( 2140 unsigned int index, 2141 unsigned int num, 2142 unsigned int vring_align, 2143 struct virtio_device *vdev, 2144 bool weak_barriers, 2145 bool may_reduce_num, 2146 bool context, 2147 bool (*notify)(struct virtqueue *), 2148 void (*callback)(struct virtqueue *), 2149 const char *name) 2150 { 2151 2152 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2153 return vring_create_virtqueue_packed(index, num, vring_align, 2154 vdev, weak_barriers, may_reduce_num, 2155 context, notify, callback, name); 2156 2157 return vring_create_virtqueue_split(index, num, vring_align, 2158 vdev, weak_barriers, may_reduce_num, 2159 context, notify, callback, name); 2160 } 2161 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2162 2163 /* Only available for split ring */ 2164 struct virtqueue *vring_new_virtqueue(unsigned int index, 2165 unsigned int num, 2166 unsigned int vring_align, 2167 struct virtio_device *vdev, 2168 bool weak_barriers, 2169 bool context, 2170 void *pages, 2171 bool (*notify)(struct virtqueue *vq), 2172 void (*callback)(struct virtqueue *vq), 2173 const char *name) 2174 { 2175 struct vring vring; 2176 2177 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2178 return NULL; 2179 2180 vring_init(&vring, num, pages, vring_align); 2181 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2182 notify, callback, name); 2183 } 2184 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2185 2186 void vring_del_virtqueue(struct virtqueue *_vq) 2187 { 2188 struct vring_virtqueue *vq = to_vvq(_vq); 2189 2190 if (vq->we_own_ring) { 2191 if (vq->packed_ring) { 2192 vring_free_queue(vq->vq.vdev, 2193 vq->packed.ring_size_in_bytes, 2194 vq->packed.vring.desc, 2195 vq->packed.ring_dma_addr); 2196 2197 vring_free_queue(vq->vq.vdev, 2198 vq->packed.event_size_in_bytes, 2199 vq->packed.vring.driver, 2200 vq->packed.driver_event_dma_addr); 2201 2202 vring_free_queue(vq->vq.vdev, 2203 vq->packed.event_size_in_bytes, 2204 vq->packed.vring.device, 2205 vq->packed.device_event_dma_addr); 2206 2207 kfree(vq->packed.desc_state); 2208 kfree(vq->packed.desc_extra); 2209 } else { 2210 vring_free_queue(vq->vq.vdev, 2211 vq->split.queue_size_in_bytes, 2212 vq->split.vring.desc, 2213 vq->split.queue_dma_addr); 2214 2215 kfree(vq->split.desc_state); 2216 } 2217 } 2218 list_del(&_vq->list); 2219 kfree(vq); 2220 } 2221 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2222 2223 /* Manipulates transport-specific feature bits. */ 2224 void vring_transport_features(struct virtio_device *vdev) 2225 { 2226 unsigned int i; 2227 2228 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2229 switch (i) { 2230 case VIRTIO_RING_F_INDIRECT_DESC: 2231 break; 2232 case VIRTIO_RING_F_EVENT_IDX: 2233 break; 2234 case VIRTIO_F_VERSION_1: 2235 break; 2236 case VIRTIO_F_IOMMU_PLATFORM: 2237 break; 2238 case VIRTIO_F_RING_PACKED: 2239 break; 2240 case VIRTIO_F_ORDER_PLATFORM: 2241 break; 2242 default: 2243 /* We don't understand this bit. */ 2244 __virtio_clear_bit(vdev, i); 2245 } 2246 } 2247 } 2248 EXPORT_SYMBOL_GPL(vring_transport_features); 2249 2250 /** 2251 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2252 * @vq: the struct virtqueue containing the vring of interest. 2253 * 2254 * Returns the size of the vring. This is mainly used for boasting to 2255 * userspace. Unlike other operations, this need not be serialized. 2256 */ 2257 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2258 { 2259 2260 struct vring_virtqueue *vq = to_vvq(_vq); 2261 2262 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2263 } 2264 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2265 2266 bool virtqueue_is_broken(struct virtqueue *_vq) 2267 { 2268 struct vring_virtqueue *vq = to_vvq(_vq); 2269 2270 return vq->broken; 2271 } 2272 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2273 2274 /* 2275 * This should prevent the device from being used, allowing drivers to 2276 * recover. You may need to grab appropriate locks to flush. 2277 */ 2278 void virtio_break_device(struct virtio_device *dev) 2279 { 2280 struct virtqueue *_vq; 2281 2282 list_for_each_entry(_vq, &dev->vqs, list) { 2283 struct vring_virtqueue *vq = to_vvq(_vq); 2284 vq->broken = true; 2285 } 2286 } 2287 EXPORT_SYMBOL_GPL(virtio_break_device); 2288 2289 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2290 { 2291 struct vring_virtqueue *vq = to_vvq(_vq); 2292 2293 BUG_ON(!vq->we_own_ring); 2294 2295 if (vq->packed_ring) 2296 return vq->packed.ring_dma_addr; 2297 2298 return vq->split.queue_dma_addr; 2299 } 2300 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2301 2302 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2303 { 2304 struct vring_virtqueue *vq = to_vvq(_vq); 2305 2306 BUG_ON(!vq->we_own_ring); 2307 2308 if (vq->packed_ring) 2309 return vq->packed.driver_event_dma_addr; 2310 2311 return vq->split.queue_dma_addr + 2312 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2313 } 2314 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2315 2316 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2317 { 2318 struct vring_virtqueue *vq = to_vvq(_vq); 2319 2320 BUG_ON(!vq->we_own_ring); 2321 2322 if (vq->packed_ring) 2323 return vq->packed.device_event_dma_addr; 2324 2325 return vq->split.queue_dma_addr + 2326 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2327 } 2328 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2329 2330 /* Only available for split ring */ 2331 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2332 { 2333 return &to_vvq(vq)->split.vring; 2334 } 2335 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2336 2337 MODULE_LICENSE("GPL"); 2338