1 /* Virtio ring implementation. 2 * 3 * Copyright 2007 Rusty Russell IBM Corporation 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19 #include <linux/virtio.h> 20 #include <linux/virtio_ring.h> 21 #include <linux/virtio_config.h> 22 #include <linux/device.h> 23 #include <linux/slab.h> 24 #include <linux/module.h> 25 #include <linux/hrtimer.h> 26 #include <linux/kmemleak.h> 27 #include <linux/dma-mapping.h> 28 #include <xen/xen.h> 29 30 #ifdef DEBUG 31 /* For development, we want to crash whenever the ring is screwed. */ 32 #define BAD_RING(_vq, fmt, args...) \ 33 do { \ 34 dev_err(&(_vq)->vq.vdev->dev, \ 35 "%s:"fmt, (_vq)->vq.name, ##args); \ 36 BUG(); \ 37 } while (0) 38 /* Caller is supposed to guarantee no reentry. */ 39 #define START_USE(_vq) \ 40 do { \ 41 if ((_vq)->in_use) \ 42 panic("%s:in_use = %i\n", \ 43 (_vq)->vq.name, (_vq)->in_use); \ 44 (_vq)->in_use = __LINE__; \ 45 } while (0) 46 #define END_USE(_vq) \ 47 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 48 #else 49 #define BAD_RING(_vq, fmt, args...) \ 50 do { \ 51 dev_err(&_vq->vq.vdev->dev, \ 52 "%s:"fmt, (_vq)->vq.name, ##args); \ 53 (_vq)->broken = true; \ 54 } while (0) 55 #define START_USE(vq) 56 #define END_USE(vq) 57 #endif 58 59 struct vring_desc_state { 60 void *data; /* Data for callback. */ 61 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 62 }; 63 64 struct vring_virtqueue { 65 struct virtqueue vq; 66 67 /* Actual memory layout for this queue */ 68 struct vring vring; 69 70 /* Can we use weak barriers? */ 71 bool weak_barriers; 72 73 /* Other side has made a mess, don't try any more. */ 74 bool broken; 75 76 /* Host supports indirect buffers */ 77 bool indirect; 78 79 /* Host publishes avail event idx */ 80 bool event; 81 82 /* Head of free buffer list. */ 83 unsigned int free_head; 84 /* Number we've added since last sync. */ 85 unsigned int num_added; 86 87 /* Last used index we've seen. */ 88 u16 last_used_idx; 89 90 /* Last written value to avail->flags */ 91 u16 avail_flags_shadow; 92 93 /* Last written value to avail->idx in guest byte order */ 94 u16 avail_idx_shadow; 95 96 /* How to notify other side. FIXME: commonalize hcalls! */ 97 bool (*notify)(struct virtqueue *vq); 98 99 /* DMA, allocation, and size information */ 100 bool we_own_ring; 101 size_t queue_size_in_bytes; 102 dma_addr_t queue_dma_addr; 103 104 #ifdef DEBUG 105 /* They're supposed to lock for us. */ 106 unsigned int in_use; 107 108 /* Figure out if their kicks are too delayed. */ 109 bool last_add_time_valid; 110 ktime_t last_add_time; 111 #endif 112 113 /* Per-descriptor state. */ 114 struct vring_desc_state desc_state[]; 115 }; 116 117 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 118 119 /* 120 * The interaction between virtio and a possible IOMMU is a mess. 121 * 122 * On most systems with virtio, physical addresses match bus addresses, 123 * and it doesn't particularly matter whether we use the DMA API. 124 * 125 * On some systems, including Xen and any system with a physical device 126 * that speaks virtio behind a physical IOMMU, we must use the DMA API 127 * for virtio DMA to work at all. 128 * 129 * On other systems, including SPARC and PPC64, virtio-pci devices are 130 * enumerated as though they are behind an IOMMU, but the virtio host 131 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 132 * there or somehow map everything as the identity. 133 * 134 * For the time being, we preserve historic behavior and bypass the DMA 135 * API. 136 */ 137 138 static bool vring_use_dma_api(struct virtio_device *vdev) 139 { 140 /* 141 * In theory, it's possible to have a buggy QEMU-supposed 142 * emulated Q35 IOMMU and Xen enabled at the same time. On 143 * such a configuration, virtio has never worked and will 144 * not work without an even larger kludge. Instead, enable 145 * the DMA API if we're a Xen guest, which at least allows 146 * all of the sensible Xen configurations to work correctly. 147 */ 148 if (xen_domain()) 149 return true; 150 151 return false; 152 } 153 154 /* 155 * The DMA ops on various arches are rather gnarly right now, and 156 * making all of the arch DMA ops work on the vring device itself 157 * is a mess. For now, we use the parent device for DMA ops. 158 */ 159 struct device *vring_dma_dev(const struct vring_virtqueue *vq) 160 { 161 return vq->vq.vdev->dev.parent; 162 } 163 164 /* Map one sg entry. */ 165 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 166 struct scatterlist *sg, 167 enum dma_data_direction direction) 168 { 169 if (!vring_use_dma_api(vq->vq.vdev)) 170 return (dma_addr_t)sg_phys(sg); 171 172 /* 173 * We can't use dma_map_sg, because we don't use scatterlists in 174 * the way it expects (we don't guarantee that the scatterlist 175 * will exist for the lifetime of the mapping). 176 */ 177 return dma_map_page(vring_dma_dev(vq), 178 sg_page(sg), sg->offset, sg->length, 179 direction); 180 } 181 182 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 183 void *cpu_addr, size_t size, 184 enum dma_data_direction direction) 185 { 186 if (!vring_use_dma_api(vq->vq.vdev)) 187 return (dma_addr_t)virt_to_phys(cpu_addr); 188 189 return dma_map_single(vring_dma_dev(vq), 190 cpu_addr, size, direction); 191 } 192 193 static void vring_unmap_one(const struct vring_virtqueue *vq, 194 struct vring_desc *desc) 195 { 196 u16 flags; 197 198 if (!vring_use_dma_api(vq->vq.vdev)) 199 return; 200 201 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 202 203 if (flags & VRING_DESC_F_INDIRECT) { 204 dma_unmap_single(vring_dma_dev(vq), 205 virtio64_to_cpu(vq->vq.vdev, desc->addr), 206 virtio32_to_cpu(vq->vq.vdev, desc->len), 207 (flags & VRING_DESC_F_WRITE) ? 208 DMA_FROM_DEVICE : DMA_TO_DEVICE); 209 } else { 210 dma_unmap_page(vring_dma_dev(vq), 211 virtio64_to_cpu(vq->vq.vdev, desc->addr), 212 virtio32_to_cpu(vq->vq.vdev, desc->len), 213 (flags & VRING_DESC_F_WRITE) ? 214 DMA_FROM_DEVICE : DMA_TO_DEVICE); 215 } 216 } 217 218 static int vring_mapping_error(const struct vring_virtqueue *vq, 219 dma_addr_t addr) 220 { 221 if (!vring_use_dma_api(vq->vq.vdev)) 222 return 0; 223 224 return dma_mapping_error(vring_dma_dev(vq), addr); 225 } 226 227 static struct vring_desc *alloc_indirect(struct virtqueue *_vq, 228 unsigned int total_sg, gfp_t gfp) 229 { 230 struct vring_desc *desc; 231 unsigned int i; 232 233 /* 234 * We require lowmem mappings for the descriptors because 235 * otherwise virt_to_phys will give us bogus addresses in the 236 * virtqueue. 237 */ 238 gfp &= ~__GFP_HIGHMEM; 239 240 desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp); 241 if (!desc) 242 return NULL; 243 244 for (i = 0; i < total_sg; i++) 245 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 246 return desc; 247 } 248 249 static inline int virtqueue_add(struct virtqueue *_vq, 250 struct scatterlist *sgs[], 251 unsigned int total_sg, 252 unsigned int out_sgs, 253 unsigned int in_sgs, 254 void *data, 255 gfp_t gfp) 256 { 257 struct vring_virtqueue *vq = to_vvq(_vq); 258 struct scatterlist *sg; 259 struct vring_desc *desc; 260 unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 261 int head; 262 bool indirect; 263 264 START_USE(vq); 265 266 BUG_ON(data == NULL); 267 268 if (unlikely(vq->broken)) { 269 END_USE(vq); 270 return -EIO; 271 } 272 273 #ifdef DEBUG 274 { 275 ktime_t now = ktime_get(); 276 277 /* No kick or get, with .1 second between? Warn. */ 278 if (vq->last_add_time_valid) 279 WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time)) 280 > 100); 281 vq->last_add_time = now; 282 vq->last_add_time_valid = true; 283 } 284 #endif 285 286 BUG_ON(total_sg > vq->vring.num); 287 BUG_ON(total_sg == 0); 288 289 head = vq->free_head; 290 291 /* If the host supports indirect descriptor tables, and we have multiple 292 * buffers, then go indirect. FIXME: tune this threshold */ 293 if (vq->indirect && total_sg > 1 && vq->vq.num_free) 294 desc = alloc_indirect(_vq, total_sg, gfp); 295 else 296 desc = NULL; 297 298 if (desc) { 299 /* Use a single buffer which doesn't continue */ 300 indirect = true; 301 /* Set up rest to use this indirect table. */ 302 i = 0; 303 descs_used = 1; 304 } else { 305 indirect = false; 306 desc = vq->vring.desc; 307 i = head; 308 descs_used = total_sg; 309 } 310 311 if (vq->vq.num_free < descs_used) { 312 pr_debug("Can't add buf len %i - avail = %i\n", 313 descs_used, vq->vq.num_free); 314 /* FIXME: for historical reasons, we force a notify here if 315 * there are outgoing parts to the buffer. Presumably the 316 * host should service the ring ASAP. */ 317 if (out_sgs) 318 vq->notify(&vq->vq); 319 END_USE(vq); 320 return -ENOSPC; 321 } 322 323 for (n = 0; n < out_sgs; n++) { 324 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 325 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 326 if (vring_mapping_error(vq, addr)) 327 goto unmap_release; 328 329 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 330 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 331 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 332 prev = i; 333 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 334 } 335 } 336 for (; n < (out_sgs + in_sgs); n++) { 337 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 338 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 339 if (vring_mapping_error(vq, addr)) 340 goto unmap_release; 341 342 desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 343 desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 344 desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 345 prev = i; 346 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 347 } 348 } 349 /* Last one doesn't continue. */ 350 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 351 352 if (indirect) { 353 /* Now that the indirect table is filled in, map it. */ 354 dma_addr_t addr = vring_map_single( 355 vq, desc, total_sg * sizeof(struct vring_desc), 356 DMA_TO_DEVICE); 357 if (vring_mapping_error(vq, addr)) 358 goto unmap_release; 359 360 vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); 361 vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr); 362 363 vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc)); 364 } 365 366 /* We're using some buffers from the free list. */ 367 vq->vq.num_free -= descs_used; 368 369 /* Update free pointer */ 370 if (indirect) 371 vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next); 372 else 373 vq->free_head = i; 374 375 /* Store token and indirect buffer state. */ 376 vq->desc_state[head].data = data; 377 if (indirect) 378 vq->desc_state[head].indir_desc = desc; 379 380 /* Put entry in available array (but don't update avail->idx until they 381 * do sync). */ 382 avail = vq->avail_idx_shadow & (vq->vring.num - 1); 383 vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 384 385 /* Descriptors and available array need to be set before we expose the 386 * new available array entries. */ 387 virtio_wmb(vq->weak_barriers); 388 vq->avail_idx_shadow++; 389 vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); 390 vq->num_added++; 391 392 pr_debug("Added buffer head %i to %p\n", head, vq); 393 END_USE(vq); 394 395 /* This is very unlikely, but theoretically possible. Kick 396 * just in case. */ 397 if (unlikely(vq->num_added == (1 << 16) - 1)) 398 virtqueue_kick(_vq); 399 400 return 0; 401 402 unmap_release: 403 err_idx = i; 404 i = head; 405 406 for (n = 0; n < total_sg; n++) { 407 if (i == err_idx) 408 break; 409 vring_unmap_one(vq, &desc[i]); 410 i = vq->vring.desc[i].next; 411 } 412 413 vq->vq.num_free += total_sg; 414 415 if (indirect) 416 kfree(desc); 417 418 return -EIO; 419 } 420 421 /** 422 * virtqueue_add_sgs - expose buffers to other end 423 * @vq: the struct virtqueue we're talking about. 424 * @sgs: array of terminated scatterlists. 425 * @out_num: the number of scatterlists readable by other side 426 * @in_num: the number of scatterlists which are writable (after readable ones) 427 * @data: the token identifying the buffer. 428 * @gfp: how to do memory allocations (if necessary). 429 * 430 * Caller must ensure we don't call this with other virtqueue operations 431 * at the same time (except where noted). 432 * 433 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 434 */ 435 int virtqueue_add_sgs(struct virtqueue *_vq, 436 struct scatterlist *sgs[], 437 unsigned int out_sgs, 438 unsigned int in_sgs, 439 void *data, 440 gfp_t gfp) 441 { 442 unsigned int i, total_sg = 0; 443 444 /* Count them first. */ 445 for (i = 0; i < out_sgs + in_sgs; i++) { 446 struct scatterlist *sg; 447 for (sg = sgs[i]; sg; sg = sg_next(sg)) 448 total_sg++; 449 } 450 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp); 451 } 452 EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 453 454 /** 455 * virtqueue_add_outbuf - expose output buffers to other end 456 * @vq: the struct virtqueue we're talking about. 457 * @sg: scatterlist (must be well-formed and terminated!) 458 * @num: the number of entries in @sg readable by other side 459 * @data: the token identifying the buffer. 460 * @gfp: how to do memory allocations (if necessary). 461 * 462 * Caller must ensure we don't call this with other virtqueue operations 463 * at the same time (except where noted). 464 * 465 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 466 */ 467 int virtqueue_add_outbuf(struct virtqueue *vq, 468 struct scatterlist *sg, unsigned int num, 469 void *data, 470 gfp_t gfp) 471 { 472 return virtqueue_add(vq, &sg, num, 1, 0, data, gfp); 473 } 474 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 475 476 /** 477 * virtqueue_add_inbuf - expose input buffers to other end 478 * @vq: the struct virtqueue we're talking about. 479 * @sg: scatterlist (must be well-formed and terminated!) 480 * @num: the number of entries in @sg writable by other side 481 * @data: the token identifying the buffer. 482 * @gfp: how to do memory allocations (if necessary). 483 * 484 * Caller must ensure we don't call this with other virtqueue operations 485 * at the same time (except where noted). 486 * 487 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 488 */ 489 int virtqueue_add_inbuf(struct virtqueue *vq, 490 struct scatterlist *sg, unsigned int num, 491 void *data, 492 gfp_t gfp) 493 { 494 return virtqueue_add(vq, &sg, num, 0, 1, data, gfp); 495 } 496 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 497 498 /** 499 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 500 * @vq: the struct virtqueue 501 * 502 * Instead of virtqueue_kick(), you can do: 503 * if (virtqueue_kick_prepare(vq)) 504 * virtqueue_notify(vq); 505 * 506 * This is sometimes useful because the virtqueue_kick_prepare() needs 507 * to be serialized, but the actual virtqueue_notify() call does not. 508 */ 509 bool virtqueue_kick_prepare(struct virtqueue *_vq) 510 { 511 struct vring_virtqueue *vq = to_vvq(_vq); 512 u16 new, old; 513 bool needs_kick; 514 515 START_USE(vq); 516 /* We need to expose available array entries before checking avail 517 * event. */ 518 virtio_mb(vq->weak_barriers); 519 520 old = vq->avail_idx_shadow - vq->num_added; 521 new = vq->avail_idx_shadow; 522 vq->num_added = 0; 523 524 #ifdef DEBUG 525 if (vq->last_add_time_valid) { 526 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), 527 vq->last_add_time)) > 100); 528 } 529 vq->last_add_time_valid = false; 530 #endif 531 532 if (vq->event) { 533 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)), 534 new, old); 535 } else { 536 needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY)); 537 } 538 END_USE(vq); 539 return needs_kick; 540 } 541 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 542 543 /** 544 * virtqueue_notify - second half of split virtqueue_kick call. 545 * @vq: the struct virtqueue 546 * 547 * This does not need to be serialized. 548 * 549 * Returns false if host notify failed or queue is broken, otherwise true. 550 */ 551 bool virtqueue_notify(struct virtqueue *_vq) 552 { 553 struct vring_virtqueue *vq = to_vvq(_vq); 554 555 if (unlikely(vq->broken)) 556 return false; 557 558 /* Prod other side to tell it about changes. */ 559 if (!vq->notify(_vq)) { 560 vq->broken = true; 561 return false; 562 } 563 return true; 564 } 565 EXPORT_SYMBOL_GPL(virtqueue_notify); 566 567 /** 568 * virtqueue_kick - update after add_buf 569 * @vq: the struct virtqueue 570 * 571 * After one or more virtqueue_add_* calls, invoke this to kick 572 * the other side. 573 * 574 * Caller must ensure we don't call this with other virtqueue 575 * operations at the same time (except where noted). 576 * 577 * Returns false if kick failed, otherwise true. 578 */ 579 bool virtqueue_kick(struct virtqueue *vq) 580 { 581 if (virtqueue_kick_prepare(vq)) 582 return virtqueue_notify(vq); 583 return true; 584 } 585 EXPORT_SYMBOL_GPL(virtqueue_kick); 586 587 static void detach_buf(struct vring_virtqueue *vq, unsigned int head) 588 { 589 unsigned int i, j; 590 u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 591 592 /* Clear data ptr. */ 593 vq->desc_state[head].data = NULL; 594 595 /* Put back on free list: unmap first-level descriptors and find end */ 596 i = head; 597 598 while (vq->vring.desc[i].flags & nextflag) { 599 vring_unmap_one(vq, &vq->vring.desc[i]); 600 i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); 601 vq->vq.num_free++; 602 } 603 604 vring_unmap_one(vq, &vq->vring.desc[i]); 605 vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); 606 vq->free_head = head; 607 608 /* Plus final descriptor */ 609 vq->vq.num_free++; 610 611 /* Free the indirect table, if any, now that it's unmapped. */ 612 if (vq->desc_state[head].indir_desc) { 613 struct vring_desc *indir_desc = vq->desc_state[head].indir_desc; 614 u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len); 615 616 BUG_ON(!(vq->vring.desc[head].flags & 617 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 618 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 619 620 for (j = 0; j < len / sizeof(struct vring_desc); j++) 621 vring_unmap_one(vq, &indir_desc[j]); 622 623 kfree(vq->desc_state[head].indir_desc); 624 vq->desc_state[head].indir_desc = NULL; 625 } 626 } 627 628 static inline bool more_used(const struct vring_virtqueue *vq) 629 { 630 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx); 631 } 632 633 /** 634 * virtqueue_get_buf - get the next used buffer 635 * @vq: the struct virtqueue we're talking about. 636 * @len: the length written into the buffer 637 * 638 * If the driver wrote data into the buffer, @len will be set to the 639 * amount written. This means you don't need to clear the buffer 640 * beforehand to ensure there's no data leakage in the case of short 641 * writes. 642 * 643 * Caller must ensure we don't call this with other virtqueue 644 * operations at the same time (except where noted). 645 * 646 * Returns NULL if there are no used buffers, or the "data" token 647 * handed to virtqueue_add_*(). 648 */ 649 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 650 { 651 struct vring_virtqueue *vq = to_vvq(_vq); 652 void *ret; 653 unsigned int i; 654 u16 last_used; 655 656 START_USE(vq); 657 658 if (unlikely(vq->broken)) { 659 END_USE(vq); 660 return NULL; 661 } 662 663 if (!more_used(vq)) { 664 pr_debug("No more buffers in queue\n"); 665 END_USE(vq); 666 return NULL; 667 } 668 669 /* Only get used array entries after they have been exposed by host. */ 670 virtio_rmb(vq->weak_barriers); 671 672 last_used = (vq->last_used_idx & (vq->vring.num - 1)); 673 i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id); 674 *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len); 675 676 if (unlikely(i >= vq->vring.num)) { 677 BAD_RING(vq, "id %u out of range\n", i); 678 return NULL; 679 } 680 if (unlikely(!vq->desc_state[i].data)) { 681 BAD_RING(vq, "id %u is not a head!\n", i); 682 return NULL; 683 } 684 685 /* detach_buf clears data, so grab it now. */ 686 ret = vq->desc_state[i].data; 687 detach_buf(vq, i); 688 vq->last_used_idx++; 689 /* If we expect an interrupt for the next entry, tell host 690 * by writing event index and flush out the write before 691 * the read in the next get_buf call. */ 692 if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 693 virtio_store_mb(vq->weak_barriers, 694 &vring_used_event(&vq->vring), 695 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 696 697 #ifdef DEBUG 698 vq->last_add_time_valid = false; 699 #endif 700 701 END_USE(vq); 702 return ret; 703 } 704 EXPORT_SYMBOL_GPL(virtqueue_get_buf); 705 706 /** 707 * virtqueue_disable_cb - disable callbacks 708 * @vq: the struct virtqueue we're talking about. 709 * 710 * Note that this is not necessarily synchronous, hence unreliable and only 711 * useful as an optimization. 712 * 713 * Unlike other operations, this need not be serialized. 714 */ 715 void virtqueue_disable_cb(struct virtqueue *_vq) 716 { 717 struct vring_virtqueue *vq = to_vvq(_vq); 718 719 if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 720 vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 721 vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); 722 } 723 724 } 725 EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 726 727 /** 728 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 729 * @vq: the struct virtqueue we're talking about. 730 * 731 * This re-enables callbacks; it returns current queue state 732 * in an opaque unsigned value. This value should be later tested by 733 * virtqueue_poll, to detect a possible race between the driver checking for 734 * more work, and enabling callbacks. 735 * 736 * Caller must ensure we don't call this with other virtqueue 737 * operations at the same time (except where noted). 738 */ 739 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 740 { 741 struct vring_virtqueue *vq = to_vvq(_vq); 742 u16 last_used_idx; 743 744 START_USE(vq); 745 746 /* We optimistically turn back on interrupts, then check if there was 747 * more to do. */ 748 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 749 * either clear the flags bit or point the event index at the next 750 * entry. Always do both to keep code simple. */ 751 if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 752 vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 753 vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); 754 } 755 vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx); 756 END_USE(vq); 757 return last_used_idx; 758 } 759 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 760 761 /** 762 * virtqueue_poll - query pending used buffers 763 * @vq: the struct virtqueue we're talking about. 764 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 765 * 766 * Returns "true" if there are pending used buffers in the queue. 767 * 768 * This does not need to be serialized. 769 */ 770 bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 771 { 772 struct vring_virtqueue *vq = to_vvq(_vq); 773 774 virtio_mb(vq->weak_barriers); 775 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx); 776 } 777 EXPORT_SYMBOL_GPL(virtqueue_poll); 778 779 /** 780 * virtqueue_enable_cb - restart callbacks after disable_cb. 781 * @vq: the struct virtqueue we're talking about. 782 * 783 * This re-enables callbacks; it returns "false" if there are pending 784 * buffers in the queue, to detect a possible race between the driver 785 * checking for more work, and enabling callbacks. 786 * 787 * Caller must ensure we don't call this with other virtqueue 788 * operations at the same time (except where noted). 789 */ 790 bool virtqueue_enable_cb(struct virtqueue *_vq) 791 { 792 unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 793 return !virtqueue_poll(_vq, last_used_idx); 794 } 795 EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 796 797 /** 798 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 799 * @vq: the struct virtqueue we're talking about. 800 * 801 * This re-enables callbacks but hints to the other side to delay 802 * interrupts until most of the available buffers have been processed; 803 * it returns "false" if there are many pending buffers in the queue, 804 * to detect a possible race between the driver checking for more work, 805 * and enabling callbacks. 806 * 807 * Caller must ensure we don't call this with other virtqueue 808 * operations at the same time (except where noted). 809 */ 810 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 811 { 812 struct vring_virtqueue *vq = to_vvq(_vq); 813 u16 bufs; 814 815 START_USE(vq); 816 817 /* We optimistically turn back on interrupts, then check if there was 818 * more to do. */ 819 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 820 * either clear the flags bit or point the event index at the next 821 * entry. Always do both to keep code simple. */ 822 if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 823 vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 824 vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); 825 } 826 /* TODO: tune this threshold */ 827 bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4; 828 829 virtio_store_mb(vq->weak_barriers, 830 &vring_used_event(&vq->vring), 831 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 832 833 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) { 834 END_USE(vq); 835 return false; 836 } 837 838 END_USE(vq); 839 return true; 840 } 841 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 842 843 /** 844 * virtqueue_detach_unused_buf - detach first unused buffer 845 * @vq: the struct virtqueue we're talking about. 846 * 847 * Returns NULL or the "data" token handed to virtqueue_add_*(). 848 * This is not valid on an active queue; it is useful only for device 849 * shutdown. 850 */ 851 void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 852 { 853 struct vring_virtqueue *vq = to_vvq(_vq); 854 unsigned int i; 855 void *buf; 856 857 START_USE(vq); 858 859 for (i = 0; i < vq->vring.num; i++) { 860 if (!vq->desc_state[i].data) 861 continue; 862 /* detach_buf clears data, so grab it now. */ 863 buf = vq->desc_state[i].data; 864 detach_buf(vq, i); 865 vq->avail_idx_shadow--; 866 vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); 867 END_USE(vq); 868 return buf; 869 } 870 /* That should have freed everything. */ 871 BUG_ON(vq->vq.num_free != vq->vring.num); 872 873 END_USE(vq); 874 return NULL; 875 } 876 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 877 878 irqreturn_t vring_interrupt(int irq, void *_vq) 879 { 880 struct vring_virtqueue *vq = to_vvq(_vq); 881 882 if (!more_used(vq)) { 883 pr_debug("virtqueue interrupt with no work for %p\n", vq); 884 return IRQ_NONE; 885 } 886 887 if (unlikely(vq->broken)) 888 return IRQ_HANDLED; 889 890 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 891 if (vq->vq.callback) 892 vq->vq.callback(&vq->vq); 893 894 return IRQ_HANDLED; 895 } 896 EXPORT_SYMBOL_GPL(vring_interrupt); 897 898 struct virtqueue *__vring_new_virtqueue(unsigned int index, 899 struct vring vring, 900 struct virtio_device *vdev, 901 bool weak_barriers, 902 bool (*notify)(struct virtqueue *), 903 void (*callback)(struct virtqueue *), 904 const char *name) 905 { 906 unsigned int i; 907 struct vring_virtqueue *vq; 908 909 vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state), 910 GFP_KERNEL); 911 if (!vq) 912 return NULL; 913 914 vq->vring = vring; 915 vq->vq.callback = callback; 916 vq->vq.vdev = vdev; 917 vq->vq.name = name; 918 vq->vq.num_free = vring.num; 919 vq->vq.index = index; 920 vq->we_own_ring = false; 921 vq->queue_dma_addr = 0; 922 vq->queue_size_in_bytes = 0; 923 vq->notify = notify; 924 vq->weak_barriers = weak_barriers; 925 vq->broken = false; 926 vq->last_used_idx = 0; 927 vq->avail_flags_shadow = 0; 928 vq->avail_idx_shadow = 0; 929 vq->num_added = 0; 930 list_add_tail(&vq->vq.list, &vdev->vqs); 931 #ifdef DEBUG 932 vq->in_use = false; 933 vq->last_add_time_valid = false; 934 #endif 935 936 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); 937 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 938 939 /* No callback? Tell other side not to bother us. */ 940 if (!callback) { 941 vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 942 vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow); 943 } 944 945 /* Put everything in free lists. */ 946 vq->free_head = 0; 947 for (i = 0; i < vring.num-1; i++) 948 vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 949 memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state)); 950 951 return &vq->vq; 952 } 953 EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 954 955 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 956 dma_addr_t *dma_handle, gfp_t flag) 957 { 958 if (vring_use_dma_api(vdev)) { 959 return dma_alloc_coherent(vdev->dev.parent, size, 960 dma_handle, flag); 961 } else { 962 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 963 if (queue) { 964 phys_addr_t phys_addr = virt_to_phys(queue); 965 *dma_handle = (dma_addr_t)phys_addr; 966 967 /* 968 * Sanity check: make sure we dind't truncate 969 * the address. The only arches I can find that 970 * have 64-bit phys_addr_t but 32-bit dma_addr_t 971 * are certain non-highmem MIPS and x86 972 * configurations, but these configurations 973 * should never allocate physical pages above 32 974 * bits, so this is fine. Just in case, throw a 975 * warning and abort if we end up with an 976 * unrepresentable address. 977 */ 978 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 979 free_pages_exact(queue, PAGE_ALIGN(size)); 980 return NULL; 981 } 982 } 983 return queue; 984 } 985 } 986 987 static void vring_free_queue(struct virtio_device *vdev, size_t size, 988 void *queue, dma_addr_t dma_handle) 989 { 990 if (vring_use_dma_api(vdev)) { 991 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 992 } else { 993 free_pages_exact(queue, PAGE_ALIGN(size)); 994 } 995 } 996 997 struct virtqueue *vring_create_virtqueue( 998 unsigned int index, 999 unsigned int num, 1000 unsigned int vring_align, 1001 struct virtio_device *vdev, 1002 bool weak_barriers, 1003 bool may_reduce_num, 1004 bool (*notify)(struct virtqueue *), 1005 void (*callback)(struct virtqueue *), 1006 const char *name) 1007 { 1008 struct virtqueue *vq; 1009 void *queue = NULL; 1010 dma_addr_t dma_addr; 1011 size_t queue_size_in_bytes; 1012 struct vring vring; 1013 1014 /* We assume num is a power of 2. */ 1015 if (num & (num - 1)) { 1016 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 1017 return NULL; 1018 } 1019 1020 /* TODO: allocate each queue chunk individually */ 1021 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 1022 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1023 &dma_addr, 1024 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1025 if (queue) 1026 break; 1027 } 1028 1029 if (!num) 1030 return NULL; 1031 1032 if (!queue) { 1033 /* Try to get a single page. You are my only hope! */ 1034 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1035 &dma_addr, GFP_KERNEL|__GFP_ZERO); 1036 } 1037 if (!queue) 1038 return NULL; 1039 1040 queue_size_in_bytes = vring_size(num, vring_align); 1041 vring_init(&vring, num, queue, vring_align); 1042 1043 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, 1044 notify, callback, name); 1045 if (!vq) { 1046 vring_free_queue(vdev, queue_size_in_bytes, queue, 1047 dma_addr); 1048 return NULL; 1049 } 1050 1051 to_vvq(vq)->queue_dma_addr = dma_addr; 1052 to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes; 1053 to_vvq(vq)->we_own_ring = true; 1054 1055 return vq; 1056 } 1057 EXPORT_SYMBOL_GPL(vring_create_virtqueue); 1058 1059 struct virtqueue *vring_new_virtqueue(unsigned int index, 1060 unsigned int num, 1061 unsigned int vring_align, 1062 struct virtio_device *vdev, 1063 bool weak_barriers, 1064 void *pages, 1065 bool (*notify)(struct virtqueue *vq), 1066 void (*callback)(struct virtqueue *vq), 1067 const char *name) 1068 { 1069 struct vring vring; 1070 vring_init(&vring, num, pages, vring_align); 1071 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, 1072 notify, callback, name); 1073 } 1074 EXPORT_SYMBOL_GPL(vring_new_virtqueue); 1075 1076 void vring_del_virtqueue(struct virtqueue *_vq) 1077 { 1078 struct vring_virtqueue *vq = to_vvq(_vq); 1079 1080 if (vq->we_own_ring) { 1081 vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes, 1082 vq->vring.desc, vq->queue_dma_addr); 1083 } 1084 list_del(&_vq->list); 1085 kfree(vq); 1086 } 1087 EXPORT_SYMBOL_GPL(vring_del_virtqueue); 1088 1089 /* Manipulates transport-specific feature bits. */ 1090 void vring_transport_features(struct virtio_device *vdev) 1091 { 1092 unsigned int i; 1093 1094 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 1095 switch (i) { 1096 case VIRTIO_RING_F_INDIRECT_DESC: 1097 break; 1098 case VIRTIO_RING_F_EVENT_IDX: 1099 break; 1100 case VIRTIO_F_VERSION_1: 1101 break; 1102 default: 1103 /* We don't understand this bit. */ 1104 __virtio_clear_bit(vdev, i); 1105 } 1106 } 1107 } 1108 EXPORT_SYMBOL_GPL(vring_transport_features); 1109 1110 /** 1111 * virtqueue_get_vring_size - return the size of the virtqueue's vring 1112 * @vq: the struct virtqueue containing the vring of interest. 1113 * 1114 * Returns the size of the vring. This is mainly used for boasting to 1115 * userspace. Unlike other operations, this need not be serialized. 1116 */ 1117 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 1118 { 1119 1120 struct vring_virtqueue *vq = to_vvq(_vq); 1121 1122 return vq->vring.num; 1123 } 1124 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 1125 1126 bool virtqueue_is_broken(struct virtqueue *_vq) 1127 { 1128 struct vring_virtqueue *vq = to_vvq(_vq); 1129 1130 return vq->broken; 1131 } 1132 EXPORT_SYMBOL_GPL(virtqueue_is_broken); 1133 1134 /* 1135 * This should prevent the device from being used, allowing drivers to 1136 * recover. You may need to grab appropriate locks to flush. 1137 */ 1138 void virtio_break_device(struct virtio_device *dev) 1139 { 1140 struct virtqueue *_vq; 1141 1142 list_for_each_entry(_vq, &dev->vqs, list) { 1143 struct vring_virtqueue *vq = to_vvq(_vq); 1144 vq->broken = true; 1145 } 1146 } 1147 EXPORT_SYMBOL_GPL(virtio_break_device); 1148 1149 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 1150 { 1151 struct vring_virtqueue *vq = to_vvq(_vq); 1152 1153 BUG_ON(!vq->we_own_ring); 1154 1155 return vq->queue_dma_addr; 1156 } 1157 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 1158 1159 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 1160 { 1161 struct vring_virtqueue *vq = to_vvq(_vq); 1162 1163 BUG_ON(!vq->we_own_ring); 1164 1165 return vq->queue_dma_addr + 1166 ((char *)vq->vring.avail - (char *)vq->vring.desc); 1167 } 1168 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 1169 1170 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 1171 { 1172 struct vring_virtqueue *vq = to_vvq(_vq); 1173 1174 BUG_ON(!vq->we_own_ring); 1175 1176 return vq->queue_dma_addr + 1177 ((char *)vq->vring.used - (char *)vq->vring.desc); 1178 } 1179 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 1180 1181 const struct vring *virtqueue_get_vring(struct virtqueue *vq) 1182 { 1183 return &to_vvq(vq)->vring; 1184 } 1185 EXPORT_SYMBOL_GPL(virtqueue_get_vring); 1186 1187 MODULE_LICENSE("GPL"); 1188