1 /* Copyright (C) 2009 Red Hat, Inc. 2 * Copyright (C) 2006 Rusty Russell IBM Corporation 3 * 4 * Author: Michael S. Tsirkin <mst@redhat.com> 5 * 6 * Inspiration, some code, and most witty comments come from 7 * Documentation/lguest/lguest.c, by Rusty Russell 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. 10 * 11 * Generic code for virtio server in host kernel. 12 */ 13 14 #include <linux/eventfd.h> 15 #include <linux/vhost.h> 16 #include <linux/virtio_net.h> 17 #include <linux/mm.h> 18 #include <linux/mmu_context.h> 19 #include <linux/miscdevice.h> 20 #include <linux/mutex.h> 21 #include <linux/rcupdate.h> 22 #include <linux/poll.h> 23 #include <linux/file.h> 24 #include <linux/highmem.h> 25 #include <linux/slab.h> 26 #include <linux/kthread.h> 27 #include <linux/cgroup.h> 28 29 #include <linux/net.h> 30 #include <linux/if_packet.h> 31 #include <linux/if_arp.h> 32 33 #include "vhost.h" 34 35 enum { 36 VHOST_MEMORY_MAX_NREGIONS = 64, 37 VHOST_MEMORY_F_LOG = 0x1, 38 }; 39 40 static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, 41 poll_table *pt) 42 { 43 struct vhost_poll *poll; 44 45 poll = container_of(pt, struct vhost_poll, table); 46 poll->wqh = wqh; 47 add_wait_queue(wqh, &poll->wait); 48 } 49 50 static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, 51 void *key) 52 { 53 struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); 54 55 if (!((unsigned long)key & poll->mask)) 56 return 0; 57 58 vhost_poll_queue(poll); 59 return 0; 60 } 61 62 static void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) 63 { 64 INIT_LIST_HEAD(&work->node); 65 work->fn = fn; 66 init_waitqueue_head(&work->done); 67 work->flushing = 0; 68 work->queue_seq = work->done_seq = 0; 69 } 70 71 /* Init poll structure */ 72 void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, 73 unsigned long mask, struct vhost_dev *dev) 74 { 75 init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); 76 init_poll_funcptr(&poll->table, vhost_poll_func); 77 poll->mask = mask; 78 poll->dev = dev; 79 80 vhost_work_init(&poll->work, fn); 81 } 82 83 /* Start polling a file. We add ourselves to file's wait queue. The caller must 84 * keep a reference to a file until after vhost_poll_stop is called. */ 85 void vhost_poll_start(struct vhost_poll *poll, struct file *file) 86 { 87 unsigned long mask; 88 89 mask = file->f_op->poll(file, &poll->table); 90 if (mask) 91 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); 92 } 93 94 /* Stop polling a file. After this function returns, it becomes safe to drop the 95 * file reference. You must also flush afterwards. */ 96 void vhost_poll_stop(struct vhost_poll *poll) 97 { 98 remove_wait_queue(poll->wqh, &poll->wait); 99 } 100 101 static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, 102 unsigned seq) 103 { 104 int left; 105 106 spin_lock_irq(&dev->work_lock); 107 left = seq - work->done_seq; 108 spin_unlock_irq(&dev->work_lock); 109 return left <= 0; 110 } 111 112 static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) 113 { 114 unsigned seq; 115 int flushing; 116 117 spin_lock_irq(&dev->work_lock); 118 seq = work->queue_seq; 119 work->flushing++; 120 spin_unlock_irq(&dev->work_lock); 121 wait_event(work->done, vhost_work_seq_done(dev, work, seq)); 122 spin_lock_irq(&dev->work_lock); 123 flushing = --work->flushing; 124 spin_unlock_irq(&dev->work_lock); 125 BUG_ON(flushing < 0); 126 } 127 128 /* Flush any work that has been scheduled. When calling this, don't hold any 129 * locks that are also used by the callback. */ 130 void vhost_poll_flush(struct vhost_poll *poll) 131 { 132 vhost_work_flush(poll->dev, &poll->work); 133 } 134 135 static inline void vhost_work_queue(struct vhost_dev *dev, 136 struct vhost_work *work) 137 { 138 unsigned long flags; 139 140 spin_lock_irqsave(&dev->work_lock, flags); 141 if (list_empty(&work->node)) { 142 list_add_tail(&work->node, &dev->work_list); 143 work->queue_seq++; 144 wake_up_process(dev->worker); 145 } 146 spin_unlock_irqrestore(&dev->work_lock, flags); 147 } 148 149 void vhost_poll_queue(struct vhost_poll *poll) 150 { 151 vhost_work_queue(poll->dev, &poll->work); 152 } 153 154 static void vhost_vq_reset(struct vhost_dev *dev, 155 struct vhost_virtqueue *vq) 156 { 157 vq->num = 1; 158 vq->desc = NULL; 159 vq->avail = NULL; 160 vq->used = NULL; 161 vq->last_avail_idx = 0; 162 vq->avail_idx = 0; 163 vq->last_used_idx = 0; 164 vq->used_flags = 0; 165 vq->log_used = false; 166 vq->log_addr = -1ull; 167 vq->vhost_hlen = 0; 168 vq->sock_hlen = 0; 169 vq->private_data = NULL; 170 vq->log_base = NULL; 171 vq->error_ctx = NULL; 172 vq->error = NULL; 173 vq->kick = NULL; 174 vq->call_ctx = NULL; 175 vq->call = NULL; 176 vq->log_ctx = NULL; 177 } 178 179 static int vhost_worker(void *data) 180 { 181 struct vhost_dev *dev = data; 182 struct vhost_work *work = NULL; 183 unsigned uninitialized_var(seq); 184 185 use_mm(dev->mm); 186 187 for (;;) { 188 /* mb paired w/ kthread_stop */ 189 set_current_state(TASK_INTERRUPTIBLE); 190 191 spin_lock_irq(&dev->work_lock); 192 if (work) { 193 work->done_seq = seq; 194 if (work->flushing) 195 wake_up_all(&work->done); 196 } 197 198 if (kthread_should_stop()) { 199 spin_unlock_irq(&dev->work_lock); 200 __set_current_state(TASK_RUNNING); 201 break; 202 } 203 if (!list_empty(&dev->work_list)) { 204 work = list_first_entry(&dev->work_list, 205 struct vhost_work, node); 206 list_del_init(&work->node); 207 seq = work->queue_seq; 208 } else 209 work = NULL; 210 spin_unlock_irq(&dev->work_lock); 211 212 if (work) { 213 __set_current_state(TASK_RUNNING); 214 work->fn(work); 215 } else 216 schedule(); 217 218 } 219 unuse_mm(dev->mm); 220 return 0; 221 } 222 223 /* Helper to allocate iovec buffers for all vqs. */ 224 static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) 225 { 226 int i; 227 228 for (i = 0; i < dev->nvqs; ++i) { 229 dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * 230 UIO_MAXIOV, GFP_KERNEL); 231 dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV, 232 GFP_KERNEL); 233 dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * 234 UIO_MAXIOV, GFP_KERNEL); 235 236 if (!dev->vqs[i].indirect || !dev->vqs[i].log || 237 !dev->vqs[i].heads) 238 goto err_nomem; 239 } 240 return 0; 241 242 err_nomem: 243 for (; i >= 0; --i) { 244 kfree(dev->vqs[i].indirect); 245 kfree(dev->vqs[i].log); 246 kfree(dev->vqs[i].heads); 247 } 248 return -ENOMEM; 249 } 250 251 static void vhost_dev_free_iovecs(struct vhost_dev *dev) 252 { 253 int i; 254 255 for (i = 0; i < dev->nvqs; ++i) { 256 kfree(dev->vqs[i].indirect); 257 dev->vqs[i].indirect = NULL; 258 kfree(dev->vqs[i].log); 259 dev->vqs[i].log = NULL; 260 kfree(dev->vqs[i].heads); 261 dev->vqs[i].heads = NULL; 262 } 263 } 264 265 long vhost_dev_init(struct vhost_dev *dev, 266 struct vhost_virtqueue *vqs, int nvqs) 267 { 268 int i; 269 270 dev->vqs = vqs; 271 dev->nvqs = nvqs; 272 mutex_init(&dev->mutex); 273 dev->log_ctx = NULL; 274 dev->log_file = NULL; 275 dev->memory = NULL; 276 dev->mm = NULL; 277 spin_lock_init(&dev->work_lock); 278 INIT_LIST_HEAD(&dev->work_list); 279 dev->worker = NULL; 280 281 for (i = 0; i < dev->nvqs; ++i) { 282 dev->vqs[i].log = NULL; 283 dev->vqs[i].indirect = NULL; 284 dev->vqs[i].heads = NULL; 285 dev->vqs[i].dev = dev; 286 mutex_init(&dev->vqs[i].mutex); 287 vhost_vq_reset(dev, dev->vqs + i); 288 if (dev->vqs[i].handle_kick) 289 vhost_poll_init(&dev->vqs[i].poll, 290 dev->vqs[i].handle_kick, POLLIN, dev); 291 } 292 293 return 0; 294 } 295 296 /* Caller should have device mutex */ 297 long vhost_dev_check_owner(struct vhost_dev *dev) 298 { 299 /* Are you the owner? If not, I don't think you mean to do that */ 300 return dev->mm == current->mm ? 0 : -EPERM; 301 } 302 303 struct vhost_attach_cgroups_struct { 304 struct vhost_work work; 305 struct task_struct *owner; 306 int ret; 307 }; 308 309 static void vhost_attach_cgroups_work(struct vhost_work *work) 310 { 311 struct vhost_attach_cgroups_struct *s; 312 313 s = container_of(work, struct vhost_attach_cgroups_struct, work); 314 s->ret = cgroup_attach_task_all(s->owner, current); 315 } 316 317 static int vhost_attach_cgroups(struct vhost_dev *dev) 318 { 319 struct vhost_attach_cgroups_struct attach; 320 321 attach.owner = current; 322 vhost_work_init(&attach.work, vhost_attach_cgroups_work); 323 vhost_work_queue(dev, &attach.work); 324 vhost_work_flush(dev, &attach.work); 325 return attach.ret; 326 } 327 328 /* Caller should have device mutex */ 329 static long vhost_dev_set_owner(struct vhost_dev *dev) 330 { 331 struct task_struct *worker; 332 int err; 333 334 /* Is there an owner already? */ 335 if (dev->mm) { 336 err = -EBUSY; 337 goto err_mm; 338 } 339 340 /* No owner, become one */ 341 dev->mm = get_task_mm(current); 342 worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); 343 if (IS_ERR(worker)) { 344 err = PTR_ERR(worker); 345 goto err_worker; 346 } 347 348 dev->worker = worker; 349 wake_up_process(worker); /* avoid contributing to loadavg */ 350 351 err = vhost_attach_cgroups(dev); 352 if (err) 353 goto err_cgroup; 354 355 err = vhost_dev_alloc_iovecs(dev); 356 if (err) 357 goto err_cgroup; 358 359 return 0; 360 err_cgroup: 361 kthread_stop(worker); 362 dev->worker = NULL; 363 err_worker: 364 if (dev->mm) 365 mmput(dev->mm); 366 dev->mm = NULL; 367 err_mm: 368 return err; 369 } 370 371 /* Caller should have device mutex */ 372 long vhost_dev_reset_owner(struct vhost_dev *dev) 373 { 374 struct vhost_memory *memory; 375 376 /* Restore memory to default empty mapping. */ 377 memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); 378 if (!memory) 379 return -ENOMEM; 380 381 vhost_dev_cleanup(dev); 382 383 memory->nregions = 0; 384 RCU_INIT_POINTER(dev->memory, memory); 385 return 0; 386 } 387 388 /* Caller should have device mutex */ 389 void vhost_dev_cleanup(struct vhost_dev *dev) 390 { 391 int i; 392 393 for (i = 0; i < dev->nvqs; ++i) { 394 if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { 395 vhost_poll_stop(&dev->vqs[i].poll); 396 vhost_poll_flush(&dev->vqs[i].poll); 397 } 398 if (dev->vqs[i].error_ctx) 399 eventfd_ctx_put(dev->vqs[i].error_ctx); 400 if (dev->vqs[i].error) 401 fput(dev->vqs[i].error); 402 if (dev->vqs[i].kick) 403 fput(dev->vqs[i].kick); 404 if (dev->vqs[i].call_ctx) 405 eventfd_ctx_put(dev->vqs[i].call_ctx); 406 if (dev->vqs[i].call) 407 fput(dev->vqs[i].call); 408 vhost_vq_reset(dev, dev->vqs + i); 409 } 410 vhost_dev_free_iovecs(dev); 411 if (dev->log_ctx) 412 eventfd_ctx_put(dev->log_ctx); 413 dev->log_ctx = NULL; 414 if (dev->log_file) 415 fput(dev->log_file); 416 dev->log_file = NULL; 417 /* No one will access memory at this point */ 418 kfree(rcu_dereference_protected(dev->memory, 419 lockdep_is_held(&dev->mutex))); 420 RCU_INIT_POINTER(dev->memory, NULL); 421 WARN_ON(!list_empty(&dev->work_list)); 422 if (dev->worker) { 423 kthread_stop(dev->worker); 424 dev->worker = NULL; 425 } 426 if (dev->mm) 427 mmput(dev->mm); 428 dev->mm = NULL; 429 } 430 431 static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) 432 { 433 u64 a = addr / VHOST_PAGE_SIZE / 8; 434 435 /* Make sure 64 bit math will not overflow. */ 436 if (a > ULONG_MAX - (unsigned long)log_base || 437 a + (unsigned long)log_base > ULONG_MAX) 438 return 0; 439 440 return access_ok(VERIFY_WRITE, log_base + a, 441 (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); 442 } 443 444 /* Caller should have vq mutex and device mutex. */ 445 static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, 446 int log_all) 447 { 448 int i; 449 450 if (!mem) 451 return 0; 452 453 for (i = 0; i < mem->nregions; ++i) { 454 struct vhost_memory_region *m = mem->regions + i; 455 unsigned long a = m->userspace_addr; 456 if (m->memory_size > ULONG_MAX) 457 return 0; 458 else if (!access_ok(VERIFY_WRITE, (void __user *)a, 459 m->memory_size)) 460 return 0; 461 else if (log_all && !log_access_ok(log_base, 462 m->guest_phys_addr, 463 m->memory_size)) 464 return 0; 465 } 466 return 1; 467 } 468 469 /* Can we switch to this memory table? */ 470 /* Caller should have device mutex but not vq mutex */ 471 static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, 472 int log_all) 473 { 474 int i; 475 476 for (i = 0; i < d->nvqs; ++i) { 477 int ok; 478 mutex_lock(&d->vqs[i].mutex); 479 /* If ring is inactive, will check when it's enabled. */ 480 if (d->vqs[i].private_data) 481 ok = vq_memory_access_ok(d->vqs[i].log_base, mem, 482 log_all); 483 else 484 ok = 1; 485 mutex_unlock(&d->vqs[i].mutex); 486 if (!ok) 487 return 0; 488 } 489 return 1; 490 } 491 492 static int vq_access_ok(unsigned int num, 493 struct vring_desc __user *desc, 494 struct vring_avail __user *avail, 495 struct vring_used __user *used) 496 { 497 return access_ok(VERIFY_READ, desc, num * sizeof *desc) && 498 access_ok(VERIFY_READ, avail, 499 sizeof *avail + num * sizeof *avail->ring) && 500 access_ok(VERIFY_WRITE, used, 501 sizeof *used + num * sizeof *used->ring); 502 } 503 504 /* Can we log writes? */ 505 /* Caller should have device mutex but not vq mutex */ 506 int vhost_log_access_ok(struct vhost_dev *dev) 507 { 508 struct vhost_memory *mp; 509 510 mp = rcu_dereference_protected(dev->memory, 511 lockdep_is_held(&dev->mutex)); 512 return memory_access_ok(dev, mp, 1); 513 } 514 515 /* Verify access for write logging. */ 516 /* Caller should have vq mutex and device mutex */ 517 static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) 518 { 519 struct vhost_memory *mp; 520 521 mp = rcu_dereference_protected(vq->dev->memory, 522 lockdep_is_held(&vq->mutex)); 523 return vq_memory_access_ok(log_base, mp, 524 vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && 525 (!vq->log_used || log_access_ok(log_base, vq->log_addr, 526 sizeof *vq->used + 527 vq->num * sizeof *vq->used->ring)); 528 } 529 530 /* Can we start vq? */ 531 /* Caller should have vq mutex and device mutex */ 532 int vhost_vq_access_ok(struct vhost_virtqueue *vq) 533 { 534 return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) && 535 vq_log_access_ok(vq, vq->log_base); 536 } 537 538 static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) 539 { 540 struct vhost_memory mem, *newmem, *oldmem; 541 unsigned long size = offsetof(struct vhost_memory, regions); 542 543 if (copy_from_user(&mem, m, size)) 544 return -EFAULT; 545 if (mem.padding) 546 return -EOPNOTSUPP; 547 if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS) 548 return -E2BIG; 549 newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL); 550 if (!newmem) 551 return -ENOMEM; 552 553 memcpy(newmem, &mem, size); 554 if (copy_from_user(newmem->regions, m->regions, 555 mem.nregions * sizeof *m->regions)) { 556 kfree(newmem); 557 return -EFAULT; 558 } 559 560 if (!memory_access_ok(d, newmem, 561 vhost_has_feature(d, VHOST_F_LOG_ALL))) { 562 kfree(newmem); 563 return -EFAULT; 564 } 565 oldmem = rcu_dereference_protected(d->memory, 566 lockdep_is_held(&d->mutex)); 567 rcu_assign_pointer(d->memory, newmem); 568 synchronize_rcu(); 569 kfree(oldmem); 570 return 0; 571 } 572 573 static int init_used(struct vhost_virtqueue *vq, 574 struct vring_used __user *used) 575 { 576 int r = put_user(vq->used_flags, &used->flags); 577 578 if (r) 579 return r; 580 return get_user(vq->last_used_idx, &used->idx); 581 } 582 583 static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) 584 { 585 struct file *eventfp, *filep = NULL, 586 *pollstart = NULL, *pollstop = NULL; 587 struct eventfd_ctx *ctx = NULL; 588 u32 __user *idxp = argp; 589 struct vhost_virtqueue *vq; 590 struct vhost_vring_state s; 591 struct vhost_vring_file f; 592 struct vhost_vring_addr a; 593 u32 idx; 594 long r; 595 596 r = get_user(idx, idxp); 597 if (r < 0) 598 return r; 599 if (idx >= d->nvqs) 600 return -ENOBUFS; 601 602 vq = d->vqs + idx; 603 604 mutex_lock(&vq->mutex); 605 606 switch (ioctl) { 607 case VHOST_SET_VRING_NUM: 608 /* Resizing ring with an active backend? 609 * You don't want to do that. */ 610 if (vq->private_data) { 611 r = -EBUSY; 612 break; 613 } 614 if (copy_from_user(&s, argp, sizeof s)) { 615 r = -EFAULT; 616 break; 617 } 618 if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) { 619 r = -EINVAL; 620 break; 621 } 622 vq->num = s.num; 623 break; 624 case VHOST_SET_VRING_BASE: 625 /* Moving base with an active backend? 626 * You don't want to do that. */ 627 if (vq->private_data) { 628 r = -EBUSY; 629 break; 630 } 631 if (copy_from_user(&s, argp, sizeof s)) { 632 r = -EFAULT; 633 break; 634 } 635 if (s.num > 0xffff) { 636 r = -EINVAL; 637 break; 638 } 639 vq->last_avail_idx = s.num; 640 /* Forget the cached index value. */ 641 vq->avail_idx = vq->last_avail_idx; 642 break; 643 case VHOST_GET_VRING_BASE: 644 s.index = idx; 645 s.num = vq->last_avail_idx; 646 if (copy_to_user(argp, &s, sizeof s)) 647 r = -EFAULT; 648 break; 649 case VHOST_SET_VRING_ADDR: 650 if (copy_from_user(&a, argp, sizeof a)) { 651 r = -EFAULT; 652 break; 653 } 654 if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) { 655 r = -EOPNOTSUPP; 656 break; 657 } 658 /* For 32bit, verify that the top 32bits of the user 659 data are set to zero. */ 660 if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || 661 (u64)(unsigned long)a.used_user_addr != a.used_user_addr || 662 (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) { 663 r = -EFAULT; 664 break; 665 } 666 if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) || 667 (a.used_user_addr & (sizeof *vq->used->ring - 1)) || 668 (a.log_guest_addr & (sizeof *vq->used->ring - 1))) { 669 r = -EINVAL; 670 break; 671 } 672 673 /* We only verify access here if backend is configured. 674 * If it is not, we don't as size might not have been setup. 675 * We will verify when backend is configured. */ 676 if (vq->private_data) { 677 if (!vq_access_ok(vq->num, 678 (void __user *)(unsigned long)a.desc_user_addr, 679 (void __user *)(unsigned long)a.avail_user_addr, 680 (void __user *)(unsigned long)a.used_user_addr)) { 681 r = -EINVAL; 682 break; 683 } 684 685 /* Also validate log access for used ring if enabled. */ 686 if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && 687 !log_access_ok(vq->log_base, a.log_guest_addr, 688 sizeof *vq->used + 689 vq->num * sizeof *vq->used->ring)) { 690 r = -EINVAL; 691 break; 692 } 693 } 694 695 r = init_used(vq, (struct vring_used __user *)(unsigned long) 696 a.used_user_addr); 697 if (r) 698 break; 699 vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); 700 vq->desc = (void __user *)(unsigned long)a.desc_user_addr; 701 vq->avail = (void __user *)(unsigned long)a.avail_user_addr; 702 vq->log_addr = a.log_guest_addr; 703 vq->used = (void __user *)(unsigned long)a.used_user_addr; 704 break; 705 case VHOST_SET_VRING_KICK: 706 if (copy_from_user(&f, argp, sizeof f)) { 707 r = -EFAULT; 708 break; 709 } 710 eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); 711 if (IS_ERR(eventfp)) { 712 r = PTR_ERR(eventfp); 713 break; 714 } 715 if (eventfp != vq->kick) { 716 pollstop = filep = vq->kick; 717 pollstart = vq->kick = eventfp; 718 } else 719 filep = eventfp; 720 break; 721 case VHOST_SET_VRING_CALL: 722 if (copy_from_user(&f, argp, sizeof f)) { 723 r = -EFAULT; 724 break; 725 } 726 eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); 727 if (IS_ERR(eventfp)) { 728 r = PTR_ERR(eventfp); 729 break; 730 } 731 if (eventfp != vq->call) { 732 filep = vq->call; 733 ctx = vq->call_ctx; 734 vq->call = eventfp; 735 vq->call_ctx = eventfp ? 736 eventfd_ctx_fileget(eventfp) : NULL; 737 } else 738 filep = eventfp; 739 break; 740 case VHOST_SET_VRING_ERR: 741 if (copy_from_user(&f, argp, sizeof f)) { 742 r = -EFAULT; 743 break; 744 } 745 eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); 746 if (IS_ERR(eventfp)) { 747 r = PTR_ERR(eventfp); 748 break; 749 } 750 if (eventfp != vq->error) { 751 filep = vq->error; 752 vq->error = eventfp; 753 ctx = vq->error_ctx; 754 vq->error_ctx = eventfp ? 755 eventfd_ctx_fileget(eventfp) : NULL; 756 } else 757 filep = eventfp; 758 break; 759 default: 760 r = -ENOIOCTLCMD; 761 } 762 763 if (pollstop && vq->handle_kick) 764 vhost_poll_stop(&vq->poll); 765 766 if (ctx) 767 eventfd_ctx_put(ctx); 768 if (filep) 769 fput(filep); 770 771 if (pollstart && vq->handle_kick) 772 vhost_poll_start(&vq->poll, vq->kick); 773 774 mutex_unlock(&vq->mutex); 775 776 if (pollstop && vq->handle_kick) 777 vhost_poll_flush(&vq->poll); 778 return r; 779 } 780 781 /* Caller must have device mutex */ 782 long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) 783 { 784 void __user *argp = (void __user *)arg; 785 struct file *eventfp, *filep = NULL; 786 struct eventfd_ctx *ctx = NULL; 787 u64 p; 788 long r; 789 int i, fd; 790 791 /* If you are not the owner, you can become one */ 792 if (ioctl == VHOST_SET_OWNER) { 793 r = vhost_dev_set_owner(d); 794 goto done; 795 } 796 797 /* You must be the owner to do anything else */ 798 r = vhost_dev_check_owner(d); 799 if (r) 800 goto done; 801 802 switch (ioctl) { 803 case VHOST_SET_MEM_TABLE: 804 r = vhost_set_memory(d, argp); 805 break; 806 case VHOST_SET_LOG_BASE: 807 if (copy_from_user(&p, argp, sizeof p)) { 808 r = -EFAULT; 809 break; 810 } 811 if ((u64)(unsigned long)p != p) { 812 r = -EFAULT; 813 break; 814 } 815 for (i = 0; i < d->nvqs; ++i) { 816 struct vhost_virtqueue *vq; 817 void __user *base = (void __user *)(unsigned long)p; 818 vq = d->vqs + i; 819 mutex_lock(&vq->mutex); 820 /* If ring is inactive, will check when it's enabled. */ 821 if (vq->private_data && !vq_log_access_ok(vq, base)) 822 r = -EFAULT; 823 else 824 vq->log_base = base; 825 mutex_unlock(&vq->mutex); 826 } 827 break; 828 case VHOST_SET_LOG_FD: 829 r = get_user(fd, (int __user *)argp); 830 if (r < 0) 831 break; 832 eventfp = fd == -1 ? NULL : eventfd_fget(fd); 833 if (IS_ERR(eventfp)) { 834 r = PTR_ERR(eventfp); 835 break; 836 } 837 if (eventfp != d->log_file) { 838 filep = d->log_file; 839 ctx = d->log_ctx; 840 d->log_ctx = eventfp ? 841 eventfd_ctx_fileget(eventfp) : NULL; 842 } else 843 filep = eventfp; 844 for (i = 0; i < d->nvqs; ++i) { 845 mutex_lock(&d->vqs[i].mutex); 846 d->vqs[i].log_ctx = d->log_ctx; 847 mutex_unlock(&d->vqs[i].mutex); 848 } 849 if (ctx) 850 eventfd_ctx_put(ctx); 851 if (filep) 852 fput(filep); 853 break; 854 default: 855 r = vhost_set_vring(d, ioctl, argp); 856 break; 857 } 858 done: 859 return r; 860 } 861 862 static const struct vhost_memory_region *find_region(struct vhost_memory *mem, 863 __u64 addr, __u32 len) 864 { 865 struct vhost_memory_region *reg; 866 int i; 867 868 /* linear search is not brilliant, but we really have on the order of 6 869 * regions in practice */ 870 for (i = 0; i < mem->nregions; ++i) { 871 reg = mem->regions + i; 872 if (reg->guest_phys_addr <= addr && 873 reg->guest_phys_addr + reg->memory_size - 1 >= addr) 874 return reg; 875 } 876 return NULL; 877 } 878 879 /* TODO: This is really inefficient. We need something like get_user() 880 * (instruction directly accesses the data, with an exception table entry 881 * returning -EFAULT). See Documentation/x86/exception-tables.txt. 882 */ 883 static int set_bit_to_user(int nr, void __user *addr) 884 { 885 unsigned long log = (unsigned long)addr; 886 struct page *page; 887 void *base; 888 int bit = nr + (log % PAGE_SIZE) * 8; 889 int r; 890 891 r = get_user_pages_fast(log, 1, 1, &page); 892 if (r < 0) 893 return r; 894 BUG_ON(r != 1); 895 base = kmap_atomic(page, KM_USER0); 896 set_bit(bit, base); 897 kunmap_atomic(base, KM_USER0); 898 set_page_dirty_lock(page); 899 put_page(page); 900 return 0; 901 } 902 903 static int log_write(void __user *log_base, 904 u64 write_address, u64 write_length) 905 { 906 u64 write_page = write_address / VHOST_PAGE_SIZE; 907 int r; 908 909 if (!write_length) 910 return 0; 911 write_length += write_address % VHOST_PAGE_SIZE; 912 for (;;) { 913 u64 base = (u64)(unsigned long)log_base; 914 u64 log = base + write_page / 8; 915 int bit = write_page % 8; 916 if ((u64)(unsigned long)log != log) 917 return -EFAULT; 918 r = set_bit_to_user(bit, (void __user *)(unsigned long)log); 919 if (r < 0) 920 return r; 921 if (write_length <= VHOST_PAGE_SIZE) 922 break; 923 write_length -= VHOST_PAGE_SIZE; 924 write_page += 1; 925 } 926 return r; 927 } 928 929 int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, 930 unsigned int log_num, u64 len) 931 { 932 int i, r; 933 934 /* Make sure data written is seen before log. */ 935 smp_wmb(); 936 for (i = 0; i < log_num; ++i) { 937 u64 l = min(log[i].len, len); 938 r = log_write(vq->log_base, log[i].addr, l); 939 if (r < 0) 940 return r; 941 len -= l; 942 if (!len) { 943 if (vq->log_ctx) 944 eventfd_signal(vq->log_ctx, 1); 945 return 0; 946 } 947 } 948 /* Length written exceeds what we have stored. This is a bug. */ 949 BUG(); 950 return 0; 951 } 952 953 static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, 954 struct iovec iov[], int iov_size) 955 { 956 const struct vhost_memory_region *reg; 957 struct vhost_memory *mem; 958 struct iovec *_iov; 959 u64 s = 0; 960 int ret = 0; 961 962 rcu_read_lock(); 963 964 mem = rcu_dereference(dev->memory); 965 while ((u64)len > s) { 966 u64 size; 967 if (unlikely(ret >= iov_size)) { 968 ret = -ENOBUFS; 969 break; 970 } 971 reg = find_region(mem, addr, len); 972 if (unlikely(!reg)) { 973 ret = -EFAULT; 974 break; 975 } 976 _iov = iov + ret; 977 size = reg->memory_size - addr + reg->guest_phys_addr; 978 _iov->iov_len = min((u64)len, size); 979 _iov->iov_base = (void __user *)(unsigned long) 980 (reg->userspace_addr + addr - reg->guest_phys_addr); 981 s += size; 982 addr += size; 983 ++ret; 984 } 985 986 rcu_read_unlock(); 987 return ret; 988 } 989 990 /* Each buffer in the virtqueues is actually a chain of descriptors. This 991 * function returns the next descriptor in the chain, 992 * or -1U if we're at the end. */ 993 static unsigned next_desc(struct vring_desc *desc) 994 { 995 unsigned int next; 996 997 /* If this descriptor says it doesn't chain, we're done. */ 998 if (!(desc->flags & VRING_DESC_F_NEXT)) 999 return -1U; 1000 1001 /* Check they're not leading us off end of descriptors. */ 1002 next = desc->next; 1003 /* Make sure compiler knows to grab that: we don't want it changing! */ 1004 /* We will use the result as an index in an array, so most 1005 * architectures only need a compiler barrier here. */ 1006 read_barrier_depends(); 1007 1008 return next; 1009 } 1010 1011 static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, 1012 struct iovec iov[], unsigned int iov_size, 1013 unsigned int *out_num, unsigned int *in_num, 1014 struct vhost_log *log, unsigned int *log_num, 1015 struct vring_desc *indirect) 1016 { 1017 struct vring_desc desc; 1018 unsigned int i = 0, count, found = 0; 1019 int ret; 1020 1021 /* Sanity check */ 1022 if (unlikely(indirect->len % sizeof desc)) { 1023 vq_err(vq, "Invalid length in indirect descriptor: " 1024 "len 0x%llx not multiple of 0x%zx\n", 1025 (unsigned long long)indirect->len, 1026 sizeof desc); 1027 return -EINVAL; 1028 } 1029 1030 ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect, 1031 UIO_MAXIOV); 1032 if (unlikely(ret < 0)) { 1033 vq_err(vq, "Translation failure %d in indirect.\n", ret); 1034 return ret; 1035 } 1036 1037 /* We will use the result as an address to read from, so most 1038 * architectures only need a compiler barrier here. */ 1039 read_barrier_depends(); 1040 1041 count = indirect->len / sizeof desc; 1042 /* Buffers are chained via a 16 bit next field, so 1043 * we can have at most 2^16 of these. */ 1044 if (unlikely(count > USHRT_MAX + 1)) { 1045 vq_err(vq, "Indirect buffer length too big: %d\n", 1046 indirect->len); 1047 return -E2BIG; 1048 } 1049 1050 do { 1051 unsigned iov_count = *in_num + *out_num; 1052 if (unlikely(++found > count)) { 1053 vq_err(vq, "Loop detected: last one at %u " 1054 "indirect size %u\n", 1055 i, count); 1056 return -EINVAL; 1057 } 1058 if (unlikely(memcpy_fromiovec((unsigned char *)&desc, 1059 vq->indirect, sizeof desc))) { 1060 vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", 1061 i, (size_t)indirect->addr + i * sizeof desc); 1062 return -EINVAL; 1063 } 1064 if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { 1065 vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", 1066 i, (size_t)indirect->addr + i * sizeof desc); 1067 return -EINVAL; 1068 } 1069 1070 ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, 1071 iov_size - iov_count); 1072 if (unlikely(ret < 0)) { 1073 vq_err(vq, "Translation failure %d indirect idx %d\n", 1074 ret, i); 1075 return ret; 1076 } 1077 /* If this is an input descriptor, increment that count. */ 1078 if (desc.flags & VRING_DESC_F_WRITE) { 1079 *in_num += ret; 1080 if (unlikely(log)) { 1081 log[*log_num].addr = desc.addr; 1082 log[*log_num].len = desc.len; 1083 ++*log_num; 1084 } 1085 } else { 1086 /* If it's an output descriptor, they're all supposed 1087 * to come before any input descriptors. */ 1088 if (unlikely(*in_num)) { 1089 vq_err(vq, "Indirect descriptor " 1090 "has out after in: idx %d\n", i); 1091 return -EINVAL; 1092 } 1093 *out_num += ret; 1094 } 1095 } while ((i = next_desc(&desc)) != -1); 1096 return 0; 1097 } 1098 1099 /* This looks in the virtqueue and for the first available buffer, and converts 1100 * it to an iovec for convenient access. Since descriptors consist of some 1101 * number of output then some number of input descriptors, it's actually two 1102 * iovecs, but we pack them into one and note how many of each there were. 1103 * 1104 * This function returns the descriptor number found, or vq->num (which is 1105 * never a valid descriptor number) if none was found. A negative code is 1106 * returned on error. */ 1107 int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, 1108 struct iovec iov[], unsigned int iov_size, 1109 unsigned int *out_num, unsigned int *in_num, 1110 struct vhost_log *log, unsigned int *log_num) 1111 { 1112 struct vring_desc desc; 1113 unsigned int i, head, found = 0; 1114 u16 last_avail_idx; 1115 int ret; 1116 1117 /* Check it isn't doing very strange things with descriptor numbers. */ 1118 last_avail_idx = vq->last_avail_idx; 1119 if (unlikely(__get_user(vq->avail_idx, &vq->avail->idx))) { 1120 vq_err(vq, "Failed to access avail idx at %p\n", 1121 &vq->avail->idx); 1122 return -EFAULT; 1123 } 1124 1125 if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { 1126 vq_err(vq, "Guest moved used index from %u to %u", 1127 last_avail_idx, vq->avail_idx); 1128 return -EFAULT; 1129 } 1130 1131 /* If there's nothing new since last we looked, return invalid. */ 1132 if (vq->avail_idx == last_avail_idx) 1133 return vq->num; 1134 1135 /* Only get avail ring entries after they have been exposed by guest. */ 1136 smp_rmb(); 1137 1138 /* Grab the next descriptor number they're advertising, and increment 1139 * the index we've seen. */ 1140 if (unlikely(__get_user(head, 1141 &vq->avail->ring[last_avail_idx % vq->num]))) { 1142 vq_err(vq, "Failed to read head: idx %d address %p\n", 1143 last_avail_idx, 1144 &vq->avail->ring[last_avail_idx % vq->num]); 1145 return -EFAULT; 1146 } 1147 1148 /* If their number is silly, that's an error. */ 1149 if (unlikely(head >= vq->num)) { 1150 vq_err(vq, "Guest says index %u > %u is available", 1151 head, vq->num); 1152 return -EINVAL; 1153 } 1154 1155 /* When we start there are none of either input nor output. */ 1156 *out_num = *in_num = 0; 1157 if (unlikely(log)) 1158 *log_num = 0; 1159 1160 i = head; 1161 do { 1162 unsigned iov_count = *in_num + *out_num; 1163 if (unlikely(i >= vq->num)) { 1164 vq_err(vq, "Desc index is %u > %u, head = %u", 1165 i, vq->num, head); 1166 return -EINVAL; 1167 } 1168 if (unlikely(++found > vq->num)) { 1169 vq_err(vq, "Loop detected: last one at %u " 1170 "vq size %u head %u\n", 1171 i, vq->num, head); 1172 return -EINVAL; 1173 } 1174 ret = __copy_from_user(&desc, vq->desc + i, sizeof desc); 1175 if (unlikely(ret)) { 1176 vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", 1177 i, vq->desc + i); 1178 return -EFAULT; 1179 } 1180 if (desc.flags & VRING_DESC_F_INDIRECT) { 1181 ret = get_indirect(dev, vq, iov, iov_size, 1182 out_num, in_num, 1183 log, log_num, &desc); 1184 if (unlikely(ret < 0)) { 1185 vq_err(vq, "Failure detected " 1186 "in indirect descriptor at idx %d\n", i); 1187 return ret; 1188 } 1189 continue; 1190 } 1191 1192 ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, 1193 iov_size - iov_count); 1194 if (unlikely(ret < 0)) { 1195 vq_err(vq, "Translation failure %d descriptor idx %d\n", 1196 ret, i); 1197 return ret; 1198 } 1199 if (desc.flags & VRING_DESC_F_WRITE) { 1200 /* If this is an input descriptor, 1201 * increment that count. */ 1202 *in_num += ret; 1203 if (unlikely(log)) { 1204 log[*log_num].addr = desc.addr; 1205 log[*log_num].len = desc.len; 1206 ++*log_num; 1207 } 1208 } else { 1209 /* If it's an output descriptor, they're all supposed 1210 * to come before any input descriptors. */ 1211 if (unlikely(*in_num)) { 1212 vq_err(vq, "Descriptor has out after in: " 1213 "idx %d\n", i); 1214 return -EINVAL; 1215 } 1216 *out_num += ret; 1217 } 1218 } while ((i = next_desc(&desc)) != -1); 1219 1220 /* On success, increment avail index. */ 1221 vq->last_avail_idx++; 1222 return head; 1223 } 1224 1225 /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ 1226 void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) 1227 { 1228 vq->last_avail_idx -= n; 1229 } 1230 1231 /* After we've used one of their buffers, we tell them about it. We'll then 1232 * want to notify the guest, using eventfd. */ 1233 int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) 1234 { 1235 struct vring_used_elem __user *used; 1236 1237 /* The virtqueue contains a ring of used buffers. Get a pointer to the 1238 * next entry in that used ring. */ 1239 used = &vq->used->ring[vq->last_used_idx % vq->num]; 1240 if (__put_user(head, &used->id)) { 1241 vq_err(vq, "Failed to write used id"); 1242 return -EFAULT; 1243 } 1244 if (__put_user(len, &used->len)) { 1245 vq_err(vq, "Failed to write used len"); 1246 return -EFAULT; 1247 } 1248 /* Make sure buffer is written before we update index. */ 1249 smp_wmb(); 1250 if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) { 1251 vq_err(vq, "Failed to increment used idx"); 1252 return -EFAULT; 1253 } 1254 if (unlikely(vq->log_used)) { 1255 /* Make sure data is seen before log. */ 1256 smp_wmb(); 1257 /* Log used ring entry write. */ 1258 log_write(vq->log_base, 1259 vq->log_addr + 1260 ((void __user *)used - (void __user *)vq->used), 1261 sizeof *used); 1262 /* Log used index update. */ 1263 log_write(vq->log_base, 1264 vq->log_addr + offsetof(struct vring_used, idx), 1265 sizeof vq->used->idx); 1266 if (vq->log_ctx) 1267 eventfd_signal(vq->log_ctx, 1); 1268 } 1269 vq->last_used_idx++; 1270 return 0; 1271 } 1272 1273 static int __vhost_add_used_n(struct vhost_virtqueue *vq, 1274 struct vring_used_elem *heads, 1275 unsigned count) 1276 { 1277 struct vring_used_elem __user *used; 1278 int start; 1279 1280 start = vq->last_used_idx % vq->num; 1281 used = vq->used->ring + start; 1282 if (__copy_to_user(used, heads, count * sizeof *used)) { 1283 vq_err(vq, "Failed to write used"); 1284 return -EFAULT; 1285 } 1286 if (unlikely(vq->log_used)) { 1287 /* Make sure data is seen before log. */ 1288 smp_wmb(); 1289 /* Log used ring entry write. */ 1290 log_write(vq->log_base, 1291 vq->log_addr + 1292 ((void __user *)used - (void __user *)vq->used), 1293 count * sizeof *used); 1294 } 1295 vq->last_used_idx += count; 1296 return 0; 1297 } 1298 1299 /* After we've used one of their buffers, we tell them about it. We'll then 1300 * want to notify the guest, using eventfd. */ 1301 int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, 1302 unsigned count) 1303 { 1304 int start, n, r; 1305 1306 start = vq->last_used_idx % vq->num; 1307 n = vq->num - start; 1308 if (n < count) { 1309 r = __vhost_add_used_n(vq, heads, n); 1310 if (r < 0) 1311 return r; 1312 heads += n; 1313 count -= n; 1314 } 1315 r = __vhost_add_used_n(vq, heads, count); 1316 1317 /* Make sure buffer is written before we update index. */ 1318 smp_wmb(); 1319 if (put_user(vq->last_used_idx, &vq->used->idx)) { 1320 vq_err(vq, "Failed to increment used idx"); 1321 return -EFAULT; 1322 } 1323 if (unlikely(vq->log_used)) { 1324 /* Log used index update. */ 1325 log_write(vq->log_base, 1326 vq->log_addr + offsetof(struct vring_used, idx), 1327 sizeof vq->used->idx); 1328 if (vq->log_ctx) 1329 eventfd_signal(vq->log_ctx, 1); 1330 } 1331 return r; 1332 } 1333 1334 /* This actually signals the guest, using eventfd. */ 1335 void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) 1336 { 1337 __u16 flags; 1338 1339 /* Flush out used index updates. This is paired 1340 * with the barrier that the Guest executes when enabling 1341 * interrupts. */ 1342 smp_mb(); 1343 1344 if (__get_user(flags, &vq->avail->flags)) { 1345 vq_err(vq, "Failed to get flags"); 1346 return; 1347 } 1348 1349 /* If they don't want an interrupt, don't signal, unless empty. */ 1350 if ((flags & VRING_AVAIL_F_NO_INTERRUPT) && 1351 (vq->avail_idx != vq->last_avail_idx || 1352 !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY))) 1353 return; 1354 1355 /* Signal the Guest tell them we used something up. */ 1356 if (vq->call_ctx) 1357 eventfd_signal(vq->call_ctx, 1); 1358 } 1359 1360 /* And here's the combo meal deal. Supersize me! */ 1361 void vhost_add_used_and_signal(struct vhost_dev *dev, 1362 struct vhost_virtqueue *vq, 1363 unsigned int head, int len) 1364 { 1365 vhost_add_used(vq, head, len); 1366 vhost_signal(dev, vq); 1367 } 1368 1369 /* multi-buffer version of vhost_add_used_and_signal */ 1370 void vhost_add_used_and_signal_n(struct vhost_dev *dev, 1371 struct vhost_virtqueue *vq, 1372 struct vring_used_elem *heads, unsigned count) 1373 { 1374 vhost_add_used_n(vq, heads, count); 1375 vhost_signal(dev, vq); 1376 } 1377 1378 /* OK, now we need to know about added descriptors. */ 1379 bool vhost_enable_notify(struct vhost_virtqueue *vq) 1380 { 1381 u16 avail_idx; 1382 int r; 1383 1384 if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) 1385 return false; 1386 vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; 1387 r = put_user(vq->used_flags, &vq->used->flags); 1388 if (r) { 1389 vq_err(vq, "Failed to enable notification at %p: %d\n", 1390 &vq->used->flags, r); 1391 return false; 1392 } 1393 /* They could have slipped one in as we were doing that: make 1394 * sure it's written, then check again. */ 1395 smp_mb(); 1396 r = __get_user(avail_idx, &vq->avail->idx); 1397 if (r) { 1398 vq_err(vq, "Failed to check avail idx at %p: %d\n", 1399 &vq->avail->idx, r); 1400 return false; 1401 } 1402 1403 return avail_idx != vq->avail_idx; 1404 } 1405 1406 /* We don't need to be notified again. */ 1407 void vhost_disable_notify(struct vhost_virtqueue *vq) 1408 { 1409 int r; 1410 1411 if (vq->used_flags & VRING_USED_F_NO_NOTIFY) 1412 return; 1413 vq->used_flags |= VRING_USED_F_NO_NOTIFY; 1414 r = put_user(vq->used_flags, &vq->used->flags); 1415 if (r) 1416 vq_err(vq, "Failed to enable notification at %p: %d\n", 1417 &vq->used->flags, r); 1418 } 1419