1 /****************************************************************************** 2 * evtchn.c 3 * 4 * Driver for receiving and demuxing event-channel signals. 5 * 6 * Copyright (c) 2004-2005, K A Fraser 7 * Multi-process extensions Copyright (c) 2004, Steven Smith 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License version 2 11 * as published by the Free Software Foundation; or, when distributed 12 * separately from the Linux kernel or incorporated into other 13 * software packages, subject to the following license: 14 * 15 * Permission is hereby granted, free of charge, to any person obtaining a copy 16 * of this source file (the "Software"), to deal in the Software without 17 * restriction, including without limitation the rights to use, copy, modify, 18 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 19 * and to permit persons to whom the Software is furnished to do so, subject to 20 * the following conditions: 21 * 22 * The above copyright notice and this permission notice shall be included in 23 * all copies or substantial portions of the Software. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 31 * IN THE SOFTWARE. 32 */ 33 34 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 35 36 #include <linux/module.h> 37 #include <linux/kernel.h> 38 #include <linux/sched.h> 39 #include <linux/slab.h> 40 #include <linux/string.h> 41 #include <linux/errno.h> 42 #include <linux/fs.h> 43 #include <linux/miscdevice.h> 44 #include <linux/major.h> 45 #include <linux/proc_fs.h> 46 #include <linux/stat.h> 47 #include <linux/poll.h> 48 #include <linux/irq.h> 49 #include <linux/init.h> 50 #include <linux/mutex.h> 51 #include <linux/cpu.h> 52 #include <linux/mm.h> 53 #include <linux/vmalloc.h> 54 55 #include <xen/xen.h> 56 #include <xen/events.h> 57 #include <xen/evtchn.h> 58 #include <xen/xen-ops.h> 59 #include <asm/xen/hypervisor.h> 60 61 struct per_user_data { 62 struct mutex bind_mutex; /* serialize bind/unbind operations */ 63 struct rb_root evtchns; 64 unsigned int nr_evtchns; 65 66 /* Notification ring, accessed via /dev/xen/evtchn. */ 67 unsigned int ring_size; 68 evtchn_port_t *ring; 69 unsigned int ring_cons, ring_prod, ring_overflow; 70 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 71 spinlock_t ring_prod_lock; /* product against concurrent interrupts */ 72 73 /* Processes wait on this queue when ring is empty. */ 74 wait_queue_head_t evtchn_wait; 75 struct fasync_struct *evtchn_async_queue; 76 const char *name; 77 78 domid_t restrict_domid; 79 }; 80 81 #define UNRESTRICTED_DOMID ((domid_t)-1) 82 83 struct user_evtchn { 84 struct rb_node node; 85 struct per_user_data *user; 86 unsigned port; 87 bool enabled; 88 }; 89 90 static void evtchn_free_ring(evtchn_port_t *ring) 91 { 92 kvfree(ring); 93 } 94 95 static unsigned int evtchn_ring_offset(struct per_user_data *u, 96 unsigned int idx) 97 { 98 return idx & (u->ring_size - 1); 99 } 100 101 static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u, 102 unsigned int idx) 103 { 104 return u->ring + evtchn_ring_offset(u, idx); 105 } 106 107 static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 108 { 109 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; 110 111 u->nr_evtchns++; 112 113 while (*new) { 114 struct user_evtchn *this; 115 116 this = rb_entry(*new, struct user_evtchn, node); 117 118 parent = *new; 119 if (this->port < evtchn->port) 120 new = &((*new)->rb_left); 121 else if (this->port > evtchn->port) 122 new = &((*new)->rb_right); 123 else 124 return -EEXIST; 125 } 126 127 /* Add new node and rebalance tree. */ 128 rb_link_node(&evtchn->node, parent, new); 129 rb_insert_color(&evtchn->node, &u->evtchns); 130 131 return 0; 132 } 133 134 static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 135 { 136 u->nr_evtchns--; 137 rb_erase(&evtchn->node, &u->evtchns); 138 kfree(evtchn); 139 } 140 141 static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port) 142 { 143 struct rb_node *node = u->evtchns.rb_node; 144 145 while (node) { 146 struct user_evtchn *evtchn; 147 148 evtchn = rb_entry(node, struct user_evtchn, node); 149 150 if (evtchn->port < port) 151 node = node->rb_left; 152 else if (evtchn->port > port) 153 node = node->rb_right; 154 else 155 return evtchn; 156 } 157 return NULL; 158 } 159 160 static irqreturn_t evtchn_interrupt(int irq, void *data) 161 { 162 struct user_evtchn *evtchn = data; 163 struct per_user_data *u = evtchn->user; 164 165 WARN(!evtchn->enabled, 166 "Interrupt for port %d, but apparently not enabled; per-user %p\n", 167 evtchn->port, u); 168 169 disable_irq_nosync(irq); 170 evtchn->enabled = false; 171 172 spin_lock(&u->ring_prod_lock); 173 174 if ((u->ring_prod - u->ring_cons) < u->ring_size) { 175 *evtchn_ring_entry(u, u->ring_prod) = evtchn->port; 176 wmb(); /* Ensure ring contents visible */ 177 if (u->ring_cons == u->ring_prod++) { 178 wake_up_interruptible(&u->evtchn_wait); 179 kill_fasync(&u->evtchn_async_queue, 180 SIGIO, POLL_IN); 181 } 182 } else 183 u->ring_overflow = 1; 184 185 spin_unlock(&u->ring_prod_lock); 186 187 return IRQ_HANDLED; 188 } 189 190 static ssize_t evtchn_read(struct file *file, char __user *buf, 191 size_t count, loff_t *ppos) 192 { 193 int rc; 194 unsigned int c, p, bytes1 = 0, bytes2 = 0; 195 struct per_user_data *u = file->private_data; 196 197 /* Whole number of ports. */ 198 count &= ~(sizeof(evtchn_port_t)-1); 199 200 if (count == 0) 201 return 0; 202 203 if (count > PAGE_SIZE) 204 count = PAGE_SIZE; 205 206 for (;;) { 207 mutex_lock(&u->ring_cons_mutex); 208 209 rc = -EFBIG; 210 if (u->ring_overflow) 211 goto unlock_out; 212 213 c = u->ring_cons; 214 p = u->ring_prod; 215 if (c != p) 216 break; 217 218 mutex_unlock(&u->ring_cons_mutex); 219 220 if (file->f_flags & O_NONBLOCK) 221 return -EAGAIN; 222 223 rc = wait_event_interruptible(u->evtchn_wait, 224 u->ring_cons != u->ring_prod); 225 if (rc) 226 return rc; 227 } 228 229 /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ 230 if (((c ^ p) & u->ring_size) != 0) { 231 bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) * 232 sizeof(evtchn_port_t); 233 bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t); 234 } else { 235 bytes1 = (p - c) * sizeof(evtchn_port_t); 236 bytes2 = 0; 237 } 238 239 /* Truncate chunks according to caller's maximum byte count. */ 240 if (bytes1 > count) { 241 bytes1 = count; 242 bytes2 = 0; 243 } else if ((bytes1 + bytes2) > count) { 244 bytes2 = count - bytes1; 245 } 246 247 rc = -EFAULT; 248 rmb(); /* Ensure that we see the port before we copy it. */ 249 if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || 250 ((bytes2 != 0) && 251 copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) 252 goto unlock_out; 253 254 u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); 255 rc = bytes1 + bytes2; 256 257 unlock_out: 258 mutex_unlock(&u->ring_cons_mutex); 259 return rc; 260 } 261 262 static ssize_t evtchn_write(struct file *file, const char __user *buf, 263 size_t count, loff_t *ppos) 264 { 265 int rc, i; 266 evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); 267 struct per_user_data *u = file->private_data; 268 269 if (kbuf == NULL) 270 return -ENOMEM; 271 272 /* Whole number of ports. */ 273 count &= ~(sizeof(evtchn_port_t)-1); 274 275 rc = 0; 276 if (count == 0) 277 goto out; 278 279 if (count > PAGE_SIZE) 280 count = PAGE_SIZE; 281 282 rc = -EFAULT; 283 if (copy_from_user(kbuf, buf, count) != 0) 284 goto out; 285 286 mutex_lock(&u->bind_mutex); 287 288 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { 289 unsigned port = kbuf[i]; 290 struct user_evtchn *evtchn; 291 292 evtchn = find_evtchn(u, port); 293 if (evtchn && !evtchn->enabled) { 294 evtchn->enabled = true; 295 enable_irq(irq_from_evtchn(port)); 296 } 297 } 298 299 mutex_unlock(&u->bind_mutex); 300 301 rc = count; 302 303 out: 304 free_page((unsigned long)kbuf); 305 return rc; 306 } 307 308 static int evtchn_resize_ring(struct per_user_data *u) 309 { 310 unsigned int new_size; 311 evtchn_port_t *new_ring, *old_ring; 312 313 /* 314 * Ensure the ring is large enough to capture all possible 315 * events. i.e., one free slot for each bound event. 316 */ 317 if (u->nr_evtchns <= u->ring_size) 318 return 0; 319 320 if (u->ring_size == 0) 321 new_size = 64; 322 else 323 new_size = 2 * u->ring_size; 324 325 new_ring = kvmalloc_array(new_size, sizeof(*new_ring), GFP_KERNEL); 326 if (!new_ring) 327 return -ENOMEM; 328 329 old_ring = u->ring; 330 331 /* 332 * Access to the ring contents is serialized by either the 333 * prod /or/ cons lock so take both when resizing. 334 */ 335 mutex_lock(&u->ring_cons_mutex); 336 spin_lock_irq(&u->ring_prod_lock); 337 338 /* 339 * Copy the old ring contents to the new ring. 340 * 341 * To take care of wrapping, a full ring, and the new index 342 * pointing into the second half, simply copy the old contents 343 * twice. 344 * 345 * +---------+ +------------------+ 346 * |34567 12| -> |34567 1234567 12| 347 * +-----p-c-+ +-------c------p---+ 348 */ 349 memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring)); 350 memcpy(new_ring + u->ring_size, old_ring, 351 u->ring_size * sizeof(*u->ring)); 352 353 u->ring = new_ring; 354 u->ring_size = new_size; 355 356 spin_unlock_irq(&u->ring_prod_lock); 357 mutex_unlock(&u->ring_cons_mutex); 358 359 evtchn_free_ring(old_ring); 360 361 return 0; 362 } 363 364 static int evtchn_bind_to_user(struct per_user_data *u, int port) 365 { 366 struct user_evtchn *evtchn; 367 struct evtchn_close close; 368 int rc = 0; 369 370 /* 371 * Ports are never reused, so every caller should pass in a 372 * unique port. 373 * 374 * (Locking not necessary because we haven't registered the 375 * interrupt handler yet, and our caller has already 376 * serialized bind operations.) 377 */ 378 379 evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); 380 if (!evtchn) 381 return -ENOMEM; 382 383 evtchn->user = u; 384 evtchn->port = port; 385 evtchn->enabled = true; /* start enabled */ 386 387 rc = add_evtchn(u, evtchn); 388 if (rc < 0) 389 goto err; 390 391 rc = evtchn_resize_ring(u); 392 if (rc < 0) 393 goto err; 394 395 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, 396 u->name, evtchn); 397 if (rc < 0) 398 goto err; 399 400 rc = evtchn_make_refcounted(port); 401 return rc; 402 403 err: 404 /* bind failed, should close the port now */ 405 close.port = port; 406 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) 407 BUG(); 408 del_evtchn(u, evtchn); 409 return rc; 410 } 411 412 static void evtchn_unbind_from_user(struct per_user_data *u, 413 struct user_evtchn *evtchn) 414 { 415 int irq = irq_from_evtchn(evtchn->port); 416 417 BUG_ON(irq < 0); 418 419 unbind_from_irqhandler(irq, evtchn); 420 421 del_evtchn(u, evtchn); 422 } 423 424 static DEFINE_PER_CPU(int, bind_last_selected_cpu); 425 426 static void evtchn_bind_interdom_next_vcpu(int evtchn) 427 { 428 unsigned int selected_cpu, irq; 429 struct irq_desc *desc; 430 unsigned long flags; 431 432 irq = irq_from_evtchn(evtchn); 433 desc = irq_to_desc(irq); 434 435 if (!desc) 436 return; 437 438 raw_spin_lock_irqsave(&desc->lock, flags); 439 selected_cpu = this_cpu_read(bind_last_selected_cpu); 440 selected_cpu = cpumask_next_and(selected_cpu, 441 desc->irq_common_data.affinity, cpu_online_mask); 442 443 if (unlikely(selected_cpu >= nr_cpu_ids)) 444 selected_cpu = cpumask_first_and(desc->irq_common_data.affinity, 445 cpu_online_mask); 446 447 this_cpu_write(bind_last_selected_cpu, selected_cpu); 448 449 /* unmask expects irqs to be disabled */ 450 xen_rebind_evtchn_to_cpu(evtchn, selected_cpu); 451 raw_spin_unlock_irqrestore(&desc->lock, flags); 452 } 453 454 static long evtchn_ioctl(struct file *file, 455 unsigned int cmd, unsigned long arg) 456 { 457 int rc; 458 struct per_user_data *u = file->private_data; 459 void __user *uarg = (void __user *) arg; 460 461 /* Prevent bind from racing with unbind */ 462 mutex_lock(&u->bind_mutex); 463 464 switch (cmd) { 465 case IOCTL_EVTCHN_BIND_VIRQ: { 466 struct ioctl_evtchn_bind_virq bind; 467 struct evtchn_bind_virq bind_virq; 468 469 rc = -EACCES; 470 if (u->restrict_domid != UNRESTRICTED_DOMID) 471 break; 472 473 rc = -EFAULT; 474 if (copy_from_user(&bind, uarg, sizeof(bind))) 475 break; 476 477 bind_virq.virq = bind.virq; 478 bind_virq.vcpu = xen_vcpu_nr(0); 479 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, 480 &bind_virq); 481 if (rc != 0) 482 break; 483 484 rc = evtchn_bind_to_user(u, bind_virq.port); 485 if (rc == 0) 486 rc = bind_virq.port; 487 break; 488 } 489 490 case IOCTL_EVTCHN_BIND_INTERDOMAIN: { 491 struct ioctl_evtchn_bind_interdomain bind; 492 struct evtchn_bind_interdomain bind_interdomain; 493 494 rc = -EFAULT; 495 if (copy_from_user(&bind, uarg, sizeof(bind))) 496 break; 497 498 rc = -EACCES; 499 if (u->restrict_domid != UNRESTRICTED_DOMID && 500 u->restrict_domid != bind.remote_domain) 501 break; 502 503 bind_interdomain.remote_dom = bind.remote_domain; 504 bind_interdomain.remote_port = bind.remote_port; 505 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, 506 &bind_interdomain); 507 if (rc != 0) 508 break; 509 510 rc = evtchn_bind_to_user(u, bind_interdomain.local_port); 511 if (rc == 0) { 512 rc = bind_interdomain.local_port; 513 evtchn_bind_interdom_next_vcpu(rc); 514 } 515 break; 516 } 517 518 case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { 519 struct ioctl_evtchn_bind_unbound_port bind; 520 struct evtchn_alloc_unbound alloc_unbound; 521 522 rc = -EACCES; 523 if (u->restrict_domid != UNRESTRICTED_DOMID) 524 break; 525 526 rc = -EFAULT; 527 if (copy_from_user(&bind, uarg, sizeof(bind))) 528 break; 529 530 alloc_unbound.dom = DOMID_SELF; 531 alloc_unbound.remote_dom = bind.remote_domain; 532 rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 533 &alloc_unbound); 534 if (rc != 0) 535 break; 536 537 rc = evtchn_bind_to_user(u, alloc_unbound.port); 538 if (rc == 0) 539 rc = alloc_unbound.port; 540 break; 541 } 542 543 case IOCTL_EVTCHN_UNBIND: { 544 struct ioctl_evtchn_unbind unbind; 545 struct user_evtchn *evtchn; 546 547 rc = -EFAULT; 548 if (copy_from_user(&unbind, uarg, sizeof(unbind))) 549 break; 550 551 rc = -EINVAL; 552 if (unbind.port >= xen_evtchn_nr_channels()) 553 break; 554 555 rc = -ENOTCONN; 556 evtchn = find_evtchn(u, unbind.port); 557 if (!evtchn) 558 break; 559 560 disable_irq(irq_from_evtchn(unbind.port)); 561 evtchn_unbind_from_user(u, evtchn); 562 rc = 0; 563 break; 564 } 565 566 case IOCTL_EVTCHN_NOTIFY: { 567 struct ioctl_evtchn_notify notify; 568 struct user_evtchn *evtchn; 569 570 rc = -EFAULT; 571 if (copy_from_user(¬ify, uarg, sizeof(notify))) 572 break; 573 574 rc = -ENOTCONN; 575 evtchn = find_evtchn(u, notify.port); 576 if (evtchn) { 577 notify_remote_via_evtchn(notify.port); 578 rc = 0; 579 } 580 break; 581 } 582 583 case IOCTL_EVTCHN_RESET: { 584 /* Initialise the ring to empty. Clear errors. */ 585 mutex_lock(&u->ring_cons_mutex); 586 spin_lock_irq(&u->ring_prod_lock); 587 u->ring_cons = u->ring_prod = u->ring_overflow = 0; 588 spin_unlock_irq(&u->ring_prod_lock); 589 mutex_unlock(&u->ring_cons_mutex); 590 rc = 0; 591 break; 592 } 593 594 case IOCTL_EVTCHN_RESTRICT_DOMID: { 595 struct ioctl_evtchn_restrict_domid ierd; 596 597 rc = -EACCES; 598 if (u->restrict_domid != UNRESTRICTED_DOMID) 599 break; 600 601 rc = -EFAULT; 602 if (copy_from_user(&ierd, uarg, sizeof(ierd))) 603 break; 604 605 rc = -EINVAL; 606 if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED) 607 break; 608 609 u->restrict_domid = ierd.domid; 610 rc = 0; 611 612 break; 613 } 614 615 default: 616 rc = -ENOSYS; 617 break; 618 } 619 mutex_unlock(&u->bind_mutex); 620 621 return rc; 622 } 623 624 static __poll_t evtchn_poll(struct file *file, poll_table *wait) 625 { 626 __poll_t mask = EPOLLOUT | EPOLLWRNORM; 627 struct per_user_data *u = file->private_data; 628 629 poll_wait(file, &u->evtchn_wait, wait); 630 if (u->ring_cons != u->ring_prod) 631 mask |= EPOLLIN | EPOLLRDNORM; 632 if (u->ring_overflow) 633 mask = EPOLLERR; 634 return mask; 635 } 636 637 static int evtchn_fasync(int fd, struct file *filp, int on) 638 { 639 struct per_user_data *u = filp->private_data; 640 return fasync_helper(fd, filp, on, &u->evtchn_async_queue); 641 } 642 643 static int evtchn_open(struct inode *inode, struct file *filp) 644 { 645 struct per_user_data *u; 646 647 u = kzalloc(sizeof(*u), GFP_KERNEL); 648 if (u == NULL) 649 return -ENOMEM; 650 651 u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); 652 if (u->name == NULL) { 653 kfree(u); 654 return -ENOMEM; 655 } 656 657 init_waitqueue_head(&u->evtchn_wait); 658 659 mutex_init(&u->bind_mutex); 660 mutex_init(&u->ring_cons_mutex); 661 spin_lock_init(&u->ring_prod_lock); 662 663 u->restrict_domid = UNRESTRICTED_DOMID; 664 665 filp->private_data = u; 666 667 return nonseekable_open(inode, filp); 668 } 669 670 static int evtchn_release(struct inode *inode, struct file *filp) 671 { 672 struct per_user_data *u = filp->private_data; 673 struct rb_node *node; 674 675 while ((node = u->evtchns.rb_node)) { 676 struct user_evtchn *evtchn; 677 678 evtchn = rb_entry(node, struct user_evtchn, node); 679 disable_irq(irq_from_evtchn(evtchn->port)); 680 evtchn_unbind_from_user(u, evtchn); 681 } 682 683 evtchn_free_ring(u->ring); 684 kfree(u->name); 685 kfree(u); 686 687 return 0; 688 } 689 690 static const struct file_operations evtchn_fops = { 691 .owner = THIS_MODULE, 692 .read = evtchn_read, 693 .write = evtchn_write, 694 .unlocked_ioctl = evtchn_ioctl, 695 .poll = evtchn_poll, 696 .fasync = evtchn_fasync, 697 .open = evtchn_open, 698 .release = evtchn_release, 699 .llseek = no_llseek, 700 }; 701 702 static struct miscdevice evtchn_miscdev = { 703 .minor = MISC_DYNAMIC_MINOR, 704 .name = "xen/evtchn", 705 .fops = &evtchn_fops, 706 }; 707 static int __init evtchn_init(void) 708 { 709 int err; 710 711 if (!xen_domain()) 712 return -ENODEV; 713 714 /* Create '/dev/xen/evtchn'. */ 715 err = misc_register(&evtchn_miscdev); 716 if (err != 0) { 717 pr_err("Could not register /dev/xen/evtchn\n"); 718 return err; 719 } 720 721 pr_info("Event-channel device installed\n"); 722 723 return 0; 724 } 725 726 static void __exit evtchn_cleanup(void) 727 { 728 misc_deregister(&evtchn_miscdev); 729 } 730 731 module_init(evtchn_init); 732 module_exit(evtchn_cleanup); 733 734 MODULE_LICENSE("GPL"); 735