1 /****************************************************************************** 2 * evtchn.c 3 * 4 * Driver for receiving and demuxing event-channel signals. 5 * 6 * Copyright (c) 2004-2005, K A Fraser 7 * Multi-process extensions Copyright (c) 2004, Steven Smith 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License version 2 11 * as published by the Free Software Foundation; or, when distributed 12 * separately from the Linux kernel or incorporated into other 13 * software packages, subject to the following license: 14 * 15 * Permission is hereby granted, free of charge, to any person obtaining a copy 16 * of this source file (the "Software"), to deal in the Software without 17 * restriction, including without limitation the rights to use, copy, modify, 18 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 19 * and to permit persons to whom the Software is furnished to do so, subject to 20 * the following conditions: 21 * 22 * The above copyright notice and this permission notice shall be included in 23 * all copies or substantial portions of the Software. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 31 * IN THE SOFTWARE. 32 */ 33 34 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 35 36 #include <linux/module.h> 37 #include <linux/kernel.h> 38 #include <linux/sched.h> 39 #include <linux/slab.h> 40 #include <linux/string.h> 41 #include <linux/errno.h> 42 #include <linux/fs.h> 43 #include <linux/miscdevice.h> 44 #include <linux/major.h> 45 #include <linux/proc_fs.h> 46 #include <linux/stat.h> 47 #include <linux/poll.h> 48 #include <linux/irq.h> 49 #include <linux/init.h> 50 #include <linux/mutex.h> 51 #include <linux/cpu.h> 52 #include <linux/mm.h> 53 #include <linux/vmalloc.h> 54 55 #include <xen/xen.h> 56 #include <xen/events.h> 57 #include <xen/evtchn.h> 58 #include <xen/xen-ops.h> 59 #include <asm/xen/hypervisor.h> 60 61 struct per_user_data { 62 struct mutex bind_mutex; /* serialize bind/unbind operations */ 63 struct rb_root evtchns; 64 unsigned int nr_evtchns; 65 66 /* Notification ring, accessed via /dev/xen/evtchn. */ 67 unsigned int ring_size; 68 evtchn_port_t *ring; 69 unsigned int ring_cons, ring_prod, ring_overflow; 70 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 71 spinlock_t ring_prod_lock; /* product against concurrent interrupts */ 72 73 /* Processes wait on this queue when ring is empty. */ 74 wait_queue_head_t evtchn_wait; 75 struct fasync_struct *evtchn_async_queue; 76 const char *name; 77 78 domid_t restrict_domid; 79 }; 80 81 #define UNRESTRICTED_DOMID ((domid_t)-1) 82 83 struct user_evtchn { 84 struct rb_node node; 85 struct per_user_data *user; 86 evtchn_port_t port; 87 bool enabled; 88 }; 89 90 static void evtchn_free_ring(evtchn_port_t *ring) 91 { 92 kvfree(ring); 93 } 94 95 static unsigned int evtchn_ring_offset(struct per_user_data *u, 96 unsigned int idx) 97 { 98 return idx & (u->ring_size - 1); 99 } 100 101 static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u, 102 unsigned int idx) 103 { 104 return u->ring + evtchn_ring_offset(u, idx); 105 } 106 107 static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 108 { 109 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; 110 111 u->nr_evtchns++; 112 113 while (*new) { 114 struct user_evtchn *this; 115 116 this = rb_entry(*new, struct user_evtchn, node); 117 118 parent = *new; 119 if (this->port < evtchn->port) 120 new = &((*new)->rb_left); 121 else if (this->port > evtchn->port) 122 new = &((*new)->rb_right); 123 else 124 return -EEXIST; 125 } 126 127 /* Add new node and rebalance tree. */ 128 rb_link_node(&evtchn->node, parent, new); 129 rb_insert_color(&evtchn->node, &u->evtchns); 130 131 return 0; 132 } 133 134 static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 135 { 136 u->nr_evtchns--; 137 rb_erase(&evtchn->node, &u->evtchns); 138 kfree(evtchn); 139 } 140 141 static struct user_evtchn *find_evtchn(struct per_user_data *u, 142 evtchn_port_t port) 143 { 144 struct rb_node *node = u->evtchns.rb_node; 145 146 while (node) { 147 struct user_evtchn *evtchn; 148 149 evtchn = rb_entry(node, struct user_evtchn, node); 150 151 if (evtchn->port < port) 152 node = node->rb_left; 153 else if (evtchn->port > port) 154 node = node->rb_right; 155 else 156 return evtchn; 157 } 158 return NULL; 159 } 160 161 static irqreturn_t evtchn_interrupt(int irq, void *data) 162 { 163 struct user_evtchn *evtchn = data; 164 struct per_user_data *u = evtchn->user; 165 166 WARN(!evtchn->enabled, 167 "Interrupt for port %u, but apparently not enabled; per-user %p\n", 168 evtchn->port, u); 169 170 disable_irq_nosync(irq); 171 evtchn->enabled = false; 172 173 spin_lock(&u->ring_prod_lock); 174 175 if ((u->ring_prod - u->ring_cons) < u->ring_size) { 176 *evtchn_ring_entry(u, u->ring_prod) = evtchn->port; 177 wmb(); /* Ensure ring contents visible */ 178 if (u->ring_cons == u->ring_prod++) { 179 wake_up_interruptible(&u->evtchn_wait); 180 kill_fasync(&u->evtchn_async_queue, 181 SIGIO, POLL_IN); 182 } 183 } else 184 u->ring_overflow = 1; 185 186 spin_unlock(&u->ring_prod_lock); 187 188 return IRQ_HANDLED; 189 } 190 191 static ssize_t evtchn_read(struct file *file, char __user *buf, 192 size_t count, loff_t *ppos) 193 { 194 int rc; 195 unsigned int c, p, bytes1 = 0, bytes2 = 0; 196 struct per_user_data *u = file->private_data; 197 198 /* Whole number of ports. */ 199 count &= ~(sizeof(evtchn_port_t)-1); 200 201 if (count == 0) 202 return 0; 203 204 if (count > PAGE_SIZE) 205 count = PAGE_SIZE; 206 207 for (;;) { 208 mutex_lock(&u->ring_cons_mutex); 209 210 rc = -EFBIG; 211 if (u->ring_overflow) 212 goto unlock_out; 213 214 c = u->ring_cons; 215 p = u->ring_prod; 216 if (c != p) 217 break; 218 219 mutex_unlock(&u->ring_cons_mutex); 220 221 if (file->f_flags & O_NONBLOCK) 222 return -EAGAIN; 223 224 rc = wait_event_interruptible(u->evtchn_wait, 225 u->ring_cons != u->ring_prod); 226 if (rc) 227 return rc; 228 } 229 230 /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ 231 if (((c ^ p) & u->ring_size) != 0) { 232 bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) * 233 sizeof(evtchn_port_t); 234 bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t); 235 } else { 236 bytes1 = (p - c) * sizeof(evtchn_port_t); 237 bytes2 = 0; 238 } 239 240 /* Truncate chunks according to caller's maximum byte count. */ 241 if (bytes1 > count) { 242 bytes1 = count; 243 bytes2 = 0; 244 } else if ((bytes1 + bytes2) > count) { 245 bytes2 = count - bytes1; 246 } 247 248 rc = -EFAULT; 249 rmb(); /* Ensure that we see the port before we copy it. */ 250 if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || 251 ((bytes2 != 0) && 252 copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) 253 goto unlock_out; 254 255 u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); 256 rc = bytes1 + bytes2; 257 258 unlock_out: 259 mutex_unlock(&u->ring_cons_mutex); 260 return rc; 261 } 262 263 static ssize_t evtchn_write(struct file *file, const char __user *buf, 264 size_t count, loff_t *ppos) 265 { 266 int rc, i; 267 evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); 268 struct per_user_data *u = file->private_data; 269 270 if (kbuf == NULL) 271 return -ENOMEM; 272 273 /* Whole number of ports. */ 274 count &= ~(sizeof(evtchn_port_t)-1); 275 276 rc = 0; 277 if (count == 0) 278 goto out; 279 280 if (count > PAGE_SIZE) 281 count = PAGE_SIZE; 282 283 rc = -EFAULT; 284 if (copy_from_user(kbuf, buf, count) != 0) 285 goto out; 286 287 mutex_lock(&u->bind_mutex); 288 289 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { 290 evtchn_port_t port = kbuf[i]; 291 struct user_evtchn *evtchn; 292 293 evtchn = find_evtchn(u, port); 294 if (evtchn && !evtchn->enabled) { 295 evtchn->enabled = true; 296 enable_irq(irq_from_evtchn(port)); 297 } 298 } 299 300 mutex_unlock(&u->bind_mutex); 301 302 rc = count; 303 304 out: 305 free_page((unsigned long)kbuf); 306 return rc; 307 } 308 309 static int evtchn_resize_ring(struct per_user_data *u) 310 { 311 unsigned int new_size; 312 evtchn_port_t *new_ring, *old_ring; 313 314 /* 315 * Ensure the ring is large enough to capture all possible 316 * events. i.e., one free slot for each bound event. 317 */ 318 if (u->nr_evtchns <= u->ring_size) 319 return 0; 320 321 if (u->ring_size == 0) 322 new_size = 64; 323 else 324 new_size = 2 * u->ring_size; 325 326 new_ring = kvmalloc_array(new_size, sizeof(*new_ring), GFP_KERNEL); 327 if (!new_ring) 328 return -ENOMEM; 329 330 old_ring = u->ring; 331 332 /* 333 * Access to the ring contents is serialized by either the 334 * prod /or/ cons lock so take both when resizing. 335 */ 336 mutex_lock(&u->ring_cons_mutex); 337 spin_lock_irq(&u->ring_prod_lock); 338 339 /* 340 * Copy the old ring contents to the new ring. 341 * 342 * To take care of wrapping, a full ring, and the new index 343 * pointing into the second half, simply copy the old contents 344 * twice. 345 * 346 * +---------+ +------------------+ 347 * |34567 12| -> |34567 1234567 12| 348 * +-----p-c-+ +-------c------p---+ 349 */ 350 memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring)); 351 memcpy(new_ring + u->ring_size, old_ring, 352 u->ring_size * sizeof(*u->ring)); 353 354 u->ring = new_ring; 355 u->ring_size = new_size; 356 357 spin_unlock_irq(&u->ring_prod_lock); 358 mutex_unlock(&u->ring_cons_mutex); 359 360 evtchn_free_ring(old_ring); 361 362 return 0; 363 } 364 365 static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port) 366 { 367 struct user_evtchn *evtchn; 368 struct evtchn_close close; 369 int rc = 0; 370 371 /* 372 * Ports are never reused, so every caller should pass in a 373 * unique port. 374 * 375 * (Locking not necessary because we haven't registered the 376 * interrupt handler yet, and our caller has already 377 * serialized bind operations.) 378 */ 379 380 evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); 381 if (!evtchn) 382 return -ENOMEM; 383 384 evtchn->user = u; 385 evtchn->port = port; 386 evtchn->enabled = true; /* start enabled */ 387 388 rc = add_evtchn(u, evtchn); 389 if (rc < 0) 390 goto err; 391 392 rc = evtchn_resize_ring(u); 393 if (rc < 0) 394 goto err; 395 396 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, 397 u->name, evtchn); 398 if (rc < 0) 399 goto err; 400 401 rc = evtchn_make_refcounted(port); 402 return rc; 403 404 err: 405 /* bind failed, should close the port now */ 406 close.port = port; 407 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) 408 BUG(); 409 del_evtchn(u, evtchn); 410 return rc; 411 } 412 413 static void evtchn_unbind_from_user(struct per_user_data *u, 414 struct user_evtchn *evtchn) 415 { 416 int irq = irq_from_evtchn(evtchn->port); 417 418 BUG_ON(irq < 0); 419 420 unbind_from_irqhandler(irq, evtchn); 421 422 del_evtchn(u, evtchn); 423 } 424 425 static DEFINE_PER_CPU(int, bind_last_selected_cpu); 426 427 static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn) 428 { 429 unsigned int selected_cpu, irq; 430 struct irq_desc *desc; 431 unsigned long flags; 432 433 irq = irq_from_evtchn(evtchn); 434 desc = irq_to_desc(irq); 435 436 if (!desc) 437 return; 438 439 raw_spin_lock_irqsave(&desc->lock, flags); 440 selected_cpu = this_cpu_read(bind_last_selected_cpu); 441 selected_cpu = cpumask_next_and(selected_cpu, 442 desc->irq_common_data.affinity, cpu_online_mask); 443 444 if (unlikely(selected_cpu >= nr_cpu_ids)) 445 selected_cpu = cpumask_first_and(desc->irq_common_data.affinity, 446 cpu_online_mask); 447 448 this_cpu_write(bind_last_selected_cpu, selected_cpu); 449 450 /* unmask expects irqs to be disabled */ 451 xen_set_affinity_evtchn(desc, selected_cpu); 452 raw_spin_unlock_irqrestore(&desc->lock, flags); 453 } 454 455 static long evtchn_ioctl(struct file *file, 456 unsigned int cmd, unsigned long arg) 457 { 458 int rc; 459 struct per_user_data *u = file->private_data; 460 void __user *uarg = (void __user *) arg; 461 462 /* Prevent bind from racing with unbind */ 463 mutex_lock(&u->bind_mutex); 464 465 switch (cmd) { 466 case IOCTL_EVTCHN_BIND_VIRQ: { 467 struct ioctl_evtchn_bind_virq bind; 468 struct evtchn_bind_virq bind_virq; 469 470 rc = -EACCES; 471 if (u->restrict_domid != UNRESTRICTED_DOMID) 472 break; 473 474 rc = -EFAULT; 475 if (copy_from_user(&bind, uarg, sizeof(bind))) 476 break; 477 478 bind_virq.virq = bind.virq; 479 bind_virq.vcpu = xen_vcpu_nr(0); 480 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, 481 &bind_virq); 482 if (rc != 0) 483 break; 484 485 rc = evtchn_bind_to_user(u, bind_virq.port); 486 if (rc == 0) 487 rc = bind_virq.port; 488 break; 489 } 490 491 case IOCTL_EVTCHN_BIND_INTERDOMAIN: { 492 struct ioctl_evtchn_bind_interdomain bind; 493 struct evtchn_bind_interdomain bind_interdomain; 494 495 rc = -EFAULT; 496 if (copy_from_user(&bind, uarg, sizeof(bind))) 497 break; 498 499 rc = -EACCES; 500 if (u->restrict_domid != UNRESTRICTED_DOMID && 501 u->restrict_domid != bind.remote_domain) 502 break; 503 504 bind_interdomain.remote_dom = bind.remote_domain; 505 bind_interdomain.remote_port = bind.remote_port; 506 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, 507 &bind_interdomain); 508 if (rc != 0) 509 break; 510 511 rc = evtchn_bind_to_user(u, bind_interdomain.local_port); 512 if (rc == 0) { 513 rc = bind_interdomain.local_port; 514 evtchn_bind_interdom_next_vcpu(rc); 515 } 516 break; 517 } 518 519 case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { 520 struct ioctl_evtchn_bind_unbound_port bind; 521 struct evtchn_alloc_unbound alloc_unbound; 522 523 rc = -EACCES; 524 if (u->restrict_domid != UNRESTRICTED_DOMID) 525 break; 526 527 rc = -EFAULT; 528 if (copy_from_user(&bind, uarg, sizeof(bind))) 529 break; 530 531 alloc_unbound.dom = DOMID_SELF; 532 alloc_unbound.remote_dom = bind.remote_domain; 533 rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 534 &alloc_unbound); 535 if (rc != 0) 536 break; 537 538 rc = evtchn_bind_to_user(u, alloc_unbound.port); 539 if (rc == 0) 540 rc = alloc_unbound.port; 541 break; 542 } 543 544 case IOCTL_EVTCHN_UNBIND: { 545 struct ioctl_evtchn_unbind unbind; 546 struct user_evtchn *evtchn; 547 548 rc = -EFAULT; 549 if (copy_from_user(&unbind, uarg, sizeof(unbind))) 550 break; 551 552 rc = -EINVAL; 553 if (unbind.port >= xen_evtchn_nr_channels()) 554 break; 555 556 rc = -ENOTCONN; 557 evtchn = find_evtchn(u, unbind.port); 558 if (!evtchn) 559 break; 560 561 disable_irq(irq_from_evtchn(unbind.port)); 562 evtchn_unbind_from_user(u, evtchn); 563 rc = 0; 564 break; 565 } 566 567 case IOCTL_EVTCHN_NOTIFY: { 568 struct ioctl_evtchn_notify notify; 569 struct user_evtchn *evtchn; 570 571 rc = -EFAULT; 572 if (copy_from_user(¬ify, uarg, sizeof(notify))) 573 break; 574 575 rc = -ENOTCONN; 576 evtchn = find_evtchn(u, notify.port); 577 if (evtchn) { 578 notify_remote_via_evtchn(notify.port); 579 rc = 0; 580 } 581 break; 582 } 583 584 case IOCTL_EVTCHN_RESET: { 585 /* Initialise the ring to empty. Clear errors. */ 586 mutex_lock(&u->ring_cons_mutex); 587 spin_lock_irq(&u->ring_prod_lock); 588 u->ring_cons = u->ring_prod = u->ring_overflow = 0; 589 spin_unlock_irq(&u->ring_prod_lock); 590 mutex_unlock(&u->ring_cons_mutex); 591 rc = 0; 592 break; 593 } 594 595 case IOCTL_EVTCHN_RESTRICT_DOMID: { 596 struct ioctl_evtchn_restrict_domid ierd; 597 598 rc = -EACCES; 599 if (u->restrict_domid != UNRESTRICTED_DOMID) 600 break; 601 602 rc = -EFAULT; 603 if (copy_from_user(&ierd, uarg, sizeof(ierd))) 604 break; 605 606 rc = -EINVAL; 607 if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED) 608 break; 609 610 u->restrict_domid = ierd.domid; 611 rc = 0; 612 613 break; 614 } 615 616 default: 617 rc = -ENOSYS; 618 break; 619 } 620 mutex_unlock(&u->bind_mutex); 621 622 return rc; 623 } 624 625 static __poll_t evtchn_poll(struct file *file, poll_table *wait) 626 { 627 __poll_t mask = EPOLLOUT | EPOLLWRNORM; 628 struct per_user_data *u = file->private_data; 629 630 poll_wait(file, &u->evtchn_wait, wait); 631 if (u->ring_cons != u->ring_prod) 632 mask |= EPOLLIN | EPOLLRDNORM; 633 if (u->ring_overflow) 634 mask = EPOLLERR; 635 return mask; 636 } 637 638 static int evtchn_fasync(int fd, struct file *filp, int on) 639 { 640 struct per_user_data *u = filp->private_data; 641 return fasync_helper(fd, filp, on, &u->evtchn_async_queue); 642 } 643 644 static int evtchn_open(struct inode *inode, struct file *filp) 645 { 646 struct per_user_data *u; 647 648 u = kzalloc(sizeof(*u), GFP_KERNEL); 649 if (u == NULL) 650 return -ENOMEM; 651 652 u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); 653 if (u->name == NULL) { 654 kfree(u); 655 return -ENOMEM; 656 } 657 658 init_waitqueue_head(&u->evtchn_wait); 659 660 mutex_init(&u->bind_mutex); 661 mutex_init(&u->ring_cons_mutex); 662 spin_lock_init(&u->ring_prod_lock); 663 664 u->restrict_domid = UNRESTRICTED_DOMID; 665 666 filp->private_data = u; 667 668 return stream_open(inode, filp); 669 } 670 671 static int evtchn_release(struct inode *inode, struct file *filp) 672 { 673 struct per_user_data *u = filp->private_data; 674 struct rb_node *node; 675 676 while ((node = u->evtchns.rb_node)) { 677 struct user_evtchn *evtchn; 678 679 evtchn = rb_entry(node, struct user_evtchn, node); 680 disable_irq(irq_from_evtchn(evtchn->port)); 681 evtchn_unbind_from_user(u, evtchn); 682 } 683 684 evtchn_free_ring(u->ring); 685 kfree(u->name); 686 kfree(u); 687 688 return 0; 689 } 690 691 static const struct file_operations evtchn_fops = { 692 .owner = THIS_MODULE, 693 .read = evtchn_read, 694 .write = evtchn_write, 695 .unlocked_ioctl = evtchn_ioctl, 696 .poll = evtchn_poll, 697 .fasync = evtchn_fasync, 698 .open = evtchn_open, 699 .release = evtchn_release, 700 .llseek = no_llseek, 701 }; 702 703 static struct miscdevice evtchn_miscdev = { 704 .minor = MISC_DYNAMIC_MINOR, 705 .name = "xen/evtchn", 706 .fops = &evtchn_fops, 707 }; 708 static int __init evtchn_init(void) 709 { 710 int err; 711 712 if (!xen_domain()) 713 return -ENODEV; 714 715 /* Create '/dev/xen/evtchn'. */ 716 err = misc_register(&evtchn_miscdev); 717 if (err != 0) { 718 pr_err("Could not register /dev/xen/evtchn\n"); 719 return err; 720 } 721 722 pr_info("Event-channel device installed\n"); 723 724 return 0; 725 } 726 727 static void __exit evtchn_cleanup(void) 728 { 729 misc_deregister(&evtchn_miscdev); 730 } 731 732 module_init(evtchn_init); 733 module_exit(evtchn_cleanup); 734 735 MODULE_LICENSE("GPL"); 736