1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Xen event channels 4 * 5 * Xen models interrupts with abstract event channels. Because each 6 * domain gets 1024 event channels, but NR_IRQ is not that large, we 7 * must dynamically map irqs<->event channels. The event channels 8 * interface with the rest of the kernel by defining a xen interrupt 9 * chip. When an event is received, it is mapped to an irq and sent 10 * through the normal interrupt processing path. 11 * 12 * There are four kinds of events which can be mapped to an event 13 * channel: 14 * 15 * 1. Inter-domain notifications. This includes all the virtual 16 * device events, since they're driven by front-ends in another domain 17 * (typically dom0). 18 * 2. VIRQs, typically used for timers. These are per-cpu events. 19 * 3. IPIs. 20 * 4. PIRQs - Hardware interrupts. 21 * 22 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 23 */ 24 25 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 26 27 #include <linux/linkage.h> 28 #include <linux/interrupt.h> 29 #include <linux/irq.h> 30 #include <linux/moduleparam.h> 31 #include <linux/string.h> 32 #include <linux/memblock.h> 33 #include <linux/slab.h> 34 #include <linux/irqnr.h> 35 #include <linux/pci.h> 36 #include <linux/spinlock.h> 37 #include <linux/cpuhotplug.h> 38 #include <linux/atomic.h> 39 #include <linux/ktime.h> 40 41 #ifdef CONFIG_X86 42 #include <asm/desc.h> 43 #include <asm/ptrace.h> 44 #include <asm/idtentry.h> 45 #include <asm/irq.h> 46 #include <asm/io_apic.h> 47 #include <asm/i8259.h> 48 #include <asm/xen/cpuid.h> 49 #include <asm/xen/pci.h> 50 #endif 51 #include <asm/sync_bitops.h> 52 #include <asm/xen/hypercall.h> 53 #include <asm/xen/hypervisor.h> 54 #include <xen/page.h> 55 56 #include <xen/xen.h> 57 #include <xen/hvm.h> 58 #include <xen/xen-ops.h> 59 #include <xen/events.h> 60 #include <xen/interface/xen.h> 61 #include <xen/interface/event_channel.h> 62 #include <xen/interface/hvm/hvm_op.h> 63 #include <xen/interface/hvm/params.h> 64 #include <xen/interface/physdev.h> 65 #include <xen/interface/sched.h> 66 #include <xen/interface/vcpu.h> 67 #include <xen/xenbus.h> 68 #include <asm/hw_irq.h> 69 70 #include "events_internal.h" 71 72 #undef MODULE_PARAM_PREFIX 73 #define MODULE_PARAM_PREFIX "xen." 74 75 /* Interrupt types. */ 76 enum xen_irq_type { 77 IRQT_UNBOUND = 0, 78 IRQT_PIRQ, 79 IRQT_VIRQ, 80 IRQT_IPI, 81 IRQT_EVTCHN 82 }; 83 84 /* 85 * Packed IRQ information: 86 * type - enum xen_irq_type 87 * event channel - irq->event channel mapping 88 * cpu - cpu this event channel is bound to 89 * index - type-specific information: 90 * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM 91 * guest, or GSI (real passthrough IRQ) of the device. 92 * VIRQ - virq number 93 * IPI - IPI vector 94 * EVTCHN - 95 */ 96 struct irq_info { 97 struct list_head list; 98 struct list_head eoi_list; 99 short refcnt; 100 u8 spurious_cnt; 101 u8 is_accounted; 102 short type; /* type: IRQT_* */ 103 u8 mask_reason; /* Why is event channel masked */ 104 #define EVT_MASK_REASON_EXPLICIT 0x01 105 #define EVT_MASK_REASON_TEMPORARY 0x02 106 #define EVT_MASK_REASON_EOI_PENDING 0x04 107 u8 is_active; /* Is event just being handled? */ 108 unsigned irq; 109 evtchn_port_t evtchn; /* event channel */ 110 unsigned short cpu; /* cpu bound */ 111 unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */ 112 unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ 113 u64 eoi_time; /* Time in jiffies when to EOI. */ 114 raw_spinlock_t lock; 115 116 union { 117 unsigned short virq; 118 enum ipi_vector ipi; 119 struct { 120 unsigned short pirq; 121 unsigned short gsi; 122 unsigned char vector; 123 unsigned char flags; 124 uint16_t domid; 125 } pirq; 126 struct xenbus_device *interdomain; 127 } u; 128 }; 129 130 #define PIRQ_NEEDS_EOI (1 << 0) 131 #define PIRQ_SHAREABLE (1 << 1) 132 #define PIRQ_MSI_GROUP (1 << 2) 133 134 static uint __read_mostly event_loop_timeout = 2; 135 module_param(event_loop_timeout, uint, 0644); 136 137 static uint __read_mostly event_eoi_delay = 10; 138 module_param(event_eoi_delay, uint, 0644); 139 140 const struct evtchn_ops *evtchn_ops; 141 142 /* 143 * This lock protects updates to the following mapping and reference-count 144 * arrays. The lock does not need to be acquired to read the mapping tables. 145 */ 146 static DEFINE_MUTEX(irq_mapping_update_lock); 147 148 /* 149 * Lock protecting event handling loop against removing event channels. 150 * Adding of event channels is no issue as the associated IRQ becomes active 151 * only after everything is setup (before request_[threaded_]irq() the handler 152 * can't be entered for an event, as the event channel will be unmasked only 153 * then). 154 */ 155 static DEFINE_RWLOCK(evtchn_rwlock); 156 157 /* 158 * Lock hierarchy: 159 * 160 * irq_mapping_update_lock 161 * evtchn_rwlock 162 * IRQ-desc lock 163 * percpu eoi_list_lock 164 * irq_info->lock 165 */ 166 167 static LIST_HEAD(xen_irq_list_head); 168 169 /* IRQ <-> VIRQ mapping. */ 170 static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; 171 172 /* IRQ <-> IPI mapping */ 173 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; 174 175 /* Event channel distribution data */ 176 static atomic_t channels_on_cpu[NR_CPUS]; 177 178 static int **evtchn_to_irq; 179 #ifdef CONFIG_X86 180 static unsigned long *pirq_eoi_map; 181 #endif 182 static bool (*pirq_needs_eoi)(unsigned irq); 183 184 #define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq))) 185 #define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq))) 186 #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq)) 187 188 /* Xen will never allocate port zero for any purpose. */ 189 #define VALID_EVTCHN(chn) ((chn) != 0) 190 191 static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; 192 193 static struct irq_chip xen_dynamic_chip; 194 static struct irq_chip xen_lateeoi_chip; 195 static struct irq_chip xen_percpu_chip; 196 static struct irq_chip xen_pirq_chip; 197 static void enable_dynirq(struct irq_data *data); 198 static void disable_dynirq(struct irq_data *data); 199 200 static DEFINE_PER_CPU(unsigned int, irq_epoch); 201 202 static void clear_evtchn_to_irq_row(int *evtchn_row) 203 { 204 unsigned col; 205 206 for (col = 0; col < EVTCHN_PER_ROW; col++) 207 WRITE_ONCE(evtchn_row[col], -1); 208 } 209 210 static void clear_evtchn_to_irq_all(void) 211 { 212 unsigned row; 213 214 for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) { 215 if (evtchn_to_irq[row] == NULL) 216 continue; 217 clear_evtchn_to_irq_row(evtchn_to_irq[row]); 218 } 219 } 220 221 static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) 222 { 223 unsigned row; 224 unsigned col; 225 int *evtchn_row; 226 227 if (evtchn >= xen_evtchn_max_channels()) 228 return -EINVAL; 229 230 row = EVTCHN_ROW(evtchn); 231 col = EVTCHN_COL(evtchn); 232 233 if (evtchn_to_irq[row] == NULL) { 234 /* Unallocated irq entries return -1 anyway */ 235 if (irq == -1) 236 return 0; 237 238 evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0); 239 if (evtchn_row == NULL) 240 return -ENOMEM; 241 242 clear_evtchn_to_irq_row(evtchn_row); 243 244 /* 245 * We've prepared an empty row for the mapping. If a different 246 * thread was faster inserting it, we can drop ours. 247 */ 248 if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL) 249 free_page((unsigned long) evtchn_row); 250 } 251 252 WRITE_ONCE(evtchn_to_irq[row][col], irq); 253 return 0; 254 } 255 256 int get_evtchn_to_irq(evtchn_port_t evtchn) 257 { 258 if (evtchn >= xen_evtchn_max_channels()) 259 return -1; 260 if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) 261 return -1; 262 return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); 263 } 264 265 /* Get info for IRQ */ 266 static struct irq_info *info_for_irq(unsigned irq) 267 { 268 if (irq < nr_legacy_irqs()) 269 return legacy_info_ptrs[irq]; 270 else 271 return irq_get_chip_data(irq); 272 } 273 274 static void set_info_for_irq(unsigned int irq, struct irq_info *info) 275 { 276 if (irq < nr_legacy_irqs()) 277 legacy_info_ptrs[irq] = info; 278 else 279 irq_set_chip_data(irq, info); 280 } 281 282 /* Per CPU channel accounting */ 283 static void channels_on_cpu_dec(struct irq_info *info) 284 { 285 if (!info->is_accounted) 286 return; 287 288 info->is_accounted = 0; 289 290 if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) 291 return; 292 293 WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0)); 294 } 295 296 static void channels_on_cpu_inc(struct irq_info *info) 297 { 298 if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) 299 return; 300 301 if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1, 302 INT_MAX))) 303 return; 304 305 info->is_accounted = 1; 306 } 307 308 /* Constructors for packed IRQ information. */ 309 static int xen_irq_info_common_setup(struct irq_info *info, 310 unsigned irq, 311 enum xen_irq_type type, 312 evtchn_port_t evtchn, 313 unsigned short cpu) 314 { 315 int ret; 316 317 BUG_ON(info->type != IRQT_UNBOUND && info->type != type); 318 319 info->type = type; 320 info->irq = irq; 321 info->evtchn = evtchn; 322 info->cpu = cpu; 323 info->mask_reason = EVT_MASK_REASON_EXPLICIT; 324 raw_spin_lock_init(&info->lock); 325 326 ret = set_evtchn_to_irq(evtchn, irq); 327 if (ret < 0) 328 return ret; 329 330 irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); 331 332 return xen_evtchn_port_setup(evtchn); 333 } 334 335 static int xen_irq_info_evtchn_setup(unsigned irq, 336 evtchn_port_t evtchn, 337 struct xenbus_device *dev) 338 { 339 struct irq_info *info = info_for_irq(irq); 340 int ret; 341 342 ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0); 343 info->u.interdomain = dev; 344 if (dev) 345 atomic_inc(&dev->event_channels); 346 347 return ret; 348 } 349 350 static int xen_irq_info_ipi_setup(unsigned cpu, 351 unsigned irq, 352 evtchn_port_t evtchn, 353 enum ipi_vector ipi) 354 { 355 struct irq_info *info = info_for_irq(irq); 356 357 info->u.ipi = ipi; 358 359 per_cpu(ipi_to_irq, cpu)[ipi] = irq; 360 361 return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0); 362 } 363 364 static int xen_irq_info_virq_setup(unsigned cpu, 365 unsigned irq, 366 evtchn_port_t evtchn, 367 unsigned virq) 368 { 369 struct irq_info *info = info_for_irq(irq); 370 371 info->u.virq = virq; 372 373 per_cpu(virq_to_irq, cpu)[virq] = irq; 374 375 return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0); 376 } 377 378 static int xen_irq_info_pirq_setup(unsigned irq, 379 evtchn_port_t evtchn, 380 unsigned pirq, 381 unsigned gsi, 382 uint16_t domid, 383 unsigned char flags) 384 { 385 struct irq_info *info = info_for_irq(irq); 386 387 info->u.pirq.pirq = pirq; 388 info->u.pirq.gsi = gsi; 389 info->u.pirq.domid = domid; 390 info->u.pirq.flags = flags; 391 392 return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0); 393 } 394 395 static void xen_irq_info_cleanup(struct irq_info *info) 396 { 397 set_evtchn_to_irq(info->evtchn, -1); 398 xen_evtchn_port_remove(info->evtchn, info->cpu); 399 info->evtchn = 0; 400 channels_on_cpu_dec(info); 401 } 402 403 /* 404 * Accessors for packed IRQ information. 405 */ 406 evtchn_port_t evtchn_from_irq(unsigned irq) 407 { 408 const struct irq_info *info = NULL; 409 410 if (likely(irq < nr_irqs)) 411 info = info_for_irq(irq); 412 if (!info) 413 return 0; 414 415 return info->evtchn; 416 } 417 418 unsigned int irq_from_evtchn(evtchn_port_t evtchn) 419 { 420 return get_evtchn_to_irq(evtchn); 421 } 422 EXPORT_SYMBOL_GPL(irq_from_evtchn); 423 424 int irq_from_virq(unsigned int cpu, unsigned int virq) 425 { 426 return per_cpu(virq_to_irq, cpu)[virq]; 427 } 428 429 static enum ipi_vector ipi_from_irq(unsigned irq) 430 { 431 struct irq_info *info = info_for_irq(irq); 432 433 BUG_ON(info == NULL); 434 BUG_ON(info->type != IRQT_IPI); 435 436 return info->u.ipi; 437 } 438 439 static unsigned virq_from_irq(unsigned irq) 440 { 441 struct irq_info *info = info_for_irq(irq); 442 443 BUG_ON(info == NULL); 444 BUG_ON(info->type != IRQT_VIRQ); 445 446 return info->u.virq; 447 } 448 449 static unsigned pirq_from_irq(unsigned irq) 450 { 451 struct irq_info *info = info_for_irq(irq); 452 453 BUG_ON(info == NULL); 454 BUG_ON(info->type != IRQT_PIRQ); 455 456 return info->u.pirq.pirq; 457 } 458 459 static enum xen_irq_type type_from_irq(unsigned irq) 460 { 461 return info_for_irq(irq)->type; 462 } 463 464 static unsigned cpu_from_irq(unsigned irq) 465 { 466 return info_for_irq(irq)->cpu; 467 } 468 469 unsigned int cpu_from_evtchn(evtchn_port_t evtchn) 470 { 471 int irq = get_evtchn_to_irq(evtchn); 472 unsigned ret = 0; 473 474 if (irq != -1) 475 ret = cpu_from_irq(irq); 476 477 return ret; 478 } 479 480 static void do_mask(struct irq_info *info, u8 reason) 481 { 482 unsigned long flags; 483 484 raw_spin_lock_irqsave(&info->lock, flags); 485 486 if (!info->mask_reason) 487 mask_evtchn(info->evtchn); 488 489 info->mask_reason |= reason; 490 491 raw_spin_unlock_irqrestore(&info->lock, flags); 492 } 493 494 static void do_unmask(struct irq_info *info, u8 reason) 495 { 496 unsigned long flags; 497 498 raw_spin_lock_irqsave(&info->lock, flags); 499 500 info->mask_reason &= ~reason; 501 502 if (!info->mask_reason) 503 unmask_evtchn(info->evtchn); 504 505 raw_spin_unlock_irqrestore(&info->lock, flags); 506 } 507 508 #ifdef CONFIG_X86 509 static bool pirq_check_eoi_map(unsigned irq) 510 { 511 return test_bit(pirq_from_irq(irq), pirq_eoi_map); 512 } 513 #endif 514 515 static bool pirq_needs_eoi_flag(unsigned irq) 516 { 517 struct irq_info *info = info_for_irq(irq); 518 BUG_ON(info->type != IRQT_PIRQ); 519 520 return info->u.pirq.flags & PIRQ_NEEDS_EOI; 521 } 522 523 static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, 524 bool force_affinity) 525 { 526 int irq = get_evtchn_to_irq(evtchn); 527 struct irq_info *info = info_for_irq(irq); 528 529 BUG_ON(irq == -1); 530 531 if (IS_ENABLED(CONFIG_SMP) && force_affinity) { 532 struct irq_data *data = irq_get_irq_data(irq); 533 534 irq_data_update_affinity(data, cpumask_of(cpu)); 535 irq_data_update_effective_affinity(data, cpumask_of(cpu)); 536 } 537 538 xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); 539 540 channels_on_cpu_dec(info); 541 info->cpu = cpu; 542 channels_on_cpu_inc(info); 543 } 544 545 /** 546 * notify_remote_via_irq - send event to remote end of event channel via irq 547 * @irq: irq of event channel to send event to 548 * 549 * Unlike notify_remote_via_evtchn(), this is safe to use across 550 * save/restore. Notifications on a broken connection are silently 551 * dropped. 552 */ 553 void notify_remote_via_irq(int irq) 554 { 555 evtchn_port_t evtchn = evtchn_from_irq(irq); 556 557 if (VALID_EVTCHN(evtchn)) 558 notify_remote_via_evtchn(evtchn); 559 } 560 EXPORT_SYMBOL_GPL(notify_remote_via_irq); 561 562 struct lateeoi_work { 563 struct delayed_work delayed; 564 spinlock_t eoi_list_lock; 565 struct list_head eoi_list; 566 }; 567 568 static DEFINE_PER_CPU(struct lateeoi_work, lateeoi); 569 570 static void lateeoi_list_del(struct irq_info *info) 571 { 572 struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); 573 unsigned long flags; 574 575 spin_lock_irqsave(&eoi->eoi_list_lock, flags); 576 list_del_init(&info->eoi_list); 577 spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); 578 } 579 580 static void lateeoi_list_add(struct irq_info *info) 581 { 582 struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); 583 struct irq_info *elem; 584 u64 now = get_jiffies_64(); 585 unsigned long delay; 586 unsigned long flags; 587 588 if (now < info->eoi_time) 589 delay = info->eoi_time - now; 590 else 591 delay = 1; 592 593 spin_lock_irqsave(&eoi->eoi_list_lock, flags); 594 595 if (list_empty(&eoi->eoi_list)) { 596 list_add(&info->eoi_list, &eoi->eoi_list); 597 mod_delayed_work_on(info->eoi_cpu, system_wq, 598 &eoi->delayed, delay); 599 } else { 600 list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) { 601 if (elem->eoi_time <= info->eoi_time) 602 break; 603 } 604 list_add(&info->eoi_list, &elem->eoi_list); 605 } 606 607 spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); 608 } 609 610 static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) 611 { 612 evtchn_port_t evtchn; 613 unsigned int cpu; 614 unsigned int delay = 0; 615 616 evtchn = info->evtchn; 617 if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) 618 return; 619 620 if (spurious) { 621 struct xenbus_device *dev = info->u.interdomain; 622 unsigned int threshold = 1; 623 624 if (dev && dev->spurious_threshold) 625 threshold = dev->spurious_threshold; 626 627 if ((1 << info->spurious_cnt) < (HZ << 2)) { 628 if (info->spurious_cnt != 0xFF) 629 info->spurious_cnt++; 630 } 631 if (info->spurious_cnt > threshold) { 632 delay = 1 << (info->spurious_cnt - 1 - threshold); 633 if (delay > HZ) 634 delay = HZ; 635 if (!info->eoi_time) 636 info->eoi_cpu = smp_processor_id(); 637 info->eoi_time = get_jiffies_64() + delay; 638 if (dev) 639 atomic_add(delay, &dev->jiffies_eoi_delayed); 640 } 641 if (dev) 642 atomic_inc(&dev->spurious_events); 643 } else { 644 info->spurious_cnt = 0; 645 } 646 647 cpu = info->eoi_cpu; 648 if (info->eoi_time && 649 (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) { 650 lateeoi_list_add(info); 651 return; 652 } 653 654 info->eoi_time = 0; 655 656 /* is_active hasn't been reset yet, do it now. */ 657 smp_store_release(&info->is_active, 0); 658 do_unmask(info, EVT_MASK_REASON_EOI_PENDING); 659 } 660 661 static void xen_irq_lateeoi_worker(struct work_struct *work) 662 { 663 struct lateeoi_work *eoi; 664 struct irq_info *info; 665 u64 now = get_jiffies_64(); 666 unsigned long flags; 667 668 eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); 669 670 read_lock_irqsave(&evtchn_rwlock, flags); 671 672 while (true) { 673 spin_lock(&eoi->eoi_list_lock); 674 675 info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, 676 eoi_list); 677 678 if (info == NULL || now < info->eoi_time) { 679 spin_unlock(&eoi->eoi_list_lock); 680 break; 681 } 682 683 list_del_init(&info->eoi_list); 684 685 spin_unlock(&eoi->eoi_list_lock); 686 687 info->eoi_time = 0; 688 689 xen_irq_lateeoi_locked(info, false); 690 } 691 692 if (info) 693 mod_delayed_work_on(info->eoi_cpu, system_wq, 694 &eoi->delayed, info->eoi_time - now); 695 696 read_unlock_irqrestore(&evtchn_rwlock, flags); 697 } 698 699 static void xen_cpu_init_eoi(unsigned int cpu) 700 { 701 struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu); 702 703 INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker); 704 spin_lock_init(&eoi->eoi_list_lock); 705 INIT_LIST_HEAD(&eoi->eoi_list); 706 } 707 708 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) 709 { 710 struct irq_info *info; 711 unsigned long flags; 712 713 read_lock_irqsave(&evtchn_rwlock, flags); 714 715 info = info_for_irq(irq); 716 717 if (info) 718 xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); 719 720 read_unlock_irqrestore(&evtchn_rwlock, flags); 721 } 722 EXPORT_SYMBOL_GPL(xen_irq_lateeoi); 723 724 static void xen_irq_init(unsigned irq) 725 { 726 struct irq_info *info; 727 728 info = kzalloc(sizeof(*info), GFP_KERNEL); 729 if (info == NULL) 730 panic("Unable to allocate metadata for IRQ%d\n", irq); 731 732 info->type = IRQT_UNBOUND; 733 info->refcnt = -1; 734 735 set_info_for_irq(irq, info); 736 /* 737 * Interrupt affinity setting can be immediate. No point 738 * in delaying it until an interrupt is handled. 739 */ 740 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 741 742 INIT_LIST_HEAD(&info->eoi_list); 743 list_add_tail(&info->list, &xen_irq_list_head); 744 } 745 746 static int __must_check xen_allocate_irqs_dynamic(int nvec) 747 { 748 int i, irq = irq_alloc_descs(-1, 0, nvec, -1); 749 750 if (irq >= 0) { 751 for (i = 0; i < nvec; i++) 752 xen_irq_init(irq + i); 753 } 754 755 return irq; 756 } 757 758 static inline int __must_check xen_allocate_irq_dynamic(void) 759 { 760 761 return xen_allocate_irqs_dynamic(1); 762 } 763 764 static int __must_check xen_allocate_irq_gsi(unsigned gsi) 765 { 766 int irq; 767 768 /* 769 * A PV guest has no concept of a GSI (since it has no ACPI 770 * nor access to/knowledge of the physical APICs). Therefore 771 * all IRQs are dynamically allocated from the entire IRQ 772 * space. 773 */ 774 if (xen_pv_domain() && !xen_initial_domain()) 775 return xen_allocate_irq_dynamic(); 776 777 /* Legacy IRQ descriptors are already allocated by the arch. */ 778 if (gsi < nr_legacy_irqs()) 779 irq = gsi; 780 else 781 irq = irq_alloc_desc_at(gsi, -1); 782 783 xen_irq_init(irq); 784 785 return irq; 786 } 787 788 static void xen_free_irq(unsigned irq) 789 { 790 struct irq_info *info = info_for_irq(irq); 791 unsigned long flags; 792 793 if (WARN_ON(!info)) 794 return; 795 796 write_lock_irqsave(&evtchn_rwlock, flags); 797 798 if (!list_empty(&info->eoi_list)) 799 lateeoi_list_del(info); 800 801 list_del(&info->list); 802 803 set_info_for_irq(irq, NULL); 804 805 WARN_ON(info->refcnt > 0); 806 807 write_unlock_irqrestore(&evtchn_rwlock, flags); 808 809 kfree(info); 810 811 /* Legacy IRQ descriptors are managed by the arch. */ 812 if (irq < nr_legacy_irqs()) 813 return; 814 815 irq_free_desc(irq); 816 } 817 818 static void xen_evtchn_close(evtchn_port_t port) 819 { 820 struct evtchn_close close; 821 822 close.port = port; 823 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) 824 BUG(); 825 } 826 827 /* Not called for lateeoi events. */ 828 static void event_handler_exit(struct irq_info *info) 829 { 830 smp_store_release(&info->is_active, 0); 831 clear_evtchn(info->evtchn); 832 } 833 834 static void pirq_query_unmask(int irq) 835 { 836 struct physdev_irq_status_query irq_status; 837 struct irq_info *info = info_for_irq(irq); 838 839 BUG_ON(info->type != IRQT_PIRQ); 840 841 irq_status.irq = pirq_from_irq(irq); 842 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) 843 irq_status.flags = 0; 844 845 info->u.pirq.flags &= ~PIRQ_NEEDS_EOI; 846 if (irq_status.flags & XENIRQSTAT_needs_eoi) 847 info->u.pirq.flags |= PIRQ_NEEDS_EOI; 848 } 849 850 static void eoi_pirq(struct irq_data *data) 851 { 852 struct irq_info *info = info_for_irq(data->irq); 853 evtchn_port_t evtchn = info ? info->evtchn : 0; 854 struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; 855 int rc = 0; 856 857 if (!VALID_EVTCHN(evtchn)) 858 return; 859 860 event_handler_exit(info); 861 862 if (pirq_needs_eoi(data->irq)) { 863 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); 864 WARN_ON(rc); 865 } 866 } 867 868 static void mask_ack_pirq(struct irq_data *data) 869 { 870 disable_dynirq(data); 871 eoi_pirq(data); 872 } 873 874 static unsigned int __startup_pirq(unsigned int irq) 875 { 876 struct evtchn_bind_pirq bind_pirq; 877 struct irq_info *info = info_for_irq(irq); 878 evtchn_port_t evtchn = evtchn_from_irq(irq); 879 int rc; 880 881 BUG_ON(info->type != IRQT_PIRQ); 882 883 if (VALID_EVTCHN(evtchn)) 884 goto out; 885 886 bind_pirq.pirq = pirq_from_irq(irq); 887 /* NB. We are happy to share unless we are probing. */ 888 bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? 889 BIND_PIRQ__WILL_SHARE : 0; 890 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); 891 if (rc != 0) { 892 pr_warn("Failed to obtain physical IRQ %d\n", irq); 893 return 0; 894 } 895 evtchn = bind_pirq.port; 896 897 pirq_query_unmask(irq); 898 899 rc = set_evtchn_to_irq(evtchn, irq); 900 if (rc) 901 goto err; 902 903 info->evtchn = evtchn; 904 bind_evtchn_to_cpu(evtchn, 0, false); 905 906 rc = xen_evtchn_port_setup(evtchn); 907 if (rc) 908 goto err; 909 910 out: 911 do_unmask(info, EVT_MASK_REASON_EXPLICIT); 912 913 eoi_pirq(irq_get_irq_data(irq)); 914 915 return 0; 916 917 err: 918 pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc); 919 xen_evtchn_close(evtchn); 920 return 0; 921 } 922 923 static unsigned int startup_pirq(struct irq_data *data) 924 { 925 return __startup_pirq(data->irq); 926 } 927 928 static void shutdown_pirq(struct irq_data *data) 929 { 930 unsigned int irq = data->irq; 931 struct irq_info *info = info_for_irq(irq); 932 evtchn_port_t evtchn = evtchn_from_irq(irq); 933 934 BUG_ON(info->type != IRQT_PIRQ); 935 936 if (!VALID_EVTCHN(evtchn)) 937 return; 938 939 do_mask(info, EVT_MASK_REASON_EXPLICIT); 940 xen_evtchn_close(evtchn); 941 xen_irq_info_cleanup(info); 942 } 943 944 static void enable_pirq(struct irq_data *data) 945 { 946 enable_dynirq(data); 947 } 948 949 static void disable_pirq(struct irq_data *data) 950 { 951 disable_dynirq(data); 952 } 953 954 int xen_irq_from_gsi(unsigned gsi) 955 { 956 struct irq_info *info; 957 958 list_for_each_entry(info, &xen_irq_list_head, list) { 959 if (info->type != IRQT_PIRQ) 960 continue; 961 962 if (info->u.pirq.gsi == gsi) 963 return info->irq; 964 } 965 966 return -1; 967 } 968 EXPORT_SYMBOL_GPL(xen_irq_from_gsi); 969 970 static void __unbind_from_irq(unsigned int irq) 971 { 972 evtchn_port_t evtchn = evtchn_from_irq(irq); 973 struct irq_info *info = info_for_irq(irq); 974 975 if (info->refcnt > 0) { 976 info->refcnt--; 977 if (info->refcnt != 0) 978 return; 979 } 980 981 if (VALID_EVTCHN(evtchn)) { 982 unsigned int cpu = cpu_from_irq(irq); 983 struct xenbus_device *dev; 984 985 xen_evtchn_close(evtchn); 986 987 switch (type_from_irq(irq)) { 988 case IRQT_VIRQ: 989 per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; 990 break; 991 case IRQT_IPI: 992 per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; 993 break; 994 case IRQT_EVTCHN: 995 dev = info->u.interdomain; 996 if (dev) 997 atomic_dec(&dev->event_channels); 998 break; 999 default: 1000 break; 1001 } 1002 1003 xen_irq_info_cleanup(info); 1004 } 1005 1006 xen_free_irq(irq); 1007 } 1008 1009 /* 1010 * Do not make any assumptions regarding the relationship between the 1011 * IRQ number returned here and the Xen pirq argument. 1012 * 1013 * Note: We don't assign an event channel until the irq actually started 1014 * up. Return an existing irq if we've already got one for the gsi. 1015 * 1016 * Shareable implies level triggered, not shareable implies edge 1017 * triggered here. 1018 */ 1019 int xen_bind_pirq_gsi_to_irq(unsigned gsi, 1020 unsigned pirq, int shareable, char *name) 1021 { 1022 int irq; 1023 struct physdev_irq irq_op; 1024 int ret; 1025 1026 mutex_lock(&irq_mapping_update_lock); 1027 1028 irq = xen_irq_from_gsi(gsi); 1029 if (irq != -1) { 1030 pr_info("%s: returning irq %d for gsi %u\n", 1031 __func__, irq, gsi); 1032 goto out; 1033 } 1034 1035 irq = xen_allocate_irq_gsi(gsi); 1036 if (irq < 0) 1037 goto out; 1038 1039 irq_op.irq = irq; 1040 irq_op.vector = 0; 1041 1042 /* Only the privileged domain can do this. For non-priv, the pcifront 1043 * driver provides a PCI bus that does the call to do exactly 1044 * this in the priv domain. */ 1045 if (xen_initial_domain() && 1046 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { 1047 xen_free_irq(irq); 1048 irq = -ENOSPC; 1049 goto out; 1050 } 1051 1052 ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF, 1053 shareable ? PIRQ_SHAREABLE : 0); 1054 if (ret < 0) { 1055 __unbind_from_irq(irq); 1056 irq = ret; 1057 goto out; 1058 } 1059 1060 pirq_query_unmask(irq); 1061 /* We try to use the handler with the appropriate semantic for the 1062 * type of interrupt: if the interrupt is an edge triggered 1063 * interrupt we use handle_edge_irq. 1064 * 1065 * On the other hand if the interrupt is level triggered we use 1066 * handle_fasteoi_irq like the native code does for this kind of 1067 * interrupts. 1068 * 1069 * Depending on the Xen version, pirq_needs_eoi might return true 1070 * not only for level triggered interrupts but for edge triggered 1071 * interrupts too. In any case Xen always honors the eoi mechanism, 1072 * not injecting any more pirqs of the same kind if the first one 1073 * hasn't received an eoi yet. Therefore using the fasteoi handler 1074 * is the right choice either way. 1075 */ 1076 if (shareable) 1077 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, 1078 handle_fasteoi_irq, name); 1079 else 1080 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, 1081 handle_edge_irq, name); 1082 1083 out: 1084 mutex_unlock(&irq_mapping_update_lock); 1085 1086 return irq; 1087 } 1088 1089 #ifdef CONFIG_PCI_MSI 1090 int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) 1091 { 1092 int rc; 1093 struct physdev_get_free_pirq op_get_free_pirq; 1094 1095 op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI; 1096 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); 1097 1098 WARN_ONCE(rc == -ENOSYS, 1099 "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n"); 1100 1101 return rc ? -1 : op_get_free_pirq.pirq; 1102 } 1103 1104 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, 1105 int pirq, int nvec, const char *name, domid_t domid) 1106 { 1107 int i, irq, ret; 1108 1109 mutex_lock(&irq_mapping_update_lock); 1110 1111 irq = xen_allocate_irqs_dynamic(nvec); 1112 if (irq < 0) 1113 goto out; 1114 1115 for (i = 0; i < nvec; i++) { 1116 irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name); 1117 1118 ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid, 1119 i == 0 ? 0 : PIRQ_MSI_GROUP); 1120 if (ret < 0) 1121 goto error_irq; 1122 } 1123 1124 ret = irq_set_msi_desc(irq, msidesc); 1125 if (ret < 0) 1126 goto error_irq; 1127 out: 1128 mutex_unlock(&irq_mapping_update_lock); 1129 return irq; 1130 error_irq: 1131 while (nvec--) 1132 __unbind_from_irq(irq + nvec); 1133 mutex_unlock(&irq_mapping_update_lock); 1134 return ret; 1135 } 1136 #endif 1137 1138 int xen_destroy_irq(int irq) 1139 { 1140 struct physdev_unmap_pirq unmap_irq; 1141 struct irq_info *info = info_for_irq(irq); 1142 int rc = -ENOENT; 1143 1144 mutex_lock(&irq_mapping_update_lock); 1145 1146 /* 1147 * If trying to remove a vector in a MSI group different 1148 * than the first one skip the PIRQ unmap unless this vector 1149 * is the first one in the group. 1150 */ 1151 if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) { 1152 unmap_irq.pirq = info->u.pirq.pirq; 1153 unmap_irq.domid = info->u.pirq.domid; 1154 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); 1155 /* If another domain quits without making the pci_disable_msix 1156 * call, the Xen hypervisor takes care of freeing the PIRQs 1157 * (free_domain_pirqs). 1158 */ 1159 if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF)) 1160 pr_info("domain %d does not have %d anymore\n", 1161 info->u.pirq.domid, info->u.pirq.pirq); 1162 else if (rc) { 1163 pr_warn("unmap irq failed %d\n", rc); 1164 goto out; 1165 } 1166 } 1167 1168 xen_free_irq(irq); 1169 1170 out: 1171 mutex_unlock(&irq_mapping_update_lock); 1172 return rc; 1173 } 1174 1175 int xen_irq_from_pirq(unsigned pirq) 1176 { 1177 int irq; 1178 1179 struct irq_info *info; 1180 1181 mutex_lock(&irq_mapping_update_lock); 1182 1183 list_for_each_entry(info, &xen_irq_list_head, list) { 1184 if (info->type != IRQT_PIRQ) 1185 continue; 1186 irq = info->irq; 1187 if (info->u.pirq.pirq == pirq) 1188 goto out; 1189 } 1190 irq = -1; 1191 out: 1192 mutex_unlock(&irq_mapping_update_lock); 1193 1194 return irq; 1195 } 1196 1197 1198 int xen_pirq_from_irq(unsigned irq) 1199 { 1200 return pirq_from_irq(irq); 1201 } 1202 EXPORT_SYMBOL_GPL(xen_pirq_from_irq); 1203 1204 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, 1205 struct xenbus_device *dev) 1206 { 1207 int irq; 1208 int ret; 1209 1210 if (evtchn >= xen_evtchn_max_channels()) 1211 return -ENOMEM; 1212 1213 mutex_lock(&irq_mapping_update_lock); 1214 1215 irq = get_evtchn_to_irq(evtchn); 1216 1217 if (irq == -1) { 1218 irq = xen_allocate_irq_dynamic(); 1219 if (irq < 0) 1220 goto out; 1221 1222 irq_set_chip_and_handler_name(irq, chip, 1223 handle_edge_irq, "event"); 1224 1225 ret = xen_irq_info_evtchn_setup(irq, evtchn, dev); 1226 if (ret < 0) { 1227 __unbind_from_irq(irq); 1228 irq = ret; 1229 goto out; 1230 } 1231 /* 1232 * New interdomain events are initially bound to vCPU0 This 1233 * is required to setup the event channel in the first 1234 * place and also important for UP guests because the 1235 * affinity setting is not invoked on them so nothing would 1236 * bind the channel. 1237 */ 1238 bind_evtchn_to_cpu(evtchn, 0, false); 1239 } else { 1240 struct irq_info *info = info_for_irq(irq); 1241 WARN_ON(info == NULL || info->type != IRQT_EVTCHN); 1242 } 1243 1244 out: 1245 mutex_unlock(&irq_mapping_update_lock); 1246 1247 return irq; 1248 } 1249 1250 int bind_evtchn_to_irq(evtchn_port_t evtchn) 1251 { 1252 return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL); 1253 } 1254 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); 1255 1256 int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) 1257 { 1258 return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL); 1259 } 1260 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); 1261 1262 static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) 1263 { 1264 struct evtchn_bind_ipi bind_ipi; 1265 evtchn_port_t evtchn; 1266 int ret, irq; 1267 1268 mutex_lock(&irq_mapping_update_lock); 1269 1270 irq = per_cpu(ipi_to_irq, cpu)[ipi]; 1271 1272 if (irq == -1) { 1273 irq = xen_allocate_irq_dynamic(); 1274 if (irq < 0) 1275 goto out; 1276 1277 irq_set_chip_and_handler_name(irq, &xen_percpu_chip, 1278 handle_percpu_irq, "ipi"); 1279 1280 bind_ipi.vcpu = xen_vcpu_nr(cpu); 1281 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, 1282 &bind_ipi) != 0) 1283 BUG(); 1284 evtchn = bind_ipi.port; 1285 1286 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); 1287 if (ret < 0) { 1288 __unbind_from_irq(irq); 1289 irq = ret; 1290 goto out; 1291 } 1292 /* 1293 * Force the affinity mask to the target CPU so proc shows 1294 * the correct target. 1295 */ 1296 bind_evtchn_to_cpu(evtchn, cpu, true); 1297 } else { 1298 struct irq_info *info = info_for_irq(irq); 1299 WARN_ON(info == NULL || info->type != IRQT_IPI); 1300 } 1301 1302 out: 1303 mutex_unlock(&irq_mapping_update_lock); 1304 return irq; 1305 } 1306 1307 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev, 1308 evtchn_port_t remote_port, 1309 struct irq_chip *chip) 1310 { 1311 struct evtchn_bind_interdomain bind_interdomain; 1312 int err; 1313 1314 bind_interdomain.remote_dom = dev->otherend_id; 1315 bind_interdomain.remote_port = remote_port; 1316 1317 err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, 1318 &bind_interdomain); 1319 1320 return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port, 1321 chip, dev); 1322 } 1323 1324 int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev, 1325 evtchn_port_t remote_port) 1326 { 1327 return bind_interdomain_evtchn_to_irq_chip(dev, remote_port, 1328 &xen_lateeoi_chip); 1329 } 1330 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); 1331 1332 static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) 1333 { 1334 struct evtchn_status status; 1335 evtchn_port_t port; 1336 int rc = -ENOENT; 1337 1338 memset(&status, 0, sizeof(status)); 1339 for (port = 0; port < xen_evtchn_max_channels(); port++) { 1340 status.dom = DOMID_SELF; 1341 status.port = port; 1342 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); 1343 if (rc < 0) 1344 continue; 1345 if (status.status != EVTCHNSTAT_virq) 1346 continue; 1347 if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) { 1348 *evtchn = port; 1349 break; 1350 } 1351 } 1352 return rc; 1353 } 1354 1355 /** 1356 * xen_evtchn_nr_channels - number of usable event channel ports 1357 * 1358 * This may be less than the maximum supported by the current 1359 * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum 1360 * supported. 1361 */ 1362 unsigned xen_evtchn_nr_channels(void) 1363 { 1364 return evtchn_ops->nr_channels(); 1365 } 1366 EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels); 1367 1368 int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) 1369 { 1370 struct evtchn_bind_virq bind_virq; 1371 evtchn_port_t evtchn = 0; 1372 int irq, ret; 1373 1374 mutex_lock(&irq_mapping_update_lock); 1375 1376 irq = per_cpu(virq_to_irq, cpu)[virq]; 1377 1378 if (irq == -1) { 1379 irq = xen_allocate_irq_dynamic(); 1380 if (irq < 0) 1381 goto out; 1382 1383 if (percpu) 1384 irq_set_chip_and_handler_name(irq, &xen_percpu_chip, 1385 handle_percpu_irq, "virq"); 1386 else 1387 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, 1388 handle_edge_irq, "virq"); 1389 1390 bind_virq.virq = virq; 1391 bind_virq.vcpu = xen_vcpu_nr(cpu); 1392 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, 1393 &bind_virq); 1394 if (ret == 0) 1395 evtchn = bind_virq.port; 1396 else { 1397 if (ret == -EEXIST) 1398 ret = find_virq(virq, cpu, &evtchn); 1399 BUG_ON(ret < 0); 1400 } 1401 1402 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq); 1403 if (ret < 0) { 1404 __unbind_from_irq(irq); 1405 irq = ret; 1406 goto out; 1407 } 1408 1409 /* 1410 * Force the affinity mask for percpu interrupts so proc 1411 * shows the correct target. 1412 */ 1413 bind_evtchn_to_cpu(evtchn, cpu, percpu); 1414 } else { 1415 struct irq_info *info = info_for_irq(irq); 1416 WARN_ON(info == NULL || info->type != IRQT_VIRQ); 1417 } 1418 1419 out: 1420 mutex_unlock(&irq_mapping_update_lock); 1421 1422 return irq; 1423 } 1424 1425 static void unbind_from_irq(unsigned int irq) 1426 { 1427 mutex_lock(&irq_mapping_update_lock); 1428 __unbind_from_irq(irq); 1429 mutex_unlock(&irq_mapping_update_lock); 1430 } 1431 1432 static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, 1433 irq_handler_t handler, 1434 unsigned long irqflags, 1435 const char *devname, void *dev_id, 1436 struct irq_chip *chip) 1437 { 1438 int irq, retval; 1439 1440 irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL); 1441 if (irq < 0) 1442 return irq; 1443 retval = request_irq(irq, handler, irqflags, devname, dev_id); 1444 if (retval != 0) { 1445 unbind_from_irq(irq); 1446 return retval; 1447 } 1448 1449 return irq; 1450 } 1451 1452 int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, 1453 irq_handler_t handler, 1454 unsigned long irqflags, 1455 const char *devname, void *dev_id) 1456 { 1457 return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, 1458 devname, dev_id, 1459 &xen_dynamic_chip); 1460 } 1461 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); 1462 1463 int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, 1464 irq_handler_t handler, 1465 unsigned long irqflags, 1466 const char *devname, void *dev_id) 1467 { 1468 return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, 1469 devname, dev_id, 1470 &xen_lateeoi_chip); 1471 } 1472 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); 1473 1474 static int bind_interdomain_evtchn_to_irqhandler_chip( 1475 struct xenbus_device *dev, evtchn_port_t remote_port, 1476 irq_handler_t handler, unsigned long irqflags, 1477 const char *devname, void *dev_id, struct irq_chip *chip) 1478 { 1479 int irq, retval; 1480 1481 irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip); 1482 if (irq < 0) 1483 return irq; 1484 1485 retval = request_irq(irq, handler, irqflags, devname, dev_id); 1486 if (retval != 0) { 1487 unbind_from_irq(irq); 1488 return retval; 1489 } 1490 1491 return irq; 1492 } 1493 1494 int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev, 1495 evtchn_port_t remote_port, 1496 irq_handler_t handler, 1497 unsigned long irqflags, 1498 const char *devname, 1499 void *dev_id) 1500 { 1501 return bind_interdomain_evtchn_to_irqhandler_chip(dev, 1502 remote_port, handler, irqflags, devname, 1503 dev_id, &xen_lateeoi_chip); 1504 } 1505 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi); 1506 1507 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, 1508 irq_handler_t handler, 1509 unsigned long irqflags, const char *devname, void *dev_id) 1510 { 1511 int irq, retval; 1512 1513 irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU); 1514 if (irq < 0) 1515 return irq; 1516 retval = request_irq(irq, handler, irqflags, devname, dev_id); 1517 if (retval != 0) { 1518 unbind_from_irq(irq); 1519 return retval; 1520 } 1521 1522 return irq; 1523 } 1524 EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); 1525 1526 int bind_ipi_to_irqhandler(enum ipi_vector ipi, 1527 unsigned int cpu, 1528 irq_handler_t handler, 1529 unsigned long irqflags, 1530 const char *devname, 1531 void *dev_id) 1532 { 1533 int irq, retval; 1534 1535 irq = bind_ipi_to_irq(ipi, cpu); 1536 if (irq < 0) 1537 return irq; 1538 1539 irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME; 1540 retval = request_irq(irq, handler, irqflags, devname, dev_id); 1541 if (retval != 0) { 1542 unbind_from_irq(irq); 1543 return retval; 1544 } 1545 1546 return irq; 1547 } 1548 1549 void unbind_from_irqhandler(unsigned int irq, void *dev_id) 1550 { 1551 struct irq_info *info = info_for_irq(irq); 1552 1553 if (WARN_ON(!info)) 1554 return; 1555 free_irq(irq, dev_id); 1556 unbind_from_irq(irq); 1557 } 1558 EXPORT_SYMBOL_GPL(unbind_from_irqhandler); 1559 1560 /** 1561 * xen_set_irq_priority() - set an event channel priority. 1562 * @irq:irq bound to an event channel. 1563 * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN. 1564 */ 1565 int xen_set_irq_priority(unsigned irq, unsigned priority) 1566 { 1567 struct evtchn_set_priority set_priority; 1568 1569 set_priority.port = evtchn_from_irq(irq); 1570 set_priority.priority = priority; 1571 1572 return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority, 1573 &set_priority); 1574 } 1575 EXPORT_SYMBOL_GPL(xen_set_irq_priority); 1576 1577 int evtchn_make_refcounted(evtchn_port_t evtchn) 1578 { 1579 int irq = get_evtchn_to_irq(evtchn); 1580 struct irq_info *info; 1581 1582 if (irq == -1) 1583 return -ENOENT; 1584 1585 info = info_for_irq(irq); 1586 1587 if (!info) 1588 return -ENOENT; 1589 1590 WARN_ON(info->refcnt != -1); 1591 1592 info->refcnt = 1; 1593 1594 return 0; 1595 } 1596 EXPORT_SYMBOL_GPL(evtchn_make_refcounted); 1597 1598 int evtchn_get(evtchn_port_t evtchn) 1599 { 1600 int irq; 1601 struct irq_info *info; 1602 int err = -ENOENT; 1603 1604 if (evtchn >= xen_evtchn_max_channels()) 1605 return -EINVAL; 1606 1607 mutex_lock(&irq_mapping_update_lock); 1608 1609 irq = get_evtchn_to_irq(evtchn); 1610 if (irq == -1) 1611 goto done; 1612 1613 info = info_for_irq(irq); 1614 1615 if (!info) 1616 goto done; 1617 1618 err = -EINVAL; 1619 if (info->refcnt <= 0 || info->refcnt == SHRT_MAX) 1620 goto done; 1621 1622 info->refcnt++; 1623 err = 0; 1624 done: 1625 mutex_unlock(&irq_mapping_update_lock); 1626 1627 return err; 1628 } 1629 EXPORT_SYMBOL_GPL(evtchn_get); 1630 1631 void evtchn_put(evtchn_port_t evtchn) 1632 { 1633 int irq = get_evtchn_to_irq(evtchn); 1634 if (WARN_ON(irq == -1)) 1635 return; 1636 unbind_from_irq(irq); 1637 } 1638 EXPORT_SYMBOL_GPL(evtchn_put); 1639 1640 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) 1641 { 1642 int irq; 1643 1644 #ifdef CONFIG_X86 1645 if (unlikely(vector == XEN_NMI_VECTOR)) { 1646 int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu), 1647 NULL); 1648 if (rc < 0) 1649 printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc); 1650 return; 1651 } 1652 #endif 1653 irq = per_cpu(ipi_to_irq, cpu)[vector]; 1654 BUG_ON(irq < 0); 1655 notify_remote_via_irq(irq); 1656 } 1657 1658 struct evtchn_loop_ctrl { 1659 ktime_t timeout; 1660 unsigned count; 1661 bool defer_eoi; 1662 }; 1663 1664 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) 1665 { 1666 int irq; 1667 struct irq_info *info; 1668 struct xenbus_device *dev; 1669 1670 irq = get_evtchn_to_irq(port); 1671 if (irq == -1) 1672 return; 1673 1674 /* 1675 * Check for timeout every 256 events. 1676 * We are setting the timeout value only after the first 256 1677 * events in order to not hurt the common case of few loop 1678 * iterations. The 256 is basically an arbitrary value. 1679 * 1680 * In case we are hitting the timeout we need to defer all further 1681 * EOIs in order to ensure to leave the event handling loop rather 1682 * sooner than later. 1683 */ 1684 if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) { 1685 ktime_t kt = ktime_get(); 1686 1687 if (!ctrl->timeout) { 1688 kt = ktime_add_ms(kt, 1689 jiffies_to_msecs(event_loop_timeout)); 1690 ctrl->timeout = kt; 1691 } else if (kt > ctrl->timeout) { 1692 ctrl->defer_eoi = true; 1693 } 1694 } 1695 1696 info = info_for_irq(irq); 1697 if (xchg_acquire(&info->is_active, 1)) 1698 return; 1699 1700 dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL; 1701 if (dev) 1702 atomic_inc(&dev->events); 1703 1704 if (ctrl->defer_eoi) { 1705 info->eoi_cpu = smp_processor_id(); 1706 info->irq_epoch = __this_cpu_read(irq_epoch); 1707 info->eoi_time = get_jiffies_64() + event_eoi_delay; 1708 } 1709 1710 generic_handle_irq(irq); 1711 } 1712 1713 static void __xen_evtchn_do_upcall(void) 1714 { 1715 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); 1716 int cpu = smp_processor_id(); 1717 struct evtchn_loop_ctrl ctrl = { 0 }; 1718 1719 read_lock(&evtchn_rwlock); 1720 1721 do { 1722 vcpu_info->evtchn_upcall_pending = 0; 1723 1724 xen_evtchn_handle_events(cpu, &ctrl); 1725 1726 BUG_ON(!irqs_disabled()); 1727 1728 virt_rmb(); /* Hypervisor can set upcall pending. */ 1729 1730 } while (vcpu_info->evtchn_upcall_pending); 1731 1732 read_unlock(&evtchn_rwlock); 1733 1734 /* 1735 * Increment irq_epoch only now to defer EOIs only for 1736 * xen_irq_lateeoi() invocations occurring from inside the loop 1737 * above. 1738 */ 1739 __this_cpu_inc(irq_epoch); 1740 } 1741 1742 void xen_evtchn_do_upcall(struct pt_regs *regs) 1743 { 1744 struct pt_regs *old_regs = set_irq_regs(regs); 1745 1746 irq_enter(); 1747 1748 __xen_evtchn_do_upcall(); 1749 1750 irq_exit(); 1751 set_irq_regs(old_regs); 1752 } 1753 1754 void xen_hvm_evtchn_do_upcall(void) 1755 { 1756 __xen_evtchn_do_upcall(); 1757 } 1758 EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); 1759 1760 /* Rebind a new event channel to an existing irq. */ 1761 void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) 1762 { 1763 struct irq_info *info = info_for_irq(irq); 1764 1765 if (WARN_ON(!info)) 1766 return; 1767 1768 /* Make sure the irq is masked, since the new event channel 1769 will also be masked. */ 1770 disable_irq(irq); 1771 1772 mutex_lock(&irq_mapping_update_lock); 1773 1774 /* After resume the irq<->evtchn mappings are all cleared out */ 1775 BUG_ON(get_evtchn_to_irq(evtchn) != -1); 1776 /* Expect irq to have been bound before, 1777 so there should be a proper type */ 1778 BUG_ON(info->type == IRQT_UNBOUND); 1779 1780 (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL); 1781 1782 mutex_unlock(&irq_mapping_update_lock); 1783 1784 bind_evtchn_to_cpu(evtchn, info->cpu, false); 1785 1786 /* Unmask the event channel. */ 1787 enable_irq(irq); 1788 } 1789 1790 /* Rebind an evtchn so that it gets delivered to a specific cpu */ 1791 static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) 1792 { 1793 struct evtchn_bind_vcpu bind_vcpu; 1794 evtchn_port_t evtchn = info ? info->evtchn : 0; 1795 1796 if (!VALID_EVTCHN(evtchn)) 1797 return -1; 1798 1799 if (!xen_support_evtchn_rebind()) 1800 return -1; 1801 1802 /* Send future instances of this interrupt to other vcpu. */ 1803 bind_vcpu.port = evtchn; 1804 bind_vcpu.vcpu = xen_vcpu_nr(tcpu); 1805 1806 /* 1807 * Mask the event while changing the VCPU binding to prevent 1808 * it being delivered on an unexpected VCPU. 1809 */ 1810 do_mask(info, EVT_MASK_REASON_TEMPORARY); 1811 1812 /* 1813 * If this fails, it usually just indicates that we're dealing with a 1814 * virq or IPI channel, which don't actually need to be rebound. Ignore 1815 * it, but don't do the xenlinux-level rebind in that case. 1816 */ 1817 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) 1818 bind_evtchn_to_cpu(evtchn, tcpu, false); 1819 1820 do_unmask(info, EVT_MASK_REASON_TEMPORARY); 1821 1822 return 0; 1823 } 1824 1825 /* 1826 * Find the CPU within @dest mask which has the least number of channels 1827 * assigned. This is not precise as the per cpu counts can be modified 1828 * concurrently. 1829 */ 1830 static unsigned int select_target_cpu(const struct cpumask *dest) 1831 { 1832 unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX; 1833 1834 for_each_cpu_and(cpu, dest, cpu_online_mask) { 1835 unsigned int curch = atomic_read(&channels_on_cpu[cpu]); 1836 1837 if (curch < minch) { 1838 minch = curch; 1839 best_cpu = cpu; 1840 } 1841 } 1842 1843 /* 1844 * Catch the unlikely case that dest contains no online CPUs. Can't 1845 * recurse. 1846 */ 1847 if (best_cpu == UINT_MAX) 1848 return select_target_cpu(cpu_online_mask); 1849 1850 return best_cpu; 1851 } 1852 1853 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, 1854 bool force) 1855 { 1856 unsigned int tcpu = select_target_cpu(dest); 1857 int ret; 1858 1859 ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu); 1860 if (!ret) 1861 irq_data_update_effective_affinity(data, cpumask_of(tcpu)); 1862 1863 return ret; 1864 } 1865 1866 static void enable_dynirq(struct irq_data *data) 1867 { 1868 struct irq_info *info = info_for_irq(data->irq); 1869 evtchn_port_t evtchn = info ? info->evtchn : 0; 1870 1871 if (VALID_EVTCHN(evtchn)) 1872 do_unmask(info, EVT_MASK_REASON_EXPLICIT); 1873 } 1874 1875 static void disable_dynirq(struct irq_data *data) 1876 { 1877 struct irq_info *info = info_for_irq(data->irq); 1878 evtchn_port_t evtchn = info ? info->evtchn : 0; 1879 1880 if (VALID_EVTCHN(evtchn)) 1881 do_mask(info, EVT_MASK_REASON_EXPLICIT); 1882 } 1883 1884 static void ack_dynirq(struct irq_data *data) 1885 { 1886 struct irq_info *info = info_for_irq(data->irq); 1887 evtchn_port_t evtchn = info ? info->evtchn : 0; 1888 1889 if (VALID_EVTCHN(evtchn)) 1890 event_handler_exit(info); 1891 } 1892 1893 static void mask_ack_dynirq(struct irq_data *data) 1894 { 1895 disable_dynirq(data); 1896 ack_dynirq(data); 1897 } 1898 1899 static void lateeoi_ack_dynirq(struct irq_data *data) 1900 { 1901 struct irq_info *info = info_for_irq(data->irq); 1902 evtchn_port_t evtchn = info ? info->evtchn : 0; 1903 1904 if (VALID_EVTCHN(evtchn)) { 1905 do_mask(info, EVT_MASK_REASON_EOI_PENDING); 1906 /* 1907 * Don't call event_handler_exit(). 1908 * Need to keep is_active non-zero in order to ignore re-raised 1909 * events after cpu affinity changes while a lateeoi is pending. 1910 */ 1911 clear_evtchn(evtchn); 1912 } 1913 } 1914 1915 static void lateeoi_mask_ack_dynirq(struct irq_data *data) 1916 { 1917 struct irq_info *info = info_for_irq(data->irq); 1918 evtchn_port_t evtchn = info ? info->evtchn : 0; 1919 1920 if (VALID_EVTCHN(evtchn)) { 1921 do_mask(info, EVT_MASK_REASON_EXPLICIT); 1922 event_handler_exit(info); 1923 } 1924 } 1925 1926 static int retrigger_dynirq(struct irq_data *data) 1927 { 1928 struct irq_info *info = info_for_irq(data->irq); 1929 evtchn_port_t evtchn = info ? info->evtchn : 0; 1930 1931 if (!VALID_EVTCHN(evtchn)) 1932 return 0; 1933 1934 do_mask(info, EVT_MASK_REASON_TEMPORARY); 1935 set_evtchn(evtchn); 1936 do_unmask(info, EVT_MASK_REASON_TEMPORARY); 1937 1938 return 1; 1939 } 1940 1941 static void restore_pirqs(void) 1942 { 1943 int pirq, rc, irq, gsi; 1944 struct physdev_map_pirq map_irq; 1945 struct irq_info *info; 1946 1947 list_for_each_entry(info, &xen_irq_list_head, list) { 1948 if (info->type != IRQT_PIRQ) 1949 continue; 1950 1951 pirq = info->u.pirq.pirq; 1952 gsi = info->u.pirq.gsi; 1953 irq = info->irq; 1954 1955 /* save/restore of PT devices doesn't work, so at this point the 1956 * only devices present are GSI based emulated devices */ 1957 if (!gsi) 1958 continue; 1959 1960 map_irq.domid = DOMID_SELF; 1961 map_irq.type = MAP_PIRQ_TYPE_GSI; 1962 map_irq.index = gsi; 1963 map_irq.pirq = pirq; 1964 1965 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1966 if (rc) { 1967 pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n", 1968 gsi, irq, pirq, rc); 1969 xen_free_irq(irq); 1970 continue; 1971 } 1972 1973 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); 1974 1975 __startup_pirq(irq); 1976 } 1977 } 1978 1979 static void restore_cpu_virqs(unsigned int cpu) 1980 { 1981 struct evtchn_bind_virq bind_virq; 1982 evtchn_port_t evtchn; 1983 int virq, irq; 1984 1985 for (virq = 0; virq < NR_VIRQS; virq++) { 1986 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) 1987 continue; 1988 1989 BUG_ON(virq_from_irq(irq) != virq); 1990 1991 /* Get a new binding from Xen. */ 1992 bind_virq.virq = virq; 1993 bind_virq.vcpu = xen_vcpu_nr(cpu); 1994 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, 1995 &bind_virq) != 0) 1996 BUG(); 1997 evtchn = bind_virq.port; 1998 1999 /* Record the new mapping. */ 2000 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq); 2001 /* The affinity mask is still valid */ 2002 bind_evtchn_to_cpu(evtchn, cpu, false); 2003 } 2004 } 2005 2006 static void restore_cpu_ipis(unsigned int cpu) 2007 { 2008 struct evtchn_bind_ipi bind_ipi; 2009 evtchn_port_t evtchn; 2010 int ipi, irq; 2011 2012 for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { 2013 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) 2014 continue; 2015 2016 BUG_ON(ipi_from_irq(irq) != ipi); 2017 2018 /* Get a new binding from Xen. */ 2019 bind_ipi.vcpu = xen_vcpu_nr(cpu); 2020 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, 2021 &bind_ipi) != 0) 2022 BUG(); 2023 evtchn = bind_ipi.port; 2024 2025 /* Record the new mapping. */ 2026 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); 2027 /* The affinity mask is still valid */ 2028 bind_evtchn_to_cpu(evtchn, cpu, false); 2029 } 2030 } 2031 2032 /* Clear an irq's pending state, in preparation for polling on it */ 2033 void xen_clear_irq_pending(int irq) 2034 { 2035 struct irq_info *info = info_for_irq(irq); 2036 evtchn_port_t evtchn = info ? info->evtchn : 0; 2037 2038 if (VALID_EVTCHN(evtchn)) 2039 event_handler_exit(info); 2040 } 2041 EXPORT_SYMBOL(xen_clear_irq_pending); 2042 void xen_set_irq_pending(int irq) 2043 { 2044 evtchn_port_t evtchn = evtchn_from_irq(irq); 2045 2046 if (VALID_EVTCHN(evtchn)) 2047 set_evtchn(evtchn); 2048 } 2049 2050 bool xen_test_irq_pending(int irq) 2051 { 2052 evtchn_port_t evtchn = evtchn_from_irq(irq); 2053 bool ret = false; 2054 2055 if (VALID_EVTCHN(evtchn)) 2056 ret = test_evtchn(evtchn); 2057 2058 return ret; 2059 } 2060 2061 /* Poll waiting for an irq to become pending with timeout. In the usual case, 2062 * the irq will be disabled so it won't deliver an interrupt. */ 2063 void xen_poll_irq_timeout(int irq, u64 timeout) 2064 { 2065 evtchn_port_t evtchn = evtchn_from_irq(irq); 2066 2067 if (VALID_EVTCHN(evtchn)) { 2068 struct sched_poll poll; 2069 2070 poll.nr_ports = 1; 2071 poll.timeout = timeout; 2072 set_xen_guest_handle(poll.ports, &evtchn); 2073 2074 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) 2075 BUG(); 2076 } 2077 } 2078 EXPORT_SYMBOL(xen_poll_irq_timeout); 2079 /* Poll waiting for an irq to become pending. In the usual case, the 2080 * irq will be disabled so it won't deliver an interrupt. */ 2081 void xen_poll_irq(int irq) 2082 { 2083 xen_poll_irq_timeout(irq, 0 /* no timeout */); 2084 } 2085 2086 /* Check whether the IRQ line is shared with other guests. */ 2087 int xen_test_irq_shared(int irq) 2088 { 2089 struct irq_info *info = info_for_irq(irq); 2090 struct physdev_irq_status_query irq_status; 2091 2092 if (WARN_ON(!info)) 2093 return -ENOENT; 2094 2095 irq_status.irq = info->u.pirq.pirq; 2096 2097 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) 2098 return 0; 2099 return !(irq_status.flags & XENIRQSTAT_shared); 2100 } 2101 EXPORT_SYMBOL_GPL(xen_test_irq_shared); 2102 2103 void xen_irq_resume(void) 2104 { 2105 unsigned int cpu; 2106 struct irq_info *info; 2107 2108 /* New event-channel space is not 'live' yet. */ 2109 xen_evtchn_resume(); 2110 2111 /* No IRQ <-> event-channel mappings. */ 2112 list_for_each_entry(info, &xen_irq_list_head, list) { 2113 /* Zap event-channel binding */ 2114 info->evtchn = 0; 2115 /* Adjust accounting */ 2116 channels_on_cpu_dec(info); 2117 } 2118 2119 clear_evtchn_to_irq_all(); 2120 2121 for_each_possible_cpu(cpu) { 2122 restore_cpu_virqs(cpu); 2123 restore_cpu_ipis(cpu); 2124 } 2125 2126 restore_pirqs(); 2127 } 2128 2129 static struct irq_chip xen_dynamic_chip __read_mostly = { 2130 .name = "xen-dyn", 2131 2132 .irq_disable = disable_dynirq, 2133 .irq_mask = disable_dynirq, 2134 .irq_unmask = enable_dynirq, 2135 2136 .irq_ack = ack_dynirq, 2137 .irq_mask_ack = mask_ack_dynirq, 2138 2139 .irq_set_affinity = set_affinity_irq, 2140 .irq_retrigger = retrigger_dynirq, 2141 }; 2142 2143 static struct irq_chip xen_lateeoi_chip __read_mostly = { 2144 /* The chip name needs to contain "xen-dyn" for irqbalance to work. */ 2145 .name = "xen-dyn-lateeoi", 2146 2147 .irq_disable = disable_dynirq, 2148 .irq_mask = disable_dynirq, 2149 .irq_unmask = enable_dynirq, 2150 2151 .irq_ack = lateeoi_ack_dynirq, 2152 .irq_mask_ack = lateeoi_mask_ack_dynirq, 2153 2154 .irq_set_affinity = set_affinity_irq, 2155 .irq_retrigger = retrigger_dynirq, 2156 }; 2157 2158 static struct irq_chip xen_pirq_chip __read_mostly = { 2159 .name = "xen-pirq", 2160 2161 .irq_startup = startup_pirq, 2162 .irq_shutdown = shutdown_pirq, 2163 .irq_enable = enable_pirq, 2164 .irq_disable = disable_pirq, 2165 2166 .irq_mask = disable_dynirq, 2167 .irq_unmask = enable_dynirq, 2168 2169 .irq_ack = eoi_pirq, 2170 .irq_eoi = eoi_pirq, 2171 .irq_mask_ack = mask_ack_pirq, 2172 2173 .irq_set_affinity = set_affinity_irq, 2174 2175 .irq_retrigger = retrigger_dynirq, 2176 }; 2177 2178 static struct irq_chip xen_percpu_chip __read_mostly = { 2179 .name = "xen-percpu", 2180 2181 .irq_disable = disable_dynirq, 2182 .irq_mask = disable_dynirq, 2183 .irq_unmask = enable_dynirq, 2184 2185 .irq_ack = ack_dynirq, 2186 }; 2187 2188 #ifdef CONFIG_X86 2189 #ifdef CONFIG_XEN_PVHVM 2190 /* Vector callbacks are better than PCI interrupts to receive event 2191 * channel notifications because we can receive vector callbacks on any 2192 * vcpu and we don't need PCI support or APIC interactions. */ 2193 void xen_setup_callback_vector(void) 2194 { 2195 uint64_t callback_via; 2196 2197 if (xen_have_vector_callback) { 2198 callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); 2199 if (xen_set_callback_via(callback_via)) { 2200 pr_err("Request for Xen HVM callback vector failed\n"); 2201 xen_have_vector_callback = false; 2202 } 2203 } 2204 } 2205 2206 /* 2207 * Setup per-vCPU vector-type callbacks. If this setup is unavailable, 2208 * fallback to the global vector-type callback. 2209 */ 2210 static __init void xen_init_setup_upcall_vector(void) 2211 { 2212 if (!xen_have_vector_callback) 2213 return; 2214 2215 if ((cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_UPCALL_VECTOR) && 2216 !xen_set_upcall_vector(0)) 2217 xen_percpu_upcall = true; 2218 else if (xen_feature(XENFEAT_hvm_callback_vector)) 2219 xen_setup_callback_vector(); 2220 else 2221 xen_have_vector_callback = false; 2222 } 2223 2224 int xen_set_upcall_vector(unsigned int cpu) 2225 { 2226 int rc; 2227 xen_hvm_evtchn_upcall_vector_t op = { 2228 .vector = HYPERVISOR_CALLBACK_VECTOR, 2229 .vcpu = per_cpu(xen_vcpu_id, cpu), 2230 }; 2231 2232 rc = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &op); 2233 if (rc) 2234 return rc; 2235 2236 /* Trick toolstack to think we are enlightened. */ 2237 if (!cpu) 2238 rc = xen_set_callback_via(1); 2239 2240 return rc; 2241 } 2242 2243 static __init void xen_alloc_callback_vector(void) 2244 { 2245 if (!xen_have_vector_callback) 2246 return; 2247 2248 pr_info("Xen HVM callback vector for event delivery is enabled\n"); 2249 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback); 2250 } 2251 #else 2252 void xen_setup_callback_vector(void) {} 2253 static inline void xen_init_setup_upcall_vector(void) {} 2254 int xen_set_upcall_vector(unsigned int cpu) {} 2255 static inline void xen_alloc_callback_vector(void) {} 2256 #endif /* CONFIG_XEN_PVHVM */ 2257 #endif /* CONFIG_X86 */ 2258 2259 bool xen_fifo_events = true; 2260 module_param_named(fifo_events, xen_fifo_events, bool, 0); 2261 2262 static int xen_evtchn_cpu_prepare(unsigned int cpu) 2263 { 2264 int ret = 0; 2265 2266 xen_cpu_init_eoi(cpu); 2267 2268 if (evtchn_ops->percpu_init) 2269 ret = evtchn_ops->percpu_init(cpu); 2270 2271 return ret; 2272 } 2273 2274 static int xen_evtchn_cpu_dead(unsigned int cpu) 2275 { 2276 int ret = 0; 2277 2278 if (evtchn_ops->percpu_deinit) 2279 ret = evtchn_ops->percpu_deinit(cpu); 2280 2281 return ret; 2282 } 2283 2284 void __init xen_init_IRQ(void) 2285 { 2286 int ret = -EINVAL; 2287 evtchn_port_t evtchn; 2288 2289 if (xen_fifo_events) 2290 ret = xen_evtchn_fifo_init(); 2291 if (ret < 0) { 2292 xen_evtchn_2l_init(); 2293 xen_fifo_events = false; 2294 } 2295 2296 xen_cpu_init_eoi(smp_processor_id()); 2297 2298 cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, 2299 "xen/evtchn:prepare", 2300 xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); 2301 2302 evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), 2303 sizeof(*evtchn_to_irq), GFP_KERNEL); 2304 BUG_ON(!evtchn_to_irq); 2305 2306 /* No event channels are 'live' right now. */ 2307 for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++) 2308 mask_evtchn(evtchn); 2309 2310 pirq_needs_eoi = pirq_needs_eoi_flag; 2311 2312 #ifdef CONFIG_X86 2313 if (xen_pv_domain()) { 2314 if (xen_initial_domain()) 2315 pci_xen_initial_domain(); 2316 } 2317 xen_init_setup_upcall_vector(); 2318 xen_alloc_callback_vector(); 2319 2320 2321 if (xen_hvm_domain()) { 2322 native_init_IRQ(); 2323 /* pci_xen_hvm_init must be called after native_init_IRQ so that 2324 * __acpi_register_gsi can point at the right function */ 2325 pci_xen_hvm_init(); 2326 } else { 2327 int rc; 2328 struct physdev_pirq_eoi_gmfn eoi_gmfn; 2329 2330 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); 2331 eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map); 2332 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); 2333 if (rc != 0) { 2334 free_page((unsigned long) pirq_eoi_map); 2335 pirq_eoi_map = NULL; 2336 } else 2337 pirq_needs_eoi = pirq_check_eoi_map; 2338 } 2339 #endif 2340 } 2341