1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2017 - Cambridge Greys Ltd 4 * Copyright (C) 2011 - 2014 Cisco Systems Inc 5 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 6 * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: 7 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar 8 */ 9 10 #include <linux/cpumask.h> 11 #include <linux/hardirq.h> 12 #include <linux/interrupt.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/module.h> 15 #include <linux/sched.h> 16 #include <linux/seq_file.h> 17 #include <linux/slab.h> 18 #include <as-layout.h> 19 #include <kern_util.h> 20 #include <os.h> 21 #include <irq_user.h> 22 #include <irq_kern.h> 23 #include <linux/time-internal.h> 24 25 26 extern void free_irqs(void); 27 28 /* When epoll triggers we do not know why it did so 29 * we can also have different IRQs for read and write. 30 * This is why we keep a small irq_reg array for each fd - 31 * one entry per IRQ type 32 */ 33 struct irq_reg { 34 void *id; 35 int irq; 36 /* it's cheaper to store this than to query it */ 37 int events; 38 bool active; 39 bool pending; 40 bool wakeup; 41 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 42 bool pending_on_resume; 43 void (*timetravel_handler)(int, int, void *, 44 struct time_travel_event *); 45 struct time_travel_event event; 46 #endif 47 }; 48 49 struct irq_entry { 50 struct list_head list; 51 int fd; 52 struct irq_reg reg[NUM_IRQ_TYPES]; 53 bool suspended; 54 bool sigio_workaround; 55 }; 56 57 static DEFINE_SPINLOCK(irq_lock); 58 static LIST_HEAD(active_fds); 59 static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); 60 static bool irqs_suspended; 61 62 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) 63 { 64 /* 65 * irq->active guards against reentry 66 * irq->pending accumulates pending requests 67 * if pending is raised the irq_handler is re-run 68 * until pending is cleared 69 */ 70 if (irq->active) { 71 irq->active = false; 72 73 do { 74 irq->pending = false; 75 do_IRQ(irq->irq, regs); 76 } while (irq->pending); 77 78 irq->active = true; 79 } else { 80 irq->pending = true; 81 } 82 } 83 84 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 85 static void irq_event_handler(struct time_travel_event *ev) 86 { 87 struct irq_reg *reg = container_of(ev, struct irq_reg, event); 88 89 /* do nothing if suspended - just to cause a wakeup */ 90 if (irqs_suspended) 91 return; 92 93 generic_handle_irq(reg->irq); 94 } 95 96 static bool irq_do_timetravel_handler(struct irq_entry *entry, 97 enum um_irq_type t) 98 { 99 struct irq_reg *reg = &entry->reg[t]; 100 101 if (!reg->timetravel_handler) 102 return false; 103 104 /* 105 * Handle all messages - we might get multiple even while 106 * interrupts are already suspended, due to suspend order 107 * etc. Note that time_travel_add_irq_event() will not add 108 * an event twice, if it's pending already "first wins". 109 */ 110 reg->timetravel_handler(reg->irq, entry->fd, reg->id, ®->event); 111 112 if (!reg->event.pending) 113 return false; 114 115 if (irqs_suspended) 116 reg->pending_on_resume = true; 117 return true; 118 } 119 #else 120 static bool irq_do_timetravel_handler(struct irq_entry *entry, 121 enum um_irq_type t) 122 { 123 return false; 124 } 125 #endif 126 127 static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, 128 struct uml_pt_regs *regs, 129 bool timetravel_handlers_only) 130 { 131 struct irq_reg *reg = &entry->reg[t]; 132 133 if (!reg->events) 134 return; 135 136 if (os_epoll_triggered(idx, reg->events) <= 0) 137 return; 138 139 if (irq_do_timetravel_handler(entry, t)) 140 return; 141 142 /* 143 * If we're called to only run time-travel handlers then don't 144 * actually proceed but mark sigio as pending (if applicable). 145 * For suspend/resume, timetravel_handlers_only may be true 146 * despite time-travel not being configured and used. 147 */ 148 if (timetravel_handlers_only) { 149 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 150 mark_sigio_pending(); 151 #endif 152 return; 153 } 154 155 irq_io_loop(reg, regs); 156 } 157 158 static void _sigio_handler(struct uml_pt_regs *regs, 159 bool timetravel_handlers_only) 160 { 161 struct irq_entry *irq_entry; 162 int n, i; 163 164 if (timetravel_handlers_only && !um_irq_timetravel_handler_used()) 165 return; 166 167 while (1) { 168 /* This is now lockless - epoll keeps back-referencesto the irqs 169 * which have trigger it so there is no need to walk the irq 170 * list and lock it every time. We avoid locking by turning off 171 * IO for a specific fd by executing os_del_epoll_fd(fd) before 172 * we do any changes to the actual data structures 173 */ 174 n = os_waiting_for_events_epoll(); 175 176 if (n <= 0) { 177 if (n == -EINTR) 178 continue; 179 else 180 break; 181 } 182 183 for (i = 0; i < n ; i++) { 184 enum um_irq_type t; 185 186 irq_entry = os_epoll_get_data_pointer(i); 187 188 for (t = 0; t < NUM_IRQ_TYPES; t++) 189 sigio_reg_handler(i, irq_entry, t, regs, 190 timetravel_handlers_only); 191 } 192 } 193 194 if (!timetravel_handlers_only) 195 free_irqs(); 196 } 197 198 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 199 { 200 _sigio_handler(regs, irqs_suspended); 201 } 202 203 static struct irq_entry *get_irq_entry_by_fd(int fd) 204 { 205 struct irq_entry *walk; 206 207 lockdep_assert_held(&irq_lock); 208 209 list_for_each_entry(walk, &active_fds, list) { 210 if (walk->fd == fd) 211 return walk; 212 } 213 214 return NULL; 215 } 216 217 static void free_irq_entry(struct irq_entry *to_free, bool remove) 218 { 219 if (!to_free) 220 return; 221 222 if (remove) 223 os_del_epoll_fd(to_free->fd); 224 list_del(&to_free->list); 225 kfree(to_free); 226 } 227 228 static bool update_irq_entry(struct irq_entry *entry) 229 { 230 enum um_irq_type i; 231 int events = 0; 232 233 for (i = 0; i < NUM_IRQ_TYPES; i++) 234 events |= entry->reg[i].events; 235 236 if (events) { 237 /* will modify (instead of add) if needed */ 238 os_add_epoll_fd(events, entry->fd, entry); 239 return true; 240 } 241 242 os_del_epoll_fd(entry->fd); 243 return false; 244 } 245 246 static void update_or_free_irq_entry(struct irq_entry *entry) 247 { 248 if (!update_irq_entry(entry)) 249 free_irq_entry(entry, false); 250 } 251 252 static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, 253 void (*timetravel_handler)(int, int, void *, 254 struct time_travel_event *)) 255 { 256 struct irq_entry *irq_entry; 257 int err, events = os_event_mask(type); 258 unsigned long flags; 259 260 err = os_set_fd_async(fd); 261 if (err < 0) 262 goto out; 263 264 spin_lock_irqsave(&irq_lock, flags); 265 irq_entry = get_irq_entry_by_fd(fd); 266 if (irq_entry) { 267 /* cannot register the same FD twice with the same type */ 268 if (WARN_ON(irq_entry->reg[type].events)) { 269 err = -EALREADY; 270 goto out_unlock; 271 } 272 273 /* temporarily disable to avoid IRQ-side locking */ 274 os_del_epoll_fd(fd); 275 } else { 276 irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); 277 if (!irq_entry) { 278 err = -ENOMEM; 279 goto out_unlock; 280 } 281 irq_entry->fd = fd; 282 list_add_tail(&irq_entry->list, &active_fds); 283 maybe_sigio_broken(fd); 284 } 285 286 irq_entry->reg[type].id = dev_id; 287 irq_entry->reg[type].irq = irq; 288 irq_entry->reg[type].active = true; 289 irq_entry->reg[type].events = events; 290 291 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 292 if (um_irq_timetravel_handler_used()) { 293 irq_entry->reg[type].timetravel_handler = timetravel_handler; 294 irq_entry->reg[type].event.fn = irq_event_handler; 295 } 296 #endif 297 298 WARN_ON(!update_irq_entry(irq_entry)); 299 spin_unlock_irqrestore(&irq_lock, flags); 300 301 return 0; 302 out_unlock: 303 spin_unlock_irqrestore(&irq_lock, flags); 304 out: 305 return err; 306 } 307 308 /* 309 * Remove the entry or entries for a specific FD, if you 310 * don't want to remove all the possible entries then use 311 * um_free_irq() or deactivate_fd() instead. 312 */ 313 void free_irq_by_fd(int fd) 314 { 315 struct irq_entry *to_free; 316 unsigned long flags; 317 318 spin_lock_irqsave(&irq_lock, flags); 319 to_free = get_irq_entry_by_fd(fd); 320 free_irq_entry(to_free, true); 321 spin_unlock_irqrestore(&irq_lock, flags); 322 } 323 EXPORT_SYMBOL(free_irq_by_fd); 324 325 static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) 326 { 327 struct irq_entry *entry; 328 unsigned long flags; 329 330 spin_lock_irqsave(&irq_lock, flags); 331 list_for_each_entry(entry, &active_fds, list) { 332 enum um_irq_type i; 333 334 for (i = 0; i < NUM_IRQ_TYPES; i++) { 335 struct irq_reg *reg = &entry->reg[i]; 336 337 if (!reg->events) 338 continue; 339 if (reg->irq != irq) 340 continue; 341 if (reg->id != dev) 342 continue; 343 344 os_del_epoll_fd(entry->fd); 345 reg->events = 0; 346 update_or_free_irq_entry(entry); 347 goto out; 348 } 349 } 350 out: 351 spin_unlock_irqrestore(&irq_lock, flags); 352 } 353 354 void deactivate_fd(int fd, int irqnum) 355 { 356 struct irq_entry *entry; 357 unsigned long flags; 358 enum um_irq_type i; 359 360 os_del_epoll_fd(fd); 361 362 spin_lock_irqsave(&irq_lock, flags); 363 entry = get_irq_entry_by_fd(fd); 364 if (!entry) 365 goto out; 366 367 for (i = 0; i < NUM_IRQ_TYPES; i++) { 368 if (!entry->reg[i].events) 369 continue; 370 if (entry->reg[i].irq == irqnum) 371 entry->reg[i].events = 0; 372 } 373 374 update_or_free_irq_entry(entry); 375 out: 376 spin_unlock_irqrestore(&irq_lock, flags); 377 378 ignore_sigio_fd(fd); 379 } 380 EXPORT_SYMBOL(deactivate_fd); 381 382 /* 383 * Called just before shutdown in order to provide a clean exec 384 * environment in case the system is rebooting. No locking because 385 * that would cause a pointless shutdown hang if something hadn't 386 * released the lock. 387 */ 388 int deactivate_all_fds(void) 389 { 390 struct irq_entry *entry; 391 392 /* Stop IO. The IRQ loop has no lock so this is our 393 * only way of making sure we are safe to dispose 394 * of all IRQ handlers 395 */ 396 os_set_ioignore(); 397 398 /* we can no longer call kfree() here so just deactivate */ 399 list_for_each_entry(entry, &active_fds, list) 400 os_del_epoll_fd(entry->fd); 401 os_close_epoll_fd(); 402 return 0; 403 } 404 405 /* 406 * do_IRQ handles all normal device IRQs (the special 407 * SMP cross-CPU interrupts have their own specific 408 * handlers). 409 */ 410 unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) 411 { 412 struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); 413 irq_enter(); 414 generic_handle_irq(irq); 415 irq_exit(); 416 set_irq_regs(old_regs); 417 return 1; 418 } 419 420 void um_free_irq(int irq, void *dev) 421 { 422 if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ, 423 "freeing invalid irq %d", irq)) 424 return; 425 426 free_irq_by_irq_and_dev(irq, dev); 427 free_irq(irq, dev); 428 clear_bit(irq, irqs_allocated); 429 } 430 EXPORT_SYMBOL(um_free_irq); 431 432 static int 433 _um_request_irq(int irq, int fd, enum um_irq_type type, 434 irq_handler_t handler, unsigned long irqflags, 435 const char *devname, void *dev_id, 436 void (*timetravel_handler)(int, int, void *, 437 struct time_travel_event *)) 438 { 439 int err; 440 441 if (irq == UM_IRQ_ALLOC) { 442 int i; 443 444 for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) { 445 if (!test_and_set_bit(i, irqs_allocated)) { 446 irq = i; 447 break; 448 } 449 } 450 } 451 452 if (irq < 0) 453 return -ENOSPC; 454 455 if (fd != -1) { 456 err = activate_fd(irq, fd, type, dev_id, timetravel_handler); 457 if (err) 458 goto error; 459 } 460 461 err = request_irq(irq, handler, irqflags, devname, dev_id); 462 if (err < 0) 463 goto error; 464 465 return irq; 466 error: 467 clear_bit(irq, irqs_allocated); 468 return err; 469 } 470 471 int um_request_irq(int irq, int fd, enum um_irq_type type, 472 irq_handler_t handler, unsigned long irqflags, 473 const char *devname, void *dev_id) 474 { 475 return _um_request_irq(irq, fd, type, handler, irqflags, 476 devname, dev_id, NULL); 477 } 478 EXPORT_SYMBOL(um_request_irq); 479 480 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 481 int um_request_irq_tt(int irq, int fd, enum um_irq_type type, 482 irq_handler_t handler, unsigned long irqflags, 483 const char *devname, void *dev_id, 484 void (*timetravel_handler)(int, int, void *, 485 struct time_travel_event *)) 486 { 487 return _um_request_irq(irq, fd, type, handler, irqflags, 488 devname, dev_id, timetravel_handler); 489 } 490 EXPORT_SYMBOL(um_request_irq_tt); 491 492 void sigio_run_timetravel_handlers(void) 493 { 494 _sigio_handler(NULL, true); 495 } 496 #endif 497 498 #ifdef CONFIG_PM_SLEEP 499 void um_irqs_suspend(void) 500 { 501 struct irq_entry *entry; 502 unsigned long flags; 503 504 irqs_suspended = true; 505 506 spin_lock_irqsave(&irq_lock, flags); 507 list_for_each_entry(entry, &active_fds, list) { 508 enum um_irq_type t; 509 bool clear = true; 510 511 for (t = 0; t < NUM_IRQ_TYPES; t++) { 512 if (!entry->reg[t].events) 513 continue; 514 515 /* 516 * For the SIGIO_WRITE_IRQ, which is used to handle the 517 * SIGIO workaround thread, we need special handling: 518 * enable wake for it itself, but below we tell it about 519 * any FDs that should be suspended. 520 */ 521 if (entry->reg[t].wakeup || 522 entry->reg[t].irq == SIGIO_WRITE_IRQ 523 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 524 || entry->reg[t].timetravel_handler 525 #endif 526 ) { 527 clear = false; 528 break; 529 } 530 } 531 532 if (clear) { 533 entry->suspended = true; 534 os_clear_fd_async(entry->fd); 535 entry->sigio_workaround = 536 !__ignore_sigio_fd(entry->fd); 537 } 538 } 539 spin_unlock_irqrestore(&irq_lock, flags); 540 } 541 542 void um_irqs_resume(void) 543 { 544 struct irq_entry *entry; 545 unsigned long flags; 546 547 548 local_irq_save(flags); 549 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 550 /* 551 * We don't need to lock anything here since we're in resume 552 * and nothing else is running, but have disabled IRQs so we 553 * don't try anything else with the interrupt list from there. 554 */ 555 list_for_each_entry(entry, &active_fds, list) { 556 enum um_irq_type t; 557 558 for (t = 0; t < NUM_IRQ_TYPES; t++) { 559 struct irq_reg *reg = &entry->reg[t]; 560 561 if (reg->pending_on_resume) { 562 irq_enter(); 563 generic_handle_irq(reg->irq); 564 irq_exit(); 565 reg->pending_on_resume = false; 566 } 567 } 568 } 569 #endif 570 571 spin_lock(&irq_lock); 572 list_for_each_entry(entry, &active_fds, list) { 573 if (entry->suspended) { 574 int err = os_set_fd_async(entry->fd); 575 576 WARN(err < 0, "os_set_fd_async returned %d\n", err); 577 entry->suspended = false; 578 579 if (entry->sigio_workaround) { 580 err = __add_sigio_fd(entry->fd); 581 WARN(err < 0, "add_sigio_returned %d\n", err); 582 } 583 } 584 } 585 spin_unlock_irqrestore(&irq_lock, flags); 586 587 irqs_suspended = false; 588 send_sigio_to_self(); 589 } 590 591 static int normal_irq_set_wake(struct irq_data *d, unsigned int on) 592 { 593 struct irq_entry *entry; 594 unsigned long flags; 595 596 spin_lock_irqsave(&irq_lock, flags); 597 list_for_each_entry(entry, &active_fds, list) { 598 enum um_irq_type t; 599 600 for (t = 0; t < NUM_IRQ_TYPES; t++) { 601 if (!entry->reg[t].events) 602 continue; 603 604 if (entry->reg[t].irq != d->irq) 605 continue; 606 entry->reg[t].wakeup = on; 607 goto unlock; 608 } 609 } 610 unlock: 611 spin_unlock_irqrestore(&irq_lock, flags); 612 return 0; 613 } 614 #else 615 #define normal_irq_set_wake NULL 616 #endif 617 618 /* 619 * irq_chip must define at least enable/disable and ack when 620 * the edge handler is used. 621 */ 622 static void dummy(struct irq_data *d) 623 { 624 } 625 626 /* This is used for everything other than the timer. */ 627 static struct irq_chip normal_irq_type = { 628 .name = "SIGIO", 629 .irq_disable = dummy, 630 .irq_enable = dummy, 631 .irq_ack = dummy, 632 .irq_mask = dummy, 633 .irq_unmask = dummy, 634 .irq_set_wake = normal_irq_set_wake, 635 }; 636 637 static struct irq_chip alarm_irq_type = { 638 .name = "SIGALRM", 639 .irq_disable = dummy, 640 .irq_enable = dummy, 641 .irq_ack = dummy, 642 .irq_mask = dummy, 643 .irq_unmask = dummy, 644 }; 645 646 void __init init_IRQ(void) 647 { 648 int i; 649 650 irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); 651 652 for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) 653 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); 654 /* Initialize EPOLL Loop */ 655 os_setup_epoll(); 656 } 657 658 /* 659 * IRQ stack entry and exit: 660 * 661 * Unlike i386, UML doesn't receive IRQs on the normal kernel stack 662 * and switch over to the IRQ stack after some preparation. We use 663 * sigaltstack to receive signals on a separate stack from the start. 664 * These two functions make sure the rest of the kernel won't be too 665 * upset by being on a different stack. The IRQ stack has a 666 * thread_info structure at the bottom so that current et al continue 667 * to work. 668 * 669 * to_irq_stack copies the current task's thread_info to the IRQ stack 670 * thread_info and sets the tasks's stack to point to the IRQ stack. 671 * 672 * from_irq_stack copies the thread_info struct back (flags may have 673 * been modified) and resets the task's stack pointer. 674 * 675 * Tricky bits - 676 * 677 * What happens when two signals race each other? UML doesn't block 678 * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal 679 * could arrive while a previous one is still setting up the 680 * thread_info. 681 * 682 * There are three cases - 683 * The first interrupt on the stack - sets up the thread_info and 684 * handles the interrupt 685 * A nested interrupt interrupting the copying of the thread_info - 686 * can't handle the interrupt, as the stack is in an unknown state 687 * A nested interrupt not interrupting the copying of the 688 * thread_info - doesn't do any setup, just handles the interrupt 689 * 690 * The first job is to figure out whether we interrupted stack setup. 691 * This is done by xchging the signal mask with thread_info->pending. 692 * If the value that comes back is zero, then there is no setup in 693 * progress, and the interrupt can be handled. If the value is 694 * non-zero, then there is stack setup in progress. In order to have 695 * the interrupt handled, we leave our signal in the mask, and it will 696 * be handled by the upper handler after it has set up the stack. 697 * 698 * Next is to figure out whether we are the outer handler or a nested 699 * one. As part of setting up the stack, thread_info->real_thread is 700 * set to non-NULL (and is reset to NULL on exit). This is the 701 * nesting indicator. If it is non-NULL, then the stack is already 702 * set up and the handler can run. 703 */ 704 705 static unsigned long pending_mask; 706 707 unsigned long to_irq_stack(unsigned long *mask_out) 708 { 709 struct thread_info *ti; 710 unsigned long mask, old; 711 int nested; 712 713 mask = xchg(&pending_mask, *mask_out); 714 if (mask != 0) { 715 /* 716 * If any interrupts come in at this point, we want to 717 * make sure that their bits aren't lost by our 718 * putting our bit in. So, this loop accumulates bits 719 * until xchg returns the same value that we put in. 720 * When that happens, there were no new interrupts, 721 * and pending_mask contains a bit for each interrupt 722 * that came in. 723 */ 724 old = *mask_out; 725 do { 726 old |= mask; 727 mask = xchg(&pending_mask, old); 728 } while (mask != old); 729 return 1; 730 } 731 732 ti = current_thread_info(); 733 nested = (ti->real_thread != NULL); 734 if (!nested) { 735 struct task_struct *task; 736 struct thread_info *tti; 737 738 task = cpu_tasks[ti->cpu].task; 739 tti = task_thread_info(task); 740 741 *ti = *tti; 742 ti->real_thread = tti; 743 task->stack = ti; 744 } 745 746 mask = xchg(&pending_mask, 0); 747 *mask_out |= mask | nested; 748 return 0; 749 } 750 751 unsigned long from_irq_stack(int nested) 752 { 753 struct thread_info *ti, *to; 754 unsigned long mask; 755 756 ti = current_thread_info(); 757 758 pending_mask = 1; 759 760 to = ti->real_thread; 761 current->stack = to; 762 ti->real_thread = NULL; 763 *to = *ti; 764 765 mask = xchg(&pending_mask, 0); 766 return mask & ~1; 767 } 768 769