1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2017 - Cambridge Greys Ltd 4 * Copyright (C) 2011 - 2014 Cisco Systems Inc 5 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 6 * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: 7 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar 8 */ 9 10 #include <linux/cpumask.h> 11 #include <linux/hardirq.h> 12 #include <linux/interrupt.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/module.h> 15 #include <linux/sched.h> 16 #include <linux/seq_file.h> 17 #include <linux/slab.h> 18 #include <as-layout.h> 19 #include <kern_util.h> 20 #include <os.h> 21 #include <irq_user.h> 22 #include <irq_kern.h> 23 #include <as-layout.h> 24 25 26 extern void free_irqs(void); 27 28 /* When epoll triggers we do not know why it did so 29 * we can also have different IRQs for read and write. 30 * This is why we keep a small irq_reg array for each fd - 31 * one entry per IRQ type 32 */ 33 struct irq_reg { 34 void *id; 35 int irq; 36 /* it's cheaper to store this than to query it */ 37 int events; 38 bool active; 39 bool pending; 40 bool wakeup; 41 }; 42 43 struct irq_entry { 44 struct list_head list; 45 int fd; 46 struct irq_reg reg[NUM_IRQ_TYPES]; 47 bool suspended; 48 bool sigio_workaround; 49 }; 50 51 static DEFINE_SPINLOCK(irq_lock); 52 static LIST_HEAD(active_fds); 53 static DECLARE_BITMAP(irqs_allocated, NR_IRQS); 54 55 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) 56 { 57 /* 58 * irq->active guards against reentry 59 * irq->pending accumulates pending requests 60 * if pending is raised the irq_handler is re-run 61 * until pending is cleared 62 */ 63 if (irq->active) { 64 irq->active = false; 65 66 do { 67 irq->pending = false; 68 do_IRQ(irq->irq, regs); 69 } while (irq->pending); 70 71 irq->active = true; 72 } else { 73 irq->pending = true; 74 } 75 } 76 77 void sigio_handler_suspend(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 78 { 79 /* nothing */ 80 } 81 82 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 83 { 84 struct irq_entry *irq_entry; 85 int n, i; 86 87 while (1) { 88 /* This is now lockless - epoll keeps back-referencesto the irqs 89 * which have trigger it so there is no need to walk the irq 90 * list and lock it every time. We avoid locking by turning off 91 * IO for a specific fd by executing os_del_epoll_fd(fd) before 92 * we do any changes to the actual data structures 93 */ 94 n = os_waiting_for_events_epoll(); 95 96 if (n <= 0) { 97 if (n == -EINTR) 98 continue; 99 else 100 break; 101 } 102 103 for (i = 0; i < n ; i++) { 104 enum um_irq_type t; 105 106 irq_entry = os_epoll_get_data_pointer(i); 107 108 for (t = 0; t < NUM_IRQ_TYPES; t++) { 109 int events = irq_entry->reg[t].events; 110 111 if (!events) 112 continue; 113 114 if (os_epoll_triggered(i, events) > 0) 115 irq_io_loop(&irq_entry->reg[t], regs); 116 } 117 } 118 } 119 120 free_irqs(); 121 } 122 123 static struct irq_entry *get_irq_entry_by_fd(int fd) 124 { 125 struct irq_entry *walk; 126 127 lockdep_assert_held(&irq_lock); 128 129 list_for_each_entry(walk, &active_fds, list) { 130 if (walk->fd == fd) 131 return walk; 132 } 133 134 return NULL; 135 } 136 137 static void free_irq_entry(struct irq_entry *to_free, bool remove) 138 { 139 if (!to_free) 140 return; 141 142 if (remove) 143 os_del_epoll_fd(to_free->fd); 144 list_del(&to_free->list); 145 kfree(to_free); 146 } 147 148 static bool update_irq_entry(struct irq_entry *entry) 149 { 150 enum um_irq_type i; 151 int events = 0; 152 153 for (i = 0; i < NUM_IRQ_TYPES; i++) 154 events |= entry->reg[i].events; 155 156 if (events) { 157 /* will modify (instead of add) if needed */ 158 os_add_epoll_fd(events, entry->fd, entry); 159 return true; 160 } 161 162 os_del_epoll_fd(entry->fd); 163 return false; 164 } 165 166 static void update_or_free_irq_entry(struct irq_entry *entry) 167 { 168 if (!update_irq_entry(entry)) 169 free_irq_entry(entry, false); 170 } 171 172 static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id) 173 { 174 struct irq_entry *irq_entry; 175 int err, events = os_event_mask(type); 176 unsigned long flags; 177 178 err = os_set_fd_async(fd); 179 if (err < 0) 180 goto out; 181 182 spin_lock_irqsave(&irq_lock, flags); 183 irq_entry = get_irq_entry_by_fd(fd); 184 if (irq_entry) { 185 /* cannot register the same FD twice with the same type */ 186 if (WARN_ON(irq_entry->reg[type].events)) { 187 err = -EALREADY; 188 goto out_unlock; 189 } 190 191 /* temporarily disable to avoid IRQ-side locking */ 192 os_del_epoll_fd(fd); 193 } else { 194 irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); 195 if (!irq_entry) { 196 err = -ENOMEM; 197 goto out_unlock; 198 } 199 irq_entry->fd = fd; 200 list_add_tail(&irq_entry->list, &active_fds); 201 maybe_sigio_broken(fd); 202 } 203 204 irq_entry->reg[type].id = dev_id; 205 irq_entry->reg[type].irq = irq; 206 irq_entry->reg[type].active = true; 207 irq_entry->reg[type].events = events; 208 209 WARN_ON(!update_irq_entry(irq_entry)); 210 spin_unlock_irqrestore(&irq_lock, flags); 211 212 return 0; 213 out_unlock: 214 spin_unlock_irqrestore(&irq_lock, flags); 215 out: 216 return err; 217 } 218 219 /* 220 * Remove the entry or entries for a specific FD, if you 221 * don't want to remove all the possible entries then use 222 * um_free_irq() or deactivate_fd() instead. 223 */ 224 void free_irq_by_fd(int fd) 225 { 226 struct irq_entry *to_free; 227 unsigned long flags; 228 229 spin_lock_irqsave(&irq_lock, flags); 230 to_free = get_irq_entry_by_fd(fd); 231 free_irq_entry(to_free, true); 232 spin_unlock_irqrestore(&irq_lock, flags); 233 } 234 EXPORT_SYMBOL(free_irq_by_fd); 235 236 static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) 237 { 238 struct irq_entry *entry; 239 unsigned long flags; 240 241 spin_lock_irqsave(&irq_lock, flags); 242 list_for_each_entry(entry, &active_fds, list) { 243 enum um_irq_type i; 244 245 for (i = 0; i < NUM_IRQ_TYPES; i++) { 246 struct irq_reg *reg = &entry->reg[i]; 247 248 if (!reg->events) 249 continue; 250 if (reg->irq != irq) 251 continue; 252 if (reg->id != dev) 253 continue; 254 255 os_del_epoll_fd(entry->fd); 256 reg->events = 0; 257 update_or_free_irq_entry(entry); 258 goto out; 259 } 260 } 261 out: 262 spin_unlock_irqrestore(&irq_lock, flags); 263 } 264 265 void deactivate_fd(int fd, int irqnum) 266 { 267 struct irq_entry *entry; 268 unsigned long flags; 269 enum um_irq_type i; 270 271 os_del_epoll_fd(fd); 272 273 spin_lock_irqsave(&irq_lock, flags); 274 entry = get_irq_entry_by_fd(fd); 275 if (!entry) 276 goto out; 277 278 for (i = 0; i < NUM_IRQ_TYPES; i++) { 279 if (!entry->reg[i].events) 280 continue; 281 if (entry->reg[i].irq == irqnum) 282 entry->reg[i].events = 0; 283 } 284 285 update_or_free_irq_entry(entry); 286 out: 287 spin_unlock_irqrestore(&irq_lock, flags); 288 289 ignore_sigio_fd(fd); 290 } 291 EXPORT_SYMBOL(deactivate_fd); 292 293 /* 294 * Called just before shutdown in order to provide a clean exec 295 * environment in case the system is rebooting. No locking because 296 * that would cause a pointless shutdown hang if something hadn't 297 * released the lock. 298 */ 299 int deactivate_all_fds(void) 300 { 301 struct irq_entry *entry; 302 303 /* Stop IO. The IRQ loop has no lock so this is our 304 * only way of making sure we are safe to dispose 305 * of all IRQ handlers 306 */ 307 os_set_ioignore(); 308 309 /* we can no longer call kfree() here so just deactivate */ 310 list_for_each_entry(entry, &active_fds, list) 311 os_del_epoll_fd(entry->fd); 312 os_close_epoll_fd(); 313 return 0; 314 } 315 316 /* 317 * do_IRQ handles all normal device IRQs (the special 318 * SMP cross-CPU interrupts have their own specific 319 * handlers). 320 */ 321 unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) 322 { 323 struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); 324 irq_enter(); 325 generic_handle_irq(irq); 326 irq_exit(); 327 set_irq_regs(old_regs); 328 return 1; 329 } 330 331 void um_free_irq(int irq, void *dev) 332 { 333 if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq)) 334 return; 335 336 free_irq_by_irq_and_dev(irq, dev); 337 free_irq(irq, dev); 338 clear_bit(irq, irqs_allocated); 339 } 340 EXPORT_SYMBOL(um_free_irq); 341 342 int um_request_irq(int irq, int fd, enum um_irq_type type, 343 irq_handler_t handler, unsigned long irqflags, 344 const char *devname, void *dev_id) 345 { 346 int err; 347 348 if (irq == UM_IRQ_ALLOC) { 349 int i; 350 351 for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) { 352 if (!test_and_set_bit(i, irqs_allocated)) { 353 irq = i; 354 break; 355 } 356 } 357 } 358 359 if (irq < 0) 360 return -ENOSPC; 361 362 if (fd != -1) { 363 err = activate_fd(irq, fd, type, dev_id); 364 if (err) 365 goto error; 366 } 367 368 err = request_irq(irq, handler, irqflags, devname, dev_id); 369 if (err < 0) 370 goto error; 371 372 return irq; 373 error: 374 clear_bit(irq, irqs_allocated); 375 return err; 376 } 377 EXPORT_SYMBOL(um_request_irq); 378 379 #ifdef CONFIG_PM_SLEEP 380 void um_irqs_suspend(void) 381 { 382 struct irq_entry *entry; 383 unsigned long flags; 384 385 sig_info[SIGIO] = sigio_handler_suspend; 386 387 spin_lock_irqsave(&irq_lock, flags); 388 list_for_each_entry(entry, &active_fds, list) { 389 enum um_irq_type t; 390 bool wake = false; 391 392 for (t = 0; t < NUM_IRQ_TYPES; t++) { 393 if (!entry->reg[t].events) 394 continue; 395 396 /* 397 * For the SIGIO_WRITE_IRQ, which is used to handle the 398 * SIGIO workaround thread, we need special handling: 399 * enable wake for it itself, but below we tell it about 400 * any FDs that should be suspended. 401 */ 402 if (entry->reg[t].wakeup || 403 entry->reg[t].irq == SIGIO_WRITE_IRQ) { 404 wake = true; 405 break; 406 } 407 } 408 409 if (!wake) { 410 entry->suspended = true; 411 os_clear_fd_async(entry->fd); 412 entry->sigio_workaround = 413 !__ignore_sigio_fd(entry->fd); 414 } 415 } 416 spin_unlock_irqrestore(&irq_lock, flags); 417 } 418 419 void um_irqs_resume(void) 420 { 421 struct irq_entry *entry; 422 unsigned long flags; 423 424 spin_lock_irqsave(&irq_lock, flags); 425 list_for_each_entry(entry, &active_fds, list) { 426 if (entry->suspended) { 427 int err = os_set_fd_async(entry->fd); 428 429 WARN(err < 0, "os_set_fd_async returned %d\n", err); 430 entry->suspended = false; 431 432 if (entry->sigio_workaround) { 433 err = __add_sigio_fd(entry->fd); 434 WARN(err < 0, "add_sigio_returned %d\n", err); 435 } 436 } 437 } 438 spin_unlock_irqrestore(&irq_lock, flags); 439 440 sig_info[SIGIO] = sigio_handler; 441 send_sigio_to_self(); 442 } 443 444 static int normal_irq_set_wake(struct irq_data *d, unsigned int on) 445 { 446 struct irq_entry *entry; 447 unsigned long flags; 448 449 spin_lock_irqsave(&irq_lock, flags); 450 list_for_each_entry(entry, &active_fds, list) { 451 enum um_irq_type t; 452 453 for (t = 0; t < NUM_IRQ_TYPES; t++) { 454 if (!entry->reg[t].events) 455 continue; 456 457 if (entry->reg[t].irq != d->irq) 458 continue; 459 entry->reg[t].wakeup = on; 460 goto unlock; 461 } 462 } 463 unlock: 464 spin_unlock_irqrestore(&irq_lock, flags); 465 return 0; 466 } 467 #else 468 #define normal_irq_set_wake NULL 469 #endif 470 471 /* 472 * irq_chip must define at least enable/disable and ack when 473 * the edge handler is used. 474 */ 475 static void dummy(struct irq_data *d) 476 { 477 } 478 479 /* This is used for everything other than the timer. */ 480 static struct irq_chip normal_irq_type = { 481 .name = "SIGIO", 482 .irq_disable = dummy, 483 .irq_enable = dummy, 484 .irq_ack = dummy, 485 .irq_mask = dummy, 486 .irq_unmask = dummy, 487 .irq_set_wake = normal_irq_set_wake, 488 }; 489 490 static struct irq_chip alarm_irq_type = { 491 .name = "SIGALRM", 492 .irq_disable = dummy, 493 .irq_enable = dummy, 494 .irq_ack = dummy, 495 .irq_mask = dummy, 496 .irq_unmask = dummy, 497 }; 498 499 void __init init_IRQ(void) 500 { 501 int i; 502 503 irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); 504 505 for (i = 1; i < NR_IRQS; i++) 506 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); 507 /* Initialize EPOLL Loop */ 508 os_setup_epoll(); 509 } 510 511 /* 512 * IRQ stack entry and exit: 513 * 514 * Unlike i386, UML doesn't receive IRQs on the normal kernel stack 515 * and switch over to the IRQ stack after some preparation. We use 516 * sigaltstack to receive signals on a separate stack from the start. 517 * These two functions make sure the rest of the kernel won't be too 518 * upset by being on a different stack. The IRQ stack has a 519 * thread_info structure at the bottom so that current et al continue 520 * to work. 521 * 522 * to_irq_stack copies the current task's thread_info to the IRQ stack 523 * thread_info and sets the tasks's stack to point to the IRQ stack. 524 * 525 * from_irq_stack copies the thread_info struct back (flags may have 526 * been modified) and resets the task's stack pointer. 527 * 528 * Tricky bits - 529 * 530 * What happens when two signals race each other? UML doesn't block 531 * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal 532 * could arrive while a previous one is still setting up the 533 * thread_info. 534 * 535 * There are three cases - 536 * The first interrupt on the stack - sets up the thread_info and 537 * handles the interrupt 538 * A nested interrupt interrupting the copying of the thread_info - 539 * can't handle the interrupt, as the stack is in an unknown state 540 * A nested interrupt not interrupting the copying of the 541 * thread_info - doesn't do any setup, just handles the interrupt 542 * 543 * The first job is to figure out whether we interrupted stack setup. 544 * This is done by xchging the signal mask with thread_info->pending. 545 * If the value that comes back is zero, then there is no setup in 546 * progress, and the interrupt can be handled. If the value is 547 * non-zero, then there is stack setup in progress. In order to have 548 * the interrupt handled, we leave our signal in the mask, and it will 549 * be handled by the upper handler after it has set up the stack. 550 * 551 * Next is to figure out whether we are the outer handler or a nested 552 * one. As part of setting up the stack, thread_info->real_thread is 553 * set to non-NULL (and is reset to NULL on exit). This is the 554 * nesting indicator. If it is non-NULL, then the stack is already 555 * set up and the handler can run. 556 */ 557 558 static unsigned long pending_mask; 559 560 unsigned long to_irq_stack(unsigned long *mask_out) 561 { 562 struct thread_info *ti; 563 unsigned long mask, old; 564 int nested; 565 566 mask = xchg(&pending_mask, *mask_out); 567 if (mask != 0) { 568 /* 569 * If any interrupts come in at this point, we want to 570 * make sure that their bits aren't lost by our 571 * putting our bit in. So, this loop accumulates bits 572 * until xchg returns the same value that we put in. 573 * When that happens, there were no new interrupts, 574 * and pending_mask contains a bit for each interrupt 575 * that came in. 576 */ 577 old = *mask_out; 578 do { 579 old |= mask; 580 mask = xchg(&pending_mask, old); 581 } while (mask != old); 582 return 1; 583 } 584 585 ti = current_thread_info(); 586 nested = (ti->real_thread != NULL); 587 if (!nested) { 588 struct task_struct *task; 589 struct thread_info *tti; 590 591 task = cpu_tasks[ti->cpu].task; 592 tti = task_thread_info(task); 593 594 *ti = *tti; 595 ti->real_thread = tti; 596 task->stack = ti; 597 } 598 599 mask = xchg(&pending_mask, 0); 600 *mask_out |= mask | nested; 601 return 0; 602 } 603 604 unsigned long from_irq_stack(int nested) 605 { 606 struct thread_info *ti, *to; 607 unsigned long mask; 608 609 ti = current_thread_info(); 610 611 pending_mask = 1; 612 613 to = ti->real_thread; 614 current->stack = to; 615 ti->real_thread = NULL; 616 *to = *ti; 617 618 mask = xchg(&pending_mask, 0); 619 return mask & ~1; 620 } 621 622