1142781e1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0 2142781e1SThomas Gleixner 3142781e1SThomas Gleixner #include <linux/context_tracking.h> 4142781e1SThomas Gleixner #include <linux/entry-common.h> 5a9f3a74aSThomas Gleixner #include <linux/livepatch.h> 6a9f3a74aSThomas Gleixner #include <linux/audit.h> 7142781e1SThomas Gleixner 8142781e1SThomas Gleixner #define CREATE_TRACE_POINTS 9142781e1SThomas Gleixner #include <trace/events/syscalls.h> 10142781e1SThomas Gleixner 11142781e1SThomas Gleixner /** 12142781e1SThomas Gleixner * enter_from_user_mode - Establish state when coming from user mode 13142781e1SThomas Gleixner * 14142781e1SThomas Gleixner * Syscall/interrupt entry disables interrupts, but user mode is traced as 15142781e1SThomas Gleixner * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. 16142781e1SThomas Gleixner * 17142781e1SThomas Gleixner * 1) Tell lockdep that interrupts are disabled 18142781e1SThomas Gleixner * 2) Invoke context tracking if enabled to reactivate RCU 19142781e1SThomas Gleixner * 3) Trace interrupts off state 20142781e1SThomas Gleixner */ 21142781e1SThomas Gleixner static __always_inline void enter_from_user_mode(struct pt_regs *regs) 22142781e1SThomas Gleixner { 23142781e1SThomas Gleixner arch_check_user_regs(regs); 24142781e1SThomas Gleixner lockdep_hardirqs_off(CALLER_ADDR0); 25142781e1SThomas Gleixner 26142781e1SThomas Gleixner CT_WARN_ON(ct_state() != CONTEXT_USER); 27142781e1SThomas Gleixner user_exit_irqoff(); 28142781e1SThomas Gleixner 29142781e1SThomas Gleixner instrumentation_begin(); 30142781e1SThomas Gleixner trace_hardirqs_off_finish(); 31142781e1SThomas Gleixner instrumentation_end(); 32142781e1SThomas Gleixner } 33142781e1SThomas Gleixner 34142781e1SThomas Gleixner static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) 35142781e1SThomas Gleixner { 36142781e1SThomas Gleixner if (unlikely(audit_context())) { 37142781e1SThomas Gleixner unsigned long args[6]; 38142781e1SThomas Gleixner 39142781e1SThomas Gleixner syscall_get_arguments(current, regs, args); 40142781e1SThomas Gleixner audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); 41142781e1SThomas Gleixner } 42142781e1SThomas Gleixner } 43142781e1SThomas Gleixner 44142781e1SThomas Gleixner static long syscall_trace_enter(struct pt_regs *regs, long syscall, 45b86678cfSGabriel Krisman Bertazi unsigned long ti_work, unsigned long work) 46142781e1SThomas Gleixner { 47142781e1SThomas Gleixner long ret = 0; 48142781e1SThomas Gleixner 49142781e1SThomas Gleixner /* Handle ptrace */ 50142781e1SThomas Gleixner if (ti_work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { 51142781e1SThomas Gleixner ret = arch_syscall_enter_tracehook(regs); 52142781e1SThomas Gleixner if (ret || (ti_work & _TIF_SYSCALL_EMU)) 53142781e1SThomas Gleixner return -1L; 54142781e1SThomas Gleixner } 55142781e1SThomas Gleixner 56142781e1SThomas Gleixner /* Do seccomp after ptrace, to catch any tracer changes. */ 57*23d67a54SGabriel Krisman Bertazi if (work & SYSCALL_WORK_SECCOMP) { 58142781e1SThomas Gleixner ret = __secure_computing(NULL); 59142781e1SThomas Gleixner if (ret == -1L) 60142781e1SThomas Gleixner return ret; 61142781e1SThomas Gleixner } 62142781e1SThomas Gleixner 63b6ec4134SKees Cook /* Either of the above might have changed the syscall number */ 64b6ec4134SKees Cook syscall = syscall_get_nr(current, regs); 65b6ec4134SKees Cook 66142781e1SThomas Gleixner if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT)) 67142781e1SThomas Gleixner trace_sys_enter(regs, syscall); 68142781e1SThomas Gleixner 69142781e1SThomas Gleixner syscall_enter_audit(regs, syscall); 70142781e1SThomas Gleixner 71142781e1SThomas Gleixner return ret ? : syscall; 72142781e1SThomas Gleixner } 73142781e1SThomas Gleixner 744facb95bSThomas Gleixner static __always_inline long 754facb95bSThomas Gleixner __syscall_enter_from_user_work(struct pt_regs *regs, long syscall) 76142781e1SThomas Gleixner { 77b86678cfSGabriel Krisman Bertazi unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 78142781e1SThomas Gleixner unsigned long ti_work; 79142781e1SThomas Gleixner 80142781e1SThomas Gleixner ti_work = READ_ONCE(current_thread_info()->flags); 81b86678cfSGabriel Krisman Bertazi if (work & SYSCALL_WORK_ENTER || ti_work & SYSCALL_ENTER_WORK) 82b86678cfSGabriel Krisman Bertazi syscall = syscall_trace_enter(regs, syscall, ti_work, work); 83142781e1SThomas Gleixner 84142781e1SThomas Gleixner return syscall; 85142781e1SThomas Gleixner } 86142781e1SThomas Gleixner 874facb95bSThomas Gleixner long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) 884facb95bSThomas Gleixner { 894facb95bSThomas Gleixner return __syscall_enter_from_user_work(regs, syscall); 904facb95bSThomas Gleixner } 914facb95bSThomas Gleixner 924facb95bSThomas Gleixner noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) 934facb95bSThomas Gleixner { 944facb95bSThomas Gleixner long ret; 954facb95bSThomas Gleixner 964facb95bSThomas Gleixner enter_from_user_mode(regs); 974facb95bSThomas Gleixner 984facb95bSThomas Gleixner instrumentation_begin(); 994facb95bSThomas Gleixner local_irq_enable(); 1004facb95bSThomas Gleixner ret = __syscall_enter_from_user_work(regs, syscall); 1014facb95bSThomas Gleixner instrumentation_end(); 1024facb95bSThomas Gleixner 1034facb95bSThomas Gleixner return ret; 1044facb95bSThomas Gleixner } 1054facb95bSThomas Gleixner 1064facb95bSThomas Gleixner noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) 1074facb95bSThomas Gleixner { 1084facb95bSThomas Gleixner enter_from_user_mode(regs); 1094facb95bSThomas Gleixner instrumentation_begin(); 1104facb95bSThomas Gleixner local_irq_enable(); 1114facb95bSThomas Gleixner instrumentation_end(); 1124facb95bSThomas Gleixner } 1134facb95bSThomas Gleixner 114a9f3a74aSThomas Gleixner /** 115a9f3a74aSThomas Gleixner * exit_to_user_mode - Fixup state when exiting to user mode 116a9f3a74aSThomas Gleixner * 117a9f3a74aSThomas Gleixner * Syscall/interupt exit enables interrupts, but the kernel state is 118a9f3a74aSThomas Gleixner * interrupts disabled when this is invoked. Also tell RCU about it. 119a9f3a74aSThomas Gleixner * 120a9f3a74aSThomas Gleixner * 1) Trace interrupts on state 121a9f3a74aSThomas Gleixner * 2) Invoke context tracking if enabled to adjust RCU state 122a9f3a74aSThomas Gleixner * 3) Invoke architecture specific last minute exit code, e.g. speculation 123a9f3a74aSThomas Gleixner * mitigations, etc. 124a9f3a74aSThomas Gleixner * 4) Tell lockdep that interrupts are enabled 125a9f3a74aSThomas Gleixner */ 126a9f3a74aSThomas Gleixner static __always_inline void exit_to_user_mode(void) 127a9f3a74aSThomas Gleixner { 128a9f3a74aSThomas Gleixner instrumentation_begin(); 129a9f3a74aSThomas Gleixner trace_hardirqs_on_prepare(); 130a9f3a74aSThomas Gleixner lockdep_hardirqs_on_prepare(CALLER_ADDR0); 131a9f3a74aSThomas Gleixner instrumentation_end(); 132a9f3a74aSThomas Gleixner 133a9f3a74aSThomas Gleixner user_enter_irqoff(); 134a9f3a74aSThomas Gleixner arch_exit_to_user_mode(); 135a9f3a74aSThomas Gleixner lockdep_hardirqs_on(CALLER_ADDR0); 136a9f3a74aSThomas Gleixner } 137a9f3a74aSThomas Gleixner 138a9f3a74aSThomas Gleixner /* Workaround to allow gradual conversion of architecture code */ 13912db8b69SJens Axboe void __weak arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { } 14012db8b69SJens Axboe 14112db8b69SJens Axboe static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work) 14212db8b69SJens Axboe { 14312db8b69SJens Axboe if (ti_work & _TIF_NOTIFY_SIGNAL) 14412db8b69SJens Axboe tracehook_notify_signal(); 14512db8b69SJens Axboe 14612db8b69SJens Axboe arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING); 14712db8b69SJens Axboe } 148a9f3a74aSThomas Gleixner 149a9f3a74aSThomas Gleixner static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, 150a9f3a74aSThomas Gleixner unsigned long ti_work) 151a9f3a74aSThomas Gleixner { 152a9f3a74aSThomas Gleixner /* 153a9f3a74aSThomas Gleixner * Before returning to user space ensure that all pending work 154a9f3a74aSThomas Gleixner * items have been completed. 155a9f3a74aSThomas Gleixner */ 156a9f3a74aSThomas Gleixner while (ti_work & EXIT_TO_USER_MODE_WORK) { 157a9f3a74aSThomas Gleixner 158a9f3a74aSThomas Gleixner local_irq_enable_exit_to_user(ti_work); 159a9f3a74aSThomas Gleixner 160a9f3a74aSThomas Gleixner if (ti_work & _TIF_NEED_RESCHED) 161a9f3a74aSThomas Gleixner schedule(); 162a9f3a74aSThomas Gleixner 163a9f3a74aSThomas Gleixner if (ti_work & _TIF_UPROBE) 164a9f3a74aSThomas Gleixner uprobe_notify_resume(regs); 165a9f3a74aSThomas Gleixner 166a9f3a74aSThomas Gleixner if (ti_work & _TIF_PATCH_PENDING) 167a9f3a74aSThomas Gleixner klp_update_patch_state(current); 168a9f3a74aSThomas Gleixner 16912db8b69SJens Axboe if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) 17012db8b69SJens Axboe handle_signal_work(regs, ti_work); 171a9f3a74aSThomas Gleixner 172a9f3a74aSThomas Gleixner if (ti_work & _TIF_NOTIFY_RESUME) { 173a9f3a74aSThomas Gleixner tracehook_notify_resume(regs); 174a9f3a74aSThomas Gleixner rseq_handle_notify_resume(NULL, regs); 175a9f3a74aSThomas Gleixner } 176a9f3a74aSThomas Gleixner 177a9f3a74aSThomas Gleixner /* Architecture specific TIF work */ 178a9f3a74aSThomas Gleixner arch_exit_to_user_mode_work(regs, ti_work); 179a9f3a74aSThomas Gleixner 180a9f3a74aSThomas Gleixner /* 181a9f3a74aSThomas Gleixner * Disable interrupts and reevaluate the work flags as they 182a9f3a74aSThomas Gleixner * might have changed while interrupts and preemption was 183a9f3a74aSThomas Gleixner * enabled above. 184a9f3a74aSThomas Gleixner */ 185a9f3a74aSThomas Gleixner local_irq_disable_exit_to_user(); 186a9f3a74aSThomas Gleixner ti_work = READ_ONCE(current_thread_info()->flags); 187a9f3a74aSThomas Gleixner } 188a9f3a74aSThomas Gleixner 189a9f3a74aSThomas Gleixner /* Return the latest work state for arch_exit_to_user_mode() */ 190a9f3a74aSThomas Gleixner return ti_work; 191a9f3a74aSThomas Gleixner } 192a9f3a74aSThomas Gleixner 193a9f3a74aSThomas Gleixner static void exit_to_user_mode_prepare(struct pt_regs *regs) 194a9f3a74aSThomas Gleixner { 195a9f3a74aSThomas Gleixner unsigned long ti_work = READ_ONCE(current_thread_info()->flags); 196a9f3a74aSThomas Gleixner 197a9f3a74aSThomas Gleixner lockdep_assert_irqs_disabled(); 198a9f3a74aSThomas Gleixner 199a9f3a74aSThomas Gleixner if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) 200a9f3a74aSThomas Gleixner ti_work = exit_to_user_mode_loop(regs, ti_work); 201a9f3a74aSThomas Gleixner 202a9f3a74aSThomas Gleixner arch_exit_to_user_mode_prepare(regs, ti_work); 203a9f3a74aSThomas Gleixner 204a9f3a74aSThomas Gleixner /* Ensure that the address limit is intact and no locks are held */ 205a9f3a74aSThomas Gleixner addr_limit_user_check(); 206a9f3a74aSThomas Gleixner lockdep_assert_irqs_disabled(); 207a9f3a74aSThomas Gleixner lockdep_sys_exit(); 208a9f3a74aSThomas Gleixner } 209a9f3a74aSThomas Gleixner 210a9f3a74aSThomas Gleixner #ifndef _TIF_SINGLESTEP 211a9f3a74aSThomas Gleixner static inline bool report_single_step(unsigned long ti_work) 212a9f3a74aSThomas Gleixner { 213a9f3a74aSThomas Gleixner return false; 214a9f3a74aSThomas Gleixner } 215a9f3a74aSThomas Gleixner #else 216a9f3a74aSThomas Gleixner /* 217a9f3a74aSThomas Gleixner * If TIF_SYSCALL_EMU is set, then the only reason to report is when 218a9f3a74aSThomas Gleixner * TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall 219900ffe39SKees Cook * instruction has been already reported in syscall_enter_from_user_mode(). 220a9f3a74aSThomas Gleixner */ 221a9f3a74aSThomas Gleixner #define SYSEMU_STEP (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU) 222a9f3a74aSThomas Gleixner 223a9f3a74aSThomas Gleixner static inline bool report_single_step(unsigned long ti_work) 224a9f3a74aSThomas Gleixner { 225a9f3a74aSThomas Gleixner return (ti_work & SYSEMU_STEP) == _TIF_SINGLESTEP; 226a9f3a74aSThomas Gleixner } 227a9f3a74aSThomas Gleixner #endif 228a9f3a74aSThomas Gleixner 229b86678cfSGabriel Krisman Bertazi static void syscall_exit_work(struct pt_regs *regs, unsigned long ti_work, 230b86678cfSGabriel Krisman Bertazi unsigned long work) 231a9f3a74aSThomas Gleixner { 232a9f3a74aSThomas Gleixner bool step; 233a9f3a74aSThomas Gleixner 234a9f3a74aSThomas Gleixner audit_syscall_exit(regs); 235a9f3a74aSThomas Gleixner 236a9f3a74aSThomas Gleixner if (ti_work & _TIF_SYSCALL_TRACEPOINT) 237a9f3a74aSThomas Gleixner trace_sys_exit(regs, syscall_get_return_value(current, regs)); 238a9f3a74aSThomas Gleixner 239a9f3a74aSThomas Gleixner step = report_single_step(ti_work); 240a9f3a74aSThomas Gleixner if (step || ti_work & _TIF_SYSCALL_TRACE) 241a9f3a74aSThomas Gleixner arch_syscall_exit_tracehook(regs, step); 242a9f3a74aSThomas Gleixner } 243a9f3a74aSThomas Gleixner 244a9f3a74aSThomas Gleixner /* 245a9f3a74aSThomas Gleixner * Syscall specific exit to user mode preparation. Runs with interrupts 246a9f3a74aSThomas Gleixner * enabled. 247a9f3a74aSThomas Gleixner */ 248a9f3a74aSThomas Gleixner static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) 249a9f3a74aSThomas Gleixner { 250b86678cfSGabriel Krisman Bertazi unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 251a9f3a74aSThomas Gleixner u32 cached_flags = READ_ONCE(current_thread_info()->flags); 252a9f3a74aSThomas Gleixner unsigned long nr = syscall_get_nr(current, regs); 253a9f3a74aSThomas Gleixner 254a9f3a74aSThomas Gleixner CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 255a9f3a74aSThomas Gleixner 256a9f3a74aSThomas Gleixner if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 257a9f3a74aSThomas Gleixner if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) 258a9f3a74aSThomas Gleixner local_irq_enable(); 259a9f3a74aSThomas Gleixner } 260a9f3a74aSThomas Gleixner 261a9f3a74aSThomas Gleixner rseq_syscall(regs); 262a9f3a74aSThomas Gleixner 263a9f3a74aSThomas Gleixner /* 264a9f3a74aSThomas Gleixner * Do one-time syscall specific work. If these work items are 265a9f3a74aSThomas Gleixner * enabled, we want to run them exactly once per syscall exit with 266a9f3a74aSThomas Gleixner * interrupts enabled. 267a9f3a74aSThomas Gleixner */ 268b86678cfSGabriel Krisman Bertazi if (unlikely(work & SYSCALL_WORK_EXIT || cached_flags & SYSCALL_EXIT_WORK)) 269b86678cfSGabriel Krisman Bertazi syscall_exit_work(regs, cached_flags, work); 270a9f3a74aSThomas Gleixner } 271a9f3a74aSThomas Gleixner 272a9f3a74aSThomas Gleixner __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs) 273a9f3a74aSThomas Gleixner { 274a9f3a74aSThomas Gleixner instrumentation_begin(); 275a9f3a74aSThomas Gleixner syscall_exit_to_user_mode_prepare(regs); 276a9f3a74aSThomas Gleixner local_irq_disable_exit_to_user(); 277a9f3a74aSThomas Gleixner exit_to_user_mode_prepare(regs); 278a9f3a74aSThomas Gleixner instrumentation_end(); 279a9f3a74aSThomas Gleixner exit_to_user_mode(); 280a9f3a74aSThomas Gleixner } 281a9f3a74aSThomas Gleixner 282142781e1SThomas Gleixner noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs) 283142781e1SThomas Gleixner { 284142781e1SThomas Gleixner enter_from_user_mode(regs); 285142781e1SThomas Gleixner } 286a9f3a74aSThomas Gleixner 287a9f3a74aSThomas Gleixner noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs) 288a9f3a74aSThomas Gleixner { 289a9f3a74aSThomas Gleixner instrumentation_begin(); 290a9f3a74aSThomas Gleixner exit_to_user_mode_prepare(regs); 291a9f3a74aSThomas Gleixner instrumentation_end(); 292a9f3a74aSThomas Gleixner exit_to_user_mode(); 293a9f3a74aSThomas Gleixner } 294a5497babSThomas Gleixner 295aadfc2f9SIngo Molnar noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) 296a5497babSThomas Gleixner { 297a5497babSThomas Gleixner irqentry_state_t ret = { 298a5497babSThomas Gleixner .exit_rcu = false, 299a5497babSThomas Gleixner }; 300a5497babSThomas Gleixner 301a5497babSThomas Gleixner if (user_mode(regs)) { 302a5497babSThomas Gleixner irqentry_enter_from_user_mode(regs); 303a5497babSThomas Gleixner return ret; 304a5497babSThomas Gleixner } 305a5497babSThomas Gleixner 306a5497babSThomas Gleixner /* 307a5497babSThomas Gleixner * If this entry hit the idle task invoke rcu_irq_enter() whether 308a5497babSThomas Gleixner * RCU is watching or not. 309a5497babSThomas Gleixner * 31078a56e04SIra Weiny * Interrupts can nest when the first interrupt invokes softirq 311a5497babSThomas Gleixner * processing on return which enables interrupts. 312a5497babSThomas Gleixner * 313a5497babSThomas Gleixner * Scheduler ticks in the idle task can mark quiescent state and 314a5497babSThomas Gleixner * terminate a grace period, if and only if the timer interrupt is 315a5497babSThomas Gleixner * not nested into another interrupt. 316a5497babSThomas Gleixner * 3177f2a53c2SPaul E. McKenney * Checking for rcu_is_watching() here would prevent the nesting 318a5497babSThomas Gleixner * interrupt to invoke rcu_irq_enter(). If that nested interrupt is 319a5497babSThomas Gleixner * the tick then rcu_flavor_sched_clock_irq() would wrongfully 320a5497babSThomas Gleixner * assume that it is the first interupt and eventually claim 32178a56e04SIra Weiny * quiescent state and end grace periods prematurely. 322a5497babSThomas Gleixner * 323a5497babSThomas Gleixner * Unconditionally invoke rcu_irq_enter() so RCU state stays 324a5497babSThomas Gleixner * consistent. 325a5497babSThomas Gleixner * 326a5497babSThomas Gleixner * TINY_RCU does not support EQS, so let the compiler eliminate 327a5497babSThomas Gleixner * this part when enabled. 328a5497babSThomas Gleixner */ 329a5497babSThomas Gleixner if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { 330a5497babSThomas Gleixner /* 331a5497babSThomas Gleixner * If RCU is not watching then the same careful 332a5497babSThomas Gleixner * sequence vs. lockdep and tracing is required 33345ff5105SIra Weiny * as in irqentry_enter_from_user_mode(). 334a5497babSThomas Gleixner */ 335a5497babSThomas Gleixner lockdep_hardirqs_off(CALLER_ADDR0); 336a5497babSThomas Gleixner rcu_irq_enter(); 337a5497babSThomas Gleixner instrumentation_begin(); 338a5497babSThomas Gleixner trace_hardirqs_off_finish(); 339a5497babSThomas Gleixner instrumentation_end(); 340a5497babSThomas Gleixner 341a5497babSThomas Gleixner ret.exit_rcu = true; 342a5497babSThomas Gleixner return ret; 343a5497babSThomas Gleixner } 344a5497babSThomas Gleixner 345a5497babSThomas Gleixner /* 346a5497babSThomas Gleixner * If RCU is watching then RCU only wants to check whether it needs 347a5497babSThomas Gleixner * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() 348a5497babSThomas Gleixner * already contains a warning when RCU is not watching, so no point 349a5497babSThomas Gleixner * in having another one here. 350a5497babSThomas Gleixner */ 3519d820f68SThomas Gleixner lockdep_hardirqs_off(CALLER_ADDR0); 352a5497babSThomas Gleixner instrumentation_begin(); 353a5497babSThomas Gleixner rcu_irq_enter_check_tick(); 3549d820f68SThomas Gleixner trace_hardirqs_off_finish(); 355a5497babSThomas Gleixner instrumentation_end(); 356a5497babSThomas Gleixner 357a5497babSThomas Gleixner return ret; 358a5497babSThomas Gleixner } 359a5497babSThomas Gleixner 360a5497babSThomas Gleixner void irqentry_exit_cond_resched(void) 361a5497babSThomas Gleixner { 362a5497babSThomas Gleixner if (!preempt_count()) { 363a5497babSThomas Gleixner /* Sanity check RCU and thread stack */ 364a5497babSThomas Gleixner rcu_irq_exit_check_preempt(); 365a5497babSThomas Gleixner if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) 366a5497babSThomas Gleixner WARN_ON_ONCE(!on_thread_stack()); 367a5497babSThomas Gleixner if (need_resched()) 368a5497babSThomas Gleixner preempt_schedule_irq(); 369a5497babSThomas Gleixner } 370a5497babSThomas Gleixner } 371a5497babSThomas Gleixner 372aadfc2f9SIngo Molnar noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) 373a5497babSThomas Gleixner { 374a5497babSThomas Gleixner lockdep_assert_irqs_disabled(); 375a5497babSThomas Gleixner 376a5497babSThomas Gleixner /* Check whether this returns to user mode */ 377a5497babSThomas Gleixner if (user_mode(regs)) { 378a5497babSThomas Gleixner irqentry_exit_to_user_mode(regs); 379a5497babSThomas Gleixner } else if (!regs_irqs_disabled(regs)) { 380a5497babSThomas Gleixner /* 381a5497babSThomas Gleixner * If RCU was not watching on entry this needs to be done 382a5497babSThomas Gleixner * carefully and needs the same ordering of lockdep/tracing 383a5497babSThomas Gleixner * and RCU as the return to user mode path. 384a5497babSThomas Gleixner */ 385a5497babSThomas Gleixner if (state.exit_rcu) { 386a5497babSThomas Gleixner instrumentation_begin(); 387a5497babSThomas Gleixner /* Tell the tracer that IRET will enable interrupts */ 388a5497babSThomas Gleixner trace_hardirqs_on_prepare(); 389a5497babSThomas Gleixner lockdep_hardirqs_on_prepare(CALLER_ADDR0); 390a5497babSThomas Gleixner instrumentation_end(); 391a5497babSThomas Gleixner rcu_irq_exit(); 392a5497babSThomas Gleixner lockdep_hardirqs_on(CALLER_ADDR0); 393a5497babSThomas Gleixner return; 394a5497babSThomas Gleixner } 395a5497babSThomas Gleixner 396a5497babSThomas Gleixner instrumentation_begin(); 397a5497babSThomas Gleixner if (IS_ENABLED(CONFIG_PREEMPTION)) 398a5497babSThomas Gleixner irqentry_exit_cond_resched(); 399a5497babSThomas Gleixner /* Covers both tracing and lockdep */ 400a5497babSThomas Gleixner trace_hardirqs_on(); 401a5497babSThomas Gleixner instrumentation_end(); 402a5497babSThomas Gleixner } else { 403a5497babSThomas Gleixner /* 404a5497babSThomas Gleixner * IRQ flags state is correct already. Just tell RCU if it 405a5497babSThomas Gleixner * was not watching on entry. 406a5497babSThomas Gleixner */ 407a5497babSThomas Gleixner if (state.exit_rcu) 408a5497babSThomas Gleixner rcu_irq_exit(); 409a5497babSThomas Gleixner } 410a5497babSThomas Gleixner } 411b6be002bSThomas Gleixner 412b6be002bSThomas Gleixner irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs) 413b6be002bSThomas Gleixner { 414b6be002bSThomas Gleixner irqentry_state_t irq_state; 415b6be002bSThomas Gleixner 416b6be002bSThomas Gleixner irq_state.lockdep = lockdep_hardirqs_enabled(); 417b6be002bSThomas Gleixner 418b6be002bSThomas Gleixner __nmi_enter(); 419b6be002bSThomas Gleixner lockdep_hardirqs_off(CALLER_ADDR0); 420b6be002bSThomas Gleixner lockdep_hardirq_enter(); 421b6be002bSThomas Gleixner rcu_nmi_enter(); 422b6be002bSThomas Gleixner 423b6be002bSThomas Gleixner instrumentation_begin(); 424b6be002bSThomas Gleixner trace_hardirqs_off_finish(); 425b6be002bSThomas Gleixner ftrace_nmi_enter(); 426b6be002bSThomas Gleixner instrumentation_end(); 427b6be002bSThomas Gleixner 428b6be002bSThomas Gleixner return irq_state; 429b6be002bSThomas Gleixner } 430b6be002bSThomas Gleixner 431b6be002bSThomas Gleixner void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) 432b6be002bSThomas Gleixner { 433b6be002bSThomas Gleixner instrumentation_begin(); 434b6be002bSThomas Gleixner ftrace_nmi_exit(); 435b6be002bSThomas Gleixner if (irq_state.lockdep) { 436b6be002bSThomas Gleixner trace_hardirqs_on_prepare(); 437b6be002bSThomas Gleixner lockdep_hardirqs_on_prepare(CALLER_ADDR0); 438b6be002bSThomas Gleixner } 439b6be002bSThomas Gleixner instrumentation_end(); 440b6be002bSThomas Gleixner 441b6be002bSThomas Gleixner rcu_nmi_exit(); 442b6be002bSThomas Gleixner lockdep_hardirq_exit(); 443b6be002bSThomas Gleixner if (irq_state.lockdep) 444b6be002bSThomas Gleixner lockdep_hardirqs_on(CALLER_ADDR0); 445b6be002bSThomas Gleixner __nmi_exit(); 446b6be002bSThomas Gleixner } 447