1142781e1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0 2142781e1SThomas Gleixner 3142781e1SThomas Gleixner #include <linux/context_tracking.h> 4142781e1SThomas Gleixner #include <linux/entry-common.h> 503248addSEric W. Biederman #include <linux/resume_user_mode.h> 65fbda3ecSThomas Gleixner #include <linux/highmem.h> 799cf983cSMark Rutland #include <linux/jump_label.h> 8*6cae637fSAlexander Potapenko #include <linux/kmsan.h> 9a9f3a74aSThomas Gleixner #include <linux/livepatch.h> 10a9f3a74aSThomas Gleixner #include <linux/audit.h> 11f268c373SFrederic Weisbecker #include <linux/tick.h> 12142781e1SThomas Gleixner 1311894468SGabriel Krisman Bertazi #include "common.h" 1411894468SGabriel Krisman Bertazi 15142781e1SThomas Gleixner #define CREATE_TRACE_POINTS 16142781e1SThomas Gleixner #include <trace/events/syscalls.h> 17142781e1SThomas Gleixner 1896e2fbccSSven Schnelle /* See comment for enter_from_user_mode() in entry-common.h */ 196666bb71SSven Schnelle static __always_inline void __enter_from_user_mode(struct pt_regs *regs) 20142781e1SThomas Gleixner { 216d97af48SSven Schnelle arch_enter_from_user_mode(regs); 22142781e1SThomas Gleixner lockdep_hardirqs_off(CALLER_ADDR0); 23142781e1SThomas Gleixner 24142781e1SThomas Gleixner CT_WARN_ON(ct_state() != CONTEXT_USER); 25142781e1SThomas Gleixner user_exit_irqoff(); 26142781e1SThomas Gleixner 27142781e1SThomas Gleixner instrumentation_begin(); 28*6cae637fSAlexander Potapenko kmsan_unpoison_entry_regs(regs); 29142781e1SThomas Gleixner trace_hardirqs_off_finish(); 30142781e1SThomas Gleixner instrumentation_end(); 31142781e1SThomas Gleixner } 32142781e1SThomas Gleixner 3396e2fbccSSven Schnelle void noinstr enter_from_user_mode(struct pt_regs *regs) 3496e2fbccSSven Schnelle { 3596e2fbccSSven Schnelle __enter_from_user_mode(regs); 3696e2fbccSSven Schnelle } 3796e2fbccSSven Schnelle 38142781e1SThomas Gleixner static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) 39142781e1SThomas Gleixner { 40142781e1SThomas Gleixner if (unlikely(audit_context())) { 41142781e1SThomas Gleixner unsigned long args[6]; 42142781e1SThomas Gleixner 43142781e1SThomas Gleixner syscall_get_arguments(current, regs, args); 44142781e1SThomas Gleixner audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); 45142781e1SThomas Gleixner } 46142781e1SThomas Gleixner } 47142781e1SThomas Gleixner 48142781e1SThomas Gleixner static long syscall_trace_enter(struct pt_regs *regs, long syscall, 4929915524SGabriel Krisman Bertazi unsigned long work) 50142781e1SThomas Gleixner { 51142781e1SThomas Gleixner long ret = 0; 52142781e1SThomas Gleixner 5311894468SGabriel Krisman Bertazi /* 5411894468SGabriel Krisman Bertazi * Handle Syscall User Dispatch. This must comes first, since 5511894468SGabriel Krisman Bertazi * the ABI here can be something that doesn't make sense for 5611894468SGabriel Krisman Bertazi * other syscall_work features. 5711894468SGabriel Krisman Bertazi */ 5811894468SGabriel Krisman Bertazi if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { 5911894468SGabriel Krisman Bertazi if (syscall_user_dispatch(regs)) 6011894468SGabriel Krisman Bertazi return -1L; 6111894468SGabriel Krisman Bertazi } 6211894468SGabriel Krisman Bertazi 63142781e1SThomas Gleixner /* Handle ptrace */ 6464eb35f7SGabriel Krisman Bertazi if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { 650cfcb2b9SEric W. Biederman ret = ptrace_report_syscall_entry(regs); 6664eb35f7SGabriel Krisman Bertazi if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) 67142781e1SThomas Gleixner return -1L; 68142781e1SThomas Gleixner } 69142781e1SThomas Gleixner 70142781e1SThomas Gleixner /* Do seccomp after ptrace, to catch any tracer changes. */ 7123d67a54SGabriel Krisman Bertazi if (work & SYSCALL_WORK_SECCOMP) { 72142781e1SThomas Gleixner ret = __secure_computing(NULL); 73142781e1SThomas Gleixner if (ret == -1L) 74142781e1SThomas Gleixner return ret; 75142781e1SThomas Gleixner } 76142781e1SThomas Gleixner 77b6ec4134SKees Cook /* Either of the above might have changed the syscall number */ 78b6ec4134SKees Cook syscall = syscall_get_nr(current, regs); 79b6ec4134SKees Cook 80524666cbSGabriel Krisman Bertazi if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) 81142781e1SThomas Gleixner trace_sys_enter(regs, syscall); 82142781e1SThomas Gleixner 83142781e1SThomas Gleixner syscall_enter_audit(regs, syscall); 84142781e1SThomas Gleixner 85142781e1SThomas Gleixner return ret ? : syscall; 86142781e1SThomas Gleixner } 87142781e1SThomas Gleixner 884facb95bSThomas Gleixner static __always_inline long 894facb95bSThomas Gleixner __syscall_enter_from_user_work(struct pt_regs *regs, long syscall) 90142781e1SThomas Gleixner { 91b86678cfSGabriel Krisman Bertazi unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 92142781e1SThomas Gleixner 9329915524SGabriel Krisman Bertazi if (work & SYSCALL_WORK_ENTER) 9429915524SGabriel Krisman Bertazi syscall = syscall_trace_enter(regs, syscall, work); 95142781e1SThomas Gleixner 96142781e1SThomas Gleixner return syscall; 97142781e1SThomas Gleixner } 98142781e1SThomas Gleixner 994facb95bSThomas Gleixner long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) 1004facb95bSThomas Gleixner { 1014facb95bSThomas Gleixner return __syscall_enter_from_user_work(regs, syscall); 1024facb95bSThomas Gleixner } 1034facb95bSThomas Gleixner 1044facb95bSThomas Gleixner noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) 1054facb95bSThomas Gleixner { 1064facb95bSThomas Gleixner long ret; 1074facb95bSThomas Gleixner 1086666bb71SSven Schnelle __enter_from_user_mode(regs); 1094facb95bSThomas Gleixner 1104facb95bSThomas Gleixner instrumentation_begin(); 1114facb95bSThomas Gleixner local_irq_enable(); 1124facb95bSThomas Gleixner ret = __syscall_enter_from_user_work(regs, syscall); 1134facb95bSThomas Gleixner instrumentation_end(); 1144facb95bSThomas Gleixner 1154facb95bSThomas Gleixner return ret; 1164facb95bSThomas Gleixner } 1174facb95bSThomas Gleixner 1184facb95bSThomas Gleixner noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) 1194facb95bSThomas Gleixner { 1206666bb71SSven Schnelle __enter_from_user_mode(regs); 1214facb95bSThomas Gleixner instrumentation_begin(); 1224facb95bSThomas Gleixner local_irq_enable(); 1234facb95bSThomas Gleixner instrumentation_end(); 1244facb95bSThomas Gleixner } 1254facb95bSThomas Gleixner 126310de1a6SSven Schnelle /* See comment for exit_to_user_mode() in entry-common.h */ 127bb793562SSven Schnelle static __always_inline void __exit_to_user_mode(void) 128a9f3a74aSThomas Gleixner { 129a9f3a74aSThomas Gleixner instrumentation_begin(); 130a9f3a74aSThomas Gleixner trace_hardirqs_on_prepare(); 1318b023accSNick Desaulniers lockdep_hardirqs_on_prepare(); 132a9f3a74aSThomas Gleixner instrumentation_end(); 133a9f3a74aSThomas Gleixner 134a9f3a74aSThomas Gleixner user_enter_irqoff(); 135a9f3a74aSThomas Gleixner arch_exit_to_user_mode(); 136a9f3a74aSThomas Gleixner lockdep_hardirqs_on(CALLER_ADDR0); 137a9f3a74aSThomas Gleixner } 138a9f3a74aSThomas Gleixner 139310de1a6SSven Schnelle void noinstr exit_to_user_mode(void) 140310de1a6SSven Schnelle { 141310de1a6SSven Schnelle __exit_to_user_mode(); 142310de1a6SSven Schnelle } 143310de1a6SSven Schnelle 144a9f3a74aSThomas Gleixner /* Workaround to allow gradual conversion of architecture code */ 1458ba62d37SEric W. Biederman void __weak arch_do_signal_or_restart(struct pt_regs *regs) { } 146a9f3a74aSThomas Gleixner 147a9f3a74aSThomas Gleixner static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, 148a9f3a74aSThomas Gleixner unsigned long ti_work) 149a9f3a74aSThomas Gleixner { 150a9f3a74aSThomas Gleixner /* 151a9f3a74aSThomas Gleixner * Before returning to user space ensure that all pending work 152a9f3a74aSThomas Gleixner * items have been completed. 153a9f3a74aSThomas Gleixner */ 154a9f3a74aSThomas Gleixner while (ti_work & EXIT_TO_USER_MODE_WORK) { 155a9f3a74aSThomas Gleixner 156a9f3a74aSThomas Gleixner local_irq_enable_exit_to_user(ti_work); 157a9f3a74aSThomas Gleixner 158a9f3a74aSThomas Gleixner if (ti_work & _TIF_NEED_RESCHED) 159a9f3a74aSThomas Gleixner schedule(); 160a9f3a74aSThomas Gleixner 161a9f3a74aSThomas Gleixner if (ti_work & _TIF_UPROBE) 162a9f3a74aSThomas Gleixner uprobe_notify_resume(regs); 163a9f3a74aSThomas Gleixner 164a9f3a74aSThomas Gleixner if (ti_work & _TIF_PATCH_PENDING) 165a9f3a74aSThomas Gleixner klp_update_patch_state(current); 166a9f3a74aSThomas Gleixner 16712db8b69SJens Axboe if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) 1688ba62d37SEric W. Biederman arch_do_signal_or_restart(regs); 169a9f3a74aSThomas Gleixner 170a68de80fSSean Christopherson if (ti_work & _TIF_NOTIFY_RESUME) 17103248addSEric W. Biederman resume_user_mode_work(regs); 172a9f3a74aSThomas Gleixner 173a9f3a74aSThomas Gleixner /* Architecture specific TIF work */ 174a9f3a74aSThomas Gleixner arch_exit_to_user_mode_work(regs, ti_work); 175a9f3a74aSThomas Gleixner 176a9f3a74aSThomas Gleixner /* 177a9f3a74aSThomas Gleixner * Disable interrupts and reevaluate the work flags as they 178a9f3a74aSThomas Gleixner * might have changed while interrupts and preemption was 179a9f3a74aSThomas Gleixner * enabled above. 180a9f3a74aSThomas Gleixner */ 181a9f3a74aSThomas Gleixner local_irq_disable_exit_to_user(); 18247b8ff19SFrederic Weisbecker 18347b8ff19SFrederic Weisbecker /* Check if any of the above work has queued a deferred wakeup */ 184f268c373SFrederic Weisbecker tick_nohz_user_enter_prepare(); 18547b8ff19SFrederic Weisbecker 1866ce89512SMark Rutland ti_work = read_thread_flags(); 187a9f3a74aSThomas Gleixner } 188a9f3a74aSThomas Gleixner 189a9f3a74aSThomas Gleixner /* Return the latest work state for arch_exit_to_user_mode() */ 190a9f3a74aSThomas Gleixner return ti_work; 191a9f3a74aSThomas Gleixner } 192a9f3a74aSThomas Gleixner 193a9f3a74aSThomas Gleixner static void exit_to_user_mode_prepare(struct pt_regs *regs) 194a9f3a74aSThomas Gleixner { 1956ce89512SMark Rutland unsigned long ti_work = read_thread_flags(); 196a9f3a74aSThomas Gleixner 197a9f3a74aSThomas Gleixner lockdep_assert_irqs_disabled(); 198a9f3a74aSThomas Gleixner 19947b8ff19SFrederic Weisbecker /* Flush pending rcuog wakeup before the last need_resched() check */ 200f268c373SFrederic Weisbecker tick_nohz_user_enter_prepare(); 20147b8ff19SFrederic Weisbecker 202a9f3a74aSThomas Gleixner if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) 203a9f3a74aSThomas Gleixner ti_work = exit_to_user_mode_loop(regs, ti_work); 204a9f3a74aSThomas Gleixner 205a9f3a74aSThomas Gleixner arch_exit_to_user_mode_prepare(regs, ti_work); 206a9f3a74aSThomas Gleixner 207a9f3a74aSThomas Gleixner /* Ensure that the address limit is intact and no locks are held */ 208a9f3a74aSThomas Gleixner addr_limit_user_check(); 2095fbda3ecSThomas Gleixner kmap_assert_nomap(); 210a9f3a74aSThomas Gleixner lockdep_assert_irqs_disabled(); 211a9f3a74aSThomas Gleixner lockdep_sys_exit(); 212a9f3a74aSThomas Gleixner } 213a9f3a74aSThomas Gleixner 214a9f3a74aSThomas Gleixner /* 21564eb35f7SGabriel Krisman Bertazi * If SYSCALL_EMU is set, then the only reason to report is when 2166342adcaSGabriel Krisman Bertazi * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall 217900ffe39SKees Cook * instruction has been already reported in syscall_enter_from_user_mode(). 218a9f3a74aSThomas Gleixner */ 21964eb35f7SGabriel Krisman Bertazi static inline bool report_single_step(unsigned long work) 220a9f3a74aSThomas Gleixner { 22141c1a06dSYuxuan Shui if (work & SYSCALL_WORK_SYSCALL_EMU) 22264eb35f7SGabriel Krisman Bertazi return false; 22364eb35f7SGabriel Krisman Bertazi 2246342adcaSGabriel Krisman Bertazi return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; 225a9f3a74aSThomas Gleixner } 22629915524SGabriel Krisman Bertazi 22729915524SGabriel Krisman Bertazi static void syscall_exit_work(struct pt_regs *regs, unsigned long work) 228a9f3a74aSThomas Gleixner { 229a9f3a74aSThomas Gleixner bool step; 230a9f3a74aSThomas Gleixner 23111894468SGabriel Krisman Bertazi /* 23211894468SGabriel Krisman Bertazi * If the syscall was rolled back due to syscall user dispatching, 23311894468SGabriel Krisman Bertazi * then the tracers below are not invoked for the same reason as 23411894468SGabriel Krisman Bertazi * the entry side was not invoked in syscall_trace_enter(): The ABI 23511894468SGabriel Krisman Bertazi * of these syscalls is unknown. 23611894468SGabriel Krisman Bertazi */ 23711894468SGabriel Krisman Bertazi if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { 23811894468SGabriel Krisman Bertazi if (unlikely(current->syscall_dispatch.on_dispatch)) { 23911894468SGabriel Krisman Bertazi current->syscall_dispatch.on_dispatch = false; 24011894468SGabriel Krisman Bertazi return; 24111894468SGabriel Krisman Bertazi } 24211894468SGabriel Krisman Bertazi } 24311894468SGabriel Krisman Bertazi 244a9f3a74aSThomas Gleixner audit_syscall_exit(regs); 245a9f3a74aSThomas Gleixner 246524666cbSGabriel Krisman Bertazi if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) 247a9f3a74aSThomas Gleixner trace_sys_exit(regs, syscall_get_return_value(current, regs)); 248a9f3a74aSThomas Gleixner 24964eb35f7SGabriel Krisman Bertazi step = report_single_step(work); 25064c19ba2SGabriel Krisman Bertazi if (step || work & SYSCALL_WORK_SYSCALL_TRACE) 2510cfcb2b9SEric W. Biederman ptrace_report_syscall_exit(regs, step); 252a9f3a74aSThomas Gleixner } 253a9f3a74aSThomas Gleixner 254a9f3a74aSThomas Gleixner /* 255a9f3a74aSThomas Gleixner * Syscall specific exit to user mode preparation. Runs with interrupts 256a9f3a74aSThomas Gleixner * enabled. 257a9f3a74aSThomas Gleixner */ 258a9f3a74aSThomas Gleixner static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) 259a9f3a74aSThomas Gleixner { 260b86678cfSGabriel Krisman Bertazi unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 261a9f3a74aSThomas Gleixner unsigned long nr = syscall_get_nr(current, regs); 262a9f3a74aSThomas Gleixner 263a9f3a74aSThomas Gleixner CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 264a9f3a74aSThomas Gleixner 265a9f3a74aSThomas Gleixner if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 266a9f3a74aSThomas Gleixner if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) 267a9f3a74aSThomas Gleixner local_irq_enable(); 268a9f3a74aSThomas Gleixner } 269a9f3a74aSThomas Gleixner 270a9f3a74aSThomas Gleixner rseq_syscall(regs); 271a9f3a74aSThomas Gleixner 272a9f3a74aSThomas Gleixner /* 273a9f3a74aSThomas Gleixner * Do one-time syscall specific work. If these work items are 274a9f3a74aSThomas Gleixner * enabled, we want to run them exactly once per syscall exit with 275a9f3a74aSThomas Gleixner * interrupts enabled. 276a9f3a74aSThomas Gleixner */ 27729915524SGabriel Krisman Bertazi if (unlikely(work & SYSCALL_WORK_EXIT)) 27829915524SGabriel Krisman Bertazi syscall_exit_work(regs, work); 279a9f3a74aSThomas Gleixner } 280a9f3a74aSThomas Gleixner 281c6156e1dSSven Schnelle static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs) 282a9f3a74aSThomas Gleixner { 283a9f3a74aSThomas Gleixner syscall_exit_to_user_mode_prepare(regs); 284a9f3a74aSThomas Gleixner local_irq_disable_exit_to_user(); 285a9f3a74aSThomas Gleixner exit_to_user_mode_prepare(regs); 286c6156e1dSSven Schnelle } 287c6156e1dSSven Schnelle 288c6156e1dSSven Schnelle void syscall_exit_to_user_mode_work(struct pt_regs *regs) 289c6156e1dSSven Schnelle { 290c6156e1dSSven Schnelle __syscall_exit_to_user_mode_work(regs); 291c6156e1dSSven Schnelle } 292c6156e1dSSven Schnelle 293c6156e1dSSven Schnelle __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs) 294c6156e1dSSven Schnelle { 295c6156e1dSSven Schnelle instrumentation_begin(); 296c6156e1dSSven Schnelle __syscall_exit_to_user_mode_work(regs); 297a9f3a74aSThomas Gleixner instrumentation_end(); 298bb793562SSven Schnelle __exit_to_user_mode(); 299a9f3a74aSThomas Gleixner } 300a9f3a74aSThomas Gleixner 301142781e1SThomas Gleixner noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs) 302142781e1SThomas Gleixner { 3036666bb71SSven Schnelle __enter_from_user_mode(regs); 304142781e1SThomas Gleixner } 305a9f3a74aSThomas Gleixner 306a9f3a74aSThomas Gleixner noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs) 307a9f3a74aSThomas Gleixner { 308a9f3a74aSThomas Gleixner instrumentation_begin(); 309a9f3a74aSThomas Gleixner exit_to_user_mode_prepare(regs); 310a9f3a74aSThomas Gleixner instrumentation_end(); 311bb793562SSven Schnelle __exit_to_user_mode(); 312a9f3a74aSThomas Gleixner } 313a5497babSThomas Gleixner 314aadfc2f9SIngo Molnar noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) 315a5497babSThomas Gleixner { 316a5497babSThomas Gleixner irqentry_state_t ret = { 317a5497babSThomas Gleixner .exit_rcu = false, 318a5497babSThomas Gleixner }; 319a5497babSThomas Gleixner 320a5497babSThomas Gleixner if (user_mode(regs)) { 321a5497babSThomas Gleixner irqentry_enter_from_user_mode(regs); 322a5497babSThomas Gleixner return ret; 323a5497babSThomas Gleixner } 324a5497babSThomas Gleixner 325a5497babSThomas Gleixner /* 3266f0e6c15SFrederic Weisbecker * If this entry hit the idle task invoke ct_irq_enter() whether 327a5497babSThomas Gleixner * RCU is watching or not. 328a5497babSThomas Gleixner * 32978a56e04SIra Weiny * Interrupts can nest when the first interrupt invokes softirq 330a5497babSThomas Gleixner * processing on return which enables interrupts. 331a5497babSThomas Gleixner * 332a5497babSThomas Gleixner * Scheduler ticks in the idle task can mark quiescent state and 333a5497babSThomas Gleixner * terminate a grace period, if and only if the timer interrupt is 334a5497babSThomas Gleixner * not nested into another interrupt. 335a5497babSThomas Gleixner * 3367f2a53c2SPaul E. McKenney * Checking for rcu_is_watching() here would prevent the nesting 3376f0e6c15SFrederic Weisbecker * interrupt to invoke ct_irq_enter(). If that nested interrupt is 338a5497babSThomas Gleixner * the tick then rcu_flavor_sched_clock_irq() would wrongfully 33997258ce9SIngo Molnar * assume that it is the first interrupt and eventually claim 34078a56e04SIra Weiny * quiescent state and end grace periods prematurely. 341a5497babSThomas Gleixner * 3426f0e6c15SFrederic Weisbecker * Unconditionally invoke ct_irq_enter() so RCU state stays 343a5497babSThomas Gleixner * consistent. 344a5497babSThomas Gleixner * 345a5497babSThomas Gleixner * TINY_RCU does not support EQS, so let the compiler eliminate 346a5497babSThomas Gleixner * this part when enabled. 347a5497babSThomas Gleixner */ 348a5497babSThomas Gleixner if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { 349a5497babSThomas Gleixner /* 350a5497babSThomas Gleixner * If RCU is not watching then the same careful 351a5497babSThomas Gleixner * sequence vs. lockdep and tracing is required 35245ff5105SIra Weiny * as in irqentry_enter_from_user_mode(). 353a5497babSThomas Gleixner */ 354a5497babSThomas Gleixner lockdep_hardirqs_off(CALLER_ADDR0); 3556f0e6c15SFrederic Weisbecker ct_irq_enter(); 356a5497babSThomas Gleixner instrumentation_begin(); 357*6cae637fSAlexander Potapenko kmsan_unpoison_entry_regs(regs); 358a5497babSThomas Gleixner trace_hardirqs_off_finish(); 359a5497babSThomas Gleixner instrumentation_end(); 360a5497babSThomas Gleixner 361a5497babSThomas Gleixner ret.exit_rcu = true; 362a5497babSThomas Gleixner return ret; 363a5497babSThomas Gleixner } 364a5497babSThomas Gleixner 365a5497babSThomas Gleixner /* 366a5497babSThomas Gleixner * If RCU is watching then RCU only wants to check whether it needs 367a5497babSThomas Gleixner * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() 368a5497babSThomas Gleixner * already contains a warning when RCU is not watching, so no point 369a5497babSThomas Gleixner * in having another one here. 370a5497babSThomas Gleixner */ 3719d820f68SThomas Gleixner lockdep_hardirqs_off(CALLER_ADDR0); 372a5497babSThomas Gleixner instrumentation_begin(); 373*6cae637fSAlexander Potapenko kmsan_unpoison_entry_regs(regs); 374a5497babSThomas Gleixner rcu_irq_enter_check_tick(); 3759d820f68SThomas Gleixner trace_hardirqs_off_finish(); 376a5497babSThomas Gleixner instrumentation_end(); 377a5497babSThomas Gleixner 378a5497babSThomas Gleixner return ret; 379a5497babSThomas Gleixner } 380a5497babSThomas Gleixner 3814624a14fSMark Rutland void raw_irqentry_exit_cond_resched(void) 382a5497babSThomas Gleixner { 383a5497babSThomas Gleixner if (!preempt_count()) { 384a5497babSThomas Gleixner /* Sanity check RCU and thread stack */ 385a5497babSThomas Gleixner rcu_irq_exit_check_preempt(); 386a5497babSThomas Gleixner if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) 387a5497babSThomas Gleixner WARN_ON_ONCE(!on_thread_stack()); 388a5497babSThomas Gleixner if (need_resched()) 389a5497babSThomas Gleixner preempt_schedule_irq(); 390a5497babSThomas Gleixner } 391a5497babSThomas Gleixner } 39240607ee9SPeter Zijlstra (Intel) #ifdef CONFIG_PREEMPT_DYNAMIC 39399cf983cSMark Rutland #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) 3944624a14fSMark Rutland DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); 39599cf983cSMark Rutland #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) 39699cf983cSMark Rutland DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); 39799cf983cSMark Rutland void dynamic_irqentry_exit_cond_resched(void) 39899cf983cSMark Rutland { 3990a70045eSSven Schnelle if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) 40099cf983cSMark Rutland return; 40199cf983cSMark Rutland raw_irqentry_exit_cond_resched(); 40299cf983cSMark Rutland } 40399cf983cSMark Rutland #endif 40440607ee9SPeter Zijlstra (Intel) #endif 405a5497babSThomas Gleixner 406aadfc2f9SIngo Molnar noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) 407a5497babSThomas Gleixner { 408a5497babSThomas Gleixner lockdep_assert_irqs_disabled(); 409a5497babSThomas Gleixner 410a5497babSThomas Gleixner /* Check whether this returns to user mode */ 411a5497babSThomas Gleixner if (user_mode(regs)) { 412a5497babSThomas Gleixner irqentry_exit_to_user_mode(regs); 413a5497babSThomas Gleixner } else if (!regs_irqs_disabled(regs)) { 414a5497babSThomas Gleixner /* 415a5497babSThomas Gleixner * If RCU was not watching on entry this needs to be done 416a5497babSThomas Gleixner * carefully and needs the same ordering of lockdep/tracing 417a5497babSThomas Gleixner * and RCU as the return to user mode path. 418a5497babSThomas Gleixner */ 419a5497babSThomas Gleixner if (state.exit_rcu) { 420a5497babSThomas Gleixner instrumentation_begin(); 421a5497babSThomas Gleixner /* Tell the tracer that IRET will enable interrupts */ 422a5497babSThomas Gleixner trace_hardirqs_on_prepare(); 4238b023accSNick Desaulniers lockdep_hardirqs_on_prepare(); 424a5497babSThomas Gleixner instrumentation_end(); 4256f0e6c15SFrederic Weisbecker ct_irq_exit(); 426a5497babSThomas Gleixner lockdep_hardirqs_on(CALLER_ADDR0); 427a5497babSThomas Gleixner return; 428a5497babSThomas Gleixner } 429a5497babSThomas Gleixner 430a5497babSThomas Gleixner instrumentation_begin(); 4314624a14fSMark Rutland if (IS_ENABLED(CONFIG_PREEMPTION)) 432a5497babSThomas Gleixner irqentry_exit_cond_resched(); 4334624a14fSMark Rutland 434a5497babSThomas Gleixner /* Covers both tracing and lockdep */ 435a5497babSThomas Gleixner trace_hardirqs_on(); 436a5497babSThomas Gleixner instrumentation_end(); 437a5497babSThomas Gleixner } else { 438a5497babSThomas Gleixner /* 439a5497babSThomas Gleixner * IRQ flags state is correct already. Just tell RCU if it 440a5497babSThomas Gleixner * was not watching on entry. 441a5497babSThomas Gleixner */ 442a5497babSThomas Gleixner if (state.exit_rcu) 4436f0e6c15SFrederic Weisbecker ct_irq_exit(); 444a5497babSThomas Gleixner } 445a5497babSThomas Gleixner } 446b6be002bSThomas Gleixner 447b6be002bSThomas Gleixner irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs) 448b6be002bSThomas Gleixner { 449b6be002bSThomas Gleixner irqentry_state_t irq_state; 450b6be002bSThomas Gleixner 451b6be002bSThomas Gleixner irq_state.lockdep = lockdep_hardirqs_enabled(); 452b6be002bSThomas Gleixner 453b6be002bSThomas Gleixner __nmi_enter(); 454b6be002bSThomas Gleixner lockdep_hardirqs_off(CALLER_ADDR0); 455b6be002bSThomas Gleixner lockdep_hardirq_enter(); 456493c1822SFrederic Weisbecker ct_nmi_enter(); 457b6be002bSThomas Gleixner 458b6be002bSThomas Gleixner instrumentation_begin(); 459*6cae637fSAlexander Potapenko kmsan_unpoison_entry_regs(regs); 460b6be002bSThomas Gleixner trace_hardirqs_off_finish(); 461b6be002bSThomas Gleixner ftrace_nmi_enter(); 462b6be002bSThomas Gleixner instrumentation_end(); 463b6be002bSThomas Gleixner 464b6be002bSThomas Gleixner return irq_state; 465b6be002bSThomas Gleixner } 466b6be002bSThomas Gleixner 467b6be002bSThomas Gleixner void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) 468b6be002bSThomas Gleixner { 469b6be002bSThomas Gleixner instrumentation_begin(); 470b6be002bSThomas Gleixner ftrace_nmi_exit(); 471b6be002bSThomas Gleixner if (irq_state.lockdep) { 472b6be002bSThomas Gleixner trace_hardirqs_on_prepare(); 4738b023accSNick Desaulniers lockdep_hardirqs_on_prepare(); 474b6be002bSThomas Gleixner } 475b6be002bSThomas Gleixner instrumentation_end(); 476b6be002bSThomas Gleixner 477493c1822SFrederic Weisbecker ct_nmi_exit(); 478b6be002bSThomas Gleixner lockdep_hardirq_exit(); 479b6be002bSThomas Gleixner if (irq_state.lockdep) 480b6be002bSThomas Gleixner lockdep_hardirqs_on(CALLER_ADDR0); 481b6be002bSThomas Gleixner __nmi_exit(); 482b6be002bSThomas Gleixner } 483