1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Context tracking: Probe on high level context boundaries such as kernel 4 * and userspace. This includes syscalls and exceptions entry/exit. 5 * 6 * This is used by RCU to remove its dependency on the timer tick while a CPU 7 * runs in userspace. 8 * 9 * Started by Frederic Weisbecker: 10 * 11 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com> 12 * 13 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton, 14 * Steven Rostedt, Peter Zijlstra for suggestions and improvements. 15 * 16 */ 17 18 #include <linux/context_tracking.h> 19 #include <linux/rcupdate.h> 20 #include <linux/sched.h> 21 #include <linux/hardirq.h> 22 #include <linux/export.h> 23 #include <linux/kprobes.h> 24 25 #ifdef CONFIG_CONTEXT_TRACKING_USER 26 27 #define CREATE_TRACE_POINTS 28 #include <trace/events/context_tracking.h> 29 30 DEFINE_STATIC_KEY_FALSE(context_tracking_key); 31 EXPORT_SYMBOL_GPL(context_tracking_key); 32 33 DEFINE_PER_CPU(struct context_tracking, context_tracking); 34 EXPORT_SYMBOL_GPL(context_tracking); 35 36 static noinstr bool context_tracking_recursion_enter(void) 37 { 38 int recursion; 39 40 recursion = __this_cpu_inc_return(context_tracking.recursion); 41 if (recursion == 1) 42 return true; 43 44 WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion); 45 __this_cpu_dec(context_tracking.recursion); 46 47 return false; 48 } 49 50 static __always_inline void context_tracking_recursion_exit(void) 51 { 52 __this_cpu_dec(context_tracking.recursion); 53 } 54 55 /** 56 * __ct_user_enter - Inform the context tracking that the CPU is going 57 * to enter user or guest space mode. 58 * 59 * This function must be called right before we switch from the kernel 60 * to user or guest space, when it's guaranteed the remaining kernel 61 * instructions to execute won't use any RCU read side critical section 62 * because this function sets RCU in extended quiescent state. 63 */ 64 void noinstr __ct_user_enter(enum ctx_state state) 65 { 66 /* Kernel threads aren't supposed to go to userspace */ 67 WARN_ON_ONCE(!current->mm); 68 69 if (!context_tracking_recursion_enter()) 70 return; 71 72 if ( __this_cpu_read(context_tracking.state) != state) { 73 if (__this_cpu_read(context_tracking.active)) { 74 /* 75 * At this stage, only low level arch entry code remains and 76 * then we'll run in userspace. We can assume there won't be 77 * any RCU read-side critical section until the next call to 78 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency 79 * on the tick. 80 */ 81 if (state == CONTEXT_USER) { 82 instrumentation_begin(); 83 trace_user_enter(0); 84 vtime_user_enter(current); 85 instrumentation_end(); 86 } 87 rcu_user_enter(); 88 } 89 /* 90 * Even if context tracking is disabled on this CPU, because it's outside 91 * the full dynticks mask for example, we still have to keep track of the 92 * context transitions and states to prevent inconsistency on those of 93 * other CPUs. 94 * If a task triggers an exception in userspace, sleep on the exception 95 * handler and then migrate to another CPU, that new CPU must know where 96 * the exception returns by the time we call exception_exit(). 97 * This information can only be provided by the previous CPU when it called 98 * exception_enter(). 99 * OTOH we can spare the calls to vtime and RCU when context_tracking.active 100 * is false because we know that CPU is not tickless. 101 */ 102 __this_cpu_write(context_tracking.state, state); 103 } 104 context_tracking_recursion_exit(); 105 } 106 EXPORT_SYMBOL_GPL(__ct_user_enter); 107 108 /* 109 * OBSOLETE: 110 * This function should be noinstr but the below local_irq_restore() is 111 * unsafe because it involves illegal RCU uses through tracing and lockdep. 112 * This is unlikely to be fixed as this function is obsolete. The preferred 113 * way is to call __context_tracking_enter() through user_enter_irqoff() 114 * or context_tracking_guest_enter(). It should be the arch entry code 115 * responsibility to call into context tracking with IRQs disabled. 116 */ 117 void ct_user_enter(enum ctx_state state) 118 { 119 unsigned long flags; 120 121 /* 122 * Some contexts may involve an exception occuring in an irq, 123 * leading to that nesting: 124 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() 125 * This would mess up the dyntick_nesting count though. And rcu_irq_*() 126 * helpers are enough to protect RCU uses inside the exception. So 127 * just return immediately if we detect we are in an IRQ. 128 */ 129 if (in_interrupt()) 130 return; 131 132 local_irq_save(flags); 133 __ct_user_enter(state); 134 local_irq_restore(flags); 135 } 136 NOKPROBE_SYMBOL(ct_user_enter); 137 EXPORT_SYMBOL_GPL(ct_user_enter); 138 139 /** 140 * user_enter_callable() - Unfortunate ASM callable version of user_enter() for 141 * archs that didn't manage to check the context tracking 142 * static key from low level code. 143 * 144 * This OBSOLETE function should be noinstr but it unsafely calls 145 * local_irq_restore(), involving illegal RCU uses through tracing and lockdep. 146 * This is unlikely to be fixed as this function is obsolete. The preferred 147 * way is to call user_enter_irqoff(). It should be the arch entry code 148 * responsibility to call into context tracking with IRQs disabled. 149 */ 150 void user_enter_callable(void) 151 { 152 user_enter(); 153 } 154 NOKPROBE_SYMBOL(user_enter_callable); 155 156 /** 157 * __ct_user_exit - Inform the context tracking that the CPU is 158 * exiting user or guest mode and entering the kernel. 159 * 160 * This function must be called after we entered the kernel from user or 161 * guest space before any use of RCU read side critical section. This 162 * potentially include any high level kernel code like syscalls, exceptions, 163 * signal handling, etc... 164 * 165 * This call supports re-entrancy. This way it can be called from any exception 166 * handler without needing to know if we came from userspace or not. 167 */ 168 void noinstr __ct_user_exit(enum ctx_state state) 169 { 170 if (!context_tracking_recursion_enter()) 171 return; 172 173 if (__this_cpu_read(context_tracking.state) == state) { 174 if (__this_cpu_read(context_tracking.active)) { 175 /* 176 * We are going to run code that may use RCU. Inform 177 * RCU core about that (ie: we may need the tick again). 178 */ 179 rcu_user_exit(); 180 if (state == CONTEXT_USER) { 181 instrumentation_begin(); 182 vtime_user_exit(current); 183 trace_user_exit(0); 184 instrumentation_end(); 185 } 186 } 187 __this_cpu_write(context_tracking.state, CONTEXT_KERNEL); 188 } 189 context_tracking_recursion_exit(); 190 } 191 EXPORT_SYMBOL_GPL(__ct_user_exit); 192 193 /* 194 * OBSOLETE: 195 * This function should be noinstr but the below local_irq_save() is 196 * unsafe because it involves illegal RCU uses through tracing and lockdep. 197 * This is unlikely to be fixed as this function is obsolete. The preferred 198 * way is to call __context_tracking_exit() through user_exit_irqoff() 199 * or context_tracking_guest_exit(). It should be the arch entry code 200 * responsibility to call into context tracking with IRQs disabled. 201 */ 202 void ct_user_exit(enum ctx_state state) 203 { 204 unsigned long flags; 205 206 if (in_interrupt()) 207 return; 208 209 local_irq_save(flags); 210 __ct_user_exit(state); 211 local_irq_restore(flags); 212 } 213 NOKPROBE_SYMBOL(ct_user_exit); 214 EXPORT_SYMBOL_GPL(ct_user_exit); 215 216 /** 217 * user_exit_callable() - Unfortunate ASM callable version of user_exit() for 218 * archs that didn't manage to check the context tracking 219 * static key from low level code. 220 * 221 * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(), 222 * involving illegal RCU uses through tracing and lockdep. This is unlikely 223 * to be fixed as this function is obsolete. The preferred way is to call 224 * user_exit_irqoff(). It should be the arch entry code responsibility to 225 * call into context tracking with IRQs disabled. 226 */ 227 void user_exit_callable(void) 228 { 229 user_exit(); 230 } 231 NOKPROBE_SYMBOL(user_exit_callable); 232 233 void __init ct_cpu_track_user(int cpu) 234 { 235 static __initdata bool initialized = false; 236 237 if (!per_cpu(context_tracking.active, cpu)) { 238 per_cpu(context_tracking.active, cpu) = true; 239 static_branch_inc(&context_tracking_key); 240 } 241 242 if (initialized) 243 return; 244 245 #ifdef CONFIG_HAVE_TIF_NOHZ 246 /* 247 * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork 248 * This assumes that init is the only task at this early boot stage. 249 */ 250 set_tsk_thread_flag(&init_task, TIF_NOHZ); 251 #endif 252 WARN_ON_ONCE(!tasklist_empty()); 253 254 initialized = true; 255 } 256 257 #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE 258 void __init context_tracking_init(void) 259 { 260 int cpu; 261 262 for_each_possible_cpu(cpu) 263 ct_cpu_track_user(cpu); 264 } 265 #endif 266 267 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */ 268