1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2 3 #include <linux/errno.h> 4 #include <linux/kernel.h> 5 #include <linux/mm.h> 6 #include <linux/smp.h> 7 #include <linux/prctl.h> 8 #include <linux/slab.h> 9 #include <linux/sched.h> 10 #include <linux/module.h> 11 #include <linux/pm.h> 12 #include <linux/clockchips.h> 13 #include <linux/random.h> 14 #include <linux/user-return-notifier.h> 15 #include <linux/dmi.h> 16 #include <linux/utsname.h> 17 #include <linux/stackprotector.h> 18 #include <linux/tick.h> 19 #include <linux/cpuidle.h> 20 #include <trace/events/power.h> 21 #include <linux/hw_breakpoint.h> 22 #include <asm/cpu.h> 23 #include <asm/apic.h> 24 #include <asm/syscalls.h> 25 #include <asm/idle.h> 26 #include <asm/uaccess.h> 27 #include <asm/i387.h> 28 #include <asm/fpu-internal.h> 29 #include <asm/debugreg.h> 30 #include <asm/nmi.h> 31 32 /* 33 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 34 * no more per-task TSS's. The TSS size is kept cacheline-aligned 35 * so they are allowed to end up in the .data..cacheline_aligned 36 * section. Since TSS's are completely CPU-local, we want them 37 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 38 */ 39 __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; 40 41 #ifdef CONFIG_X86_64 42 static DEFINE_PER_CPU(unsigned char, is_idle); 43 static ATOMIC_NOTIFIER_HEAD(idle_notifier); 44 45 void idle_notifier_register(struct notifier_block *n) 46 { 47 atomic_notifier_chain_register(&idle_notifier, n); 48 } 49 EXPORT_SYMBOL_GPL(idle_notifier_register); 50 51 void idle_notifier_unregister(struct notifier_block *n) 52 { 53 atomic_notifier_chain_unregister(&idle_notifier, n); 54 } 55 EXPORT_SYMBOL_GPL(idle_notifier_unregister); 56 #endif 57 58 struct kmem_cache *task_xstate_cachep; 59 EXPORT_SYMBOL_GPL(task_xstate_cachep); 60 61 /* 62 * this gets called so that we can store lazy state into memory and copy the 63 * current task into the new thread. 64 */ 65 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 66 { 67 *dst = *src; 68 69 dst->thread.fpu_counter = 0; 70 dst->thread.fpu.has_fpu = 0; 71 dst->thread.fpu.last_cpu = ~0; 72 dst->thread.fpu.state = NULL; 73 if (tsk_used_math(src)) { 74 int err = fpu_alloc(&dst->thread.fpu); 75 if (err) 76 return err; 77 fpu_copy(dst, src); 78 } 79 return 0; 80 } 81 82 void free_thread_xstate(struct task_struct *tsk) 83 { 84 fpu_free(&tsk->thread.fpu); 85 } 86 87 void arch_release_task_struct(struct task_struct *tsk) 88 { 89 free_thread_xstate(tsk); 90 } 91 92 void arch_task_cache_init(void) 93 { 94 task_xstate_cachep = 95 kmem_cache_create("task_xstate", xstate_size, 96 __alignof__(union thread_xstate), 97 SLAB_PANIC | SLAB_NOTRACK, NULL); 98 setup_xstate_comp(); 99 } 100 101 /* 102 * Free current thread data structures etc.. 103 */ 104 void exit_thread(void) 105 { 106 struct task_struct *me = current; 107 struct thread_struct *t = &me->thread; 108 unsigned long *bp = t->io_bitmap_ptr; 109 110 if (bp) { 111 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 112 113 t->io_bitmap_ptr = NULL; 114 clear_thread_flag(TIF_IO_BITMAP); 115 /* 116 * Careful, clear this in the TSS too: 117 */ 118 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 119 t->io_bitmap_max = 0; 120 put_cpu(); 121 kfree(bp); 122 } 123 124 drop_fpu(me); 125 } 126 127 void flush_thread(void) 128 { 129 struct task_struct *tsk = current; 130 131 flush_ptrace_hw_breakpoint(tsk); 132 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 133 drop_init_fpu(tsk); 134 /* 135 * Free the FPU state for non xsave platforms. They get reallocated 136 * lazily at the first use. 137 */ 138 if (!use_eager_fpu()) 139 free_thread_xstate(tsk); 140 } 141 142 static void hard_disable_TSC(void) 143 { 144 write_cr4(read_cr4() | X86_CR4_TSD); 145 } 146 147 void disable_TSC(void) 148 { 149 preempt_disable(); 150 if (!test_and_set_thread_flag(TIF_NOTSC)) 151 /* 152 * Must flip the CPU state synchronously with 153 * TIF_NOTSC in the current running context. 154 */ 155 hard_disable_TSC(); 156 preempt_enable(); 157 } 158 159 static void hard_enable_TSC(void) 160 { 161 write_cr4(read_cr4() & ~X86_CR4_TSD); 162 } 163 164 static void enable_TSC(void) 165 { 166 preempt_disable(); 167 if (test_and_clear_thread_flag(TIF_NOTSC)) 168 /* 169 * Must flip the CPU state synchronously with 170 * TIF_NOTSC in the current running context. 171 */ 172 hard_enable_TSC(); 173 preempt_enable(); 174 } 175 176 int get_tsc_mode(unsigned long adr) 177 { 178 unsigned int val; 179 180 if (test_thread_flag(TIF_NOTSC)) 181 val = PR_TSC_SIGSEGV; 182 else 183 val = PR_TSC_ENABLE; 184 185 return put_user(val, (unsigned int __user *)adr); 186 } 187 188 int set_tsc_mode(unsigned int val) 189 { 190 if (val == PR_TSC_SIGSEGV) 191 disable_TSC(); 192 else if (val == PR_TSC_ENABLE) 193 enable_TSC(); 194 else 195 return -EINVAL; 196 197 return 0; 198 } 199 200 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 201 struct tss_struct *tss) 202 { 203 struct thread_struct *prev, *next; 204 205 prev = &prev_p->thread; 206 next = &next_p->thread; 207 208 if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ 209 test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { 210 unsigned long debugctl = get_debugctlmsr(); 211 212 debugctl &= ~DEBUGCTLMSR_BTF; 213 if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) 214 debugctl |= DEBUGCTLMSR_BTF; 215 216 update_debugctlmsr(debugctl); 217 } 218 219 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 220 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 221 /* prev and next are different */ 222 if (test_tsk_thread_flag(next_p, TIF_NOTSC)) 223 hard_disable_TSC(); 224 else 225 hard_enable_TSC(); 226 } 227 228 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 229 /* 230 * Copy the relevant range of the IO bitmap. 231 * Normally this is 128 bytes or less: 232 */ 233 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 234 max(prev->io_bitmap_max, next->io_bitmap_max)); 235 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 236 /* 237 * Clear any possible leftover bits: 238 */ 239 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 240 } 241 propagate_user_return_notify(prev_p, next_p); 242 } 243 244 /* 245 * Idle related variables and functions 246 */ 247 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; 248 EXPORT_SYMBOL(boot_option_idle_override); 249 250 static void (*x86_idle)(void); 251 252 #ifndef CONFIG_SMP 253 static inline void play_dead(void) 254 { 255 BUG(); 256 } 257 #endif 258 259 #ifdef CONFIG_X86_64 260 void enter_idle(void) 261 { 262 this_cpu_write(is_idle, 1); 263 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 264 } 265 266 static void __exit_idle(void) 267 { 268 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) 269 return; 270 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 271 } 272 273 /* Called from interrupts to signify idle end */ 274 void exit_idle(void) 275 { 276 /* idle loop has pid 0 */ 277 if (current->pid) 278 return; 279 __exit_idle(); 280 } 281 #endif 282 283 void arch_cpu_idle_enter(void) 284 { 285 local_touch_nmi(); 286 enter_idle(); 287 } 288 289 void arch_cpu_idle_exit(void) 290 { 291 __exit_idle(); 292 } 293 294 void arch_cpu_idle_dead(void) 295 { 296 play_dead(); 297 } 298 299 /* 300 * Called from the generic idle code. 301 */ 302 void arch_cpu_idle(void) 303 { 304 x86_idle(); 305 } 306 307 /* 308 * We use this if we don't have any better idle routine.. 309 */ 310 void default_idle(void) 311 { 312 trace_cpu_idle_rcuidle(1, smp_processor_id()); 313 safe_halt(); 314 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 315 } 316 #ifdef CONFIG_APM_MODULE 317 EXPORT_SYMBOL(default_idle); 318 #endif 319 320 #ifdef CONFIG_XEN 321 bool xen_set_default_idle(void) 322 { 323 bool ret = !!x86_idle; 324 325 x86_idle = default_idle; 326 327 return ret; 328 } 329 #endif 330 void stop_this_cpu(void *dummy) 331 { 332 local_irq_disable(); 333 /* 334 * Remove this CPU: 335 */ 336 set_cpu_online(smp_processor_id(), false); 337 disable_local_APIC(); 338 339 for (;;) 340 halt(); 341 } 342 343 bool amd_e400_c1e_detected; 344 EXPORT_SYMBOL(amd_e400_c1e_detected); 345 346 static cpumask_var_t amd_e400_c1e_mask; 347 348 void amd_e400_remove_cpu(int cpu) 349 { 350 if (amd_e400_c1e_mask != NULL) 351 cpumask_clear_cpu(cpu, amd_e400_c1e_mask); 352 } 353 354 /* 355 * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt 356 * pending message MSR. If we detect C1E, then we handle it the same 357 * way as C3 power states (local apic timer and TSC stop) 358 */ 359 static void amd_e400_idle(void) 360 { 361 if (!amd_e400_c1e_detected) { 362 u32 lo, hi; 363 364 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); 365 366 if (lo & K8_INTP_C1E_ACTIVE_MASK) { 367 amd_e400_c1e_detected = true; 368 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 369 mark_tsc_unstable("TSC halt in AMD C1E"); 370 pr_info("System has AMD C1E enabled\n"); 371 } 372 } 373 374 if (amd_e400_c1e_detected) { 375 int cpu = smp_processor_id(); 376 377 if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { 378 cpumask_set_cpu(cpu, amd_e400_c1e_mask); 379 /* 380 * Force broadcast so ACPI can not interfere. 381 */ 382 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, 383 &cpu); 384 pr_info("Switch to broadcast mode on CPU%d\n", cpu); 385 } 386 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); 387 388 default_idle(); 389 390 /* 391 * The switch back from broadcast mode needs to be 392 * called with interrupts disabled. 393 */ 394 local_irq_disable(); 395 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); 396 local_irq_enable(); 397 } else 398 default_idle(); 399 } 400 401 void select_idle_routine(const struct cpuinfo_x86 *c) 402 { 403 #ifdef CONFIG_SMP 404 if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) 405 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); 406 #endif 407 if (x86_idle || boot_option_idle_override == IDLE_POLL) 408 return; 409 410 if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) { 411 /* E400: APIC timer interrupt does not wake up CPU from C1e */ 412 pr_info("using AMD E400 aware idle routine\n"); 413 x86_idle = amd_e400_idle; 414 } else 415 x86_idle = default_idle; 416 } 417 418 void __init init_amd_e400_c1e_mask(void) 419 { 420 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ 421 if (x86_idle == amd_e400_idle) 422 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); 423 } 424 425 static int __init idle_setup(char *str) 426 { 427 if (!str) 428 return -EINVAL; 429 430 if (!strcmp(str, "poll")) { 431 pr_info("using polling idle threads\n"); 432 boot_option_idle_override = IDLE_POLL; 433 cpu_idle_poll_ctrl(true); 434 } else if (!strcmp(str, "halt")) { 435 /* 436 * When the boot option of idle=halt is added, halt is 437 * forced to be used for CPU idle. In such case CPU C2/C3 438 * won't be used again. 439 * To continue to load the CPU idle driver, don't touch 440 * the boot_option_idle_override. 441 */ 442 x86_idle = default_idle; 443 boot_option_idle_override = IDLE_HALT; 444 } else if (!strcmp(str, "nomwait")) { 445 /* 446 * If the boot option of "idle=nomwait" is added, 447 * it means that mwait will be disabled for CPU C2/C3 448 * states. In such case it won't touch the variable 449 * of boot_option_idle_override. 450 */ 451 boot_option_idle_override = IDLE_NOMWAIT; 452 } else 453 return -1; 454 455 return 0; 456 } 457 early_param("idle", idle_setup); 458 459 unsigned long arch_align_stack(unsigned long sp) 460 { 461 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 462 sp -= get_random_int() % 8192; 463 return sp & ~0xf; 464 } 465 466 unsigned long arch_randomize_brk(struct mm_struct *mm) 467 { 468 unsigned long range_end = mm->brk + 0x02000000; 469 return randomize_range(mm->brk, range_end, 0) ? : mm->brk; 470 } 471 472