1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2 3 #include <linux/errno.h> 4 #include <linux/kernel.h> 5 #include <linux/mm.h> 6 #include <linux/smp.h> 7 #include <linux/prctl.h> 8 #include <linux/slab.h> 9 #include <linux/sched.h> 10 #include <linux/module.h> 11 #include <linux/pm.h> 12 #include <linux/clockchips.h> 13 #include <linux/random.h> 14 #include <linux/user-return-notifier.h> 15 #include <linux/dmi.h> 16 #include <linux/utsname.h> 17 #include <linux/stackprotector.h> 18 #include <linux/tick.h> 19 #include <linux/cpuidle.h> 20 #include <trace/events/power.h> 21 #include <linux/hw_breakpoint.h> 22 #include <asm/cpu.h> 23 #include <asm/apic.h> 24 #include <asm/syscalls.h> 25 #include <asm/idle.h> 26 #include <asm/uaccess.h> 27 #include <asm/i387.h> 28 #include <asm/fpu-internal.h> 29 #include <asm/debugreg.h> 30 #include <asm/nmi.h> 31 32 /* 33 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 34 * no more per-task TSS's. The TSS size is kept cacheline-aligned 35 * so they are allowed to end up in the .data..cacheline_aligned 36 * section. Since TSS's are completely CPU-local, we want them 37 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 38 */ 39 DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; 40 41 #ifdef CONFIG_X86_64 42 static DEFINE_PER_CPU(unsigned char, is_idle); 43 static ATOMIC_NOTIFIER_HEAD(idle_notifier); 44 45 void idle_notifier_register(struct notifier_block *n) 46 { 47 atomic_notifier_chain_register(&idle_notifier, n); 48 } 49 EXPORT_SYMBOL_GPL(idle_notifier_register); 50 51 void idle_notifier_unregister(struct notifier_block *n) 52 { 53 atomic_notifier_chain_unregister(&idle_notifier, n); 54 } 55 EXPORT_SYMBOL_GPL(idle_notifier_unregister); 56 #endif 57 58 struct kmem_cache *task_xstate_cachep; 59 EXPORT_SYMBOL_GPL(task_xstate_cachep); 60 61 /* 62 * this gets called so that we can store lazy state into memory and copy the 63 * current task into the new thread. 64 */ 65 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 66 { 67 int ret; 68 69 *dst = *src; 70 if (fpu_allocated(&src->thread.fpu)) { 71 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); 72 ret = fpu_alloc(&dst->thread.fpu); 73 if (ret) 74 return ret; 75 fpu_copy(dst, src); 76 } 77 return 0; 78 } 79 80 void free_thread_xstate(struct task_struct *tsk) 81 { 82 fpu_free(&tsk->thread.fpu); 83 } 84 85 void arch_release_task_struct(struct task_struct *tsk) 86 { 87 free_thread_xstate(tsk); 88 } 89 90 void arch_task_cache_init(void) 91 { 92 task_xstate_cachep = 93 kmem_cache_create("task_xstate", xstate_size, 94 __alignof__(union thread_xstate), 95 SLAB_PANIC | SLAB_NOTRACK, NULL); 96 } 97 98 /* 99 * Free current thread data structures etc.. 100 */ 101 void exit_thread(void) 102 { 103 struct task_struct *me = current; 104 struct thread_struct *t = &me->thread; 105 unsigned long *bp = t->io_bitmap_ptr; 106 107 if (bp) { 108 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 109 110 t->io_bitmap_ptr = NULL; 111 clear_thread_flag(TIF_IO_BITMAP); 112 /* 113 * Careful, clear this in the TSS too: 114 */ 115 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 116 t->io_bitmap_max = 0; 117 put_cpu(); 118 kfree(bp); 119 } 120 121 drop_fpu(me); 122 } 123 124 void flush_thread(void) 125 { 126 struct task_struct *tsk = current; 127 128 flush_ptrace_hw_breakpoint(tsk); 129 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 130 drop_init_fpu(tsk); 131 /* 132 * Free the FPU state for non xsave platforms. They get reallocated 133 * lazily at the first use. 134 */ 135 if (!use_eager_fpu()) 136 free_thread_xstate(tsk); 137 } 138 139 static void hard_disable_TSC(void) 140 { 141 write_cr4(read_cr4() | X86_CR4_TSD); 142 } 143 144 void disable_TSC(void) 145 { 146 preempt_disable(); 147 if (!test_and_set_thread_flag(TIF_NOTSC)) 148 /* 149 * Must flip the CPU state synchronously with 150 * TIF_NOTSC in the current running context. 151 */ 152 hard_disable_TSC(); 153 preempt_enable(); 154 } 155 156 static void hard_enable_TSC(void) 157 { 158 write_cr4(read_cr4() & ~X86_CR4_TSD); 159 } 160 161 static void enable_TSC(void) 162 { 163 preempt_disable(); 164 if (test_and_clear_thread_flag(TIF_NOTSC)) 165 /* 166 * Must flip the CPU state synchronously with 167 * TIF_NOTSC in the current running context. 168 */ 169 hard_enable_TSC(); 170 preempt_enable(); 171 } 172 173 int get_tsc_mode(unsigned long adr) 174 { 175 unsigned int val; 176 177 if (test_thread_flag(TIF_NOTSC)) 178 val = PR_TSC_SIGSEGV; 179 else 180 val = PR_TSC_ENABLE; 181 182 return put_user(val, (unsigned int __user *)adr); 183 } 184 185 int set_tsc_mode(unsigned int val) 186 { 187 if (val == PR_TSC_SIGSEGV) 188 disable_TSC(); 189 else if (val == PR_TSC_ENABLE) 190 enable_TSC(); 191 else 192 return -EINVAL; 193 194 return 0; 195 } 196 197 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 198 struct tss_struct *tss) 199 { 200 struct thread_struct *prev, *next; 201 202 prev = &prev_p->thread; 203 next = &next_p->thread; 204 205 if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ 206 test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { 207 unsigned long debugctl = get_debugctlmsr(); 208 209 debugctl &= ~DEBUGCTLMSR_BTF; 210 if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) 211 debugctl |= DEBUGCTLMSR_BTF; 212 213 update_debugctlmsr(debugctl); 214 } 215 216 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 217 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 218 /* prev and next are different */ 219 if (test_tsk_thread_flag(next_p, TIF_NOTSC)) 220 hard_disable_TSC(); 221 else 222 hard_enable_TSC(); 223 } 224 225 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 226 /* 227 * Copy the relevant range of the IO bitmap. 228 * Normally this is 128 bytes or less: 229 */ 230 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 231 max(prev->io_bitmap_max, next->io_bitmap_max)); 232 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 233 /* 234 * Clear any possible leftover bits: 235 */ 236 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 237 } 238 propagate_user_return_notify(prev_p, next_p); 239 } 240 241 /* 242 * Idle related variables and functions 243 */ 244 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; 245 EXPORT_SYMBOL(boot_option_idle_override); 246 247 static void (*x86_idle)(void); 248 249 #ifndef CONFIG_SMP 250 static inline void play_dead(void) 251 { 252 BUG(); 253 } 254 #endif 255 256 #ifdef CONFIG_X86_64 257 void enter_idle(void) 258 { 259 this_cpu_write(is_idle, 1); 260 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 261 } 262 263 static void __exit_idle(void) 264 { 265 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) 266 return; 267 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 268 } 269 270 /* Called from interrupts to signify idle end */ 271 void exit_idle(void) 272 { 273 /* idle loop has pid 0 */ 274 if (current->pid) 275 return; 276 __exit_idle(); 277 } 278 #endif 279 280 void arch_cpu_idle_prepare(void) 281 { 282 /* 283 * If we're the non-boot CPU, nothing set the stack canary up 284 * for us. CPU0 already has it initialized but no harm in 285 * doing it again. This is a good place for updating it, as 286 * we wont ever return from this function (so the invalid 287 * canaries already on the stack wont ever trigger). 288 */ 289 boot_init_stack_canary(); 290 } 291 292 void arch_cpu_idle_enter(void) 293 { 294 local_touch_nmi(); 295 enter_idle(); 296 } 297 298 void arch_cpu_idle_exit(void) 299 { 300 __exit_idle(); 301 } 302 303 void arch_cpu_idle_dead(void) 304 { 305 play_dead(); 306 } 307 308 /* 309 * Called from the generic idle code. 310 */ 311 void arch_cpu_idle(void) 312 { 313 if (cpuidle_idle_call()) 314 x86_idle(); 315 else 316 local_irq_enable(); 317 } 318 319 /* 320 * We use this if we don't have any better idle routine.. 321 */ 322 void default_idle(void) 323 { 324 trace_cpu_idle_rcuidle(1, smp_processor_id()); 325 safe_halt(); 326 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 327 } 328 #ifdef CONFIG_APM_MODULE 329 EXPORT_SYMBOL(default_idle); 330 #endif 331 332 #ifdef CONFIG_XEN 333 bool xen_set_default_idle(void) 334 { 335 bool ret = !!x86_idle; 336 337 x86_idle = default_idle; 338 339 return ret; 340 } 341 #endif 342 void stop_this_cpu(void *dummy) 343 { 344 local_irq_disable(); 345 /* 346 * Remove this CPU: 347 */ 348 set_cpu_online(smp_processor_id(), false); 349 disable_local_APIC(); 350 351 for (;;) 352 halt(); 353 } 354 355 bool amd_e400_c1e_detected; 356 EXPORT_SYMBOL(amd_e400_c1e_detected); 357 358 static cpumask_var_t amd_e400_c1e_mask; 359 360 void amd_e400_remove_cpu(int cpu) 361 { 362 if (amd_e400_c1e_mask != NULL) 363 cpumask_clear_cpu(cpu, amd_e400_c1e_mask); 364 } 365 366 /* 367 * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt 368 * pending message MSR. If we detect C1E, then we handle it the same 369 * way as C3 power states (local apic timer and TSC stop) 370 */ 371 static void amd_e400_idle(void) 372 { 373 if (!amd_e400_c1e_detected) { 374 u32 lo, hi; 375 376 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); 377 378 if (lo & K8_INTP_C1E_ACTIVE_MASK) { 379 amd_e400_c1e_detected = true; 380 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 381 mark_tsc_unstable("TSC halt in AMD C1E"); 382 pr_info("System has AMD C1E enabled\n"); 383 } 384 } 385 386 if (amd_e400_c1e_detected) { 387 int cpu = smp_processor_id(); 388 389 if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { 390 cpumask_set_cpu(cpu, amd_e400_c1e_mask); 391 /* 392 * Force broadcast so ACPI can not interfere. 393 */ 394 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, 395 &cpu); 396 pr_info("Switch to broadcast mode on CPU%d\n", cpu); 397 } 398 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); 399 400 default_idle(); 401 402 /* 403 * The switch back from broadcast mode needs to be 404 * called with interrupts disabled. 405 */ 406 local_irq_disable(); 407 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); 408 local_irq_enable(); 409 } else 410 default_idle(); 411 } 412 413 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 414 { 415 #ifdef CONFIG_SMP 416 if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) 417 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); 418 #endif 419 if (x86_idle || boot_option_idle_override == IDLE_POLL) 420 return; 421 422 if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) { 423 /* E400: APIC timer interrupt does not wake up CPU from C1e */ 424 pr_info("using AMD E400 aware idle routine\n"); 425 x86_idle = amd_e400_idle; 426 } else 427 x86_idle = default_idle; 428 } 429 430 void __init init_amd_e400_c1e_mask(void) 431 { 432 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ 433 if (x86_idle == amd_e400_idle) 434 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); 435 } 436 437 static int __init idle_setup(char *str) 438 { 439 if (!str) 440 return -EINVAL; 441 442 if (!strcmp(str, "poll")) { 443 pr_info("using polling idle threads\n"); 444 boot_option_idle_override = IDLE_POLL; 445 cpu_idle_poll_ctrl(true); 446 } else if (!strcmp(str, "halt")) { 447 /* 448 * When the boot option of idle=halt is added, halt is 449 * forced to be used for CPU idle. In such case CPU C2/C3 450 * won't be used again. 451 * To continue to load the CPU idle driver, don't touch 452 * the boot_option_idle_override. 453 */ 454 x86_idle = default_idle; 455 boot_option_idle_override = IDLE_HALT; 456 } else if (!strcmp(str, "nomwait")) { 457 /* 458 * If the boot option of "idle=nomwait" is added, 459 * it means that mwait will be disabled for CPU C2/C3 460 * states. In such case it won't touch the variable 461 * of boot_option_idle_override. 462 */ 463 boot_option_idle_override = IDLE_NOMWAIT; 464 } else 465 return -1; 466 467 return 0; 468 } 469 early_param("idle", idle_setup); 470 471 unsigned long arch_align_stack(unsigned long sp) 472 { 473 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 474 sp -= get_random_int() % 8192; 475 return sp & ~0xf; 476 } 477 478 unsigned long arch_randomize_brk(struct mm_struct *mm) 479 { 480 unsigned long range_end = mm->brk + 0x02000000; 481 return randomize_range(mm->brk, range_end, 0) ? : mm->brk; 482 } 483 484