1 /* 2 * processor_idle - idle state submodule to the ACPI processor driver 3 * 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 6 * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de> 7 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 8 * - Added processor hotplug support 9 * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 10 * - Added support for C3 on SMP 11 * 12 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 * 14 * This program is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or (at 17 * your option) any later version. 18 * 19 * This program is distributed in the hope that it will be useful, but 20 * WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 * General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License along 25 * with this program; if not, write to the Free Software Foundation, Inc., 26 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 27 * 28 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29 */ 30 31 #include <linux/kernel.h> 32 #include <linux/module.h> 33 #include <linux/init.h> 34 #include <linux/cpufreq.h> 35 #include <linux/proc_fs.h> 36 #include <linux/seq_file.h> 37 #include <linux/acpi.h> 38 #include <linux/dmi.h> 39 #include <linux/moduleparam.h> 40 #include <linux/sched.h> /* need_resched() */ 41 #include <linux/latency.h> 42 #include <linux/clockchips.h> 43 #include <linux/cpuidle.h> 44 45 /* 46 * Include the apic definitions for x86 to have the APIC timer related defines 47 * available also for UP (on SMP it gets magically included via linux/smp.h). 48 * asm/acpi.h is not an option, as it would require more include magic. Also 49 * creating an empty asm-ia64/apic.h would just trade pest vs. cholera. 50 */ 51 #ifdef CONFIG_X86 52 #include <asm/apic.h> 53 #endif 54 55 #include <asm/io.h> 56 #include <asm/uaccess.h> 57 58 #include <acpi/acpi_bus.h> 59 #include <acpi/processor.h> 60 61 #define ACPI_PROCESSOR_COMPONENT 0x01000000 62 #define ACPI_PROCESSOR_CLASS "processor" 63 #define _COMPONENT ACPI_PROCESSOR_COMPONENT 64 ACPI_MODULE_NAME("processor_idle"); 65 #define ACPI_PROCESSOR_FILE_POWER "power" 66 #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) 67 #define PM_TIMER_TICK_NS (1000000000ULL/PM_TIMER_FREQUENCY) 68 #ifndef CONFIG_CPU_IDLE 69 #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 70 #define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ 71 static void (*pm_idle_save) (void) __read_mostly; 72 #else 73 #define C2_OVERHEAD 1 /* 1us */ 74 #define C3_OVERHEAD 1 /* 1us */ 75 #endif 76 #define PM_TIMER_TICKS_TO_US(p) (((p) * 1000)/(PM_TIMER_FREQUENCY/1000)) 77 78 static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER; 79 module_param(max_cstate, uint, 0000); 80 static unsigned int nocst __read_mostly; 81 module_param(nocst, uint, 0000); 82 83 #ifndef CONFIG_CPU_IDLE 84 /* 85 * bm_history -- bit-mask with a bit per jiffy of bus-master activity 86 * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms 87 * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms 88 * 100 HZ: 0x0000000F: 4 jiffies = 40ms 89 * reduce history for more aggressive entry into C3 90 */ 91 static unsigned int bm_history __read_mostly = 92 (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); 93 module_param(bm_history, uint, 0644); 94 95 static int acpi_processor_set_power_policy(struct acpi_processor *pr); 96 97 #endif 98 99 /* 100 * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3. 101 * For now disable this. Probably a bug somewhere else. 102 * 103 * To skip this limit, boot/load with a large max_cstate limit. 104 */ 105 static int set_max_cstate(const struct dmi_system_id *id) 106 { 107 if (max_cstate > ACPI_PROCESSOR_MAX_POWER) 108 return 0; 109 110 printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate." 111 " Override with \"processor.max_cstate=%d\"\n", id->ident, 112 (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1); 113 114 max_cstate = (long)id->driver_data; 115 116 return 0; 117 } 118 119 /* Actually this shouldn't be __cpuinitdata, would be better to fix the 120 callers to only run once -AK */ 121 static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = { 122 { set_max_cstate, "IBM ThinkPad R40e", { 123 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 124 DMI_MATCH(DMI_BIOS_VERSION,"1SET70WW")}, (void *)1}, 125 { set_max_cstate, "IBM ThinkPad R40e", { 126 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 127 DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW")}, (void *)1}, 128 { set_max_cstate, "IBM ThinkPad R40e", { 129 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 130 DMI_MATCH(DMI_BIOS_VERSION,"1SET43WW") }, (void*)1}, 131 { set_max_cstate, "IBM ThinkPad R40e", { 132 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 133 DMI_MATCH(DMI_BIOS_VERSION,"1SET45WW") }, (void*)1}, 134 { set_max_cstate, "IBM ThinkPad R40e", { 135 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 136 DMI_MATCH(DMI_BIOS_VERSION,"1SET47WW") }, (void*)1}, 137 { set_max_cstate, "IBM ThinkPad R40e", { 138 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 139 DMI_MATCH(DMI_BIOS_VERSION,"1SET50WW") }, (void*)1}, 140 { set_max_cstate, "IBM ThinkPad R40e", { 141 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 142 DMI_MATCH(DMI_BIOS_VERSION,"1SET52WW") }, (void*)1}, 143 { set_max_cstate, "IBM ThinkPad R40e", { 144 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 145 DMI_MATCH(DMI_BIOS_VERSION,"1SET55WW") }, (void*)1}, 146 { set_max_cstate, "IBM ThinkPad R40e", { 147 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 148 DMI_MATCH(DMI_BIOS_VERSION,"1SET56WW") }, (void*)1}, 149 { set_max_cstate, "IBM ThinkPad R40e", { 150 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 151 DMI_MATCH(DMI_BIOS_VERSION,"1SET59WW") }, (void*)1}, 152 { set_max_cstate, "IBM ThinkPad R40e", { 153 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 154 DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }, (void*)1}, 155 { set_max_cstate, "IBM ThinkPad R40e", { 156 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 157 DMI_MATCH(DMI_BIOS_VERSION,"1SET61WW") }, (void*)1}, 158 { set_max_cstate, "IBM ThinkPad R40e", { 159 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 160 DMI_MATCH(DMI_BIOS_VERSION,"1SET62WW") }, (void*)1}, 161 { set_max_cstate, "IBM ThinkPad R40e", { 162 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 163 DMI_MATCH(DMI_BIOS_VERSION,"1SET64WW") }, (void*)1}, 164 { set_max_cstate, "IBM ThinkPad R40e", { 165 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 166 DMI_MATCH(DMI_BIOS_VERSION,"1SET65WW") }, (void*)1}, 167 { set_max_cstate, "IBM ThinkPad R40e", { 168 DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), 169 DMI_MATCH(DMI_BIOS_VERSION,"1SET68WW") }, (void*)1}, 170 { set_max_cstate, "Medion 41700", { 171 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), 172 DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J")}, (void *)1}, 173 { set_max_cstate, "Clevo 5600D", { 174 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), 175 DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, 176 (void *)2}, 177 {}, 178 }; 179 180 static inline u32 ticks_elapsed(u32 t1, u32 t2) 181 { 182 if (t2 >= t1) 183 return (t2 - t1); 184 else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) 185 return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); 186 else 187 return ((0xFFFFFFFF - t1) + t2); 188 } 189 190 static inline u32 ticks_elapsed_in_us(u32 t1, u32 t2) 191 { 192 if (t2 >= t1) 193 return PM_TIMER_TICKS_TO_US(t2 - t1); 194 else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) 195 return PM_TIMER_TICKS_TO_US(((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); 196 else 197 return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2); 198 } 199 200 #ifndef CONFIG_CPU_IDLE 201 202 static void 203 acpi_processor_power_activate(struct acpi_processor *pr, 204 struct acpi_processor_cx *new) 205 { 206 struct acpi_processor_cx *old; 207 208 if (!pr || !new) 209 return; 210 211 old = pr->power.state; 212 213 if (old) 214 old->promotion.count = 0; 215 new->demotion.count = 0; 216 217 /* Cleanup from old state. */ 218 if (old) { 219 switch (old->type) { 220 case ACPI_STATE_C3: 221 /* Disable bus master reload */ 222 if (new->type != ACPI_STATE_C3 && pr->flags.bm_check) 223 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 224 break; 225 } 226 } 227 228 /* Prepare to use new state. */ 229 switch (new->type) { 230 case ACPI_STATE_C3: 231 /* Enable bus master reload */ 232 if (old->type != ACPI_STATE_C3 && pr->flags.bm_check) 233 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 234 break; 235 } 236 237 pr->power.state = new; 238 239 return; 240 } 241 242 static void acpi_safe_halt(void) 243 { 244 current_thread_info()->status &= ~TS_POLLING; 245 /* 246 * TS_POLLING-cleared state must be visible before we 247 * test NEED_RESCHED: 248 */ 249 smp_mb(); 250 if (!need_resched()) 251 safe_halt(); 252 current_thread_info()->status |= TS_POLLING; 253 } 254 255 static atomic_t c3_cpu_count; 256 257 /* Common C-state entry for C2, C3, .. */ 258 static void acpi_cstate_enter(struct acpi_processor_cx *cstate) 259 { 260 if (cstate->space_id == ACPI_CSTATE_FFH) { 261 /* Call into architectural FFH based C-state */ 262 acpi_processor_ffh_cstate_enter(cstate); 263 } else { 264 int unused; 265 /* IO port based C-state */ 266 inb(cstate->address); 267 /* Dummy wait op - must do something useless after P_LVL2 read 268 because chipsets cannot guarantee that STPCLK# signal 269 gets asserted in time to freeze execution properly. */ 270 unused = inl(acpi_gbl_FADT.xpm_timer_block.address); 271 } 272 } 273 #endif /* !CONFIG_CPU_IDLE */ 274 275 #ifdef ARCH_APICTIMER_STOPS_ON_C3 276 277 /* 278 * Some BIOS implementations switch to C3 in the published C2 state. 279 * This seems to be a common problem on AMD boxen, but other vendors 280 * are affected too. We pick the most conservative approach: we assume 281 * that the local APIC stops in both C2 and C3. 282 */ 283 static void acpi_timer_check_state(int state, struct acpi_processor *pr, 284 struct acpi_processor_cx *cx) 285 { 286 struct acpi_processor_power *pwr = &pr->power; 287 u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2; 288 289 /* 290 * Check, if one of the previous states already marked the lapic 291 * unstable 292 */ 293 if (pwr->timer_broadcast_on_state < state) 294 return; 295 296 if (cx->type >= type) 297 pr->power.timer_broadcast_on_state = state; 298 } 299 300 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) 301 { 302 unsigned long reason; 303 304 reason = pr->power.timer_broadcast_on_state < INT_MAX ? 305 CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; 306 307 clockevents_notify(reason, &pr->id); 308 } 309 310 /* Power(C) State timer broadcast control */ 311 static void acpi_state_timer_broadcast(struct acpi_processor *pr, 312 struct acpi_processor_cx *cx, 313 int broadcast) 314 { 315 int state = cx - pr->power.states; 316 317 if (state >= pr->power.timer_broadcast_on_state) { 318 unsigned long reason; 319 320 reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER : 321 CLOCK_EVT_NOTIFY_BROADCAST_EXIT; 322 clockevents_notify(reason, &pr->id); 323 } 324 } 325 326 #else 327 328 static void acpi_timer_check_state(int state, struct acpi_processor *pr, 329 struct acpi_processor_cx *cstate) { } 330 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { } 331 static void acpi_state_timer_broadcast(struct acpi_processor *pr, 332 struct acpi_processor_cx *cx, 333 int broadcast) 334 { 335 } 336 337 #endif 338 339 /* 340 * Suspend / resume control 341 */ 342 static int acpi_idle_suspend; 343 344 int acpi_processor_suspend(struct acpi_device * device, pm_message_t state) 345 { 346 acpi_idle_suspend = 1; 347 return 0; 348 } 349 350 int acpi_processor_resume(struct acpi_device * device) 351 { 352 acpi_idle_suspend = 0; 353 return 0; 354 } 355 356 #ifndef CONFIG_CPU_IDLE 357 static void acpi_processor_idle(void) 358 { 359 struct acpi_processor *pr = NULL; 360 struct acpi_processor_cx *cx = NULL; 361 struct acpi_processor_cx *next_state = NULL; 362 int sleep_ticks = 0; 363 u32 t1, t2 = 0; 364 365 /* 366 * Interrupts must be disabled during bus mastering calculations and 367 * for C2/C3 transitions. 368 */ 369 local_irq_disable(); 370 371 pr = processors[smp_processor_id()]; 372 if (!pr) { 373 local_irq_enable(); 374 return; 375 } 376 377 /* 378 * Check whether we truly need to go idle, or should 379 * reschedule: 380 */ 381 if (unlikely(need_resched())) { 382 local_irq_enable(); 383 return; 384 } 385 386 cx = pr->power.state; 387 if (!cx || acpi_idle_suspend) { 388 if (pm_idle_save) 389 pm_idle_save(); 390 else 391 acpi_safe_halt(); 392 return; 393 } 394 395 /* 396 * Check BM Activity 397 * ----------------- 398 * Check for bus mastering activity (if required), record, and check 399 * for demotion. 400 */ 401 if (pr->flags.bm_check) { 402 u32 bm_status = 0; 403 unsigned long diff = jiffies - pr->power.bm_check_timestamp; 404 405 if (diff > 31) 406 diff = 31; 407 408 pr->power.bm_activity <<= diff; 409 410 acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); 411 if (bm_status) { 412 pr->power.bm_activity |= 0x1; 413 acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 414 } 415 /* 416 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect 417 * the true state of bus mastering activity; forcing us to 418 * manually check the BMIDEA bit of each IDE channel. 419 */ 420 else if (errata.piix4.bmisx) { 421 if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) 422 || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) 423 pr->power.bm_activity |= 0x1; 424 } 425 426 pr->power.bm_check_timestamp = jiffies; 427 428 /* 429 * If bus mastering is or was active this jiffy, demote 430 * to avoid a faulty transition. Note that the processor 431 * won't enter a low-power state during this call (to this 432 * function) but should upon the next. 433 * 434 * TBD: A better policy might be to fallback to the demotion 435 * state (use it for this quantum only) istead of 436 * demoting -- and rely on duration as our sole demotion 437 * qualification. This may, however, introduce DMA 438 * issues (e.g. floppy DMA transfer overrun/underrun). 439 */ 440 if ((pr->power.bm_activity & 0x1) && 441 cx->demotion.threshold.bm) { 442 local_irq_enable(); 443 next_state = cx->demotion.state; 444 goto end; 445 } 446 } 447 448 #ifdef CONFIG_HOTPLUG_CPU 449 /* 450 * Check for P_LVL2_UP flag before entering C2 and above on 451 * an SMP system. We do it here instead of doing it at _CST/P_LVL 452 * detection phase, to work cleanly with logical CPU hotplug. 453 */ 454 if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 455 !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 456 cx = &pr->power.states[ACPI_STATE_C1]; 457 #endif 458 459 /* 460 * Sleep: 461 * ------ 462 * Invoke the current Cx state to put the processor to sleep. 463 */ 464 if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { 465 current_thread_info()->status &= ~TS_POLLING; 466 /* 467 * TS_POLLING-cleared state must be visible before we 468 * test NEED_RESCHED: 469 */ 470 smp_mb(); 471 if (need_resched()) { 472 current_thread_info()->status |= TS_POLLING; 473 local_irq_enable(); 474 return; 475 } 476 } 477 478 switch (cx->type) { 479 480 case ACPI_STATE_C1: 481 /* 482 * Invoke C1. 483 * Use the appropriate idle routine, the one that would 484 * be used without acpi C-states. 485 */ 486 if (pm_idle_save) 487 pm_idle_save(); 488 else 489 acpi_safe_halt(); 490 491 /* 492 * TBD: Can't get time duration while in C1, as resumes 493 * go to an ISR rather than here. Need to instrument 494 * base interrupt handler. 495 * 496 * Note: the TSC better not stop in C1, sched_clock() will 497 * skew otherwise. 498 */ 499 sleep_ticks = 0xFFFFFFFF; 500 break; 501 502 case ACPI_STATE_C2: 503 /* Get start time (ticks) */ 504 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 505 /* Tell the scheduler that we are going deep-idle: */ 506 sched_clock_idle_sleep_event(); 507 /* Invoke C2 */ 508 acpi_state_timer_broadcast(pr, cx, 1); 509 acpi_cstate_enter(cx); 510 /* Get end time (ticks) */ 511 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 512 513 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC) 514 /* TSC halts in C2, so notify users */ 515 mark_tsc_unstable("possible TSC halt in C2"); 516 #endif 517 /* Compute time (ticks) that we were actually asleep */ 518 sleep_ticks = ticks_elapsed(t1, t2); 519 520 /* Tell the scheduler how much we idled: */ 521 sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS); 522 523 /* Re-enable interrupts */ 524 local_irq_enable(); 525 /* Do not account our idle-switching overhead: */ 526 sleep_ticks -= cx->latency_ticks + C2_OVERHEAD; 527 528 current_thread_info()->status |= TS_POLLING; 529 acpi_state_timer_broadcast(pr, cx, 0); 530 break; 531 532 case ACPI_STATE_C3: 533 /* 534 * disable bus master 535 * bm_check implies we need ARB_DIS 536 * !bm_check implies we need cache flush 537 * bm_control implies whether we can do ARB_DIS 538 * 539 * That leaves a case where bm_check is set and bm_control is 540 * not set. In that case we cannot do much, we enter C3 541 * without doing anything. 542 */ 543 if (pr->flags.bm_check && pr->flags.bm_control) { 544 if (atomic_inc_return(&c3_cpu_count) == 545 num_online_cpus()) { 546 /* 547 * All CPUs are trying to go to C3 548 * Disable bus master arbitration 549 */ 550 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 551 } 552 } else if (!pr->flags.bm_check) { 553 /* SMP with no shared cache... Invalidate cache */ 554 ACPI_FLUSH_CPU_CACHE(); 555 } 556 557 /* Get start time (ticks) */ 558 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 559 /* Invoke C3 */ 560 acpi_state_timer_broadcast(pr, cx, 1); 561 /* Tell the scheduler that we are going deep-idle: */ 562 sched_clock_idle_sleep_event(); 563 acpi_cstate_enter(cx); 564 /* Get end time (ticks) */ 565 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 566 if (pr->flags.bm_check && pr->flags.bm_control) { 567 /* Enable bus master arbitration */ 568 atomic_dec(&c3_cpu_count); 569 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 570 } 571 572 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC) 573 /* TSC halts in C3, so notify users */ 574 mark_tsc_unstable("TSC halts in C3"); 575 #endif 576 /* Compute time (ticks) that we were actually asleep */ 577 sleep_ticks = ticks_elapsed(t1, t2); 578 /* Tell the scheduler how much we idled: */ 579 sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS); 580 581 /* Re-enable interrupts */ 582 local_irq_enable(); 583 /* Do not account our idle-switching overhead: */ 584 sleep_ticks -= cx->latency_ticks + C3_OVERHEAD; 585 586 current_thread_info()->status |= TS_POLLING; 587 acpi_state_timer_broadcast(pr, cx, 0); 588 break; 589 590 default: 591 local_irq_enable(); 592 return; 593 } 594 cx->usage++; 595 if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0)) 596 cx->time += sleep_ticks; 597 598 next_state = pr->power.state; 599 600 #ifdef CONFIG_HOTPLUG_CPU 601 /* Don't do promotion/demotion */ 602 if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) && 603 !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) { 604 next_state = cx; 605 goto end; 606 } 607 #endif 608 609 /* 610 * Promotion? 611 * ---------- 612 * Track the number of longs (time asleep is greater than threshold) 613 * and promote when the count threshold is reached. Note that bus 614 * mastering activity may prevent promotions. 615 * Do not promote above max_cstate. 616 */ 617 if (cx->promotion.state && 618 ((cx->promotion.state - pr->power.states) <= max_cstate)) { 619 if (sleep_ticks > cx->promotion.threshold.ticks && 620 cx->promotion.state->latency <= system_latency_constraint()) { 621 cx->promotion.count++; 622 cx->demotion.count = 0; 623 if (cx->promotion.count >= 624 cx->promotion.threshold.count) { 625 if (pr->flags.bm_check) { 626 if (! 627 (pr->power.bm_activity & cx-> 628 promotion.threshold.bm)) { 629 next_state = 630 cx->promotion.state; 631 goto end; 632 } 633 } else { 634 next_state = cx->promotion.state; 635 goto end; 636 } 637 } 638 } 639 } 640 641 /* 642 * Demotion? 643 * --------- 644 * Track the number of shorts (time asleep is less than time threshold) 645 * and demote when the usage threshold is reached. 646 */ 647 if (cx->demotion.state) { 648 if (sleep_ticks < cx->demotion.threshold.ticks) { 649 cx->demotion.count++; 650 cx->promotion.count = 0; 651 if (cx->demotion.count >= cx->demotion.threshold.count) { 652 next_state = cx->demotion.state; 653 goto end; 654 } 655 } 656 } 657 658 end: 659 /* 660 * Demote if current state exceeds max_cstate 661 * or if the latency of the current state is unacceptable 662 */ 663 if ((pr->power.state - pr->power.states) > max_cstate || 664 pr->power.state->latency > system_latency_constraint()) { 665 if (cx->demotion.state) 666 next_state = cx->demotion.state; 667 } 668 669 /* 670 * New Cx State? 671 * ------------- 672 * If we're going to start using a new Cx state we must clean up 673 * from the previous and prepare to use the new. 674 */ 675 if (next_state != pr->power.state) 676 acpi_processor_power_activate(pr, next_state); 677 } 678 679 static int acpi_processor_set_power_policy(struct acpi_processor *pr) 680 { 681 unsigned int i; 682 unsigned int state_is_set = 0; 683 struct acpi_processor_cx *lower = NULL; 684 struct acpi_processor_cx *higher = NULL; 685 struct acpi_processor_cx *cx; 686 687 688 if (!pr) 689 return -EINVAL; 690 691 /* 692 * This function sets the default Cx state policy (OS idle handler). 693 * Our scheme is to promote quickly to C2 but more conservatively 694 * to C3. We're favoring C2 for its characteristics of low latency 695 * (quick response), good power savings, and ability to allow bus 696 * mastering activity. Note that the Cx state policy is completely 697 * customizable and can be altered dynamically. 698 */ 699 700 /* startup state */ 701 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 702 cx = &pr->power.states[i]; 703 if (!cx->valid) 704 continue; 705 706 if (!state_is_set) 707 pr->power.state = cx; 708 state_is_set++; 709 break; 710 } 711 712 if (!state_is_set) 713 return -ENODEV; 714 715 /* demotion */ 716 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 717 cx = &pr->power.states[i]; 718 if (!cx->valid) 719 continue; 720 721 if (lower) { 722 cx->demotion.state = lower; 723 cx->demotion.threshold.ticks = cx->latency_ticks; 724 cx->demotion.threshold.count = 1; 725 if (cx->type == ACPI_STATE_C3) 726 cx->demotion.threshold.bm = bm_history; 727 } 728 729 lower = cx; 730 } 731 732 /* promotion */ 733 for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) { 734 cx = &pr->power.states[i]; 735 if (!cx->valid) 736 continue; 737 738 if (higher) { 739 cx->promotion.state = higher; 740 cx->promotion.threshold.ticks = cx->latency_ticks; 741 if (cx->type >= ACPI_STATE_C2) 742 cx->promotion.threshold.count = 4; 743 else 744 cx->promotion.threshold.count = 10; 745 if (higher->type == ACPI_STATE_C3) 746 cx->promotion.threshold.bm = bm_history; 747 } 748 749 higher = cx; 750 } 751 752 return 0; 753 } 754 #endif /* !CONFIG_CPU_IDLE */ 755 756 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) 757 { 758 759 if (!pr) 760 return -EINVAL; 761 762 if (!pr->pblk) 763 return -ENODEV; 764 765 /* if info is obtained from pblk/fadt, type equals state */ 766 pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2; 767 pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3; 768 769 #ifndef CONFIG_HOTPLUG_CPU 770 /* 771 * Check for P_LVL2_UP flag before entering C2 and above on 772 * an SMP system. 773 */ 774 if ((num_online_cpus() > 1) && 775 !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 776 return -ENODEV; 777 #endif 778 779 /* determine C2 and C3 address from pblk */ 780 pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4; 781 pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5; 782 783 /* determine latencies from FADT */ 784 pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency; 785 pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency; 786 787 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 788 "lvl2[0x%08x] lvl3[0x%08x]\n", 789 pr->power.states[ACPI_STATE_C2].address, 790 pr->power.states[ACPI_STATE_C3].address)); 791 792 return 0; 793 } 794 795 static int acpi_processor_get_power_info_default(struct acpi_processor *pr) 796 { 797 if (!pr->power.states[ACPI_STATE_C1].valid) { 798 /* set the first C-State to C1 */ 799 /* all processors need to support C1 */ 800 pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; 801 pr->power.states[ACPI_STATE_C1].valid = 1; 802 } 803 /* the C0 state only exists as a filler in our array */ 804 pr->power.states[ACPI_STATE_C0].valid = 1; 805 return 0; 806 } 807 808 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) 809 { 810 acpi_status status = 0; 811 acpi_integer count; 812 int current_count; 813 int i; 814 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 815 union acpi_object *cst; 816 817 818 if (nocst) 819 return -ENODEV; 820 821 current_count = 0; 822 823 status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); 824 if (ACPI_FAILURE(status)) { 825 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); 826 return -ENODEV; 827 } 828 829 cst = buffer.pointer; 830 831 /* There must be at least 2 elements */ 832 if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { 833 printk(KERN_ERR PREFIX "not enough elements in _CST\n"); 834 status = -EFAULT; 835 goto end; 836 } 837 838 count = cst->package.elements[0].integer.value; 839 840 /* Validate number of power states. */ 841 if (count < 1 || count != cst->package.count - 1) { 842 printk(KERN_ERR PREFIX "count given by _CST is not valid\n"); 843 status = -EFAULT; 844 goto end; 845 } 846 847 /* Tell driver that at least _CST is supported. */ 848 pr->flags.has_cst = 1; 849 850 for (i = 1; i <= count; i++) { 851 union acpi_object *element; 852 union acpi_object *obj; 853 struct acpi_power_register *reg; 854 struct acpi_processor_cx cx; 855 856 memset(&cx, 0, sizeof(cx)); 857 858 element = &(cst->package.elements[i]); 859 if (element->type != ACPI_TYPE_PACKAGE) 860 continue; 861 862 if (element->package.count != 4) 863 continue; 864 865 obj = &(element->package.elements[0]); 866 867 if (obj->type != ACPI_TYPE_BUFFER) 868 continue; 869 870 reg = (struct acpi_power_register *)obj->buffer.pointer; 871 872 if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO && 873 (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) 874 continue; 875 876 /* There should be an easy way to extract an integer... */ 877 obj = &(element->package.elements[1]); 878 if (obj->type != ACPI_TYPE_INTEGER) 879 continue; 880 881 cx.type = obj->integer.value; 882 /* 883 * Some buggy BIOSes won't list C1 in _CST - 884 * Let acpi_processor_get_power_info_default() handle them later 885 */ 886 if (i == 1 && cx.type != ACPI_STATE_C1) 887 current_count++; 888 889 cx.address = reg->address; 890 cx.index = current_count + 1; 891 892 cx.space_id = ACPI_CSTATE_SYSTEMIO; 893 if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 894 if (acpi_processor_ffh_cstate_probe 895 (pr->id, &cx, reg) == 0) { 896 cx.space_id = ACPI_CSTATE_FFH; 897 } else if (cx.type != ACPI_STATE_C1) { 898 /* 899 * C1 is a special case where FIXED_HARDWARE 900 * can be handled in non-MWAIT way as well. 901 * In that case, save this _CST entry info. 902 * That is, we retain space_id of SYSTEM_IO for 903 * halt based C1. 904 * Otherwise, ignore this info and continue. 905 */ 906 continue; 907 } 908 } 909 910 obj = &(element->package.elements[2]); 911 if (obj->type != ACPI_TYPE_INTEGER) 912 continue; 913 914 cx.latency = obj->integer.value; 915 916 obj = &(element->package.elements[3]); 917 if (obj->type != ACPI_TYPE_INTEGER) 918 continue; 919 920 cx.power = obj->integer.value; 921 922 current_count++; 923 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); 924 925 /* 926 * We support total ACPI_PROCESSOR_MAX_POWER - 1 927 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1) 928 */ 929 if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) { 930 printk(KERN_WARNING 931 "Limiting number of power states to max (%d)\n", 932 ACPI_PROCESSOR_MAX_POWER); 933 printk(KERN_WARNING 934 "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); 935 break; 936 } 937 } 938 939 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", 940 current_count)); 941 942 /* Validate number of power states discovered */ 943 if (current_count < 2) 944 status = -EFAULT; 945 946 end: 947 kfree(buffer.pointer); 948 949 return status; 950 } 951 952 static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) 953 { 954 955 if (!cx->address) 956 return; 957 958 /* 959 * C2 latency must be less than or equal to 100 960 * microseconds. 961 */ 962 else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { 963 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 964 "latency too large [%d]\n", cx->latency)); 965 return; 966 } 967 968 /* 969 * Otherwise we've met all of our C2 requirements. 970 * Normalize the C2 latency to expidite policy 971 */ 972 cx->valid = 1; 973 974 #ifndef CONFIG_CPU_IDLE 975 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); 976 #else 977 cx->latency_ticks = cx->latency; 978 #endif 979 980 return; 981 } 982 983 static void acpi_processor_power_verify_c3(struct acpi_processor *pr, 984 struct acpi_processor_cx *cx) 985 { 986 static int bm_check_flag; 987 988 989 if (!cx->address) 990 return; 991 992 /* 993 * C3 latency must be less than or equal to 1000 994 * microseconds. 995 */ 996 else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) { 997 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 998 "latency too large [%d]\n", cx->latency)); 999 return; 1000 } 1001 1002 /* 1003 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast) 1004 * DMA transfers are used by any ISA device to avoid livelock. 1005 * Note that we could disable Type-F DMA (as recommended by 1006 * the erratum), but this is known to disrupt certain ISA 1007 * devices thus we take the conservative approach. 1008 */ 1009 else if (errata.piix4.fdma) { 1010 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 1011 "C3 not supported on PIIX4 with Type-F DMA\n")); 1012 return; 1013 } 1014 1015 /* All the logic here assumes flags.bm_check is same across all CPUs */ 1016 if (!bm_check_flag) { 1017 /* Determine whether bm_check is needed based on CPU */ 1018 acpi_processor_power_init_bm_check(&(pr->flags), pr->id); 1019 bm_check_flag = pr->flags.bm_check; 1020 } else { 1021 pr->flags.bm_check = bm_check_flag; 1022 } 1023 1024 if (pr->flags.bm_check) { 1025 if (!pr->flags.bm_control) { 1026 if (pr->flags.has_cst != 1) { 1027 /* bus mastering control is necessary */ 1028 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 1029 "C3 support requires BM control\n")); 1030 return; 1031 } else { 1032 /* Here we enter C3 without bus mastering */ 1033 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 1034 "C3 support without BM control\n")); 1035 } 1036 } 1037 } else { 1038 /* 1039 * WBINVD should be set in fadt, for C3 state to be 1040 * supported on when bm_check is not required. 1041 */ 1042 if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) { 1043 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 1044 "Cache invalidation should work properly" 1045 " for C3 to be enabled on SMP systems\n")); 1046 return; 1047 } 1048 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 1049 } 1050 1051 /* 1052 * Otherwise we've met all of our C3 requirements. 1053 * Normalize the C3 latency to expidite policy. Enable 1054 * checking of bus mastering status (bm_check) so we can 1055 * use this in our C3 policy 1056 */ 1057 cx->valid = 1; 1058 1059 #ifndef CONFIG_CPU_IDLE 1060 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); 1061 #else 1062 cx->latency_ticks = cx->latency; 1063 #endif 1064 1065 return; 1066 } 1067 1068 static int acpi_processor_power_verify(struct acpi_processor *pr) 1069 { 1070 unsigned int i; 1071 unsigned int working = 0; 1072 1073 pr->power.timer_broadcast_on_state = INT_MAX; 1074 1075 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 1076 struct acpi_processor_cx *cx = &pr->power.states[i]; 1077 1078 switch (cx->type) { 1079 case ACPI_STATE_C1: 1080 cx->valid = 1; 1081 break; 1082 1083 case ACPI_STATE_C2: 1084 acpi_processor_power_verify_c2(cx); 1085 if (cx->valid) 1086 acpi_timer_check_state(i, pr, cx); 1087 break; 1088 1089 case ACPI_STATE_C3: 1090 acpi_processor_power_verify_c3(pr, cx); 1091 if (cx->valid) 1092 acpi_timer_check_state(i, pr, cx); 1093 break; 1094 } 1095 1096 if (cx->valid) 1097 working++; 1098 } 1099 1100 acpi_propagate_timer_broadcast(pr); 1101 1102 return (working); 1103 } 1104 1105 static int acpi_processor_get_power_info(struct acpi_processor *pr) 1106 { 1107 unsigned int i; 1108 int result; 1109 1110 1111 /* NOTE: the idle thread may not be running while calling 1112 * this function */ 1113 1114 /* Zero initialize all the C-states info. */ 1115 memset(pr->power.states, 0, sizeof(pr->power.states)); 1116 1117 result = acpi_processor_get_power_info_cst(pr); 1118 if (result == -ENODEV) 1119 result = acpi_processor_get_power_info_fadt(pr); 1120 1121 if (result) 1122 return result; 1123 1124 acpi_processor_get_power_info_default(pr); 1125 1126 pr->power.count = acpi_processor_power_verify(pr); 1127 1128 #ifndef CONFIG_CPU_IDLE 1129 /* 1130 * Set Default Policy 1131 * ------------------ 1132 * Now that we know which states are supported, set the default 1133 * policy. Note that this policy can be changed dynamically 1134 * (e.g. encourage deeper sleeps to conserve battery life when 1135 * not on AC). 1136 */ 1137 result = acpi_processor_set_power_policy(pr); 1138 if (result) 1139 return result; 1140 #endif 1141 1142 /* 1143 * if one state of type C2 or C3 is available, mark this 1144 * CPU as being "idle manageable" 1145 */ 1146 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 1147 if (pr->power.states[i].valid) { 1148 pr->power.count = i; 1149 if (pr->power.states[i].type >= ACPI_STATE_C2) 1150 pr->flags.power = 1; 1151 } 1152 } 1153 1154 return 0; 1155 } 1156 1157 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) 1158 { 1159 struct acpi_processor *pr = seq->private; 1160 unsigned int i; 1161 1162 1163 if (!pr) 1164 goto end; 1165 1166 seq_printf(seq, "active state: C%zd\n" 1167 "max_cstate: C%d\n" 1168 "bus master activity: %08x\n" 1169 "maximum allowed latency: %d usec\n", 1170 pr->power.state ? pr->power.state - pr->power.states : 0, 1171 max_cstate, (unsigned)pr->power.bm_activity, 1172 system_latency_constraint()); 1173 1174 seq_puts(seq, "states:\n"); 1175 1176 for (i = 1; i <= pr->power.count; i++) { 1177 seq_printf(seq, " %cC%d: ", 1178 (&pr->power.states[i] == 1179 pr->power.state ? '*' : ' '), i); 1180 1181 if (!pr->power.states[i].valid) { 1182 seq_puts(seq, "<not supported>\n"); 1183 continue; 1184 } 1185 1186 switch (pr->power.states[i].type) { 1187 case ACPI_STATE_C1: 1188 seq_printf(seq, "type[C1] "); 1189 break; 1190 case ACPI_STATE_C2: 1191 seq_printf(seq, "type[C2] "); 1192 break; 1193 case ACPI_STATE_C3: 1194 seq_printf(seq, "type[C3] "); 1195 break; 1196 default: 1197 seq_printf(seq, "type[--] "); 1198 break; 1199 } 1200 1201 if (pr->power.states[i].promotion.state) 1202 seq_printf(seq, "promotion[C%zd] ", 1203 (pr->power.states[i].promotion.state - 1204 pr->power.states)); 1205 else 1206 seq_puts(seq, "promotion[--] "); 1207 1208 if (pr->power.states[i].demotion.state) 1209 seq_printf(seq, "demotion[C%zd] ", 1210 (pr->power.states[i].demotion.state - 1211 pr->power.states)); 1212 else 1213 seq_puts(seq, "demotion[--] "); 1214 1215 seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n", 1216 pr->power.states[i].latency, 1217 pr->power.states[i].usage, 1218 (unsigned long long)pr->power.states[i].time); 1219 } 1220 1221 end: 1222 return 0; 1223 } 1224 1225 static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) 1226 { 1227 return single_open(file, acpi_processor_power_seq_show, 1228 PDE(inode)->data); 1229 } 1230 1231 static const struct file_operations acpi_processor_power_fops = { 1232 .open = acpi_processor_power_open_fs, 1233 .read = seq_read, 1234 .llseek = seq_lseek, 1235 .release = single_release, 1236 }; 1237 1238 #ifndef CONFIG_CPU_IDLE 1239 1240 int acpi_processor_cst_has_changed(struct acpi_processor *pr) 1241 { 1242 int result = 0; 1243 1244 1245 if (!pr) 1246 return -EINVAL; 1247 1248 if (nocst) { 1249 return -ENODEV; 1250 } 1251 1252 if (!pr->flags.power_setup_done) 1253 return -ENODEV; 1254 1255 /* Fall back to the default idle loop */ 1256 pm_idle = pm_idle_save; 1257 synchronize_sched(); /* Relies on interrupts forcing exit from idle. */ 1258 1259 pr->flags.power = 0; 1260 result = acpi_processor_get_power_info(pr); 1261 if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) 1262 pm_idle = acpi_processor_idle; 1263 1264 return result; 1265 } 1266 1267 #ifdef CONFIG_SMP 1268 static void smp_callback(void *v) 1269 { 1270 /* we already woke the CPU up, nothing more to do */ 1271 } 1272 1273 /* 1274 * This function gets called when a part of the kernel has a new latency 1275 * requirement. This means we need to get all processors out of their C-state, 1276 * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that 1277 * wakes them all right up. 1278 */ 1279 static int acpi_processor_latency_notify(struct notifier_block *b, 1280 unsigned long l, void *v) 1281 { 1282 smp_call_function(smp_callback, NULL, 0, 1); 1283 return NOTIFY_OK; 1284 } 1285 1286 static struct notifier_block acpi_processor_latency_notifier = { 1287 .notifier_call = acpi_processor_latency_notify, 1288 }; 1289 1290 #endif 1291 1292 #else /* CONFIG_CPU_IDLE */ 1293 1294 /** 1295 * acpi_idle_bm_check - checks if bus master activity was detected 1296 */ 1297 static int acpi_idle_bm_check(void) 1298 { 1299 u32 bm_status = 0; 1300 1301 acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); 1302 if (bm_status) 1303 acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 1304 /* 1305 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect 1306 * the true state of bus mastering activity; forcing us to 1307 * manually check the BMIDEA bit of each IDE channel. 1308 */ 1309 else if (errata.piix4.bmisx) { 1310 if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) 1311 || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) 1312 bm_status = 1; 1313 } 1314 return bm_status; 1315 } 1316 1317 /** 1318 * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state 1319 * @pr: the processor 1320 * @target: the new target state 1321 */ 1322 static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr, 1323 struct acpi_processor_cx *target) 1324 { 1325 if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) { 1326 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 1327 pr->flags.bm_rld_set = 0; 1328 } 1329 1330 if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) { 1331 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 1332 pr->flags.bm_rld_set = 1; 1333 } 1334 } 1335 1336 /** 1337 * acpi_idle_do_entry - a helper function that does C2 and C3 type entry 1338 * @cx: cstate data 1339 */ 1340 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) 1341 { 1342 if (cx->space_id == ACPI_CSTATE_FFH) { 1343 /* Call into architectural FFH based C-state */ 1344 acpi_processor_ffh_cstate_enter(cx); 1345 } else { 1346 int unused; 1347 /* IO port based C-state */ 1348 inb(cx->address); 1349 /* Dummy wait op - must do something useless after P_LVL2 read 1350 because chipsets cannot guarantee that STPCLK# signal 1351 gets asserted in time to freeze execution properly. */ 1352 unused = inl(acpi_gbl_FADT.xpm_timer_block.address); 1353 } 1354 } 1355 1356 /** 1357 * acpi_idle_enter_c1 - enters an ACPI C1 state-type 1358 * @dev: the target CPU 1359 * @state: the state data 1360 * 1361 * This is equivalent to the HALT instruction. 1362 */ 1363 static int acpi_idle_enter_c1(struct cpuidle_device *dev, 1364 struct cpuidle_state *state) 1365 { 1366 struct acpi_processor *pr; 1367 struct acpi_processor_cx *cx = cpuidle_get_statedata(state); 1368 pr = processors[smp_processor_id()]; 1369 1370 if (unlikely(!pr)) 1371 return 0; 1372 1373 if (pr->flags.bm_check) 1374 acpi_idle_update_bm_rld(pr, cx); 1375 1376 current_thread_info()->status &= ~TS_POLLING; 1377 /* 1378 * TS_POLLING-cleared state must be visible before we test 1379 * NEED_RESCHED: 1380 */ 1381 smp_mb(); 1382 if (!need_resched()) 1383 safe_halt(); 1384 current_thread_info()->status |= TS_POLLING; 1385 1386 cx->usage++; 1387 1388 return 0; 1389 } 1390 1391 /** 1392 * acpi_idle_enter_simple - enters an ACPI state without BM handling 1393 * @dev: the target CPU 1394 * @state: the state data 1395 */ 1396 static int acpi_idle_enter_simple(struct cpuidle_device *dev, 1397 struct cpuidle_state *state) 1398 { 1399 struct acpi_processor *pr; 1400 struct acpi_processor_cx *cx = cpuidle_get_statedata(state); 1401 u32 t1, t2; 1402 pr = processors[smp_processor_id()]; 1403 1404 if (unlikely(!pr)) 1405 return 0; 1406 1407 if (acpi_idle_suspend) 1408 return(acpi_idle_enter_c1(dev, state)); 1409 1410 if (pr->flags.bm_check) 1411 acpi_idle_update_bm_rld(pr, cx); 1412 1413 local_irq_disable(); 1414 current_thread_info()->status &= ~TS_POLLING; 1415 /* 1416 * TS_POLLING-cleared state must be visible before we test 1417 * NEED_RESCHED: 1418 */ 1419 smp_mb(); 1420 1421 if (unlikely(need_resched())) { 1422 current_thread_info()->status |= TS_POLLING; 1423 local_irq_enable(); 1424 return 0; 1425 } 1426 1427 if (cx->type == ACPI_STATE_C3) 1428 ACPI_FLUSH_CPU_CACHE(); 1429 1430 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 1431 acpi_state_timer_broadcast(pr, cx, 1); 1432 acpi_idle_do_entry(cx); 1433 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 1434 1435 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC) 1436 /* TSC could halt in idle, so notify users */ 1437 mark_tsc_unstable("TSC halts in idle");; 1438 #endif 1439 1440 local_irq_enable(); 1441 current_thread_info()->status |= TS_POLLING; 1442 1443 cx->usage++; 1444 1445 acpi_state_timer_broadcast(pr, cx, 0); 1446 cx->time += ticks_elapsed(t1, t2); 1447 return ticks_elapsed_in_us(t1, t2); 1448 } 1449 1450 static int c3_cpu_count; 1451 static DEFINE_SPINLOCK(c3_lock); 1452 1453 /** 1454 * acpi_idle_enter_bm - enters C3 with proper BM handling 1455 * @dev: the target CPU 1456 * @state: the state data 1457 * 1458 * If BM is detected, the deepest non-C3 idle state is entered instead. 1459 */ 1460 static int acpi_idle_enter_bm(struct cpuidle_device *dev, 1461 struct cpuidle_state *state) 1462 { 1463 struct acpi_processor *pr; 1464 struct acpi_processor_cx *cx = cpuidle_get_statedata(state); 1465 u32 t1, t2; 1466 pr = processors[smp_processor_id()]; 1467 1468 if (unlikely(!pr)) 1469 return 0; 1470 1471 if (acpi_idle_suspend) 1472 return(acpi_idle_enter_c1(dev, state)); 1473 1474 local_irq_disable(); 1475 current_thread_info()->status &= ~TS_POLLING; 1476 /* 1477 * TS_POLLING-cleared state must be visible before we test 1478 * NEED_RESCHED: 1479 */ 1480 smp_mb(); 1481 1482 if (unlikely(need_resched())) { 1483 current_thread_info()->status |= TS_POLLING; 1484 local_irq_enable(); 1485 return 0; 1486 } 1487 1488 /* 1489 * Must be done before busmaster disable as we might need to 1490 * access HPET ! 1491 */ 1492 acpi_state_timer_broadcast(pr, cx, 1); 1493 1494 if (acpi_idle_bm_check()) { 1495 cx = pr->power.bm_state; 1496 1497 acpi_idle_update_bm_rld(pr, cx); 1498 1499 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 1500 acpi_idle_do_entry(cx); 1501 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 1502 } else { 1503 acpi_idle_update_bm_rld(pr, cx); 1504 1505 spin_lock(&c3_lock); 1506 c3_cpu_count++; 1507 /* Disable bus master arbitration when all CPUs are in C3 */ 1508 if (c3_cpu_count == num_online_cpus()) 1509 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 1510 spin_unlock(&c3_lock); 1511 1512 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); 1513 acpi_idle_do_entry(cx); 1514 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); 1515 1516 spin_lock(&c3_lock); 1517 /* Re-enable bus master arbitration */ 1518 if (c3_cpu_count == num_online_cpus()) 1519 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 1520 c3_cpu_count--; 1521 spin_unlock(&c3_lock); 1522 } 1523 1524 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC) 1525 /* TSC could halt in idle, so notify users */ 1526 mark_tsc_unstable("TSC halts in idle"); 1527 #endif 1528 1529 local_irq_enable(); 1530 current_thread_info()->status |= TS_POLLING; 1531 1532 cx->usage++; 1533 1534 acpi_state_timer_broadcast(pr, cx, 0); 1535 cx->time += ticks_elapsed(t1, t2); 1536 return ticks_elapsed_in_us(t1, t2); 1537 } 1538 1539 struct cpuidle_driver acpi_idle_driver = { 1540 .name = "acpi_idle", 1541 .owner = THIS_MODULE, 1542 }; 1543 1544 /** 1545 * acpi_processor_setup_cpuidle - prepares and configures CPUIDLE 1546 * @pr: the ACPI processor 1547 */ 1548 static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) 1549 { 1550 int i, count = 0; 1551 struct acpi_processor_cx *cx; 1552 struct cpuidle_state *state; 1553 struct cpuidle_device *dev = &pr->power.dev; 1554 1555 if (!pr->flags.power_setup_done) 1556 return -EINVAL; 1557 1558 if (pr->flags.power == 0) { 1559 return -EINVAL; 1560 } 1561 1562 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { 1563 cx = &pr->power.states[i]; 1564 state = &dev->states[count]; 1565 1566 if (!cx->valid) 1567 continue; 1568 1569 #ifdef CONFIG_HOTPLUG_CPU 1570 if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 1571 !pr->flags.has_cst && 1572 !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 1573 continue; 1574 #endif 1575 cpuidle_set_statedata(state, cx); 1576 1577 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i); 1578 state->exit_latency = cx->latency; 1579 state->target_residency = cx->latency * 6; 1580 state->power_usage = cx->power; 1581 1582 state->flags = 0; 1583 switch (cx->type) { 1584 case ACPI_STATE_C1: 1585 state->flags |= CPUIDLE_FLAG_SHALLOW; 1586 state->enter = acpi_idle_enter_c1; 1587 break; 1588 1589 case ACPI_STATE_C2: 1590 state->flags |= CPUIDLE_FLAG_BALANCED; 1591 state->flags |= CPUIDLE_FLAG_TIME_VALID; 1592 state->enter = acpi_idle_enter_simple; 1593 break; 1594 1595 case ACPI_STATE_C3: 1596 state->flags |= CPUIDLE_FLAG_DEEP; 1597 state->flags |= CPUIDLE_FLAG_TIME_VALID; 1598 state->flags |= CPUIDLE_FLAG_CHECK_BM; 1599 state->enter = pr->flags.bm_check ? 1600 acpi_idle_enter_bm : 1601 acpi_idle_enter_simple; 1602 break; 1603 } 1604 1605 count++; 1606 } 1607 1608 dev->state_count = count; 1609 1610 if (!count) 1611 return -EINVAL; 1612 1613 /* find the deepest state that can handle active BM */ 1614 if (pr->flags.bm_check) { 1615 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) 1616 if (pr->power.states[i].type == ACPI_STATE_C3) 1617 break; 1618 pr->power.bm_state = &pr->power.states[i-1]; 1619 } 1620 1621 return 0; 1622 } 1623 1624 int acpi_processor_cst_has_changed(struct acpi_processor *pr) 1625 { 1626 int ret; 1627 1628 if (!pr) 1629 return -EINVAL; 1630 1631 if (nocst) { 1632 return -ENODEV; 1633 } 1634 1635 if (!pr->flags.power_setup_done) 1636 return -ENODEV; 1637 1638 cpuidle_pause_and_lock(); 1639 cpuidle_disable_device(&pr->power.dev); 1640 acpi_processor_get_power_info(pr); 1641 acpi_processor_setup_cpuidle(pr); 1642 ret = cpuidle_enable_device(&pr->power.dev); 1643 cpuidle_resume_and_unlock(); 1644 1645 return ret; 1646 } 1647 1648 #endif /* CONFIG_CPU_IDLE */ 1649 1650 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, 1651 struct acpi_device *device) 1652 { 1653 acpi_status status = 0; 1654 static int first_run; 1655 struct proc_dir_entry *entry = NULL; 1656 unsigned int i; 1657 1658 1659 if (!first_run) { 1660 dmi_check_system(processor_power_dmi_table); 1661 if (max_cstate < ACPI_C_STATES_MAX) 1662 printk(KERN_NOTICE 1663 "ACPI: processor limited to max C-state %d\n", 1664 max_cstate); 1665 first_run++; 1666 #if !defined (CONFIG_CPU_IDLE) && defined (CONFIG_SMP) 1667 register_latency_notifier(&acpi_processor_latency_notifier); 1668 #endif 1669 } 1670 1671 if (!pr) 1672 return -EINVAL; 1673 1674 if (acpi_gbl_FADT.cst_control && !nocst) { 1675 status = 1676 acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8); 1677 if (ACPI_FAILURE(status)) { 1678 ACPI_EXCEPTION((AE_INFO, status, 1679 "Notifying BIOS of _CST ability failed")); 1680 } 1681 } 1682 1683 acpi_processor_get_power_info(pr); 1684 pr->flags.power_setup_done = 1; 1685 1686 /* 1687 * Install the idle handler if processor power management is supported. 1688 * Note that we use previously set idle handler will be used on 1689 * platforms that only support C1. 1690 */ 1691 if ((pr->flags.power) && (!boot_option_idle_override)) { 1692 #ifdef CONFIG_CPU_IDLE 1693 acpi_processor_setup_cpuidle(pr); 1694 pr->power.dev.cpu = pr->id; 1695 if (cpuidle_register_device(&pr->power.dev)) 1696 return -EIO; 1697 #endif 1698 1699 printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); 1700 for (i = 1; i <= pr->power.count; i++) 1701 if (pr->power.states[i].valid) 1702 printk(" C%d[C%d]", i, 1703 pr->power.states[i].type); 1704 printk(")\n"); 1705 1706 #ifndef CONFIG_CPU_IDLE 1707 if (pr->id == 0) { 1708 pm_idle_save = pm_idle; 1709 pm_idle = acpi_processor_idle; 1710 } 1711 #endif 1712 } 1713 1714 /* 'power' [R] */ 1715 entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, 1716 S_IRUGO, acpi_device_dir(device)); 1717 if (!entry) 1718 return -EIO; 1719 else { 1720 entry->proc_fops = &acpi_processor_power_fops; 1721 entry->data = acpi_driver_data(device); 1722 entry->owner = THIS_MODULE; 1723 } 1724 1725 return 0; 1726 } 1727 1728 int acpi_processor_power_exit(struct acpi_processor *pr, 1729 struct acpi_device *device) 1730 { 1731 #ifdef CONFIG_CPU_IDLE 1732 if ((pr->flags.power) && (!boot_option_idle_override)) 1733 cpuidle_unregister_device(&pr->power.dev); 1734 #endif 1735 pr->flags.power_setup_done = 0; 1736 1737 if (acpi_device_dir(device)) 1738 remove_proc_entry(ACPI_PROCESSOR_FILE_POWER, 1739 acpi_device_dir(device)); 1740 1741 #ifndef CONFIG_CPU_IDLE 1742 1743 /* Unregister the idle handler when processor #0 is removed. */ 1744 if (pr->id == 0) { 1745 pm_idle = pm_idle_save; 1746 1747 /* 1748 * We are about to unload the current idle thread pm callback 1749 * (pm_idle), Wait for all processors to update cached/local 1750 * copies of pm_idle before proceeding. 1751 */ 1752 cpu_idle_wait(); 1753 #ifdef CONFIG_SMP 1754 unregister_latency_notifier(&acpi_processor_latency_notifier); 1755 #endif 1756 } 1757 #endif 1758 1759 return 0; 1760 } 1761