1 /* 2 * processor_idle - idle state submodule to the ACPI processor driver 3 * 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 6 * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de> 7 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 8 * - Added processor hotplug support 9 * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 10 * - Added support for C3 on SMP 11 * 12 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 * 14 * This program is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or (at 17 * your option) any later version. 18 * 19 * This program is distributed in the hope that it will be useful, but 20 * WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 * General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License along 25 * with this program; if not, write to the Free Software Foundation, Inc., 26 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 27 * 28 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29 */ 30 31 #include <linux/kernel.h> 32 #include <linux/module.h> 33 #include <linux/init.h> 34 #include <linux/cpufreq.h> 35 #include <linux/proc_fs.h> 36 #include <linux/seq_file.h> 37 #include <linux/acpi.h> 38 #include <linux/dmi.h> 39 #include <linux/moduleparam.h> 40 #include <linux/sched.h> /* need_resched() */ 41 #include <linux/pm_qos_params.h> 42 #include <linux/clockchips.h> 43 #include <linux/cpuidle.h> 44 #include <linux/irqflags.h> 45 46 /* 47 * Include the apic definitions for x86 to have the APIC timer related defines 48 * available also for UP (on SMP it gets magically included via linux/smp.h). 49 * asm/acpi.h is not an option, as it would require more include magic. Also 50 * creating an empty asm-ia64/apic.h would just trade pest vs. cholera. 51 */ 52 #ifdef CONFIG_X86 53 #include <asm/apic.h> 54 #endif 55 56 #include <asm/io.h> 57 #include <asm/uaccess.h> 58 59 #include <acpi/acpi_bus.h> 60 #include <acpi/processor.h> 61 #include <asm/processor.h> 62 63 #define ACPI_PROCESSOR_CLASS "processor" 64 #define _COMPONENT ACPI_PROCESSOR_COMPONENT 65 ACPI_MODULE_NAME("processor_idle"); 66 #define ACPI_PROCESSOR_FILE_POWER "power" 67 #define PM_TIMER_TICK_NS (1000000000ULL/PM_TIMER_FREQUENCY) 68 #define C2_OVERHEAD 1 /* 1us */ 69 #define C3_OVERHEAD 1 /* 1us */ 70 #define PM_TIMER_TICKS_TO_US(p) (((p) * 1000)/(PM_TIMER_FREQUENCY/1000)) 71 72 static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER; 73 module_param(max_cstate, uint, 0000); 74 static unsigned int nocst __read_mostly; 75 module_param(nocst, uint, 0000); 76 77 static unsigned int latency_factor __read_mostly = 2; 78 module_param(latency_factor, uint, 0644); 79 80 static s64 us_to_pm_timer_ticks(s64 t) 81 { 82 return div64_u64(t * PM_TIMER_FREQUENCY, 1000000); 83 } 84 /* 85 * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3. 86 * For now disable this. Probably a bug somewhere else. 87 * 88 * To skip this limit, boot/load with a large max_cstate limit. 89 */ 90 static int set_max_cstate(const struct dmi_system_id *id) 91 { 92 if (max_cstate > ACPI_PROCESSOR_MAX_POWER) 93 return 0; 94 95 printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate." 96 " Override with \"processor.max_cstate=%d\"\n", id->ident, 97 (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1); 98 99 max_cstate = (long)id->driver_data; 100 101 return 0; 102 } 103 104 /* Actually this shouldn't be __cpuinitdata, would be better to fix the 105 callers to only run once -AK */ 106 static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = { 107 { set_max_cstate, "Clevo 5600D", { 108 DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), 109 DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, 110 (void *)2}, 111 {}, 112 }; 113 114 115 /* 116 * Callers should disable interrupts before the call and enable 117 * interrupts after return. 118 */ 119 static void acpi_safe_halt(void) 120 { 121 current_thread_info()->status &= ~TS_POLLING; 122 /* 123 * TS_POLLING-cleared state must be visible before we 124 * test NEED_RESCHED: 125 */ 126 smp_mb(); 127 if (!need_resched()) { 128 safe_halt(); 129 local_irq_disable(); 130 } 131 current_thread_info()->status |= TS_POLLING; 132 } 133 134 #ifdef ARCH_APICTIMER_STOPS_ON_C3 135 136 /* 137 * Some BIOS implementations switch to C3 in the published C2 state. 138 * This seems to be a common problem on AMD boxen, but other vendors 139 * are affected too. We pick the most conservative approach: we assume 140 * that the local APIC stops in both C2 and C3. 141 */ 142 static void acpi_timer_check_state(int state, struct acpi_processor *pr, 143 struct acpi_processor_cx *cx) 144 { 145 struct acpi_processor_power *pwr = &pr->power; 146 u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2; 147 148 if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT)) 149 return; 150 151 if (boot_cpu_has(X86_FEATURE_AMDC1E)) 152 type = ACPI_STATE_C1; 153 154 /* 155 * Check, if one of the previous states already marked the lapic 156 * unstable 157 */ 158 if (pwr->timer_broadcast_on_state < state) 159 return; 160 161 if (cx->type >= type) 162 pr->power.timer_broadcast_on_state = state; 163 } 164 165 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) 166 { 167 unsigned long reason; 168 169 reason = pr->power.timer_broadcast_on_state < INT_MAX ? 170 CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; 171 172 clockevents_notify(reason, &pr->id); 173 } 174 175 /* Power(C) State timer broadcast control */ 176 static void acpi_state_timer_broadcast(struct acpi_processor *pr, 177 struct acpi_processor_cx *cx, 178 int broadcast) 179 { 180 int state = cx - pr->power.states; 181 182 if (state >= pr->power.timer_broadcast_on_state) { 183 unsigned long reason; 184 185 reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER : 186 CLOCK_EVT_NOTIFY_BROADCAST_EXIT; 187 clockevents_notify(reason, &pr->id); 188 } 189 } 190 191 #else 192 193 static void acpi_timer_check_state(int state, struct acpi_processor *pr, 194 struct acpi_processor_cx *cstate) { } 195 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { } 196 static void acpi_state_timer_broadcast(struct acpi_processor *pr, 197 struct acpi_processor_cx *cx, 198 int broadcast) 199 { 200 } 201 202 #endif 203 204 /* 205 * Suspend / resume control 206 */ 207 static int acpi_idle_suspend; 208 static u32 saved_bm_rld; 209 210 static void acpi_idle_bm_rld_save(void) 211 { 212 acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld); 213 } 214 static void acpi_idle_bm_rld_restore(void) 215 { 216 u32 resumed_bm_rld; 217 218 acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld); 219 220 if (resumed_bm_rld != saved_bm_rld) 221 acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld); 222 } 223 224 int acpi_processor_suspend(struct acpi_device * device, pm_message_t state) 225 { 226 if (acpi_idle_suspend == 1) 227 return 0; 228 229 acpi_idle_bm_rld_save(); 230 acpi_idle_suspend = 1; 231 return 0; 232 } 233 234 int acpi_processor_resume(struct acpi_device * device) 235 { 236 if (acpi_idle_suspend == 0) 237 return 0; 238 239 acpi_idle_bm_rld_restore(); 240 acpi_idle_suspend = 0; 241 return 0; 242 } 243 244 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) 245 static void tsc_check_state(int state) 246 { 247 switch (boot_cpu_data.x86_vendor) { 248 case X86_VENDOR_AMD: 249 case X86_VENDOR_INTEL: 250 /* 251 * AMD Fam10h TSC will tick in all 252 * C/P/S0/S1 states when this bit is set. 253 */ 254 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 255 return; 256 257 /*FALL THROUGH*/ 258 default: 259 /* TSC could halt in idle, so notify users */ 260 if (state > ACPI_STATE_C1) 261 mark_tsc_unstable("TSC halts in idle"); 262 } 263 } 264 #else 265 static void tsc_check_state(int state) { return; } 266 #endif 267 268 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) 269 { 270 271 if (!pr) 272 return -EINVAL; 273 274 if (!pr->pblk) 275 return -ENODEV; 276 277 /* if info is obtained from pblk/fadt, type equals state */ 278 pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2; 279 pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3; 280 281 #ifndef CONFIG_HOTPLUG_CPU 282 /* 283 * Check for P_LVL2_UP flag before entering C2 and above on 284 * an SMP system. 285 */ 286 if ((num_online_cpus() > 1) && 287 !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 288 return -ENODEV; 289 #endif 290 291 /* determine C2 and C3 address from pblk */ 292 pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4; 293 pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5; 294 295 /* determine latencies from FADT */ 296 pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency; 297 pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency; 298 299 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 300 "lvl2[0x%08x] lvl3[0x%08x]\n", 301 pr->power.states[ACPI_STATE_C2].address, 302 pr->power.states[ACPI_STATE_C3].address)); 303 304 return 0; 305 } 306 307 static int acpi_processor_get_power_info_default(struct acpi_processor *pr) 308 { 309 if (!pr->power.states[ACPI_STATE_C1].valid) { 310 /* set the first C-State to C1 */ 311 /* all processors need to support C1 */ 312 pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; 313 pr->power.states[ACPI_STATE_C1].valid = 1; 314 pr->power.states[ACPI_STATE_C1].entry_method = ACPI_CSTATE_HALT; 315 } 316 /* the C0 state only exists as a filler in our array */ 317 pr->power.states[ACPI_STATE_C0].valid = 1; 318 return 0; 319 } 320 321 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) 322 { 323 acpi_status status = 0; 324 acpi_integer count; 325 int current_count; 326 int i; 327 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 328 union acpi_object *cst; 329 330 331 if (nocst) 332 return -ENODEV; 333 334 current_count = 0; 335 336 status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); 337 if (ACPI_FAILURE(status)) { 338 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); 339 return -ENODEV; 340 } 341 342 cst = buffer.pointer; 343 344 /* There must be at least 2 elements */ 345 if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { 346 printk(KERN_ERR PREFIX "not enough elements in _CST\n"); 347 status = -EFAULT; 348 goto end; 349 } 350 351 count = cst->package.elements[0].integer.value; 352 353 /* Validate number of power states. */ 354 if (count < 1 || count != cst->package.count - 1) { 355 printk(KERN_ERR PREFIX "count given by _CST is not valid\n"); 356 status = -EFAULT; 357 goto end; 358 } 359 360 /* Tell driver that at least _CST is supported. */ 361 pr->flags.has_cst = 1; 362 363 for (i = 1; i <= count; i++) { 364 union acpi_object *element; 365 union acpi_object *obj; 366 struct acpi_power_register *reg; 367 struct acpi_processor_cx cx; 368 369 memset(&cx, 0, sizeof(cx)); 370 371 element = &(cst->package.elements[i]); 372 if (element->type != ACPI_TYPE_PACKAGE) 373 continue; 374 375 if (element->package.count != 4) 376 continue; 377 378 obj = &(element->package.elements[0]); 379 380 if (obj->type != ACPI_TYPE_BUFFER) 381 continue; 382 383 reg = (struct acpi_power_register *)obj->buffer.pointer; 384 385 if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO && 386 (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) 387 continue; 388 389 /* There should be an easy way to extract an integer... */ 390 obj = &(element->package.elements[1]); 391 if (obj->type != ACPI_TYPE_INTEGER) 392 continue; 393 394 cx.type = obj->integer.value; 395 /* 396 * Some buggy BIOSes won't list C1 in _CST - 397 * Let acpi_processor_get_power_info_default() handle them later 398 */ 399 if (i == 1 && cx.type != ACPI_STATE_C1) 400 current_count++; 401 402 cx.address = reg->address; 403 cx.index = current_count + 1; 404 405 cx.entry_method = ACPI_CSTATE_SYSTEMIO; 406 if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 407 if (acpi_processor_ffh_cstate_probe 408 (pr->id, &cx, reg) == 0) { 409 cx.entry_method = ACPI_CSTATE_FFH; 410 } else if (cx.type == ACPI_STATE_C1) { 411 /* 412 * C1 is a special case where FIXED_HARDWARE 413 * can be handled in non-MWAIT way as well. 414 * In that case, save this _CST entry info. 415 * Otherwise, ignore this info and continue. 416 */ 417 cx.entry_method = ACPI_CSTATE_HALT; 418 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); 419 } else { 420 continue; 421 } 422 if (cx.type == ACPI_STATE_C1 && 423 (idle_halt || idle_nomwait)) { 424 /* 425 * In most cases the C1 space_id obtained from 426 * _CST object is FIXED_HARDWARE access mode. 427 * But when the option of idle=halt is added, 428 * the entry_method type should be changed from 429 * CSTATE_FFH to CSTATE_HALT. 430 * When the option of idle=nomwait is added, 431 * the C1 entry_method type should be 432 * CSTATE_HALT. 433 */ 434 cx.entry_method = ACPI_CSTATE_HALT; 435 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); 436 } 437 } else { 438 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x", 439 cx.address); 440 } 441 442 if (cx.type == ACPI_STATE_C1) { 443 cx.valid = 1; 444 } 445 446 obj = &(element->package.elements[2]); 447 if (obj->type != ACPI_TYPE_INTEGER) 448 continue; 449 450 cx.latency = obj->integer.value; 451 452 obj = &(element->package.elements[3]); 453 if (obj->type != ACPI_TYPE_INTEGER) 454 continue; 455 456 cx.power = obj->integer.value; 457 458 current_count++; 459 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); 460 461 /* 462 * We support total ACPI_PROCESSOR_MAX_POWER - 1 463 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1) 464 */ 465 if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) { 466 printk(KERN_WARNING 467 "Limiting number of power states to max (%d)\n", 468 ACPI_PROCESSOR_MAX_POWER); 469 printk(KERN_WARNING 470 "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); 471 break; 472 } 473 } 474 475 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", 476 current_count)); 477 478 /* Validate number of power states discovered */ 479 if (current_count < 2) 480 status = -EFAULT; 481 482 end: 483 kfree(buffer.pointer); 484 485 return status; 486 } 487 488 static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) 489 { 490 491 if (!cx->address) 492 return; 493 494 /* 495 * C2 latency must be less than or equal to 100 496 * microseconds. 497 */ 498 else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { 499 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 500 "latency too large [%d]\n", cx->latency)); 501 return; 502 } 503 504 /* 505 * Otherwise we've met all of our C2 requirements. 506 * Normalize the C2 latency to expidite policy 507 */ 508 cx->valid = 1; 509 510 cx->latency_ticks = cx->latency; 511 512 return; 513 } 514 515 static void acpi_processor_power_verify_c3(struct acpi_processor *pr, 516 struct acpi_processor_cx *cx) 517 { 518 static int bm_check_flag; 519 520 521 if (!cx->address) 522 return; 523 524 /* 525 * C3 latency must be less than or equal to 1000 526 * microseconds. 527 */ 528 else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) { 529 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 530 "latency too large [%d]\n", cx->latency)); 531 return; 532 } 533 534 /* 535 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast) 536 * DMA transfers are used by any ISA device to avoid livelock. 537 * Note that we could disable Type-F DMA (as recommended by 538 * the erratum), but this is known to disrupt certain ISA 539 * devices thus we take the conservative approach. 540 */ 541 else if (errata.piix4.fdma) { 542 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 543 "C3 not supported on PIIX4 with Type-F DMA\n")); 544 return; 545 } 546 547 /* All the logic here assumes flags.bm_check is same across all CPUs */ 548 if (!bm_check_flag) { 549 /* Determine whether bm_check is needed based on CPU */ 550 acpi_processor_power_init_bm_check(&(pr->flags), pr->id); 551 bm_check_flag = pr->flags.bm_check; 552 } else { 553 pr->flags.bm_check = bm_check_flag; 554 } 555 556 if (pr->flags.bm_check) { 557 if (!pr->flags.bm_control) { 558 if (pr->flags.has_cst != 1) { 559 /* bus mastering control is necessary */ 560 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 561 "C3 support requires BM control\n")); 562 return; 563 } else { 564 /* Here we enter C3 without bus mastering */ 565 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 566 "C3 support without BM control\n")); 567 } 568 } 569 } else { 570 /* 571 * WBINVD should be set in fadt, for C3 state to be 572 * supported on when bm_check is not required. 573 */ 574 if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) { 575 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 576 "Cache invalidation should work properly" 577 " for C3 to be enabled on SMP systems\n")); 578 return; 579 } 580 } 581 582 /* 583 * Otherwise we've met all of our C3 requirements. 584 * Normalize the C3 latency to expidite policy. Enable 585 * checking of bus mastering status (bm_check) so we can 586 * use this in our C3 policy 587 */ 588 cx->valid = 1; 589 590 cx->latency_ticks = cx->latency; 591 /* 592 * On older chipsets, BM_RLD needs to be set 593 * in order for Bus Master activity to wake the 594 * system from C3. Newer chipsets handle DMA 595 * during C3 automatically and BM_RLD is a NOP. 596 * In either case, the proper way to 597 * handle BM_RLD is to set it and leave it set. 598 */ 599 acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 600 601 return; 602 } 603 604 static int acpi_processor_power_verify(struct acpi_processor *pr) 605 { 606 unsigned int i; 607 unsigned int working = 0; 608 609 pr->power.timer_broadcast_on_state = INT_MAX; 610 611 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { 612 struct acpi_processor_cx *cx = &pr->power.states[i]; 613 614 switch (cx->type) { 615 case ACPI_STATE_C1: 616 cx->valid = 1; 617 acpi_timer_check_state(i, pr, cx); 618 break; 619 620 case ACPI_STATE_C2: 621 acpi_processor_power_verify_c2(cx); 622 if (cx->valid) 623 acpi_timer_check_state(i, pr, cx); 624 break; 625 626 case ACPI_STATE_C3: 627 acpi_processor_power_verify_c3(pr, cx); 628 if (cx->valid) 629 acpi_timer_check_state(i, pr, cx); 630 break; 631 } 632 if (cx->valid) 633 tsc_check_state(cx->type); 634 635 if (cx->valid) 636 working++; 637 } 638 639 acpi_propagate_timer_broadcast(pr); 640 641 return (working); 642 } 643 644 static int acpi_processor_get_power_info(struct acpi_processor *pr) 645 { 646 unsigned int i; 647 int result; 648 649 650 /* NOTE: the idle thread may not be running while calling 651 * this function */ 652 653 /* Zero initialize all the C-states info. */ 654 memset(pr->power.states, 0, sizeof(pr->power.states)); 655 656 result = acpi_processor_get_power_info_cst(pr); 657 if (result == -ENODEV) 658 result = acpi_processor_get_power_info_fadt(pr); 659 660 if (result) 661 return result; 662 663 acpi_processor_get_power_info_default(pr); 664 665 pr->power.count = acpi_processor_power_verify(pr); 666 667 /* 668 * if one state of type C2 or C3 is available, mark this 669 * CPU as being "idle manageable" 670 */ 671 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { 672 if (pr->power.states[i].valid) { 673 pr->power.count = i; 674 if (pr->power.states[i].type >= ACPI_STATE_C2) 675 pr->flags.power = 1; 676 } 677 } 678 679 return 0; 680 } 681 682 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) 683 { 684 struct acpi_processor *pr = seq->private; 685 unsigned int i; 686 687 688 if (!pr) 689 goto end; 690 691 seq_printf(seq, "active state: C%zd\n" 692 "max_cstate: C%d\n" 693 "maximum allowed latency: %d usec\n", 694 pr->power.state ? pr->power.state - pr->power.states : 0, 695 max_cstate, pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)); 696 697 seq_puts(seq, "states:\n"); 698 699 for (i = 1; i <= pr->power.count; i++) { 700 seq_printf(seq, " %cC%d: ", 701 (&pr->power.states[i] == 702 pr->power.state ? '*' : ' '), i); 703 704 if (!pr->power.states[i].valid) { 705 seq_puts(seq, "<not supported>\n"); 706 continue; 707 } 708 709 switch (pr->power.states[i].type) { 710 case ACPI_STATE_C1: 711 seq_printf(seq, "type[C1] "); 712 break; 713 case ACPI_STATE_C2: 714 seq_printf(seq, "type[C2] "); 715 break; 716 case ACPI_STATE_C3: 717 seq_printf(seq, "type[C3] "); 718 break; 719 default: 720 seq_printf(seq, "type[--] "); 721 break; 722 } 723 724 if (pr->power.states[i].promotion.state) 725 seq_printf(seq, "promotion[C%zd] ", 726 (pr->power.states[i].promotion.state - 727 pr->power.states)); 728 else 729 seq_puts(seq, "promotion[--] "); 730 731 if (pr->power.states[i].demotion.state) 732 seq_printf(seq, "demotion[C%zd] ", 733 (pr->power.states[i].demotion.state - 734 pr->power.states)); 735 else 736 seq_puts(seq, "demotion[--] "); 737 738 seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n", 739 pr->power.states[i].latency, 740 pr->power.states[i].usage, 741 (unsigned long long)pr->power.states[i].time); 742 } 743 744 end: 745 return 0; 746 } 747 748 static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) 749 { 750 return single_open(file, acpi_processor_power_seq_show, 751 PDE(inode)->data); 752 } 753 754 static const struct file_operations acpi_processor_power_fops = { 755 .owner = THIS_MODULE, 756 .open = acpi_processor_power_open_fs, 757 .read = seq_read, 758 .llseek = seq_lseek, 759 .release = single_release, 760 }; 761 762 763 /** 764 * acpi_idle_bm_check - checks if bus master activity was detected 765 */ 766 static int acpi_idle_bm_check(void) 767 { 768 u32 bm_status = 0; 769 770 acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); 771 if (bm_status) 772 acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 773 /* 774 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect 775 * the true state of bus mastering activity; forcing us to 776 * manually check the BMIDEA bit of each IDE channel. 777 */ 778 else if (errata.piix4.bmisx) { 779 if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) 780 || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) 781 bm_status = 1; 782 } 783 return bm_status; 784 } 785 786 /** 787 * acpi_idle_do_entry - a helper function that does C2 and C3 type entry 788 * @cx: cstate data 789 * 790 * Caller disables interrupt before call and enables interrupt after return. 791 */ 792 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) 793 { 794 /* Don't trace irqs off for idle */ 795 stop_critical_timings(); 796 if (cx->entry_method == ACPI_CSTATE_FFH) { 797 /* Call into architectural FFH based C-state */ 798 acpi_processor_ffh_cstate_enter(cx); 799 } else if (cx->entry_method == ACPI_CSTATE_HALT) { 800 acpi_safe_halt(); 801 } else { 802 int unused; 803 /* IO port based C-state */ 804 inb(cx->address); 805 /* Dummy wait op - must do something useless after P_LVL2 read 806 because chipsets cannot guarantee that STPCLK# signal 807 gets asserted in time to freeze execution properly. */ 808 unused = inl(acpi_gbl_FADT.xpm_timer_block.address); 809 } 810 start_critical_timings(); 811 } 812 813 /** 814 * acpi_idle_enter_c1 - enters an ACPI C1 state-type 815 * @dev: the target CPU 816 * @state: the state data 817 * 818 * This is equivalent to the HALT instruction. 819 */ 820 static int acpi_idle_enter_c1(struct cpuidle_device *dev, 821 struct cpuidle_state *state) 822 { 823 ktime_t kt1, kt2; 824 s64 idle_time; 825 struct acpi_processor *pr; 826 struct acpi_processor_cx *cx = cpuidle_get_statedata(state); 827 828 pr = __get_cpu_var(processors); 829 830 if (unlikely(!pr)) 831 return 0; 832 833 local_irq_disable(); 834 835 /* Do not access any ACPI IO ports in suspend path */ 836 if (acpi_idle_suspend) { 837 local_irq_enable(); 838 cpu_relax(); 839 return 0; 840 } 841 842 acpi_state_timer_broadcast(pr, cx, 1); 843 kt1 = ktime_get_real(); 844 acpi_idle_do_entry(cx); 845 kt2 = ktime_get_real(); 846 idle_time = ktime_to_us(ktime_sub(kt2, kt1)); 847 848 local_irq_enable(); 849 cx->usage++; 850 acpi_state_timer_broadcast(pr, cx, 0); 851 852 return idle_time; 853 } 854 855 /** 856 * acpi_idle_enter_simple - enters an ACPI state without BM handling 857 * @dev: the target CPU 858 * @state: the state data 859 */ 860 static int acpi_idle_enter_simple(struct cpuidle_device *dev, 861 struct cpuidle_state *state) 862 { 863 struct acpi_processor *pr; 864 struct acpi_processor_cx *cx = cpuidle_get_statedata(state); 865 ktime_t kt1, kt2; 866 s64 idle_time; 867 s64 sleep_ticks = 0; 868 869 pr = __get_cpu_var(processors); 870 871 if (unlikely(!pr)) 872 return 0; 873 874 if (acpi_idle_suspend) 875 return(acpi_idle_enter_c1(dev, state)); 876 877 local_irq_disable(); 878 current_thread_info()->status &= ~TS_POLLING; 879 /* 880 * TS_POLLING-cleared state must be visible before we test 881 * NEED_RESCHED: 882 */ 883 smp_mb(); 884 885 if (unlikely(need_resched())) { 886 current_thread_info()->status |= TS_POLLING; 887 local_irq_enable(); 888 return 0; 889 } 890 891 /* 892 * Must be done before busmaster disable as we might need to 893 * access HPET ! 894 */ 895 acpi_state_timer_broadcast(pr, cx, 1); 896 897 if (cx->type == ACPI_STATE_C3) 898 ACPI_FLUSH_CPU_CACHE(); 899 900 kt1 = ktime_get_real(); 901 /* Tell the scheduler that we are going deep-idle: */ 902 sched_clock_idle_sleep_event(); 903 acpi_idle_do_entry(cx); 904 kt2 = ktime_get_real(); 905 idle_time = ktime_to_us(ktime_sub(kt2, kt1)); 906 907 sleep_ticks = us_to_pm_timer_ticks(idle_time); 908 909 /* Tell the scheduler how much we idled: */ 910 sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS); 911 912 local_irq_enable(); 913 current_thread_info()->status |= TS_POLLING; 914 915 cx->usage++; 916 917 acpi_state_timer_broadcast(pr, cx, 0); 918 cx->time += sleep_ticks; 919 return idle_time; 920 } 921 922 static int c3_cpu_count; 923 static DEFINE_SPINLOCK(c3_lock); 924 925 /** 926 * acpi_idle_enter_bm - enters C3 with proper BM handling 927 * @dev: the target CPU 928 * @state: the state data 929 * 930 * If BM is detected, the deepest non-C3 idle state is entered instead. 931 */ 932 static int acpi_idle_enter_bm(struct cpuidle_device *dev, 933 struct cpuidle_state *state) 934 { 935 struct acpi_processor *pr; 936 struct acpi_processor_cx *cx = cpuidle_get_statedata(state); 937 ktime_t kt1, kt2; 938 s64 idle_time; 939 s64 sleep_ticks = 0; 940 941 942 pr = __get_cpu_var(processors); 943 944 if (unlikely(!pr)) 945 return 0; 946 947 if (acpi_idle_suspend) 948 return(acpi_idle_enter_c1(dev, state)); 949 950 if (acpi_idle_bm_check()) { 951 if (dev->safe_state) { 952 dev->last_state = dev->safe_state; 953 return dev->safe_state->enter(dev, dev->safe_state); 954 } else { 955 local_irq_disable(); 956 acpi_safe_halt(); 957 local_irq_enable(); 958 return 0; 959 } 960 } 961 962 local_irq_disable(); 963 current_thread_info()->status &= ~TS_POLLING; 964 /* 965 * TS_POLLING-cleared state must be visible before we test 966 * NEED_RESCHED: 967 */ 968 smp_mb(); 969 970 if (unlikely(need_resched())) { 971 current_thread_info()->status |= TS_POLLING; 972 local_irq_enable(); 973 return 0; 974 } 975 976 acpi_unlazy_tlb(smp_processor_id()); 977 978 /* Tell the scheduler that we are going deep-idle: */ 979 sched_clock_idle_sleep_event(); 980 /* 981 * Must be done before busmaster disable as we might need to 982 * access HPET ! 983 */ 984 acpi_state_timer_broadcast(pr, cx, 1); 985 986 kt1 = ktime_get_real(); 987 /* 988 * disable bus master 989 * bm_check implies we need ARB_DIS 990 * !bm_check implies we need cache flush 991 * bm_control implies whether we can do ARB_DIS 992 * 993 * That leaves a case where bm_check is set and bm_control is 994 * not set. In that case we cannot do much, we enter C3 995 * without doing anything. 996 */ 997 if (pr->flags.bm_check && pr->flags.bm_control) { 998 spin_lock(&c3_lock); 999 c3_cpu_count++; 1000 /* Disable bus master arbitration when all CPUs are in C3 */ 1001 if (c3_cpu_count == num_online_cpus()) 1002 acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1); 1003 spin_unlock(&c3_lock); 1004 } else if (!pr->flags.bm_check) { 1005 ACPI_FLUSH_CPU_CACHE(); 1006 } 1007 1008 acpi_idle_do_entry(cx); 1009 1010 /* Re-enable bus master arbitration */ 1011 if (pr->flags.bm_check && pr->flags.bm_control) { 1012 spin_lock(&c3_lock); 1013 acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0); 1014 c3_cpu_count--; 1015 spin_unlock(&c3_lock); 1016 } 1017 kt2 = ktime_get_real(); 1018 idle_time = ktime_to_us(ktime_sub(kt2, kt1)); 1019 1020 sleep_ticks = us_to_pm_timer_ticks(idle_time); 1021 /* Tell the scheduler how much we idled: */ 1022 sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS); 1023 1024 local_irq_enable(); 1025 current_thread_info()->status |= TS_POLLING; 1026 1027 cx->usage++; 1028 1029 acpi_state_timer_broadcast(pr, cx, 0); 1030 cx->time += sleep_ticks; 1031 return idle_time; 1032 } 1033 1034 struct cpuidle_driver acpi_idle_driver = { 1035 .name = "acpi_idle", 1036 .owner = THIS_MODULE, 1037 }; 1038 1039 /** 1040 * acpi_processor_setup_cpuidle - prepares and configures CPUIDLE 1041 * @pr: the ACPI processor 1042 */ 1043 static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) 1044 { 1045 int i, count = CPUIDLE_DRIVER_STATE_START; 1046 struct acpi_processor_cx *cx; 1047 struct cpuidle_state *state; 1048 struct cpuidle_device *dev = &pr->power.dev; 1049 1050 if (!pr->flags.power_setup_done) 1051 return -EINVAL; 1052 1053 if (pr->flags.power == 0) { 1054 return -EINVAL; 1055 } 1056 1057 dev->cpu = pr->id; 1058 for (i = 0; i < CPUIDLE_STATE_MAX; i++) { 1059 dev->states[i].name[0] = '\0'; 1060 dev->states[i].desc[0] = '\0'; 1061 } 1062 1063 if (max_cstate == 0) 1064 max_cstate = 1; 1065 1066 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { 1067 cx = &pr->power.states[i]; 1068 state = &dev->states[count]; 1069 1070 if (!cx->valid) 1071 continue; 1072 1073 #ifdef CONFIG_HOTPLUG_CPU 1074 if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 1075 !pr->flags.has_cst && 1076 !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 1077 continue; 1078 #endif 1079 cpuidle_set_statedata(state, cx); 1080 1081 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i); 1082 strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1083 state->exit_latency = cx->latency; 1084 state->target_residency = cx->latency * latency_factor; 1085 state->power_usage = cx->power; 1086 1087 state->flags = 0; 1088 switch (cx->type) { 1089 case ACPI_STATE_C1: 1090 state->flags |= CPUIDLE_FLAG_SHALLOW; 1091 if (cx->entry_method == ACPI_CSTATE_FFH) 1092 state->flags |= CPUIDLE_FLAG_TIME_VALID; 1093 1094 state->enter = acpi_idle_enter_c1; 1095 dev->safe_state = state; 1096 break; 1097 1098 case ACPI_STATE_C2: 1099 state->flags |= CPUIDLE_FLAG_BALANCED; 1100 state->flags |= CPUIDLE_FLAG_TIME_VALID; 1101 state->enter = acpi_idle_enter_simple; 1102 dev->safe_state = state; 1103 break; 1104 1105 case ACPI_STATE_C3: 1106 state->flags |= CPUIDLE_FLAG_DEEP; 1107 state->flags |= CPUIDLE_FLAG_TIME_VALID; 1108 state->flags |= CPUIDLE_FLAG_CHECK_BM; 1109 state->enter = pr->flags.bm_check ? 1110 acpi_idle_enter_bm : 1111 acpi_idle_enter_simple; 1112 break; 1113 } 1114 1115 count++; 1116 if (count == CPUIDLE_STATE_MAX) 1117 break; 1118 } 1119 1120 dev->state_count = count; 1121 1122 if (!count) 1123 return -EINVAL; 1124 1125 return 0; 1126 } 1127 1128 int acpi_processor_cst_has_changed(struct acpi_processor *pr) 1129 { 1130 int ret = 0; 1131 1132 if (boot_option_idle_override) 1133 return 0; 1134 1135 if (!pr) 1136 return -EINVAL; 1137 1138 if (nocst) { 1139 return -ENODEV; 1140 } 1141 1142 if (!pr->flags.power_setup_done) 1143 return -ENODEV; 1144 1145 cpuidle_pause_and_lock(); 1146 cpuidle_disable_device(&pr->power.dev); 1147 acpi_processor_get_power_info(pr); 1148 if (pr->flags.power) { 1149 acpi_processor_setup_cpuidle(pr); 1150 ret = cpuidle_enable_device(&pr->power.dev); 1151 } 1152 cpuidle_resume_and_unlock(); 1153 1154 return ret; 1155 } 1156 1157 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, 1158 struct acpi_device *device) 1159 { 1160 acpi_status status = 0; 1161 static int first_run; 1162 struct proc_dir_entry *entry = NULL; 1163 unsigned int i; 1164 1165 if (boot_option_idle_override) 1166 return 0; 1167 1168 if (!first_run) { 1169 if (idle_halt) { 1170 /* 1171 * When the boot option of "idle=halt" is added, halt 1172 * is used for CPU IDLE. 1173 * In such case C2/C3 is meaningless. So the max_cstate 1174 * is set to one. 1175 */ 1176 max_cstate = 1; 1177 } 1178 dmi_check_system(processor_power_dmi_table); 1179 max_cstate = acpi_processor_cstate_check(max_cstate); 1180 if (max_cstate < ACPI_C_STATES_MAX) 1181 printk(KERN_NOTICE 1182 "ACPI: processor limited to max C-state %d\n", 1183 max_cstate); 1184 first_run++; 1185 } 1186 1187 if (!pr) 1188 return -EINVAL; 1189 1190 if (acpi_gbl_FADT.cst_control && !nocst) { 1191 status = 1192 acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8); 1193 if (ACPI_FAILURE(status)) { 1194 ACPI_EXCEPTION((AE_INFO, status, 1195 "Notifying BIOS of _CST ability failed")); 1196 } 1197 } 1198 1199 acpi_processor_get_power_info(pr); 1200 pr->flags.power_setup_done = 1; 1201 1202 /* 1203 * Install the idle handler if processor power management is supported. 1204 * Note that we use previously set idle handler will be used on 1205 * platforms that only support C1. 1206 */ 1207 if (pr->flags.power) { 1208 acpi_processor_setup_cpuidle(pr); 1209 if (cpuidle_register_device(&pr->power.dev)) 1210 return -EIO; 1211 1212 printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); 1213 for (i = 1; i <= pr->power.count; i++) 1214 if (pr->power.states[i].valid) 1215 printk(" C%d[C%d]", i, 1216 pr->power.states[i].type); 1217 printk(")\n"); 1218 } 1219 1220 /* 'power' [R] */ 1221 entry = proc_create_data(ACPI_PROCESSOR_FILE_POWER, 1222 S_IRUGO, acpi_device_dir(device), 1223 &acpi_processor_power_fops, 1224 acpi_driver_data(device)); 1225 if (!entry) 1226 return -EIO; 1227 return 0; 1228 } 1229 1230 int acpi_processor_power_exit(struct acpi_processor *pr, 1231 struct acpi_device *device) 1232 { 1233 if (boot_option_idle_override) 1234 return 0; 1235 1236 cpuidle_unregister_device(&pr->power.dev); 1237 pr->flags.power_setup_done = 0; 1238 1239 if (acpi_device_dir(device)) 1240 remove_proc_entry(ACPI_PROCESSOR_FILE_POWER, 1241 acpi_device_dir(device)); 1242 1243 return 0; 1244 } 1245