xref: /openbmc/linux/drivers/acpi/processor_idle.c (revision 9ac8d3fb)
1 /*
2  * processor_idle - idle state submodule to the ACPI processor driver
3  *
4  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6  *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
7  *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
8  *  			- Added processor hotplug support
9  *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
10  *  			- Added support for C3 on SMP
11  *
12  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13  *
14  *  This program is free software; you can redistribute it and/or modify
15  *  it under the terms of the GNU General Public License as published by
16  *  the Free Software Foundation; either version 2 of the License, or (at
17  *  your option) any later version.
18  *
19  *  This program is distributed in the hope that it will be useful, but
20  *  WITHOUT ANY WARRANTY; without even the implied warranty of
21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  *  General Public License for more details.
23  *
24  *  You should have received a copy of the GNU General Public License along
25  *  with this program; if not, write to the Free Software Foundation, Inc.,
26  *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
27  *
28  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29  */
30 
31 #include <linux/kernel.h>
32 #include <linux/module.h>
33 #include <linux/init.h>
34 #include <linux/cpufreq.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/acpi.h>
38 #include <linux/dmi.h>
39 #include <linux/moduleparam.h>
40 #include <linux/sched.h>	/* need_resched() */
41 #include <linux/pm_qos_params.h>
42 #include <linux/clockchips.h>
43 #include <linux/cpuidle.h>
44 
45 /*
46  * Include the apic definitions for x86 to have the APIC timer related defines
47  * available also for UP (on SMP it gets magically included via linux/smp.h).
48  * asm/acpi.h is not an option, as it would require more include magic. Also
49  * creating an empty asm-ia64/apic.h would just trade pest vs. cholera.
50  */
51 #ifdef CONFIG_X86
52 #include <asm/apic.h>
53 #endif
54 
55 #include <asm/io.h>
56 #include <asm/uaccess.h>
57 
58 #include <acpi/acpi_bus.h>
59 #include <acpi/processor.h>
60 #include <asm/processor.h>
61 
62 #define ACPI_PROCESSOR_COMPONENT        0x01000000
63 #define ACPI_PROCESSOR_CLASS            "processor"
64 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
65 ACPI_MODULE_NAME("processor_idle");
66 #define ACPI_PROCESSOR_FILE_POWER	"power"
67 #define US_TO_PM_TIMER_TICKS(t)		((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
68 #define PM_TIMER_TICK_NS		(1000000000ULL/PM_TIMER_FREQUENCY)
69 #ifndef CONFIG_CPU_IDLE
70 #define C2_OVERHEAD			4	/* 1us (3.579 ticks per us) */
71 #define C3_OVERHEAD			4	/* 1us (3.579 ticks per us) */
72 static void (*pm_idle_save) (void) __read_mostly;
73 #else
74 #define C2_OVERHEAD			1	/* 1us */
75 #define C3_OVERHEAD			1	/* 1us */
76 #endif
77 #define PM_TIMER_TICKS_TO_US(p)		(((p) * 1000)/(PM_TIMER_FREQUENCY/1000))
78 
79 static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
80 #ifdef CONFIG_CPU_IDLE
81 module_param(max_cstate, uint, 0000);
82 #else
83 module_param(max_cstate, uint, 0644);
84 #endif
85 static unsigned int nocst __read_mostly;
86 module_param(nocst, uint, 0000);
87 
88 #ifndef CONFIG_CPU_IDLE
89 /*
90  * bm_history -- bit-mask with a bit per jiffy of bus-master activity
91  * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
92  * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
93  * 100 HZ: 0x0000000F: 4 jiffies = 40ms
94  * reduce history for more aggressive entry into C3
95  */
96 static unsigned int bm_history __read_mostly =
97     (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
98 module_param(bm_history, uint, 0644);
99 
100 static int acpi_processor_set_power_policy(struct acpi_processor *pr);
101 
102 #else	/* CONFIG_CPU_IDLE */
103 static unsigned int latency_factor __read_mostly = 2;
104 module_param(latency_factor, uint, 0644);
105 #endif
106 
107 /*
108  * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
109  * For now disable this. Probably a bug somewhere else.
110  *
111  * To skip this limit, boot/load with a large max_cstate limit.
112  */
113 static int set_max_cstate(const struct dmi_system_id *id)
114 {
115 	if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
116 		return 0;
117 
118 	printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate."
119 	       " Override with \"processor.max_cstate=%d\"\n", id->ident,
120 	       (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
121 
122 	max_cstate = (long)id->driver_data;
123 
124 	return 0;
125 }
126 
127 /* Actually this shouldn't be __cpuinitdata, would be better to fix the
128    callers to only run once -AK */
129 static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = {
130 	{ set_max_cstate, "IBM ThinkPad R40e", {
131 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
132 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET70WW")}, (void *)1},
133 	{ set_max_cstate, "IBM ThinkPad R40e", {
134 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
135 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW")}, (void *)1},
136 	{ set_max_cstate, "IBM ThinkPad R40e", {
137 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
138 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET43WW") }, (void*)1},
139 	{ set_max_cstate, "IBM ThinkPad R40e", {
140 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
141 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET45WW") }, (void*)1},
142 	{ set_max_cstate, "IBM ThinkPad R40e", {
143 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
144 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET47WW") }, (void*)1},
145 	{ set_max_cstate, "IBM ThinkPad R40e", {
146 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
147 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET50WW") }, (void*)1},
148 	{ set_max_cstate, "IBM ThinkPad R40e", {
149 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
150 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET52WW") }, (void*)1},
151 	{ set_max_cstate, "IBM ThinkPad R40e", {
152 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
153 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET55WW") }, (void*)1},
154 	{ set_max_cstate, "IBM ThinkPad R40e", {
155 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
156 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET56WW") }, (void*)1},
157 	{ set_max_cstate, "IBM ThinkPad R40e", {
158 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
159 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET59WW") }, (void*)1},
160 	{ set_max_cstate, "IBM ThinkPad R40e", {
161 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
162 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }, (void*)1},
163 	{ set_max_cstate, "IBM ThinkPad R40e", {
164 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
165 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET61WW") }, (void*)1},
166 	{ set_max_cstate, "IBM ThinkPad R40e", {
167 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
168 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET62WW") }, (void*)1},
169 	{ set_max_cstate, "IBM ThinkPad R40e", {
170 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
171 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET64WW") }, (void*)1},
172 	{ set_max_cstate, "IBM ThinkPad R40e", {
173 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
174 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET65WW") }, (void*)1},
175 	{ set_max_cstate, "IBM ThinkPad R40e", {
176 	  DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
177 	  DMI_MATCH(DMI_BIOS_VERSION,"1SET68WW") }, (void*)1},
178 	{ set_max_cstate, "Medion 41700", {
179 	  DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
180 	  DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J")}, (void *)1},
181 	{ set_max_cstate, "Clevo 5600D", {
182 	  DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
183 	  DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")},
184 	 (void *)2},
185 	{},
186 };
187 
188 static inline u32 ticks_elapsed(u32 t1, u32 t2)
189 {
190 	if (t2 >= t1)
191 		return (t2 - t1);
192 	else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
193 		return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
194 	else
195 		return ((0xFFFFFFFF - t1) + t2);
196 }
197 
198 static inline u32 ticks_elapsed_in_us(u32 t1, u32 t2)
199 {
200 	if (t2 >= t1)
201 		return PM_TIMER_TICKS_TO_US(t2 - t1);
202 	else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
203 		return PM_TIMER_TICKS_TO_US(((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
204 	else
205 		return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2);
206 }
207 
208 /*
209  * Callers should disable interrupts before the call and enable
210  * interrupts after return.
211  */
212 static void acpi_safe_halt(void)
213 {
214 	current_thread_info()->status &= ~TS_POLLING;
215 	/*
216 	 * TS_POLLING-cleared state must be visible before we
217 	 * test NEED_RESCHED:
218 	 */
219 	smp_mb();
220 	if (!need_resched()) {
221 		safe_halt();
222 		local_irq_disable();
223 	}
224 	current_thread_info()->status |= TS_POLLING;
225 }
226 
227 #ifndef CONFIG_CPU_IDLE
228 
229 static void
230 acpi_processor_power_activate(struct acpi_processor *pr,
231 			      struct acpi_processor_cx *new)
232 {
233 	struct acpi_processor_cx *old;
234 
235 	if (!pr || !new)
236 		return;
237 
238 	old = pr->power.state;
239 
240 	if (old)
241 		old->promotion.count = 0;
242 	new->demotion.count = 0;
243 
244 	/* Cleanup from old state. */
245 	if (old) {
246 		switch (old->type) {
247 		case ACPI_STATE_C3:
248 			/* Disable bus master reload */
249 			if (new->type != ACPI_STATE_C3 && pr->flags.bm_check)
250 				acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
251 			break;
252 		}
253 	}
254 
255 	/* Prepare to use new state. */
256 	switch (new->type) {
257 	case ACPI_STATE_C3:
258 		/* Enable bus master reload */
259 		if (old->type != ACPI_STATE_C3 && pr->flags.bm_check)
260 			acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
261 		break;
262 	}
263 
264 	pr->power.state = new;
265 
266 	return;
267 }
268 
269 static atomic_t c3_cpu_count;
270 
271 /* Common C-state entry for C2, C3, .. */
272 static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
273 {
274 	/* Don't trace irqs off for idle */
275 	stop_critical_timings();
276 	if (cstate->entry_method == ACPI_CSTATE_FFH) {
277 		/* Call into architectural FFH based C-state */
278 		acpi_processor_ffh_cstate_enter(cstate);
279 	} else {
280 		int unused;
281 		/* IO port based C-state */
282 		inb(cstate->address);
283 		/* Dummy wait op - must do something useless after P_LVL2 read
284 		   because chipsets cannot guarantee that STPCLK# signal
285 		   gets asserted in time to freeze execution properly. */
286 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
287 	}
288 	start_critical_timings();
289 }
290 #endif /* !CONFIG_CPU_IDLE */
291 
292 #ifdef ARCH_APICTIMER_STOPS_ON_C3
293 
294 /*
295  * Some BIOS implementations switch to C3 in the published C2 state.
296  * This seems to be a common problem on AMD boxen, but other vendors
297  * are affected too. We pick the most conservative approach: we assume
298  * that the local APIC stops in both C2 and C3.
299  */
300 static void acpi_timer_check_state(int state, struct acpi_processor *pr,
301 				   struct acpi_processor_cx *cx)
302 {
303 	struct acpi_processor_power *pwr = &pr->power;
304 	u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2;
305 
306 	/*
307 	 * Check, if one of the previous states already marked the lapic
308 	 * unstable
309 	 */
310 	if (pwr->timer_broadcast_on_state < state)
311 		return;
312 
313 	if (cx->type >= type)
314 		pr->power.timer_broadcast_on_state = state;
315 }
316 
317 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr)
318 {
319 	unsigned long reason;
320 
321 	reason = pr->power.timer_broadcast_on_state < INT_MAX ?
322 		CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
323 
324 	clockevents_notify(reason, &pr->id);
325 }
326 
327 /* Power(C) State timer broadcast control */
328 static void acpi_state_timer_broadcast(struct acpi_processor *pr,
329 				       struct acpi_processor_cx *cx,
330 				       int broadcast)
331 {
332 	int state = cx - pr->power.states;
333 
334 	if (state >= pr->power.timer_broadcast_on_state) {
335 		unsigned long reason;
336 
337 		reason = broadcast ?  CLOCK_EVT_NOTIFY_BROADCAST_ENTER :
338 			CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
339 		clockevents_notify(reason, &pr->id);
340 	}
341 }
342 
343 #else
344 
345 static void acpi_timer_check_state(int state, struct acpi_processor *pr,
346 				   struct acpi_processor_cx *cstate) { }
347 static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { }
348 static void acpi_state_timer_broadcast(struct acpi_processor *pr,
349 				       struct acpi_processor_cx *cx,
350 				       int broadcast)
351 {
352 }
353 
354 #endif
355 
356 /*
357  * Suspend / resume control
358  */
359 static int acpi_idle_suspend;
360 
361 int acpi_processor_suspend(struct acpi_device * device, pm_message_t state)
362 {
363 	acpi_idle_suspend = 1;
364 	return 0;
365 }
366 
367 int acpi_processor_resume(struct acpi_device * device)
368 {
369 	acpi_idle_suspend = 0;
370 	return 0;
371 }
372 
373 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
374 static int tsc_halts_in_c(int state)
375 {
376 	switch (boot_cpu_data.x86_vendor) {
377 	case X86_VENDOR_AMD:
378 		/*
379 		 * AMD Fam10h TSC will tick in all
380 		 * C/P/S0/S1 states when this bit is set.
381 		 */
382 		if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
383 			return 0;
384 		/*FALL THROUGH*/
385 	case X86_VENDOR_INTEL:
386 		/* Several cases known where TSC halts in C2 too */
387 	default:
388 		return state > ACPI_STATE_C1;
389 	}
390 }
391 #endif
392 
393 #ifndef CONFIG_CPU_IDLE
394 static void acpi_processor_idle(void)
395 {
396 	struct acpi_processor *pr = NULL;
397 	struct acpi_processor_cx *cx = NULL;
398 	struct acpi_processor_cx *next_state = NULL;
399 	int sleep_ticks = 0;
400 	u32 t1, t2 = 0;
401 
402 	/*
403 	 * Interrupts must be disabled during bus mastering calculations and
404 	 * for C2/C3 transitions.
405 	 */
406 	local_irq_disable();
407 
408 	pr = __get_cpu_var(processors);
409 	if (!pr) {
410 		local_irq_enable();
411 		return;
412 	}
413 
414 	/*
415 	 * Check whether we truly need to go idle, or should
416 	 * reschedule:
417 	 */
418 	if (unlikely(need_resched())) {
419 		local_irq_enable();
420 		return;
421 	}
422 
423 	cx = pr->power.state;
424 	if (!cx || acpi_idle_suspend) {
425 		if (pm_idle_save) {
426 			pm_idle_save(); /* enables IRQs */
427 		} else {
428 			acpi_safe_halt();
429 			local_irq_enable();
430 		}
431 
432 		return;
433 	}
434 
435 	/*
436 	 * Check BM Activity
437 	 * -----------------
438 	 * Check for bus mastering activity (if required), record, and check
439 	 * for demotion.
440 	 */
441 	if (pr->flags.bm_check) {
442 		u32 bm_status = 0;
443 		unsigned long diff = jiffies - pr->power.bm_check_timestamp;
444 
445 		if (diff > 31)
446 			diff = 31;
447 
448 		pr->power.bm_activity <<= diff;
449 
450 		acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
451 		if (bm_status) {
452 			pr->power.bm_activity |= 0x1;
453 			acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
454 		}
455 		/*
456 		 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
457 		 * the true state of bus mastering activity; forcing us to
458 		 * manually check the BMIDEA bit of each IDE channel.
459 		 */
460 		else if (errata.piix4.bmisx) {
461 			if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
462 			    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
463 				pr->power.bm_activity |= 0x1;
464 		}
465 
466 		pr->power.bm_check_timestamp = jiffies;
467 
468 		/*
469 		 * If bus mastering is or was active this jiffy, demote
470 		 * to avoid a faulty transition.  Note that the processor
471 		 * won't enter a low-power state during this call (to this
472 		 * function) but should upon the next.
473 		 *
474 		 * TBD: A better policy might be to fallback to the demotion
475 		 *      state (use it for this quantum only) istead of
476 		 *      demoting -- and rely on duration as our sole demotion
477 		 *      qualification.  This may, however, introduce DMA
478 		 *      issues (e.g. floppy DMA transfer overrun/underrun).
479 		 */
480 		if ((pr->power.bm_activity & 0x1) &&
481 		    cx->demotion.threshold.bm) {
482 			local_irq_enable();
483 			next_state = cx->demotion.state;
484 			goto end;
485 		}
486 	}
487 
488 #ifdef CONFIG_HOTPLUG_CPU
489 	/*
490 	 * Check for P_LVL2_UP flag before entering C2 and above on
491 	 * an SMP system. We do it here instead of doing it at _CST/P_LVL
492 	 * detection phase, to work cleanly with logical CPU hotplug.
493 	 */
494 	if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
495 	    !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
496 		cx = &pr->power.states[ACPI_STATE_C1];
497 #endif
498 
499 	/*
500 	 * Sleep:
501 	 * ------
502 	 * Invoke the current Cx state to put the processor to sleep.
503 	 */
504 	if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) {
505 		current_thread_info()->status &= ~TS_POLLING;
506 		/*
507 		 * TS_POLLING-cleared state must be visible before we
508 		 * test NEED_RESCHED:
509 		 */
510 		smp_mb();
511 		if (need_resched()) {
512 			current_thread_info()->status |= TS_POLLING;
513 			local_irq_enable();
514 			return;
515 		}
516 	}
517 
518 	switch (cx->type) {
519 
520 	case ACPI_STATE_C1:
521 		/*
522 		 * Invoke C1.
523 		 * Use the appropriate idle routine, the one that would
524 		 * be used without acpi C-states.
525 		 */
526 		if (pm_idle_save) {
527 			pm_idle_save(); /* enables IRQs */
528 		} else {
529 			acpi_safe_halt();
530 			local_irq_enable();
531 		}
532 
533 		/*
534 		 * TBD: Can't get time duration while in C1, as resumes
535 		 *      go to an ISR rather than here.  Need to instrument
536 		 *      base interrupt handler.
537 		 *
538 		 * Note: the TSC better not stop in C1, sched_clock() will
539 		 *       skew otherwise.
540 		 */
541 		sleep_ticks = 0xFFFFFFFF;
542 
543 		break;
544 
545 	case ACPI_STATE_C2:
546 		/* Get start time (ticks) */
547 		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
548 		/* Tell the scheduler that we are going deep-idle: */
549 		sched_clock_idle_sleep_event();
550 		/* Invoke C2 */
551 		acpi_state_timer_broadcast(pr, cx, 1);
552 		acpi_cstate_enter(cx);
553 		/* Get end time (ticks) */
554 		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
555 
556 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
557 		/* TSC halts in C2, so notify users */
558 		if (tsc_halts_in_c(ACPI_STATE_C2))
559 			mark_tsc_unstable("possible TSC halt in C2");
560 #endif
561 		/* Compute time (ticks) that we were actually asleep */
562 		sleep_ticks = ticks_elapsed(t1, t2);
563 
564 		/* Tell the scheduler how much we idled: */
565 		sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
566 
567 		/* Re-enable interrupts */
568 		local_irq_enable();
569 		/* Do not account our idle-switching overhead: */
570 		sleep_ticks -= cx->latency_ticks + C2_OVERHEAD;
571 
572 		current_thread_info()->status |= TS_POLLING;
573 		acpi_state_timer_broadcast(pr, cx, 0);
574 		break;
575 
576 	case ACPI_STATE_C3:
577 		acpi_unlazy_tlb(smp_processor_id());
578 		/*
579 		 * Must be done before busmaster disable as we might
580 		 * need to access HPET !
581 		 */
582 		acpi_state_timer_broadcast(pr, cx, 1);
583 		/*
584 		 * disable bus master
585 		 * bm_check implies we need ARB_DIS
586 		 * !bm_check implies we need cache flush
587 		 * bm_control implies whether we can do ARB_DIS
588 		 *
589 		 * That leaves a case where bm_check is set and bm_control is
590 		 * not set. In that case we cannot do much, we enter C3
591 		 * without doing anything.
592 		 */
593 		if (pr->flags.bm_check && pr->flags.bm_control) {
594 			if (atomic_inc_return(&c3_cpu_count) ==
595 			    num_online_cpus()) {
596 				/*
597 				 * All CPUs are trying to go to C3
598 				 * Disable bus master arbitration
599 				 */
600 				acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
601 			}
602 		} else if (!pr->flags.bm_check) {
603 			/* SMP with no shared cache... Invalidate cache  */
604 			ACPI_FLUSH_CPU_CACHE();
605 		}
606 
607 		/* Get start time (ticks) */
608 		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
609 		/* Invoke C3 */
610 		/* Tell the scheduler that we are going deep-idle: */
611 		sched_clock_idle_sleep_event();
612 		acpi_cstate_enter(cx);
613 		/* Get end time (ticks) */
614 		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
615 		if (pr->flags.bm_check && pr->flags.bm_control) {
616 			/* Enable bus master arbitration */
617 			atomic_dec(&c3_cpu_count);
618 			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
619 		}
620 
621 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
622 		/* TSC halts in C3, so notify users */
623 		if (tsc_halts_in_c(ACPI_STATE_C3))
624 			mark_tsc_unstable("TSC halts in C3");
625 #endif
626 		/* Compute time (ticks) that we were actually asleep */
627 		sleep_ticks = ticks_elapsed(t1, t2);
628 		/* Tell the scheduler how much we idled: */
629 		sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
630 
631 		/* Re-enable interrupts */
632 		local_irq_enable();
633 		/* Do not account our idle-switching overhead: */
634 		sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
635 
636 		current_thread_info()->status |= TS_POLLING;
637 		acpi_state_timer_broadcast(pr, cx, 0);
638 		break;
639 
640 	default:
641 		local_irq_enable();
642 		return;
643 	}
644 	cx->usage++;
645 	if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0))
646 		cx->time += sleep_ticks;
647 
648 	next_state = pr->power.state;
649 
650 #ifdef CONFIG_HOTPLUG_CPU
651 	/* Don't do promotion/demotion */
652 	if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) &&
653 	    !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) {
654 		next_state = cx;
655 		goto end;
656 	}
657 #endif
658 
659 	/*
660 	 * Promotion?
661 	 * ----------
662 	 * Track the number of longs (time asleep is greater than threshold)
663 	 * and promote when the count threshold is reached.  Note that bus
664 	 * mastering activity may prevent promotions.
665 	 * Do not promote above max_cstate.
666 	 */
667 	if (cx->promotion.state &&
668 	    ((cx->promotion.state - pr->power.states) <= max_cstate)) {
669 		if (sleep_ticks > cx->promotion.threshold.ticks &&
670 		  cx->promotion.state->latency <=
671 				pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
672 			cx->promotion.count++;
673 			cx->demotion.count = 0;
674 			if (cx->promotion.count >=
675 			    cx->promotion.threshold.count) {
676 				if (pr->flags.bm_check) {
677 					if (!
678 					    (pr->power.bm_activity & cx->
679 					     promotion.threshold.bm)) {
680 						next_state =
681 						    cx->promotion.state;
682 						goto end;
683 					}
684 				} else {
685 					next_state = cx->promotion.state;
686 					goto end;
687 				}
688 			}
689 		}
690 	}
691 
692 	/*
693 	 * Demotion?
694 	 * ---------
695 	 * Track the number of shorts (time asleep is less than time threshold)
696 	 * and demote when the usage threshold is reached.
697 	 */
698 	if (cx->demotion.state) {
699 		if (sleep_ticks < cx->demotion.threshold.ticks) {
700 			cx->demotion.count++;
701 			cx->promotion.count = 0;
702 			if (cx->demotion.count >= cx->demotion.threshold.count) {
703 				next_state = cx->demotion.state;
704 				goto end;
705 			}
706 		}
707 	}
708 
709       end:
710 	/*
711 	 * Demote if current state exceeds max_cstate
712 	 * or if the latency of the current state is unacceptable
713 	 */
714 	if ((pr->power.state - pr->power.states) > max_cstate ||
715 		pr->power.state->latency >
716 				pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
717 		if (cx->demotion.state)
718 			next_state = cx->demotion.state;
719 	}
720 
721 	/*
722 	 * New Cx State?
723 	 * -------------
724 	 * If we're going to start using a new Cx state we must clean up
725 	 * from the previous and prepare to use the new.
726 	 */
727 	if (next_state != pr->power.state)
728 		acpi_processor_power_activate(pr, next_state);
729 }
730 
731 static int acpi_processor_set_power_policy(struct acpi_processor *pr)
732 {
733 	unsigned int i;
734 	unsigned int state_is_set = 0;
735 	struct acpi_processor_cx *lower = NULL;
736 	struct acpi_processor_cx *higher = NULL;
737 	struct acpi_processor_cx *cx;
738 
739 
740 	if (!pr)
741 		return -EINVAL;
742 
743 	/*
744 	 * This function sets the default Cx state policy (OS idle handler).
745 	 * Our scheme is to promote quickly to C2 but more conservatively
746 	 * to C3.  We're favoring C2  for its characteristics of low latency
747 	 * (quick response), good power savings, and ability to allow bus
748 	 * mastering activity.  Note that the Cx state policy is completely
749 	 * customizable and can be altered dynamically.
750 	 */
751 
752 	/* startup state */
753 	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
754 		cx = &pr->power.states[i];
755 		if (!cx->valid)
756 			continue;
757 
758 		if (!state_is_set)
759 			pr->power.state = cx;
760 		state_is_set++;
761 		break;
762 	}
763 
764 	if (!state_is_set)
765 		return -ENODEV;
766 
767 	/* demotion */
768 	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
769 		cx = &pr->power.states[i];
770 		if (!cx->valid)
771 			continue;
772 
773 		if (lower) {
774 			cx->demotion.state = lower;
775 			cx->demotion.threshold.ticks = cx->latency_ticks;
776 			cx->demotion.threshold.count = 1;
777 			if (cx->type == ACPI_STATE_C3)
778 				cx->demotion.threshold.bm = bm_history;
779 		}
780 
781 		lower = cx;
782 	}
783 
784 	/* promotion */
785 	for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) {
786 		cx = &pr->power.states[i];
787 		if (!cx->valid)
788 			continue;
789 
790 		if (higher) {
791 			cx->promotion.state = higher;
792 			cx->promotion.threshold.ticks = cx->latency_ticks;
793 			if (cx->type >= ACPI_STATE_C2)
794 				cx->promotion.threshold.count = 4;
795 			else
796 				cx->promotion.threshold.count = 10;
797 			if (higher->type == ACPI_STATE_C3)
798 				cx->promotion.threshold.bm = bm_history;
799 		}
800 
801 		higher = cx;
802 	}
803 
804 	return 0;
805 }
806 #endif /* !CONFIG_CPU_IDLE */
807 
808 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
809 {
810 
811 	if (!pr)
812 		return -EINVAL;
813 
814 	if (!pr->pblk)
815 		return -ENODEV;
816 
817 	/* if info is obtained from pblk/fadt, type equals state */
818 	pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2;
819 	pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3;
820 
821 #ifndef CONFIG_HOTPLUG_CPU
822 	/*
823 	 * Check for P_LVL2_UP flag before entering C2 and above on
824 	 * an SMP system.
825 	 */
826 	if ((num_online_cpus() > 1) &&
827 	    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
828 		return -ENODEV;
829 #endif
830 
831 	/* determine C2 and C3 address from pblk */
832 	pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4;
833 	pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5;
834 
835 	/* determine latencies from FADT */
836 	pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency;
837 	pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency;
838 
839 	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
840 			  "lvl2[0x%08x] lvl3[0x%08x]\n",
841 			  pr->power.states[ACPI_STATE_C2].address,
842 			  pr->power.states[ACPI_STATE_C3].address));
843 
844 	return 0;
845 }
846 
847 static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
848 {
849 	if (!pr->power.states[ACPI_STATE_C1].valid) {
850 		/* set the first C-State to C1 */
851 		/* all processors need to support C1 */
852 		pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
853 		pr->power.states[ACPI_STATE_C1].valid = 1;
854 		pr->power.states[ACPI_STATE_C1].entry_method = ACPI_CSTATE_HALT;
855 	}
856 	/* the C0 state only exists as a filler in our array */
857 	pr->power.states[ACPI_STATE_C0].valid = 1;
858 	return 0;
859 }
860 
861 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
862 {
863 	acpi_status status = 0;
864 	acpi_integer count;
865 	int current_count;
866 	int i;
867 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
868 	union acpi_object *cst;
869 
870 
871 	if (nocst)
872 		return -ENODEV;
873 
874 	current_count = 0;
875 
876 	status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
877 	if (ACPI_FAILURE(status)) {
878 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n"));
879 		return -ENODEV;
880 	}
881 
882 	cst = buffer.pointer;
883 
884 	/* There must be at least 2 elements */
885 	if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
886 		printk(KERN_ERR PREFIX "not enough elements in _CST\n");
887 		status = -EFAULT;
888 		goto end;
889 	}
890 
891 	count = cst->package.elements[0].integer.value;
892 
893 	/* Validate number of power states. */
894 	if (count < 1 || count != cst->package.count - 1) {
895 		printk(KERN_ERR PREFIX "count given by _CST is not valid\n");
896 		status = -EFAULT;
897 		goto end;
898 	}
899 
900 	/* Tell driver that at least _CST is supported. */
901 	pr->flags.has_cst = 1;
902 
903 	for (i = 1; i <= count; i++) {
904 		union acpi_object *element;
905 		union acpi_object *obj;
906 		struct acpi_power_register *reg;
907 		struct acpi_processor_cx cx;
908 
909 		memset(&cx, 0, sizeof(cx));
910 
911 		element = &(cst->package.elements[i]);
912 		if (element->type != ACPI_TYPE_PACKAGE)
913 			continue;
914 
915 		if (element->package.count != 4)
916 			continue;
917 
918 		obj = &(element->package.elements[0]);
919 
920 		if (obj->type != ACPI_TYPE_BUFFER)
921 			continue;
922 
923 		reg = (struct acpi_power_register *)obj->buffer.pointer;
924 
925 		if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
926 		    (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
927 			continue;
928 
929 		/* There should be an easy way to extract an integer... */
930 		obj = &(element->package.elements[1]);
931 		if (obj->type != ACPI_TYPE_INTEGER)
932 			continue;
933 
934 		cx.type = obj->integer.value;
935 		/*
936 		 * Some buggy BIOSes won't list C1 in _CST -
937 		 * Let acpi_processor_get_power_info_default() handle them later
938 		 */
939 		if (i == 1 && cx.type != ACPI_STATE_C1)
940 			current_count++;
941 
942 		cx.address = reg->address;
943 		cx.index = current_count + 1;
944 
945 		cx.entry_method = ACPI_CSTATE_SYSTEMIO;
946 		if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
947 			if (acpi_processor_ffh_cstate_probe
948 					(pr->id, &cx, reg) == 0) {
949 				cx.entry_method = ACPI_CSTATE_FFH;
950 			} else if (cx.type == ACPI_STATE_C1) {
951 				/*
952 				 * C1 is a special case where FIXED_HARDWARE
953 				 * can be handled in non-MWAIT way as well.
954 				 * In that case, save this _CST entry info.
955 				 * Otherwise, ignore this info and continue.
956 				 */
957 				cx.entry_method = ACPI_CSTATE_HALT;
958 				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
959 			} else {
960 				continue;
961 			}
962 			if (cx.type == ACPI_STATE_C1 &&
963 					(idle_halt || idle_nomwait)) {
964 				/*
965 				 * In most cases the C1 space_id obtained from
966 				 * _CST object is FIXED_HARDWARE access mode.
967 				 * But when the option of idle=halt is added,
968 				 * the entry_method type should be changed from
969 				 * CSTATE_FFH to CSTATE_HALT.
970 				 * When the option of idle=nomwait is added,
971 				 * the C1 entry_method type should be
972 				 * CSTATE_HALT.
973 				 */
974 				cx.entry_method = ACPI_CSTATE_HALT;
975 				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
976 			}
977 		} else {
978 			snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
979 				 cx.address);
980 		}
981 
982 		if (cx.type == ACPI_STATE_C1) {
983 			cx.valid = 1;
984 		}
985 
986 		obj = &(element->package.elements[2]);
987 		if (obj->type != ACPI_TYPE_INTEGER)
988 			continue;
989 
990 		cx.latency = obj->integer.value;
991 
992 		obj = &(element->package.elements[3]);
993 		if (obj->type != ACPI_TYPE_INTEGER)
994 			continue;
995 
996 		cx.power = obj->integer.value;
997 
998 		current_count++;
999 		memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
1000 
1001 		/*
1002 		 * We support total ACPI_PROCESSOR_MAX_POWER - 1
1003 		 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1)
1004 		 */
1005 		if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) {
1006 			printk(KERN_WARNING
1007 			       "Limiting number of power states to max (%d)\n",
1008 			       ACPI_PROCESSOR_MAX_POWER);
1009 			printk(KERN_WARNING
1010 			       "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
1011 			break;
1012 		}
1013 	}
1014 
1015 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n",
1016 			  current_count));
1017 
1018 	/* Validate number of power states discovered */
1019 	if (current_count < 2)
1020 		status = -EFAULT;
1021 
1022       end:
1023 	kfree(buffer.pointer);
1024 
1025 	return status;
1026 }
1027 
1028 static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx)
1029 {
1030 
1031 	if (!cx->address)
1032 		return;
1033 
1034 	/*
1035 	 * C2 latency must be less than or equal to 100
1036 	 * microseconds.
1037 	 */
1038 	else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) {
1039 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1040 				  "latency too large [%d]\n", cx->latency));
1041 		return;
1042 	}
1043 
1044 	/*
1045 	 * Otherwise we've met all of our C2 requirements.
1046 	 * Normalize the C2 latency to expidite policy
1047 	 */
1048 	cx->valid = 1;
1049 
1050 #ifndef CONFIG_CPU_IDLE
1051 	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
1052 #else
1053 	cx->latency_ticks = cx->latency;
1054 #endif
1055 
1056 	return;
1057 }
1058 
1059 static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
1060 					   struct acpi_processor_cx *cx)
1061 {
1062 	static int bm_check_flag;
1063 
1064 
1065 	if (!cx->address)
1066 		return;
1067 
1068 	/*
1069 	 * C3 latency must be less than or equal to 1000
1070 	 * microseconds.
1071 	 */
1072 	else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) {
1073 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1074 				  "latency too large [%d]\n", cx->latency));
1075 		return;
1076 	}
1077 
1078 	/*
1079 	 * PIIX4 Erratum #18: We don't support C3 when Type-F (fast)
1080 	 * DMA transfers are used by any ISA device to avoid livelock.
1081 	 * Note that we could disable Type-F DMA (as recommended by
1082 	 * the erratum), but this is known to disrupt certain ISA
1083 	 * devices thus we take the conservative approach.
1084 	 */
1085 	else if (errata.piix4.fdma) {
1086 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1087 				  "C3 not supported on PIIX4 with Type-F DMA\n"));
1088 		return;
1089 	}
1090 
1091 	/* All the logic here assumes flags.bm_check is same across all CPUs */
1092 	if (!bm_check_flag) {
1093 		/* Determine whether bm_check is needed based on CPU  */
1094 		acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
1095 		bm_check_flag = pr->flags.bm_check;
1096 	} else {
1097 		pr->flags.bm_check = bm_check_flag;
1098 	}
1099 
1100 	if (pr->flags.bm_check) {
1101 		if (!pr->flags.bm_control) {
1102 			if (pr->flags.has_cst != 1) {
1103 				/* bus mastering control is necessary */
1104 				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1105 					"C3 support requires BM control\n"));
1106 				return;
1107 			} else {
1108 				/* Here we enter C3 without bus mastering */
1109 				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1110 					"C3 support without BM control\n"));
1111 			}
1112 		}
1113 	} else {
1114 		/*
1115 		 * WBINVD should be set in fadt, for C3 state to be
1116 		 * supported on when bm_check is not required.
1117 		 */
1118 		if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) {
1119 			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1120 					  "Cache invalidation should work properly"
1121 					  " for C3 to be enabled on SMP systems\n"));
1122 			return;
1123 		}
1124 		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
1125 	}
1126 
1127 	/*
1128 	 * Otherwise we've met all of our C3 requirements.
1129 	 * Normalize the C3 latency to expidite policy.  Enable
1130 	 * checking of bus mastering status (bm_check) so we can
1131 	 * use this in our C3 policy
1132 	 */
1133 	cx->valid = 1;
1134 
1135 #ifndef CONFIG_CPU_IDLE
1136 	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
1137 #else
1138 	cx->latency_ticks = cx->latency;
1139 #endif
1140 
1141 	return;
1142 }
1143 
1144 static int acpi_processor_power_verify(struct acpi_processor *pr)
1145 {
1146 	unsigned int i;
1147 	unsigned int working = 0;
1148 
1149 	pr->power.timer_broadcast_on_state = INT_MAX;
1150 
1151 	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
1152 		struct acpi_processor_cx *cx = &pr->power.states[i];
1153 
1154 		switch (cx->type) {
1155 		case ACPI_STATE_C1:
1156 			cx->valid = 1;
1157 			break;
1158 
1159 		case ACPI_STATE_C2:
1160 			acpi_processor_power_verify_c2(cx);
1161 			if (cx->valid)
1162 				acpi_timer_check_state(i, pr, cx);
1163 			break;
1164 
1165 		case ACPI_STATE_C3:
1166 			acpi_processor_power_verify_c3(pr, cx);
1167 			if (cx->valid)
1168 				acpi_timer_check_state(i, pr, cx);
1169 			break;
1170 		}
1171 
1172 		if (cx->valid)
1173 			working++;
1174 	}
1175 
1176 	acpi_propagate_timer_broadcast(pr);
1177 
1178 	return (working);
1179 }
1180 
1181 static int acpi_processor_get_power_info(struct acpi_processor *pr)
1182 {
1183 	unsigned int i;
1184 	int result;
1185 
1186 
1187 	/* NOTE: the idle thread may not be running while calling
1188 	 * this function */
1189 
1190 	/* Zero initialize all the C-states info. */
1191 	memset(pr->power.states, 0, sizeof(pr->power.states));
1192 
1193 	result = acpi_processor_get_power_info_cst(pr);
1194 	if (result == -ENODEV)
1195 		result = acpi_processor_get_power_info_fadt(pr);
1196 
1197 	if (result)
1198 		return result;
1199 
1200 	acpi_processor_get_power_info_default(pr);
1201 
1202 	pr->power.count = acpi_processor_power_verify(pr);
1203 
1204 #ifndef CONFIG_CPU_IDLE
1205 	/*
1206 	 * Set Default Policy
1207 	 * ------------------
1208 	 * Now that we know which states are supported, set the default
1209 	 * policy.  Note that this policy can be changed dynamically
1210 	 * (e.g. encourage deeper sleeps to conserve battery life when
1211 	 * not on AC).
1212 	 */
1213 	result = acpi_processor_set_power_policy(pr);
1214 	if (result)
1215 		return result;
1216 #endif
1217 
1218 	/*
1219 	 * if one state of type C2 or C3 is available, mark this
1220 	 * CPU as being "idle manageable"
1221 	 */
1222 	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
1223 		if (pr->power.states[i].valid) {
1224 			pr->power.count = i;
1225 			if (pr->power.states[i].type >= ACPI_STATE_C2)
1226 				pr->flags.power = 1;
1227 		}
1228 	}
1229 
1230 	return 0;
1231 }
1232 
1233 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
1234 {
1235 	struct acpi_processor *pr = seq->private;
1236 	unsigned int i;
1237 
1238 
1239 	if (!pr)
1240 		goto end;
1241 
1242 	seq_printf(seq, "active state:            C%zd\n"
1243 		   "max_cstate:              C%d\n"
1244 		   "bus master activity:     %08x\n"
1245 		   "maximum allowed latency: %d usec\n",
1246 		   pr->power.state ? pr->power.state - pr->power.states : 0,
1247 		   max_cstate, (unsigned)pr->power.bm_activity,
1248 		   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY));
1249 
1250 	seq_puts(seq, "states:\n");
1251 
1252 	for (i = 1; i <= pr->power.count; i++) {
1253 		seq_printf(seq, "   %cC%d:                  ",
1254 			   (&pr->power.states[i] ==
1255 			    pr->power.state ? '*' : ' '), i);
1256 
1257 		if (!pr->power.states[i].valid) {
1258 			seq_puts(seq, "<not supported>\n");
1259 			continue;
1260 		}
1261 
1262 		switch (pr->power.states[i].type) {
1263 		case ACPI_STATE_C1:
1264 			seq_printf(seq, "type[C1] ");
1265 			break;
1266 		case ACPI_STATE_C2:
1267 			seq_printf(seq, "type[C2] ");
1268 			break;
1269 		case ACPI_STATE_C3:
1270 			seq_printf(seq, "type[C3] ");
1271 			break;
1272 		default:
1273 			seq_printf(seq, "type[--] ");
1274 			break;
1275 		}
1276 
1277 		if (pr->power.states[i].promotion.state)
1278 			seq_printf(seq, "promotion[C%zd] ",
1279 				   (pr->power.states[i].promotion.state -
1280 				    pr->power.states));
1281 		else
1282 			seq_puts(seq, "promotion[--] ");
1283 
1284 		if (pr->power.states[i].demotion.state)
1285 			seq_printf(seq, "demotion[C%zd] ",
1286 				   (pr->power.states[i].demotion.state -
1287 				    pr->power.states));
1288 		else
1289 			seq_puts(seq, "demotion[--] ");
1290 
1291 		seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n",
1292 			   pr->power.states[i].latency,
1293 			   pr->power.states[i].usage,
1294 			   (unsigned long long)pr->power.states[i].time);
1295 	}
1296 
1297       end:
1298 	return 0;
1299 }
1300 
1301 static int acpi_processor_power_open_fs(struct inode *inode, struct file *file)
1302 {
1303 	return single_open(file, acpi_processor_power_seq_show,
1304 			   PDE(inode)->data);
1305 }
1306 
1307 static const struct file_operations acpi_processor_power_fops = {
1308 	.owner = THIS_MODULE,
1309 	.open = acpi_processor_power_open_fs,
1310 	.read = seq_read,
1311 	.llseek = seq_lseek,
1312 	.release = single_release,
1313 };
1314 
1315 #ifndef CONFIG_CPU_IDLE
1316 
1317 int acpi_processor_cst_has_changed(struct acpi_processor *pr)
1318 {
1319 	int result = 0;
1320 
1321 	if (boot_option_idle_override)
1322 		return 0;
1323 
1324 	if (!pr)
1325 		return -EINVAL;
1326 
1327 	if (nocst) {
1328 		return -ENODEV;
1329 	}
1330 
1331 	if (!pr->flags.power_setup_done)
1332 		return -ENODEV;
1333 
1334 	/*
1335 	 * Fall back to the default idle loop, when pm_idle_save had
1336 	 * been initialized.
1337 	 */
1338 	if (pm_idle_save) {
1339 		pm_idle = pm_idle_save;
1340 		/* Relies on interrupts forcing exit from idle. */
1341 		synchronize_sched();
1342 	}
1343 
1344 	pr->flags.power = 0;
1345 	result = acpi_processor_get_power_info(pr);
1346 	if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
1347 		pm_idle = acpi_processor_idle;
1348 
1349 	return result;
1350 }
1351 
1352 #ifdef CONFIG_SMP
1353 static void smp_callback(void *v)
1354 {
1355 	/* we already woke the CPU up, nothing more to do */
1356 }
1357 
1358 /*
1359  * This function gets called when a part of the kernel has a new latency
1360  * requirement.  This means we need to get all processors out of their C-state,
1361  * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
1362  * wakes them all right up.
1363  */
1364 static int acpi_processor_latency_notify(struct notifier_block *b,
1365 		unsigned long l, void *v)
1366 {
1367 	smp_call_function(smp_callback, NULL, 1);
1368 	return NOTIFY_OK;
1369 }
1370 
1371 static struct notifier_block acpi_processor_latency_notifier = {
1372 	.notifier_call = acpi_processor_latency_notify,
1373 };
1374 
1375 #endif
1376 
1377 #else /* CONFIG_CPU_IDLE */
1378 
1379 /**
1380  * acpi_idle_bm_check - checks if bus master activity was detected
1381  */
1382 static int acpi_idle_bm_check(void)
1383 {
1384 	u32 bm_status = 0;
1385 
1386 	acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
1387 	if (bm_status)
1388 		acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
1389 	/*
1390 	 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
1391 	 * the true state of bus mastering activity; forcing us to
1392 	 * manually check the BMIDEA bit of each IDE channel.
1393 	 */
1394 	else if (errata.piix4.bmisx) {
1395 		if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
1396 		    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
1397 			bm_status = 1;
1398 	}
1399 	return bm_status;
1400 }
1401 
1402 /**
1403  * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state
1404  * @pr: the processor
1405  * @target: the new target state
1406  */
1407 static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
1408 					   struct acpi_processor_cx *target)
1409 {
1410 	if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) {
1411 		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
1412 		pr->flags.bm_rld_set = 0;
1413 	}
1414 
1415 	if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) {
1416 		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
1417 		pr->flags.bm_rld_set = 1;
1418 	}
1419 }
1420 
1421 /**
1422  * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
1423  * @cx: cstate data
1424  *
1425  * Caller disables interrupt before call and enables interrupt after return.
1426  */
1427 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
1428 {
1429 	/* Don't trace irqs off for idle */
1430 	stop_critical_timings();
1431 	if (cx->entry_method == ACPI_CSTATE_FFH) {
1432 		/* Call into architectural FFH based C-state */
1433 		acpi_processor_ffh_cstate_enter(cx);
1434 	} else if (cx->entry_method == ACPI_CSTATE_HALT) {
1435 		acpi_safe_halt();
1436 	} else {
1437 		int unused;
1438 		/* IO port based C-state */
1439 		inb(cx->address);
1440 		/* Dummy wait op - must do something useless after P_LVL2 read
1441 		   because chipsets cannot guarantee that STPCLK# signal
1442 		   gets asserted in time to freeze execution properly. */
1443 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
1444 	}
1445 	start_critical_timings();
1446 }
1447 
1448 /**
1449  * acpi_idle_enter_c1 - enters an ACPI C1 state-type
1450  * @dev: the target CPU
1451  * @state: the state data
1452  *
1453  * This is equivalent to the HALT instruction.
1454  */
1455 static int acpi_idle_enter_c1(struct cpuidle_device *dev,
1456 			      struct cpuidle_state *state)
1457 {
1458 	u32 t1, t2;
1459 	struct acpi_processor *pr;
1460 	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
1461 
1462 	pr = __get_cpu_var(processors);
1463 
1464 	if (unlikely(!pr))
1465 		return 0;
1466 
1467 	local_irq_disable();
1468 
1469 	/* Do not access any ACPI IO ports in suspend path */
1470 	if (acpi_idle_suspend) {
1471 		acpi_safe_halt();
1472 		local_irq_enable();
1473 		return 0;
1474 	}
1475 
1476 	if (pr->flags.bm_check)
1477 		acpi_idle_update_bm_rld(pr, cx);
1478 
1479 	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
1480 	acpi_idle_do_entry(cx);
1481 	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
1482 
1483 	local_irq_enable();
1484 	cx->usage++;
1485 
1486 	return ticks_elapsed_in_us(t1, t2);
1487 }
1488 
1489 /**
1490  * acpi_idle_enter_simple - enters an ACPI state without BM handling
1491  * @dev: the target CPU
1492  * @state: the state data
1493  */
1494 static int acpi_idle_enter_simple(struct cpuidle_device *dev,
1495 				  struct cpuidle_state *state)
1496 {
1497 	struct acpi_processor *pr;
1498 	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
1499 	u32 t1, t2;
1500 	int sleep_ticks = 0;
1501 
1502 	pr = __get_cpu_var(processors);
1503 
1504 	if (unlikely(!pr))
1505 		return 0;
1506 
1507 	if (acpi_idle_suspend)
1508 		return(acpi_idle_enter_c1(dev, state));
1509 
1510 	local_irq_disable();
1511 	current_thread_info()->status &= ~TS_POLLING;
1512 	/*
1513 	 * TS_POLLING-cleared state must be visible before we test
1514 	 * NEED_RESCHED:
1515 	 */
1516 	smp_mb();
1517 
1518 	if (unlikely(need_resched())) {
1519 		current_thread_info()->status |= TS_POLLING;
1520 		local_irq_enable();
1521 		return 0;
1522 	}
1523 
1524 	/*
1525 	 * Must be done before busmaster disable as we might need to
1526 	 * access HPET !
1527 	 */
1528 	acpi_state_timer_broadcast(pr, cx, 1);
1529 
1530 	if (pr->flags.bm_check)
1531 		acpi_idle_update_bm_rld(pr, cx);
1532 
1533 	if (cx->type == ACPI_STATE_C3)
1534 		ACPI_FLUSH_CPU_CACHE();
1535 
1536 	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
1537 	/* Tell the scheduler that we are going deep-idle: */
1538 	sched_clock_idle_sleep_event();
1539 	acpi_idle_do_entry(cx);
1540 	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
1541 
1542 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
1543 	/* TSC could halt in idle, so notify users */
1544 	if (tsc_halts_in_c(cx->type))
1545 		mark_tsc_unstable("TSC halts in idle");;
1546 #endif
1547 	sleep_ticks = ticks_elapsed(t1, t2);
1548 
1549 	/* Tell the scheduler how much we idled: */
1550 	sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
1551 
1552 	local_irq_enable();
1553 	current_thread_info()->status |= TS_POLLING;
1554 
1555 	cx->usage++;
1556 
1557 	acpi_state_timer_broadcast(pr, cx, 0);
1558 	cx->time += sleep_ticks;
1559 	return ticks_elapsed_in_us(t1, t2);
1560 }
1561 
1562 static int c3_cpu_count;
1563 static DEFINE_SPINLOCK(c3_lock);
1564 
1565 /**
1566  * acpi_idle_enter_bm - enters C3 with proper BM handling
1567  * @dev: the target CPU
1568  * @state: the state data
1569  *
1570  * If BM is detected, the deepest non-C3 idle state is entered instead.
1571  */
1572 static int acpi_idle_enter_bm(struct cpuidle_device *dev,
1573 			      struct cpuidle_state *state)
1574 {
1575 	struct acpi_processor *pr;
1576 	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
1577 	u32 t1, t2;
1578 	int sleep_ticks = 0;
1579 
1580 	pr = __get_cpu_var(processors);
1581 
1582 	if (unlikely(!pr))
1583 		return 0;
1584 
1585 	if (acpi_idle_suspend)
1586 		return(acpi_idle_enter_c1(dev, state));
1587 
1588 	if (acpi_idle_bm_check()) {
1589 		if (dev->safe_state) {
1590 			dev->last_state = dev->safe_state;
1591 			return dev->safe_state->enter(dev, dev->safe_state);
1592 		} else {
1593 			local_irq_disable();
1594 			acpi_safe_halt();
1595 			local_irq_enable();
1596 			return 0;
1597 		}
1598 	}
1599 
1600 	local_irq_disable();
1601 	current_thread_info()->status &= ~TS_POLLING;
1602 	/*
1603 	 * TS_POLLING-cleared state must be visible before we test
1604 	 * NEED_RESCHED:
1605 	 */
1606 	smp_mb();
1607 
1608 	if (unlikely(need_resched())) {
1609 		current_thread_info()->status |= TS_POLLING;
1610 		local_irq_enable();
1611 		return 0;
1612 	}
1613 
1614 	acpi_unlazy_tlb(smp_processor_id());
1615 
1616 	/* Tell the scheduler that we are going deep-idle: */
1617 	sched_clock_idle_sleep_event();
1618 	/*
1619 	 * Must be done before busmaster disable as we might need to
1620 	 * access HPET !
1621 	 */
1622 	acpi_state_timer_broadcast(pr, cx, 1);
1623 
1624 	acpi_idle_update_bm_rld(pr, cx);
1625 
1626 	/*
1627 	 * disable bus master
1628 	 * bm_check implies we need ARB_DIS
1629 	 * !bm_check implies we need cache flush
1630 	 * bm_control implies whether we can do ARB_DIS
1631 	 *
1632 	 * That leaves a case where bm_check is set and bm_control is
1633 	 * not set. In that case we cannot do much, we enter C3
1634 	 * without doing anything.
1635 	 */
1636 	if (pr->flags.bm_check && pr->flags.bm_control) {
1637 		spin_lock(&c3_lock);
1638 		c3_cpu_count++;
1639 		/* Disable bus master arbitration when all CPUs are in C3 */
1640 		if (c3_cpu_count == num_online_cpus())
1641 			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
1642 		spin_unlock(&c3_lock);
1643 	} else if (!pr->flags.bm_check) {
1644 		ACPI_FLUSH_CPU_CACHE();
1645 	}
1646 
1647 	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
1648 	acpi_idle_do_entry(cx);
1649 	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
1650 
1651 	/* Re-enable bus master arbitration */
1652 	if (pr->flags.bm_check && pr->flags.bm_control) {
1653 		spin_lock(&c3_lock);
1654 		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
1655 		c3_cpu_count--;
1656 		spin_unlock(&c3_lock);
1657 	}
1658 
1659 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
1660 	/* TSC could halt in idle, so notify users */
1661 	if (tsc_halts_in_c(ACPI_STATE_C3))
1662 		mark_tsc_unstable("TSC halts in idle");
1663 #endif
1664 	sleep_ticks = ticks_elapsed(t1, t2);
1665 	/* Tell the scheduler how much we idled: */
1666 	sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
1667 
1668 	local_irq_enable();
1669 	current_thread_info()->status |= TS_POLLING;
1670 
1671 	cx->usage++;
1672 
1673 	acpi_state_timer_broadcast(pr, cx, 0);
1674 	cx->time += sleep_ticks;
1675 	return ticks_elapsed_in_us(t1, t2);
1676 }
1677 
1678 struct cpuidle_driver acpi_idle_driver = {
1679 	.name =		"acpi_idle",
1680 	.owner =	THIS_MODULE,
1681 };
1682 
1683 /**
1684  * acpi_processor_setup_cpuidle - prepares and configures CPUIDLE
1685  * @pr: the ACPI processor
1686  */
1687 static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
1688 {
1689 	int i, count = CPUIDLE_DRIVER_STATE_START;
1690 	struct acpi_processor_cx *cx;
1691 	struct cpuidle_state *state;
1692 	struct cpuidle_device *dev = &pr->power.dev;
1693 
1694 	if (!pr->flags.power_setup_done)
1695 		return -EINVAL;
1696 
1697 	if (pr->flags.power == 0) {
1698 		return -EINVAL;
1699 	}
1700 
1701 	dev->cpu = pr->id;
1702 	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
1703 		dev->states[i].name[0] = '\0';
1704 		dev->states[i].desc[0] = '\0';
1705 	}
1706 
1707 	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
1708 		cx = &pr->power.states[i];
1709 		state = &dev->states[count];
1710 
1711 		if (!cx->valid)
1712 			continue;
1713 
1714 #ifdef CONFIG_HOTPLUG_CPU
1715 		if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
1716 		    !pr->flags.has_cst &&
1717 		    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
1718 			continue;
1719 #endif
1720 		cpuidle_set_statedata(state, cx);
1721 
1722 		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
1723 		strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1724 		state->exit_latency = cx->latency;
1725 		state->target_residency = cx->latency * latency_factor;
1726 		state->power_usage = cx->power;
1727 
1728 		state->flags = 0;
1729 		switch (cx->type) {
1730 			case ACPI_STATE_C1:
1731 			state->flags |= CPUIDLE_FLAG_SHALLOW;
1732 			if (cx->entry_method == ACPI_CSTATE_FFH)
1733 				state->flags |= CPUIDLE_FLAG_TIME_VALID;
1734 
1735 			state->enter = acpi_idle_enter_c1;
1736 			dev->safe_state = state;
1737 			break;
1738 
1739 			case ACPI_STATE_C2:
1740 			state->flags |= CPUIDLE_FLAG_BALANCED;
1741 			state->flags |= CPUIDLE_FLAG_TIME_VALID;
1742 			state->enter = acpi_idle_enter_simple;
1743 			dev->safe_state = state;
1744 			break;
1745 
1746 			case ACPI_STATE_C3:
1747 			state->flags |= CPUIDLE_FLAG_DEEP;
1748 			state->flags |= CPUIDLE_FLAG_TIME_VALID;
1749 			state->flags |= CPUIDLE_FLAG_CHECK_BM;
1750 			state->enter = pr->flags.bm_check ?
1751 					acpi_idle_enter_bm :
1752 					acpi_idle_enter_simple;
1753 			break;
1754 		}
1755 
1756 		count++;
1757 		if (count == CPUIDLE_STATE_MAX)
1758 			break;
1759 	}
1760 
1761 	dev->state_count = count;
1762 
1763 	if (!count)
1764 		return -EINVAL;
1765 
1766 	return 0;
1767 }
1768 
1769 int acpi_processor_cst_has_changed(struct acpi_processor *pr)
1770 {
1771 	int ret = 0;
1772 
1773 	if (boot_option_idle_override)
1774 		return 0;
1775 
1776 	if (!pr)
1777 		return -EINVAL;
1778 
1779 	if (nocst) {
1780 		return -ENODEV;
1781 	}
1782 
1783 	if (!pr->flags.power_setup_done)
1784 		return -ENODEV;
1785 
1786 	cpuidle_pause_and_lock();
1787 	cpuidle_disable_device(&pr->power.dev);
1788 	acpi_processor_get_power_info(pr);
1789 	if (pr->flags.power) {
1790 		acpi_processor_setup_cpuidle(pr);
1791 		ret = cpuidle_enable_device(&pr->power.dev);
1792 	}
1793 	cpuidle_resume_and_unlock();
1794 
1795 	return ret;
1796 }
1797 
1798 #endif /* CONFIG_CPU_IDLE */
1799 
1800 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
1801 			      struct acpi_device *device)
1802 {
1803 	acpi_status status = 0;
1804 	static int first_run;
1805 	struct proc_dir_entry *entry = NULL;
1806 	unsigned int i;
1807 
1808 	if (boot_option_idle_override)
1809 		return 0;
1810 
1811 	if (!first_run) {
1812 		if (idle_halt) {
1813 			/*
1814 			 * When the boot option of "idle=halt" is added, halt
1815 			 * is used for CPU IDLE.
1816 			 * In such case C2/C3 is meaningless. So the max_cstate
1817 			 * is set to one.
1818 			 */
1819 			max_cstate = 1;
1820 		}
1821 		dmi_check_system(processor_power_dmi_table);
1822 		max_cstate = acpi_processor_cstate_check(max_cstate);
1823 		if (max_cstate < ACPI_C_STATES_MAX)
1824 			printk(KERN_NOTICE
1825 			       "ACPI: processor limited to max C-state %d\n",
1826 			       max_cstate);
1827 		first_run++;
1828 #if !defined(CONFIG_CPU_IDLE) && defined(CONFIG_SMP)
1829 		pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY,
1830 				&acpi_processor_latency_notifier);
1831 #endif
1832 	}
1833 
1834 	if (!pr)
1835 		return -EINVAL;
1836 
1837 	if (acpi_gbl_FADT.cst_control && !nocst) {
1838 		status =
1839 		    acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8);
1840 		if (ACPI_FAILURE(status)) {
1841 			ACPI_EXCEPTION((AE_INFO, status,
1842 					"Notifying BIOS of _CST ability failed"));
1843 		}
1844 	}
1845 
1846 	acpi_processor_get_power_info(pr);
1847 	pr->flags.power_setup_done = 1;
1848 
1849 	/*
1850 	 * Install the idle handler if processor power management is supported.
1851 	 * Note that we use previously set idle handler will be used on
1852 	 * platforms that only support C1.
1853 	 */
1854 	if (pr->flags.power) {
1855 #ifdef CONFIG_CPU_IDLE
1856 		acpi_processor_setup_cpuidle(pr);
1857 		if (cpuidle_register_device(&pr->power.dev))
1858 			return -EIO;
1859 #endif
1860 
1861 		printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id);
1862 		for (i = 1; i <= pr->power.count; i++)
1863 			if (pr->power.states[i].valid)
1864 				printk(" C%d[C%d]", i,
1865 				       pr->power.states[i].type);
1866 		printk(")\n");
1867 
1868 #ifndef CONFIG_CPU_IDLE
1869 		if (pr->id == 0) {
1870 			pm_idle_save = pm_idle;
1871 			pm_idle = acpi_processor_idle;
1872 		}
1873 #endif
1874 	}
1875 
1876 	/* 'power' [R] */
1877 	entry = proc_create_data(ACPI_PROCESSOR_FILE_POWER,
1878 				 S_IRUGO, acpi_device_dir(device),
1879 				 &acpi_processor_power_fops,
1880 				 acpi_driver_data(device));
1881 	if (!entry)
1882 		return -EIO;
1883 	return 0;
1884 }
1885 
1886 int acpi_processor_power_exit(struct acpi_processor *pr,
1887 			      struct acpi_device *device)
1888 {
1889 	if (boot_option_idle_override)
1890 		return 0;
1891 
1892 #ifdef CONFIG_CPU_IDLE
1893 	cpuidle_unregister_device(&pr->power.dev);
1894 #endif
1895 	pr->flags.power_setup_done = 0;
1896 
1897 	if (acpi_device_dir(device))
1898 		remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
1899 				  acpi_device_dir(device));
1900 
1901 #ifndef CONFIG_CPU_IDLE
1902 
1903 	/* Unregister the idle handler when processor #0 is removed. */
1904 	if (pr->id == 0) {
1905 		if (pm_idle_save)
1906 			pm_idle = pm_idle_save;
1907 
1908 		/*
1909 		 * We are about to unload the current idle thread pm callback
1910 		 * (pm_idle), Wait for all processors to update cached/local
1911 		 * copies of pm_idle before proceeding.
1912 		 */
1913 		cpu_idle_wait();
1914 #ifdef CONFIG_SMP
1915 		pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY,
1916 				&acpi_processor_latency_notifier);
1917 #endif
1918 	}
1919 #endif
1920 
1921 	return 0;
1922 }
1923