xref: /openbmc/linux/drivers/pci/pcie/aspm.c (revision b8bb76713ec50df2f11efee386e16f93d51e1076)
1 /*
2  * File:	drivers/pci/pcie/aspm.c
3  * Enabling PCIE link L0s/L1 state and Clock Power Management
4  *
5  * Copyright (C) 2007 Intel
6  * Copyright (C) Zhang Yanmin (yanmin.zhang@intel.com)
7  * Copyright (C) Shaohua Li (shaohua.li@intel.com)
8  */
9 
10 #include <linux/kernel.h>
11 #include <linux/module.h>
12 #include <linux/moduleparam.h>
13 #include <linux/pci.h>
14 #include <linux/pci_regs.h>
15 #include <linux/errno.h>
16 #include <linux/pm.h>
17 #include <linux/init.h>
18 #include <linux/slab.h>
19 #include <linux/jiffies.h>
20 #include <linux/delay.h>
21 #include <linux/pci-aspm.h>
22 #include "../pci.h"
23 
24 #ifdef MODULE_PARAM_PREFIX
25 #undef MODULE_PARAM_PREFIX
26 #endif
27 #define MODULE_PARAM_PREFIX "pcie_aspm."
28 
29 struct endpoint_state {
30 	unsigned int l0s_acceptable_latency;
31 	unsigned int l1_acceptable_latency;
32 };
33 
34 struct pcie_link_state {
35 	struct list_head sibiling;
36 	struct pci_dev *pdev;
37 	bool downstream_has_switch;
38 
39 	struct pcie_link_state *parent;
40 	struct list_head children;
41 	struct list_head link;
42 
43 	/* ASPM state */
44 	unsigned int support_state;
45 	unsigned int enabled_state;
46 	unsigned int bios_aspm_state;
47 	/* upstream component */
48 	unsigned int l0s_upper_latency;
49 	unsigned int l1_upper_latency;
50 	/* downstream component */
51 	unsigned int l0s_down_latency;
52 	unsigned int l1_down_latency;
53 	/* Clock PM state*/
54 	unsigned int clk_pm_capable;
55 	unsigned int clk_pm_enabled;
56 	unsigned int bios_clk_state;
57 
58 	/*
59 	 * A pcie downstream port only has one slot under it, so at most there
60 	 * are 8 functions
61 	 */
62 	struct endpoint_state endpoints[8];
63 };
64 
65 static int aspm_disabled, aspm_force;
66 static DEFINE_MUTEX(aspm_lock);
67 static LIST_HEAD(link_list);
68 
69 #define POLICY_DEFAULT 0	/* BIOS default setting */
70 #define POLICY_PERFORMANCE 1	/* high performance */
71 #define POLICY_POWERSAVE 2	/* high power saving */
72 static int aspm_policy;
73 static const char *policy_str[] = {
74 	[POLICY_DEFAULT] = "default",
75 	[POLICY_PERFORMANCE] = "performance",
76 	[POLICY_POWERSAVE] = "powersave"
77 };
78 
79 #define LINK_RETRAIN_TIMEOUT HZ
80 
81 static int policy_to_aspm_state(struct pci_dev *pdev)
82 {
83 	struct pcie_link_state *link_state = pdev->link_state;
84 
85 	switch (aspm_policy) {
86 	case POLICY_PERFORMANCE:
87 		/* Disable ASPM and Clock PM */
88 		return 0;
89 	case POLICY_POWERSAVE:
90 		/* Enable ASPM L0s/L1 */
91 		return PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1;
92 	case POLICY_DEFAULT:
93 		return link_state->bios_aspm_state;
94 	}
95 	return 0;
96 }
97 
98 static int policy_to_clkpm_state(struct pci_dev *pdev)
99 {
100 	struct pcie_link_state *link_state = pdev->link_state;
101 
102 	switch (aspm_policy) {
103 	case POLICY_PERFORMANCE:
104 		/* Disable ASPM and Clock PM */
105 		return 0;
106 	case POLICY_POWERSAVE:
107 		/* Disable Clock PM */
108 		return 1;
109 	case POLICY_DEFAULT:
110 		return link_state->bios_clk_state;
111 	}
112 	return 0;
113 }
114 
115 static void pcie_set_clock_pm(struct pci_dev *pdev, int enable)
116 {
117 	struct pci_dev *child_dev;
118 	int pos;
119 	u16 reg16;
120 	struct pcie_link_state *link_state = pdev->link_state;
121 
122 	list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
123 		pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
124 		if (!pos)
125 			return;
126 		pci_read_config_word(child_dev, pos + PCI_EXP_LNKCTL, &reg16);
127 		if (enable)
128 			reg16 |= PCI_EXP_LNKCTL_CLKREQ_EN;
129 		else
130 			reg16 &= ~PCI_EXP_LNKCTL_CLKREQ_EN;
131 		pci_write_config_word(child_dev, pos + PCI_EXP_LNKCTL, reg16);
132 	}
133 	link_state->clk_pm_enabled = !!enable;
134 }
135 
136 static void pcie_check_clock_pm(struct pci_dev *pdev, int blacklist)
137 {
138 	int pos;
139 	u32 reg32;
140 	u16 reg16;
141 	int capable = 1, enabled = 1;
142 	struct pci_dev *child_dev;
143 	struct pcie_link_state *link_state = pdev->link_state;
144 
145 	/* All functions should have the same cap and state, take the worst */
146 	list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
147 		pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
148 		if (!pos)
149 			return;
150 		pci_read_config_dword(child_dev, pos + PCI_EXP_LNKCAP, &reg32);
151 		if (!(reg32 & PCI_EXP_LNKCAP_CLKPM)) {
152 			capable = 0;
153 			enabled = 0;
154 			break;
155 		}
156 		pci_read_config_word(child_dev, pos + PCI_EXP_LNKCTL, &reg16);
157 		if (!(reg16 & PCI_EXP_LNKCTL_CLKREQ_EN))
158 			enabled = 0;
159 	}
160 	link_state->clk_pm_enabled = enabled;
161 	link_state->bios_clk_state = enabled;
162 	if (!blacklist) {
163 		link_state->clk_pm_capable = capable;
164 		pcie_set_clock_pm(pdev, policy_to_clkpm_state(pdev));
165 	} else {
166 		link_state->clk_pm_capable = 0;
167 		pcie_set_clock_pm(pdev, 0);
168 	}
169 }
170 
171 static bool pcie_aspm_downstream_has_switch(struct pci_dev *pdev)
172 {
173 	struct pci_dev *child_dev;
174 
175 	list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
176 		if (child_dev->pcie_type == PCI_EXP_TYPE_UPSTREAM)
177 			return true;
178 	}
179 	return false;
180 }
181 
182 /*
183  * pcie_aspm_configure_common_clock: check if the 2 ends of a link
184  *   could use common clock. If they are, configure them to use the
185  *   common clock. That will reduce the ASPM state exit latency.
186  */
187 static void pcie_aspm_configure_common_clock(struct pci_dev *pdev)
188 {
189 	int pos, child_pos, i = 0;
190 	u16 reg16 = 0;
191 	struct pci_dev *child_dev;
192 	int same_clock = 1;
193 	unsigned long start_jiffies;
194 	u16 child_regs[8], parent_reg;
195 	/*
196 	 * all functions of a slot should have the same Slot Clock
197 	 * Configuration, so just check one function
198 	 * */
199 	child_dev = list_entry(pdev->subordinate->devices.next, struct pci_dev,
200 		bus_list);
201 	BUG_ON(!child_dev->is_pcie);
202 
203 	/* Check downstream component if bit Slot Clock Configuration is 1 */
204 	child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
205 	pci_read_config_word(child_dev, child_pos + PCI_EXP_LNKSTA, &reg16);
206 	if (!(reg16 & PCI_EXP_LNKSTA_SLC))
207 		same_clock = 0;
208 
209 	/* Check upstream component if bit Slot Clock Configuration is 1 */
210 	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
211 	pci_read_config_word(pdev, pos + PCI_EXP_LNKSTA, &reg16);
212 	if (!(reg16 & PCI_EXP_LNKSTA_SLC))
213 		same_clock = 0;
214 
215 	/* Configure downstream component, all functions */
216 	list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
217 		child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
218 		pci_read_config_word(child_dev, child_pos + PCI_EXP_LNKCTL,
219 			&reg16);
220 		child_regs[i] = reg16;
221 		if (same_clock)
222 			reg16 |= PCI_EXP_LNKCTL_CCC;
223 		else
224 			reg16 &= ~PCI_EXP_LNKCTL_CCC;
225 		pci_write_config_word(child_dev, child_pos + PCI_EXP_LNKCTL,
226 			reg16);
227 		i++;
228 	}
229 
230 	/* Configure upstream component */
231 	pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
232 	parent_reg = reg16;
233 	if (same_clock)
234 		reg16 |= PCI_EXP_LNKCTL_CCC;
235 	else
236 		reg16 &= ~PCI_EXP_LNKCTL_CCC;
237 	pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
238 
239 	/* retrain link */
240 	reg16 |= PCI_EXP_LNKCTL_RL;
241 	pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
242 
243 	/* Wait for link training end */
244 	/* break out after waiting for timeout */
245 	start_jiffies = jiffies;
246 	for (;;) {
247 		pci_read_config_word(pdev, pos + PCI_EXP_LNKSTA, &reg16);
248 		if (!(reg16 & PCI_EXP_LNKSTA_LT))
249 			break;
250 		if (time_after(jiffies, start_jiffies + LINK_RETRAIN_TIMEOUT))
251 			break;
252 		msleep(1);
253 	}
254 	/* training failed -> recover */
255 	if (reg16 & PCI_EXP_LNKSTA_LT) {
256 		dev_printk (KERN_ERR, &pdev->dev, "ASPM: Could not configure"
257 			    " common clock\n");
258 		i = 0;
259 		list_for_each_entry(child_dev, &pdev->subordinate->devices,
260 				    bus_list) {
261 			child_pos = pci_find_capability(child_dev,
262 							PCI_CAP_ID_EXP);
263 			pci_write_config_word(child_dev,
264 					      child_pos + PCI_EXP_LNKCTL,
265 					      child_regs[i]);
266 			i++;
267 		}
268 		pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, parent_reg);
269 	}
270 }
271 
272 /*
273  * calc_L0S_latency: Convert L0s latency encoding to ns
274  */
275 static unsigned int calc_L0S_latency(unsigned int latency_encoding, int ac)
276 {
277 	unsigned int ns = 64;
278 
279 	if (latency_encoding == 0x7) {
280 		if (ac)
281 			ns = -1U;
282 		else
283 			ns = 5*1000; /* > 4us */
284 	} else
285 		ns *= (1 << latency_encoding);
286 	return ns;
287 }
288 
289 /*
290  * calc_L1_latency: Convert L1 latency encoding to ns
291  */
292 static unsigned int calc_L1_latency(unsigned int latency_encoding, int ac)
293 {
294 	unsigned int ns = 1000;
295 
296 	if (latency_encoding == 0x7) {
297 		if (ac)
298 			ns = -1U;
299 		else
300 			ns = 65*1000; /* > 64us */
301 	} else
302 		ns *= (1 << latency_encoding);
303 	return ns;
304 }
305 
306 static void pcie_aspm_get_cap_device(struct pci_dev *pdev, u32 *state,
307 	unsigned int *l0s, unsigned int *l1, unsigned int *enabled)
308 {
309 	int pos;
310 	u16 reg16;
311 	u32 reg32;
312 	unsigned int latency;
313 
314 	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
315 	pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, &reg32);
316 	*state = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10;
317 	if (*state != PCIE_LINK_STATE_L0S &&
318 		*state != (PCIE_LINK_STATE_L1|PCIE_LINK_STATE_L0S))
319 		*state = 0;
320 	if (*state == 0)
321 		return;
322 
323 	latency = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12;
324 	*l0s = calc_L0S_latency(latency, 0);
325 	if (*state & PCIE_LINK_STATE_L1) {
326 		latency = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15;
327 		*l1 = calc_L1_latency(latency, 0);
328 	}
329 	pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
330 	*enabled = reg16 & (PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1);
331 }
332 
333 static void pcie_aspm_cap_init(struct pci_dev *pdev)
334 {
335 	struct pci_dev *child_dev;
336 	u32 state, tmp;
337 	struct pcie_link_state *link_state = pdev->link_state;
338 
339 	/* upstream component states */
340 	pcie_aspm_get_cap_device(pdev, &link_state->support_state,
341 		&link_state->l0s_upper_latency,
342 		&link_state->l1_upper_latency,
343 		&link_state->enabled_state);
344 	/* downstream component states, all functions have the same setting */
345 	child_dev = list_entry(pdev->subordinate->devices.next, struct pci_dev,
346 		bus_list);
347 	pcie_aspm_get_cap_device(child_dev, &state,
348 		&link_state->l0s_down_latency,
349 		&link_state->l1_down_latency,
350 		&tmp);
351 	link_state->support_state &= state;
352 	if (!link_state->support_state)
353 		return;
354 	link_state->enabled_state &= link_state->support_state;
355 	link_state->bios_aspm_state = link_state->enabled_state;
356 
357 	/* ENDPOINT states*/
358 	list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
359 		int pos;
360 		u32 reg32;
361 		unsigned int latency;
362 		struct endpoint_state *ep_state =
363 			&link_state->endpoints[PCI_FUNC(child_dev->devfn)];
364 
365 		if (child_dev->pcie_type != PCI_EXP_TYPE_ENDPOINT &&
366 			child_dev->pcie_type != PCI_EXP_TYPE_LEG_END)
367 			continue;
368 
369 		pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
370 		pci_read_config_dword(child_dev, pos + PCI_EXP_DEVCAP, &reg32);
371 		latency = (reg32 & PCI_EXP_DEVCAP_L0S) >> 6;
372 		latency = calc_L0S_latency(latency, 1);
373 		ep_state->l0s_acceptable_latency = latency;
374 		if (link_state->support_state & PCIE_LINK_STATE_L1) {
375 			latency = (reg32 & PCI_EXP_DEVCAP_L1) >> 9;
376 			latency = calc_L1_latency(latency, 1);
377 			ep_state->l1_acceptable_latency = latency;
378 		}
379 	}
380 }
381 
382 static unsigned int __pcie_aspm_check_state_one(struct pci_dev *pdev,
383 	unsigned int state)
384 {
385 	struct pci_dev *parent_dev, *tmp_dev;
386 	unsigned int latency, l1_latency = 0;
387 	struct pcie_link_state *link_state;
388 	struct endpoint_state *ep_state;
389 
390 	parent_dev = pdev->bus->self;
391 	link_state = parent_dev->link_state;
392 	state &= link_state->support_state;
393 	if (state == 0)
394 		return 0;
395 	ep_state = &link_state->endpoints[PCI_FUNC(pdev->devfn)];
396 
397 	/*
398 	 * Check latency for endpoint device.
399 	 * TBD: The latency from the endpoint to root complex vary per
400 	 * switch's upstream link state above the device. Here we just do a
401 	 * simple check which assumes all links above the device can be in L1
402 	 * state, that is we just consider the worst case. If switch's upstream
403 	 * link can't be put into L0S/L1, then our check is too strictly.
404 	 */
405 	tmp_dev = pdev;
406 	while (state & (PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)) {
407 		parent_dev = tmp_dev->bus->self;
408 		link_state = parent_dev->link_state;
409 		if (state & PCIE_LINK_STATE_L0S) {
410 			latency = max_t(unsigned int,
411 					link_state->l0s_upper_latency,
412 					link_state->l0s_down_latency);
413 			if (latency > ep_state->l0s_acceptable_latency)
414 				state &= ~PCIE_LINK_STATE_L0S;
415 		}
416 		if (state & PCIE_LINK_STATE_L1) {
417 			latency = max_t(unsigned int,
418 					link_state->l1_upper_latency,
419 					link_state->l1_down_latency);
420 			if (latency + l1_latency >
421 					ep_state->l1_acceptable_latency)
422 				state &= ~PCIE_LINK_STATE_L1;
423 		}
424 		if (!parent_dev->bus->self) /* parent_dev is a root port */
425 			break;
426 		else {
427 			/*
428 			 * parent_dev is the downstream port of a switch, make
429 			 * tmp_dev the upstream port of the switch
430 			 */
431 			tmp_dev = parent_dev->bus->self;
432 			/*
433 			 * every switch on the path to root complex need 1 more
434 			 * microsecond for L1. Spec doesn't mention L0S.
435 			 */
436 			if (state & PCIE_LINK_STATE_L1)
437 				l1_latency += 1000;
438 		}
439 	}
440 	return state;
441 }
442 
443 static unsigned int pcie_aspm_check_state(struct pci_dev *pdev,
444 	unsigned int state)
445 {
446 	struct pci_dev *child_dev;
447 
448 	/* If no child, ignore the link */
449 	if (list_empty(&pdev->subordinate->devices))
450 		return state;
451 	list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
452 		if (child_dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) {
453 			/*
454 			 * If downstream component of a link is pci bridge, we
455 			 * disable ASPM for now for the link
456 			 * */
457 			state = 0;
458 			break;
459 		}
460 		if ((child_dev->pcie_type != PCI_EXP_TYPE_ENDPOINT &&
461 			child_dev->pcie_type != PCI_EXP_TYPE_LEG_END))
462 			continue;
463 		/* Device not in D0 doesn't need check latency */
464 		if (child_dev->current_state == PCI_D1 ||
465 			child_dev->current_state == PCI_D2 ||
466 			child_dev->current_state == PCI_D3hot ||
467 			child_dev->current_state == PCI_D3cold)
468 			continue;
469 		state = __pcie_aspm_check_state_one(child_dev, state);
470 	}
471 	return state;
472 }
473 
474 static void __pcie_aspm_config_one_dev(struct pci_dev *pdev, unsigned int state)
475 {
476 	u16 reg16;
477 	int pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
478 
479 	pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
480 	reg16 &= ~0x3;
481 	reg16 |= state;
482 	pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
483 }
484 
485 static void __pcie_aspm_config_link(struct pci_dev *pdev, unsigned int state)
486 {
487 	struct pci_dev *child_dev;
488 	int valid = 1;
489 	struct pcie_link_state *link_state = pdev->link_state;
490 
491 	/* If no child, disable the link */
492 	if (list_empty(&pdev->subordinate->devices))
493 		state = 0;
494 	/*
495 	 * if the downstream component has pci bridge function, don't do ASPM
496 	 * now
497 	 */
498 	list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
499 		if (child_dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) {
500 			valid = 0;
501 			break;
502 		}
503 	}
504 	if (!valid)
505 		return;
506 
507 	/*
508 	 * spec 2.0 suggests all functions should be configured the same
509 	 * setting for ASPM. Enabling ASPM L1 should be done in upstream
510 	 * component first and then downstream, and vice versa for disabling
511 	 * ASPM L1. Spec doesn't mention L0S.
512 	 */
513 	if (state & PCIE_LINK_STATE_L1)
514 		__pcie_aspm_config_one_dev(pdev, state);
515 
516 	list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list)
517 		__pcie_aspm_config_one_dev(child_dev, state);
518 
519 	if (!(state & PCIE_LINK_STATE_L1))
520 		__pcie_aspm_config_one_dev(pdev, state);
521 
522 	link_state->enabled_state = state;
523 }
524 
525 static struct pcie_link_state *get_root_port_link(struct pcie_link_state *link)
526 {
527 	struct pcie_link_state *root_port_link = link;
528 	while (root_port_link->parent)
529 		root_port_link = root_port_link->parent;
530 	return root_port_link;
531 }
532 
533 /* check the whole hierarchy, and configure each link in the hierarchy */
534 static void __pcie_aspm_configure_link_state(struct pci_dev *pdev,
535 	unsigned int state)
536 {
537 	struct pcie_link_state *link_state = pdev->link_state;
538 	struct pcie_link_state *root_port_link = get_root_port_link(link_state);
539 	struct pcie_link_state *leaf;
540 
541 	state &= PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1;
542 
543 	/* check all links who have specific root port link */
544 	list_for_each_entry(leaf, &link_list, sibiling) {
545 		if (!list_empty(&leaf->children) ||
546 			get_root_port_link(leaf) != root_port_link)
547 			continue;
548 		state = pcie_aspm_check_state(leaf->pdev, state);
549 	}
550 	/* check root port link too in case it hasn't children */
551 	state = pcie_aspm_check_state(root_port_link->pdev, state);
552 
553 	if (link_state->enabled_state == state)
554 		return;
555 
556 	/*
557 	 * we must change the hierarchy. See comments in
558 	 * __pcie_aspm_config_link for the order
559 	 **/
560 	if (state & PCIE_LINK_STATE_L1) {
561 		list_for_each_entry(leaf, &link_list, sibiling) {
562 			if (get_root_port_link(leaf) == root_port_link)
563 				__pcie_aspm_config_link(leaf->pdev, state);
564 		}
565 	} else {
566 		list_for_each_entry_reverse(leaf, &link_list, sibiling) {
567 			if (get_root_port_link(leaf) == root_port_link)
568 				__pcie_aspm_config_link(leaf->pdev, state);
569 		}
570 	}
571 }
572 
573 /*
574  * pcie_aspm_configure_link_state: enable/disable PCI express link state
575  * @pdev: the root port or switch downstream port
576  */
577 static void pcie_aspm_configure_link_state(struct pci_dev *pdev,
578 	unsigned int state)
579 {
580 	down_read(&pci_bus_sem);
581 	mutex_lock(&aspm_lock);
582 	__pcie_aspm_configure_link_state(pdev, state);
583 	mutex_unlock(&aspm_lock);
584 	up_read(&pci_bus_sem);
585 }
586 
587 static void free_link_state(struct pci_dev *pdev)
588 {
589 	kfree(pdev->link_state);
590 	pdev->link_state = NULL;
591 }
592 
593 static int pcie_aspm_sanity_check(struct pci_dev *pdev)
594 {
595 	struct pci_dev *child_dev;
596 	int child_pos;
597 	u32 reg32;
598 
599 	/*
600 	 * Some functions in a slot might not all be PCIE functions, very
601 	 * strange. Disable ASPM for the whole slot
602 	 */
603 	list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) {
604 		child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
605 		if (!child_pos)
606 			return -EINVAL;
607 
608 		/*
609 		 * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
610 		 * RBER bit to determine if a function is 1.1 version device
611 		 */
612 		pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
613 			&reg32);
614 		if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) {
615 			dev_printk(KERN_INFO, &child_dev->dev, "disabling ASPM"
616 				" on pre-1.1 PCIe device.  You can enable it"
617 				" with 'pcie_aspm=force'\n");
618 			return -EINVAL;
619 		}
620 	}
621 	return 0;
622 }
623 
624 /*
625  * pcie_aspm_init_link_state: Initiate PCI express link state.
626  * It is called after the pcie and its children devices are scaned.
627  * @pdev: the root port or switch downstream port
628  */
629 void pcie_aspm_init_link_state(struct pci_dev *pdev)
630 {
631 	unsigned int state;
632 	struct pcie_link_state *link_state;
633 	int error = 0;
634 	int blacklist;
635 
636 	if (aspm_disabled || !pdev->is_pcie || pdev->link_state)
637 		return;
638 	if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
639 		pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
640 		return;
641 	down_read(&pci_bus_sem);
642 	if (list_empty(&pdev->subordinate->devices))
643 		goto out;
644 
645 	blacklist = !!pcie_aspm_sanity_check(pdev);
646 
647 	mutex_lock(&aspm_lock);
648 
649 	link_state = kzalloc(sizeof(*link_state), GFP_KERNEL);
650 	if (!link_state)
651 		goto unlock_out;
652 
653 	link_state->downstream_has_switch = pcie_aspm_downstream_has_switch(pdev);
654 	INIT_LIST_HEAD(&link_state->children);
655 	INIT_LIST_HEAD(&link_state->link);
656 	if (pdev->bus->self) {/* this is a switch */
657 		struct pcie_link_state *parent_link_state;
658 
659 		parent_link_state = pdev->bus->parent->self->link_state;
660 		if (!parent_link_state) {
661 			kfree(link_state);
662 			goto unlock_out;
663 		}
664 		list_add(&link_state->link, &parent_link_state->children);
665 		link_state->parent = parent_link_state;
666 	}
667 
668 	pdev->link_state = link_state;
669 
670 	if (!blacklist) {
671 		pcie_aspm_configure_common_clock(pdev);
672 		pcie_aspm_cap_init(pdev);
673 	} else {
674 		link_state->enabled_state = PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1;
675 		link_state->bios_aspm_state = 0;
676 		/* Set support state to 0, so we will disable ASPM later */
677 		link_state->support_state = 0;
678 	}
679 
680 	link_state->pdev = pdev;
681 	list_add(&link_state->sibiling, &link_list);
682 
683 	if (link_state->downstream_has_switch) {
684 		/*
685 		 * If link has switch, delay the link config. The leaf link
686 		 * initialization will config the whole hierarchy. but we must
687 		 * make sure BIOS doesn't set unsupported link state
688 		 **/
689 		state = pcie_aspm_check_state(pdev, link_state->bios_aspm_state);
690 		__pcie_aspm_config_link(pdev, state);
691 	} else
692 		__pcie_aspm_configure_link_state(pdev,
693 			policy_to_aspm_state(pdev));
694 
695 	pcie_check_clock_pm(pdev, blacklist);
696 
697 unlock_out:
698 	if (error)
699 		free_link_state(pdev);
700 	mutex_unlock(&aspm_lock);
701 out:
702 	up_read(&pci_bus_sem);
703 }
704 
705 /* @pdev: the endpoint device */
706 void pcie_aspm_exit_link_state(struct pci_dev *pdev)
707 {
708 	struct pci_dev *parent = pdev->bus->self;
709 	struct pcie_link_state *link_state = parent->link_state;
710 
711 	if (aspm_disabled || !pdev->is_pcie || !parent || !link_state)
712 		return;
713 	if (parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
714 		parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
715 		return;
716 	down_read(&pci_bus_sem);
717 	mutex_lock(&aspm_lock);
718 
719 	/*
720 	 * All PCIe functions are in one slot, remove one function will remove
721 	 * the whole slot, so just wait until we are the last function left.
722 	 */
723 	if (!list_is_last(&pdev->bus_list, &parent->subordinate->devices))
724 		goto out;
725 
726 	/* All functions are removed, so just disable ASPM for the link */
727 	__pcie_aspm_config_one_dev(parent, 0);
728 	list_del(&link_state->sibiling);
729 	list_del(&link_state->link);
730 	/* Clock PM is for endpoint device */
731 
732 	free_link_state(parent);
733 out:
734 	mutex_unlock(&aspm_lock);
735 	up_read(&pci_bus_sem);
736 }
737 
738 /* @pdev: the root port or switch downstream port */
739 void pcie_aspm_pm_state_change(struct pci_dev *pdev)
740 {
741 	struct pcie_link_state *link_state = pdev->link_state;
742 
743 	if (aspm_disabled || !pdev->is_pcie || !pdev->link_state)
744 		return;
745 	if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
746 		pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
747 		return;
748 	/*
749 	 * devices changed PM state, we should recheck if latency meets all
750 	 * functions' requirement
751 	 */
752 	pcie_aspm_configure_link_state(pdev, link_state->enabled_state);
753 }
754 
755 /*
756  * pci_disable_link_state - disable pci device's link state, so the link will
757  * never enter specific states
758  */
759 void pci_disable_link_state(struct pci_dev *pdev, int state)
760 {
761 	struct pci_dev *parent = pdev->bus->self;
762 	struct pcie_link_state *link_state;
763 
764 	if (aspm_disabled || !pdev->is_pcie)
765 		return;
766 	if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
767 	    pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
768 		parent = pdev;
769 	if (!parent || !parent->link_state)
770 		return;
771 
772 	down_read(&pci_bus_sem);
773 	mutex_lock(&aspm_lock);
774 	link_state = parent->link_state;
775 	link_state->support_state &=
776 		~(state & (PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1));
777 	if (state & PCIE_LINK_STATE_CLKPM)
778 		link_state->clk_pm_capable = 0;
779 
780 	__pcie_aspm_configure_link_state(parent, link_state->enabled_state);
781 	if (!link_state->clk_pm_capable && link_state->clk_pm_enabled)
782 		pcie_set_clock_pm(parent, 0);
783 	mutex_unlock(&aspm_lock);
784 	up_read(&pci_bus_sem);
785 }
786 EXPORT_SYMBOL(pci_disable_link_state);
787 
788 static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
789 {
790 	int i;
791 	struct pci_dev *pdev;
792 	struct pcie_link_state *link_state;
793 
794 	for (i = 0; i < ARRAY_SIZE(policy_str); i++)
795 		if (!strncmp(val, policy_str[i], strlen(policy_str[i])))
796 			break;
797 	if (i >= ARRAY_SIZE(policy_str))
798 		return -EINVAL;
799 	if (i == aspm_policy)
800 		return 0;
801 
802 	down_read(&pci_bus_sem);
803 	mutex_lock(&aspm_lock);
804 	aspm_policy = i;
805 	list_for_each_entry(link_state, &link_list, sibiling) {
806 		pdev = link_state->pdev;
807 		__pcie_aspm_configure_link_state(pdev,
808 			policy_to_aspm_state(pdev));
809 		if (link_state->clk_pm_capable &&
810 		    link_state->clk_pm_enabled != policy_to_clkpm_state(pdev))
811 			pcie_set_clock_pm(pdev, policy_to_clkpm_state(pdev));
812 
813 	}
814 	mutex_unlock(&aspm_lock);
815 	up_read(&pci_bus_sem);
816 	return 0;
817 }
818 
819 static int pcie_aspm_get_policy(char *buffer, struct kernel_param *kp)
820 {
821 	int i, cnt = 0;
822 	for (i = 0; i < ARRAY_SIZE(policy_str); i++)
823 		if (i == aspm_policy)
824 			cnt += sprintf(buffer + cnt, "[%s] ", policy_str[i]);
825 		else
826 			cnt += sprintf(buffer + cnt, "%s ", policy_str[i]);
827 	return cnt;
828 }
829 
830 module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
831 	NULL, 0644);
832 
833 #ifdef CONFIG_PCIEASPM_DEBUG
834 static ssize_t link_state_show(struct device *dev,
835 		struct device_attribute *attr,
836 		char *buf)
837 {
838 	struct pci_dev *pci_device = to_pci_dev(dev);
839 	struct pcie_link_state *link_state = pci_device->link_state;
840 
841 	return sprintf(buf, "%d\n", link_state->enabled_state);
842 }
843 
844 static ssize_t link_state_store(struct device *dev,
845 		struct device_attribute *attr,
846 		const char *buf,
847 		size_t n)
848 {
849 	struct pci_dev *pci_device = to_pci_dev(dev);
850 	int state;
851 
852 	if (n < 1)
853 		return -EINVAL;
854 	state = buf[0]-'0';
855 	if (state >= 0 && state <= 3) {
856 		/* setup link aspm state */
857 		pcie_aspm_configure_link_state(pci_device, state);
858 		return n;
859 	}
860 
861 	return -EINVAL;
862 }
863 
864 static ssize_t clk_ctl_show(struct device *dev,
865 		struct device_attribute *attr,
866 		char *buf)
867 {
868 	struct pci_dev *pci_device = to_pci_dev(dev);
869 	struct pcie_link_state *link_state = pci_device->link_state;
870 
871 	return sprintf(buf, "%d\n", link_state->clk_pm_enabled);
872 }
873 
874 static ssize_t clk_ctl_store(struct device *dev,
875 		struct device_attribute *attr,
876 		const char *buf,
877 		size_t n)
878 {
879 	struct pci_dev *pci_device = to_pci_dev(dev);
880 	int state;
881 
882 	if (n < 1)
883 		return -EINVAL;
884 	state = buf[0]-'0';
885 
886 	down_read(&pci_bus_sem);
887 	mutex_lock(&aspm_lock);
888 	pcie_set_clock_pm(pci_device, !!state);
889 	mutex_unlock(&aspm_lock);
890 	up_read(&pci_bus_sem);
891 
892 	return n;
893 }
894 
895 static DEVICE_ATTR(link_state, 0644, link_state_show, link_state_store);
896 static DEVICE_ATTR(clk_ctl, 0644, clk_ctl_show, clk_ctl_store);
897 
898 static char power_group[] = "power";
899 void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev)
900 {
901 	struct pcie_link_state *link_state = pdev->link_state;
902 
903 	if (!pdev->is_pcie || (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
904 		pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) || !link_state)
905 		return;
906 
907 	if (link_state->support_state)
908 		sysfs_add_file_to_group(&pdev->dev.kobj,
909 			&dev_attr_link_state.attr, power_group);
910 	if (link_state->clk_pm_capable)
911 		sysfs_add_file_to_group(&pdev->dev.kobj,
912 			&dev_attr_clk_ctl.attr, power_group);
913 }
914 
915 void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
916 {
917 	struct pcie_link_state *link_state = pdev->link_state;
918 
919 	if (!pdev->is_pcie || (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
920 		pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM) || !link_state)
921 		return;
922 
923 	if (link_state->support_state)
924 		sysfs_remove_file_from_group(&pdev->dev.kobj,
925 			&dev_attr_link_state.attr, power_group);
926 	if (link_state->clk_pm_capable)
927 		sysfs_remove_file_from_group(&pdev->dev.kobj,
928 			&dev_attr_clk_ctl.attr, power_group);
929 }
930 #endif
931 
932 static int __init pcie_aspm_disable(char *str)
933 {
934 	if (!strcmp(str, "off")) {
935 		aspm_disabled = 1;
936 		printk(KERN_INFO "PCIe ASPM is disabled\n");
937 	} else if (!strcmp(str, "force")) {
938 		aspm_force = 1;
939 		printk(KERN_INFO "PCIe ASPM is forcedly enabled\n");
940 	}
941 	return 1;
942 }
943 
944 __setup("pcie_aspm=", pcie_aspm_disable);
945 
946 void pcie_no_aspm(void)
947 {
948 	if (!aspm_force)
949 		aspm_disabled = 1;
950 }
951 
952 /**
953  * pcie_aspm_enabled - is PCIe ASPM enabled?
954  *
955  * Returns true if ASPM has not been disabled by the command-line option
956  * pcie_aspm=off.
957  **/
958 int pcie_aspm_enabled(void)
959 {
960        return !aspm_disabled;
961 }
962 EXPORT_SYMBOL(pcie_aspm_enabled);
963 
964