xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_gt_pm.c (revision f5ad1c74)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6 
7 #include <linux/suspend.h>
8 
9 #include "i915_drv.h"
10 #include "i915_globals.h"
11 #include "i915_params.h"
12 #include "intel_context.h"
13 #include "intel_engine_pm.h"
14 #include "intel_gt.h"
15 #include "intel_gt_clock_utils.h"
16 #include "intel_gt_pm.h"
17 #include "intel_gt_requests.h"
18 #include "intel_llc.h"
19 #include "intel_pm.h"
20 #include "intel_rc6.h"
21 #include "intel_rps.h"
22 #include "intel_wakeref.h"
23 
24 static void user_forcewake(struct intel_gt *gt, bool suspend)
25 {
26 	int count = atomic_read(&gt->user_wakeref);
27 
28 	/* Inside suspend/resume so single threaded, no races to worry about. */
29 	if (likely(!count))
30 		return;
31 
32 	intel_gt_pm_get(gt);
33 	if (suspend) {
34 		GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
35 		atomic_sub(count, &gt->wakeref.count);
36 	} else {
37 		atomic_add(count, &gt->wakeref.count);
38 	}
39 	intel_gt_pm_put(gt);
40 }
41 
42 static int __gt_unpark(struct intel_wakeref *wf)
43 {
44 	struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
45 	struct drm_i915_private *i915 = gt->i915;
46 
47 	GT_TRACE(gt, "\n");
48 
49 	i915_globals_unpark();
50 
51 	/*
52 	 * It seems that the DMC likes to transition between the DC states a lot
53 	 * when there are no connected displays (no active power domains) during
54 	 * command submission.
55 	 *
56 	 * This activity has negative impact on the performance of the chip with
57 	 * huge latencies observed in the interrupt handler and elsewhere.
58 	 *
59 	 * Work around it by grabbing a GT IRQ power domain whilst there is any
60 	 * GT activity, preventing any DC state transitions.
61 	 */
62 	gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
63 	GEM_BUG_ON(!gt->awake);
64 
65 	intel_rc6_unpark(&gt->rc6);
66 	intel_rps_unpark(&gt->rps);
67 	i915_pmu_gt_unparked(i915);
68 
69 	intel_gt_unpark_requests(gt);
70 
71 	return 0;
72 }
73 
74 static int __gt_park(struct intel_wakeref *wf)
75 {
76 	struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
77 	intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
78 	struct drm_i915_private *i915 = gt->i915;
79 
80 	GT_TRACE(gt, "\n");
81 
82 	intel_gt_park_requests(gt);
83 
84 	i915_vma_parked(gt);
85 	i915_pmu_gt_parked(i915);
86 	intel_rps_park(&gt->rps);
87 	intel_rc6_park(&gt->rc6);
88 
89 	/* Everything switched off, flush any residual interrupt just in case */
90 	intel_synchronize_irq(i915);
91 
92 	/* Defer dropping the display power well for 100ms, it's slow! */
93 	GEM_BUG_ON(!wakeref);
94 	intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
95 
96 	i915_globals_park();
97 
98 	return 0;
99 }
100 
101 static const struct intel_wakeref_ops wf_ops = {
102 	.get = __gt_unpark,
103 	.put = __gt_park,
104 };
105 
106 void intel_gt_pm_init_early(struct intel_gt *gt)
107 {
108 	intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops);
109 }
110 
111 void intel_gt_pm_init(struct intel_gt *gt)
112 {
113 	/*
114 	 * Enabling power-management should be "self-healing". If we cannot
115 	 * enable a feature, simply leave it disabled with a notice to the
116 	 * user.
117 	 */
118 	intel_rc6_init(&gt->rc6);
119 	intel_rps_init(&gt->rps);
120 }
121 
122 static bool reset_engines(struct intel_gt *gt)
123 {
124 	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
125 		return false;
126 
127 	return __intel_gt_reset(gt, ALL_ENGINES) == 0;
128 }
129 
130 static void gt_sanitize(struct intel_gt *gt, bool force)
131 {
132 	struct intel_engine_cs *engine;
133 	enum intel_engine_id id;
134 	intel_wakeref_t wakeref;
135 
136 	GT_TRACE(gt, "force:%s", yesno(force));
137 
138 	/* Use a raw wakeref to avoid calling intel_display_power_get early */
139 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
140 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
141 
142 	intel_gt_check_clock_frequency(gt);
143 
144 	/*
145 	 * As we have just resumed the machine and woken the device up from
146 	 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
147 	 * back to defaults, recovering from whatever wedged state we left it
148 	 * in and so worth trying to use the device once more.
149 	 */
150 	if (intel_gt_is_wedged(gt))
151 		intel_gt_unset_wedged(gt);
152 
153 	intel_uc_sanitize(&gt->uc);
154 
155 	for_each_engine(engine, gt, id)
156 		if (engine->reset.prepare)
157 			engine->reset.prepare(engine);
158 
159 	intel_uc_reset_prepare(&gt->uc);
160 
161 	for_each_engine(engine, gt, id)
162 		if (engine->sanitize)
163 			engine->sanitize(engine);
164 
165 	if (reset_engines(gt) || force) {
166 		for_each_engine(engine, gt, id)
167 			__intel_engine_reset(engine, false);
168 	}
169 
170 	for_each_engine(engine, gt, id)
171 		if (engine->reset.finish)
172 			engine->reset.finish(engine);
173 
174 	intel_rps_sanitize(&gt->rps);
175 
176 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
177 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
178 }
179 
180 void intel_gt_pm_fini(struct intel_gt *gt)
181 {
182 	intel_rc6_fini(&gt->rc6);
183 }
184 
185 int intel_gt_resume(struct intel_gt *gt)
186 {
187 	struct intel_engine_cs *engine;
188 	enum intel_engine_id id;
189 	int err;
190 
191 	err = intel_gt_has_unrecoverable_error(gt);
192 	if (err)
193 		return err;
194 
195 	GT_TRACE(gt, "\n");
196 
197 	/*
198 	 * After resume, we may need to poke into the pinned kernel
199 	 * contexts to paper over any damage caused by the sudden suspend.
200 	 * Only the kernel contexts should remain pinned over suspend,
201 	 * allowing us to fixup the user contexts on their first pin.
202 	 */
203 	gt_sanitize(gt, true);
204 
205 	intel_gt_pm_get(gt);
206 
207 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
208 	intel_rc6_sanitize(&gt->rc6);
209 	if (intel_gt_is_wedged(gt)) {
210 		err = -EIO;
211 		goto out_fw;
212 	}
213 
214 	/* Only when the HW is re-initialised, can we replay the requests */
215 	err = intel_gt_init_hw(gt);
216 	if (err) {
217 		i915_probe_error(gt->i915,
218 				 "Failed to initialize GPU, declaring it wedged!\n");
219 		goto err_wedged;
220 	}
221 
222 	intel_rps_enable(&gt->rps);
223 	intel_llc_enable(&gt->llc);
224 
225 	for_each_engine(engine, gt, id) {
226 		intel_engine_pm_get(engine);
227 
228 		engine->serial++; /* kernel context lost */
229 		err = intel_engine_resume(engine);
230 
231 		intel_engine_pm_put(engine);
232 		if (err) {
233 			drm_err(&gt->i915->drm,
234 				"Failed to restart %s (%d)\n",
235 				engine->name, err);
236 			goto err_wedged;
237 		}
238 	}
239 
240 	intel_rc6_enable(&gt->rc6);
241 
242 	intel_uc_resume(&gt->uc);
243 
244 	user_forcewake(gt, false);
245 
246 out_fw:
247 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
248 	intel_gt_pm_put(gt);
249 	return err;
250 
251 err_wedged:
252 	intel_gt_set_wedged(gt);
253 	goto out_fw;
254 }
255 
256 static void wait_for_suspend(struct intel_gt *gt)
257 {
258 	if (!intel_gt_pm_is_awake(gt))
259 		return;
260 
261 	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
262 		/*
263 		 * Forcibly cancel outstanding work and leave
264 		 * the gpu quiet.
265 		 */
266 		intel_gt_set_wedged(gt);
267 		intel_gt_retire_requests(gt);
268 	}
269 
270 	intel_gt_pm_wait_for_idle(gt);
271 }
272 
273 void intel_gt_suspend_prepare(struct intel_gt *gt)
274 {
275 	user_forcewake(gt, true);
276 	wait_for_suspend(gt);
277 
278 	intel_uc_suspend(&gt->uc);
279 }
280 
281 static suspend_state_t pm_suspend_target(void)
282 {
283 #if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP)
284 	return pm_suspend_target_state;
285 #else
286 	return PM_SUSPEND_TO_IDLE;
287 #endif
288 }
289 
290 void intel_gt_suspend_late(struct intel_gt *gt)
291 {
292 	intel_wakeref_t wakeref;
293 
294 	/* We expect to be idle already; but also want to be independent */
295 	wait_for_suspend(gt);
296 
297 	if (is_mock_gt(gt))
298 		return;
299 
300 	GEM_BUG_ON(gt->awake);
301 
302 	/*
303 	 * On disabling the device, we want to turn off HW access to memory
304 	 * that we no longer own.
305 	 *
306 	 * However, not all suspend-states disable the device. S0 (s2idle)
307 	 * is effectively runtime-suspend, the device is left powered on
308 	 * but needs to be put into a low power state. We need to keep
309 	 * powermanagement enabled, but we also retain system state and so
310 	 * it remains safe to keep on using our allocated memory.
311 	 */
312 	if (pm_suspend_target() == PM_SUSPEND_TO_IDLE)
313 		return;
314 
315 	with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
316 		intel_rps_disable(&gt->rps);
317 		intel_rc6_disable(&gt->rc6);
318 		intel_llc_disable(&gt->llc);
319 	}
320 
321 	gt_sanitize(gt, false);
322 
323 	GT_TRACE(gt, "\n");
324 }
325 
326 void intel_gt_runtime_suspend(struct intel_gt *gt)
327 {
328 	intel_uc_runtime_suspend(&gt->uc);
329 
330 	GT_TRACE(gt, "\n");
331 }
332 
333 int intel_gt_runtime_resume(struct intel_gt *gt)
334 {
335 	GT_TRACE(gt, "\n");
336 	intel_gt_init_swizzling(gt);
337 	intel_ggtt_restore_fences(gt->ggtt);
338 
339 	return intel_uc_runtime_resume(&gt->uc);
340 }
341 
342 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
343 #include "selftest_gt_pm.c"
344 #endif
345