1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/crc32.h>
7 
8 #include "gem/i915_gem_stolen.h"
9 
10 #include "i915_memcpy.h"
11 #include "i915_selftest.h"
12 #include "selftests/igt_reset.h"
13 #include "selftests/igt_atomic.h"
14 #include "selftests/igt_spinner.h"
15 
16 static int
17 __igt_reset_stolen(struct intel_gt *gt,
18 		   intel_engine_mask_t mask,
19 		   const char *msg)
20 {
21 	struct i915_ggtt *ggtt = &gt->i915->ggtt;
22 	const struct resource *dsm = &gt->i915->dsm;
23 	resource_size_t num_pages, page;
24 	struct intel_engine_cs *engine;
25 	intel_wakeref_t wakeref;
26 	enum intel_engine_id id;
27 	struct igt_spinner spin;
28 	long max, count;
29 	void *tmp;
30 	u32 *crc;
31 	int err;
32 
33 	if (!drm_mm_node_allocated(&ggtt->error_capture))
34 		return 0;
35 
36 	num_pages = resource_size(dsm) >> PAGE_SHIFT;
37 	if (!num_pages)
38 		return 0;
39 
40 	crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL);
41 	if (!crc)
42 		return -ENOMEM;
43 
44 	tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
45 	if (!tmp) {
46 		err = -ENOMEM;
47 		goto err_crc;
48 	}
49 
50 	igt_global_reset_lock(gt);
51 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
52 
53 	err = igt_spinner_init(&spin, gt);
54 	if (err)
55 		goto err_lock;
56 
57 	for_each_engine(engine, gt, id) {
58 		struct intel_context *ce;
59 		struct i915_request *rq;
60 
61 		if (!(mask & engine->mask))
62 			continue;
63 
64 		if (!intel_engine_can_store_dword(engine))
65 			continue;
66 
67 		ce = intel_context_create(engine);
68 		if (IS_ERR(ce)) {
69 			err = PTR_ERR(ce);
70 			goto err_spin;
71 		}
72 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
73 		intel_context_put(ce);
74 		if (IS_ERR(rq)) {
75 			err = PTR_ERR(rq);
76 			goto err_spin;
77 		}
78 		i915_request_add(rq);
79 	}
80 
81 	for (page = 0; page < num_pages; page++) {
82 		dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
83 		void __iomem *s;
84 		void *in;
85 
86 		ggtt->vm.insert_page(&ggtt->vm, dma,
87 				     ggtt->error_capture.start,
88 				     I915_CACHE_NONE, 0);
89 		mb();
90 
91 		s = io_mapping_map_wc(&ggtt->iomap,
92 				      ggtt->error_capture.start,
93 				      PAGE_SIZE);
94 
95 		if (!__drm_mm_interval_first(&gt->i915->mm.stolen,
96 					     page << PAGE_SHIFT,
97 					     ((page + 1) << PAGE_SHIFT) - 1))
98 			memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
99 
100 		in = s;
101 		if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
102 			in = tmp;
103 		crc[page] = crc32_le(0, in, PAGE_SIZE);
104 
105 		io_mapping_unmap(s);
106 	}
107 	mb();
108 	ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
109 
110 	if (mask == ALL_ENGINES) {
111 		intel_gt_reset(gt, mask, NULL);
112 	} else {
113 		for_each_engine(engine, gt, id) {
114 			if (mask & engine->mask)
115 				intel_engine_reset(engine, NULL);
116 		}
117 	}
118 
119 	max = -1;
120 	count = 0;
121 	for (page = 0; page < num_pages; page++) {
122 		dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
123 		void __iomem *s;
124 		void *in;
125 		u32 x;
126 
127 		ggtt->vm.insert_page(&ggtt->vm, dma,
128 				     ggtt->error_capture.start,
129 				     I915_CACHE_NONE, 0);
130 		mb();
131 
132 		s = io_mapping_map_wc(&ggtt->iomap,
133 				      ggtt->error_capture.start,
134 				      PAGE_SIZE);
135 
136 		in = s;
137 		if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
138 			in = tmp;
139 		x = crc32_le(0, in, PAGE_SIZE);
140 
141 		if (x != crc[page] &&
142 		    !__drm_mm_interval_first(&gt->i915->mm.stolen,
143 					     page << PAGE_SHIFT,
144 					     ((page + 1) << PAGE_SHIFT) - 1)) {
145 			pr_debug("unused stolen page %pa modified by GPU reset\n",
146 				 &page);
147 			if (count++ == 0)
148 				igt_hexdump(in, PAGE_SIZE);
149 			max = page;
150 		}
151 
152 		io_mapping_unmap(s);
153 	}
154 	mb();
155 	ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
156 
157 	if (count > 0) {
158 		pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n",
159 			msg, count, max);
160 	}
161 	if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) {
162 		pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n",
163 		       msg, I915_GEM_STOLEN_BIAS);
164 		err = -EINVAL;
165 	}
166 
167 err_spin:
168 	igt_spinner_fini(&spin);
169 
170 err_lock:
171 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
172 	igt_global_reset_unlock(gt);
173 
174 	kfree(tmp);
175 err_crc:
176 	kfree(crc);
177 	return err;
178 }
179 
180 static int igt_reset_device_stolen(void *arg)
181 {
182 	return __igt_reset_stolen(arg, ALL_ENGINES, "device");
183 }
184 
185 static int igt_reset_engines_stolen(void *arg)
186 {
187 	struct intel_gt *gt = arg;
188 	struct intel_engine_cs *engine;
189 	enum intel_engine_id id;
190 	int err;
191 
192 	if (!intel_has_reset_engine(gt))
193 		return 0;
194 
195 	for_each_engine(engine, gt, id) {
196 		err = __igt_reset_stolen(gt, engine->mask, engine->name);
197 		if (err)
198 			return err;
199 	}
200 
201 	return 0;
202 }
203 
204 static int igt_global_reset(void *arg)
205 {
206 	struct intel_gt *gt = arg;
207 	unsigned int reset_count;
208 	intel_wakeref_t wakeref;
209 	int err = 0;
210 
211 	/* Check that we can issue a global GPU reset */
212 
213 	igt_global_reset_lock(gt);
214 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
215 
216 	reset_count = i915_reset_count(&gt->i915->gpu_error);
217 
218 	intel_gt_reset(gt, ALL_ENGINES, NULL);
219 
220 	if (i915_reset_count(&gt->i915->gpu_error) == reset_count) {
221 		pr_err("No GPU reset recorded!\n");
222 		err = -EINVAL;
223 	}
224 
225 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
226 	igt_global_reset_unlock(gt);
227 
228 	if (intel_gt_is_wedged(gt))
229 		err = -EIO;
230 
231 	return err;
232 }
233 
234 static int igt_wedged_reset(void *arg)
235 {
236 	struct intel_gt *gt = arg;
237 	intel_wakeref_t wakeref;
238 
239 	/* Check that we can recover a wedged device with a GPU reset */
240 
241 	igt_global_reset_lock(gt);
242 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
243 
244 	intel_gt_set_wedged(gt);
245 
246 	GEM_BUG_ON(!intel_gt_is_wedged(gt));
247 	intel_gt_reset(gt, ALL_ENGINES, NULL);
248 
249 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
250 	igt_global_reset_unlock(gt);
251 
252 	return intel_gt_is_wedged(gt) ? -EIO : 0;
253 }
254 
255 static int igt_atomic_reset(void *arg)
256 {
257 	struct intel_gt *gt = arg;
258 	const typeof(*igt_atomic_phases) *p;
259 	int err = 0;
260 
261 	/* Check that the resets are usable from atomic context */
262 
263 	intel_gt_pm_get(gt);
264 	igt_global_reset_lock(gt);
265 
266 	/* Flush any requests before we get started and check basics */
267 	if (!igt_force_reset(gt))
268 		goto unlock;
269 
270 	for (p = igt_atomic_phases; p->name; p++) {
271 		intel_engine_mask_t awake;
272 
273 		GEM_TRACE("__intel_gt_reset under %s\n", p->name);
274 
275 		awake = reset_prepare(gt);
276 		p->critical_section_begin();
277 
278 		err = __intel_gt_reset(gt, ALL_ENGINES);
279 
280 		p->critical_section_end();
281 		reset_finish(gt, awake);
282 
283 		if (err) {
284 			pr_err("__intel_gt_reset failed under %s\n", p->name);
285 			break;
286 		}
287 	}
288 
289 	/* As we poke around the guts, do a full reset before continuing. */
290 	igt_force_reset(gt);
291 
292 unlock:
293 	igt_global_reset_unlock(gt);
294 	intel_gt_pm_put(gt);
295 
296 	return err;
297 }
298 
299 static int igt_atomic_engine_reset(void *arg)
300 {
301 	struct intel_gt *gt = arg;
302 	const typeof(*igt_atomic_phases) *p;
303 	struct intel_engine_cs *engine;
304 	enum intel_engine_id id;
305 	int err = 0;
306 
307 	/* Check that the resets are usable from atomic context */
308 
309 	if (!intel_has_reset_engine(gt))
310 		return 0;
311 
312 	if (intel_uc_uses_guc_submission(&gt->uc))
313 		return 0;
314 
315 	intel_gt_pm_get(gt);
316 	igt_global_reset_lock(gt);
317 
318 	/* Flush any requests before we get started and check basics */
319 	if (!igt_force_reset(gt))
320 		goto out_unlock;
321 
322 	for_each_engine(engine, gt, id) {
323 		tasklet_disable(&engine->execlists.tasklet);
324 		intel_engine_pm_get(engine);
325 
326 		for (p = igt_atomic_phases; p->name; p++) {
327 			GEM_TRACE("intel_engine_reset(%s) under %s\n",
328 				  engine->name, p->name);
329 
330 			p->critical_section_begin();
331 			err = intel_engine_reset(engine, NULL);
332 			p->critical_section_end();
333 
334 			if (err) {
335 				pr_err("intel_engine_reset(%s) failed under %s\n",
336 				       engine->name, p->name);
337 				break;
338 			}
339 		}
340 
341 		intel_engine_pm_put(engine);
342 		tasklet_enable(&engine->execlists.tasklet);
343 		if (err)
344 			break;
345 	}
346 
347 	/* As we poke around the guts, do a full reset before continuing. */
348 	igt_force_reset(gt);
349 
350 out_unlock:
351 	igt_global_reset_unlock(gt);
352 	intel_gt_pm_put(gt);
353 
354 	return err;
355 }
356 
357 int intel_reset_live_selftests(struct drm_i915_private *i915)
358 {
359 	static const struct i915_subtest tests[] = {
360 		SUBTEST(igt_global_reset), /* attempt to recover GPU first */
361 		SUBTEST(igt_reset_device_stolen),
362 		SUBTEST(igt_reset_engines_stolen),
363 		SUBTEST(igt_wedged_reset),
364 		SUBTEST(igt_atomic_reset),
365 		SUBTEST(igt_atomic_engine_reset),
366 	};
367 	struct intel_gt *gt = &i915->gt;
368 
369 	if (!intel_has_gpu_reset(gt))
370 		return 0;
371 
372 	if (intel_gt_is_wedged(gt))
373 		return -EIO; /* we're long past hope of a successful reset */
374 
375 	return intel_gt_live_subtests(tests, gt);
376 }
377