1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <drm/drm_print.h>
26 
27 #include "gem/i915_gem_context.h"
28 
29 #include "i915_drv.h"
30 
31 #include "gt/intel_gt.h"
32 
33 #include "intel_engine.h"
34 #include "intel_engine_pm.h"
35 #include "intel_engine_pool.h"
36 #include "intel_engine_user.h"
37 #include "intel_context.h"
38 #include "intel_lrc.h"
39 #include "intel_reset.h"
40 
41 /* Haswell does have the CXT_SIZE register however it does not appear to be
42  * valid. Now, docs explain in dwords what is in the context object. The full
43  * size is 70720 bytes, however, the power context and execlist context will
44  * never be saved (power context is stored elsewhere, and execlists don't work
45  * on HSW) - so the final size, including the extra state required for the
46  * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
47  */
48 #define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
49 
50 #define DEFAULT_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
51 #define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
52 #define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
53 #define GEN10_LR_CONTEXT_RENDER_SIZE	(18 * PAGE_SIZE)
54 #define GEN11_LR_CONTEXT_RENDER_SIZE	(14 * PAGE_SIZE)
55 
56 #define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
57 
58 #define MAX_MMIO_BASES 3
59 struct engine_info {
60 	unsigned int hw_id;
61 	u8 class;
62 	u8 instance;
63 	/* mmio bases table *must* be sorted in reverse gen order */
64 	struct engine_mmio_base {
65 		u32 gen : 8;
66 		u32 base : 24;
67 	} mmio_bases[MAX_MMIO_BASES];
68 };
69 
70 static const struct engine_info intel_engines[] = {
71 	[RCS0] = {
72 		.hw_id = RCS0_HW,
73 		.class = RENDER_CLASS,
74 		.instance = 0,
75 		.mmio_bases = {
76 			{ .gen = 1, .base = RENDER_RING_BASE }
77 		},
78 	},
79 	[BCS0] = {
80 		.hw_id = BCS0_HW,
81 		.class = COPY_ENGINE_CLASS,
82 		.instance = 0,
83 		.mmio_bases = {
84 			{ .gen = 6, .base = BLT_RING_BASE }
85 		},
86 	},
87 	[VCS0] = {
88 		.hw_id = VCS0_HW,
89 		.class = VIDEO_DECODE_CLASS,
90 		.instance = 0,
91 		.mmio_bases = {
92 			{ .gen = 11, .base = GEN11_BSD_RING_BASE },
93 			{ .gen = 6, .base = GEN6_BSD_RING_BASE },
94 			{ .gen = 4, .base = BSD_RING_BASE }
95 		},
96 	},
97 	[VCS1] = {
98 		.hw_id = VCS1_HW,
99 		.class = VIDEO_DECODE_CLASS,
100 		.instance = 1,
101 		.mmio_bases = {
102 			{ .gen = 11, .base = GEN11_BSD2_RING_BASE },
103 			{ .gen = 8, .base = GEN8_BSD2_RING_BASE }
104 		},
105 	},
106 	[VCS2] = {
107 		.hw_id = VCS2_HW,
108 		.class = VIDEO_DECODE_CLASS,
109 		.instance = 2,
110 		.mmio_bases = {
111 			{ .gen = 11, .base = GEN11_BSD3_RING_BASE }
112 		},
113 	},
114 	[VCS3] = {
115 		.hw_id = VCS3_HW,
116 		.class = VIDEO_DECODE_CLASS,
117 		.instance = 3,
118 		.mmio_bases = {
119 			{ .gen = 11, .base = GEN11_BSD4_RING_BASE }
120 		},
121 	},
122 	[VECS0] = {
123 		.hw_id = VECS0_HW,
124 		.class = VIDEO_ENHANCEMENT_CLASS,
125 		.instance = 0,
126 		.mmio_bases = {
127 			{ .gen = 11, .base = GEN11_VEBOX_RING_BASE },
128 			{ .gen = 7, .base = VEBOX_RING_BASE }
129 		},
130 	},
131 	[VECS1] = {
132 		.hw_id = VECS1_HW,
133 		.class = VIDEO_ENHANCEMENT_CLASS,
134 		.instance = 1,
135 		.mmio_bases = {
136 			{ .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
137 		},
138 	},
139 };
140 
141 /**
142  * intel_engine_context_size() - return the size of the context for an engine
143  * @dev_priv: i915 device private
144  * @class: engine class
145  *
146  * Each engine class may require a different amount of space for a context
147  * image.
148  *
149  * Return: size (in bytes) of an engine class specific context image
150  *
151  * Note: this size includes the HWSP, which is part of the context image
152  * in LRC mode, but does not include the "shared data page" used with
153  * GuC submission. The caller should account for this if using the GuC.
154  */
155 u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
156 {
157 	u32 cxt_size;
158 
159 	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
160 
161 	switch (class) {
162 	case RENDER_CLASS:
163 		switch (INTEL_GEN(dev_priv)) {
164 		default:
165 			MISSING_CASE(INTEL_GEN(dev_priv));
166 			return DEFAULT_LR_CONTEXT_RENDER_SIZE;
167 		case 12:
168 		case 11:
169 			return GEN11_LR_CONTEXT_RENDER_SIZE;
170 		case 10:
171 			return GEN10_LR_CONTEXT_RENDER_SIZE;
172 		case 9:
173 			return GEN9_LR_CONTEXT_RENDER_SIZE;
174 		case 8:
175 			return GEN8_LR_CONTEXT_RENDER_SIZE;
176 		case 7:
177 			if (IS_HASWELL(dev_priv))
178 				return HSW_CXT_TOTAL_SIZE;
179 
180 			cxt_size = I915_READ(GEN7_CXT_SIZE);
181 			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
182 					PAGE_SIZE);
183 		case 6:
184 			cxt_size = I915_READ(CXT_SIZE);
185 			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
186 					PAGE_SIZE);
187 		case 5:
188 		case 4:
189 			/*
190 			 * There is a discrepancy here between the size reported
191 			 * by the register and the size of the context layout
192 			 * in the docs. Both are described as authorative!
193 			 *
194 			 * The discrepancy is on the order of a few cachelines,
195 			 * but the total is under one page (4k), which is our
196 			 * minimum allocation anyway so it should all come
197 			 * out in the wash.
198 			 */
199 			cxt_size = I915_READ(CXT_SIZE) + 1;
200 			DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
201 					 INTEL_GEN(dev_priv),
202 					 cxt_size * 64,
203 					 cxt_size - 1);
204 			return round_up(cxt_size * 64, PAGE_SIZE);
205 		case 3:
206 		case 2:
207 		/* For the special day when i810 gets merged. */
208 		case 1:
209 			return 0;
210 		}
211 		break;
212 	default:
213 		MISSING_CASE(class);
214 		/* fall through */
215 	case VIDEO_DECODE_CLASS:
216 	case VIDEO_ENHANCEMENT_CLASS:
217 	case COPY_ENGINE_CLASS:
218 		if (INTEL_GEN(dev_priv) < 8)
219 			return 0;
220 		return GEN8_LR_CONTEXT_OTHER_SIZE;
221 	}
222 }
223 
224 static u32 __engine_mmio_base(struct drm_i915_private *i915,
225 			      const struct engine_mmio_base *bases)
226 {
227 	int i;
228 
229 	for (i = 0; i < MAX_MMIO_BASES; i++)
230 		if (INTEL_GEN(i915) >= bases[i].gen)
231 			break;
232 
233 	GEM_BUG_ON(i == MAX_MMIO_BASES);
234 	GEM_BUG_ON(!bases[i].base);
235 
236 	return bases[i].base;
237 }
238 
239 static void __sprint_engine_name(struct intel_engine_cs *engine)
240 {
241 	/*
242 	 * Before we know what the uABI name for this engine will be,
243 	 * we still would like to keep track of this engine in the debug logs.
244 	 * We throw in a ' here as a reminder that this isn't its final name.
245 	 */
246 	GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
247 			     intel_engine_class_repr(engine->class),
248 			     engine->instance) >= sizeof(engine->name));
249 }
250 
251 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
252 {
253 	/*
254 	 * Though they added more rings on g4x/ilk, they did not add
255 	 * per-engine HWSTAM until gen6.
256 	 */
257 	if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
258 		return;
259 
260 	if (INTEL_GEN(engine->i915) >= 3)
261 		ENGINE_WRITE(engine, RING_HWSTAM, mask);
262 	else
263 		ENGINE_WRITE16(engine, RING_HWSTAM, mask);
264 }
265 
266 static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
267 {
268 	/* Mask off all writes into the unknown HWSP */
269 	intel_engine_set_hwsp_writemask(engine, ~0u);
270 }
271 
272 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
273 {
274 	const struct engine_info *info = &intel_engines[id];
275 	struct intel_engine_cs *engine;
276 
277 	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
278 	BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
279 
280 	if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
281 		return -EINVAL;
282 
283 	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
284 		return -EINVAL;
285 
286 	if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
287 		return -EINVAL;
288 
289 	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
290 	if (!engine)
291 		return -ENOMEM;
292 
293 	BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
294 
295 	engine->id = id;
296 	engine->mask = BIT(id);
297 	engine->i915 = gt->i915;
298 	engine->gt = gt;
299 	engine->uncore = gt->uncore;
300 	engine->hw_id = engine->guc_id = info->hw_id;
301 	engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
302 
303 	engine->class = info->class;
304 	engine->instance = info->instance;
305 	__sprint_engine_name(engine);
306 
307 	/*
308 	 * To be overridden by the backend on setup. However to facilitate
309 	 * cleanup on error during setup, we always provide the destroy vfunc.
310 	 */
311 	engine->destroy = (typeof(engine->destroy))kfree;
312 
313 	engine->context_size = intel_engine_context_size(gt->i915,
314 							 engine->class);
315 	if (WARN_ON(engine->context_size > BIT(20)))
316 		engine->context_size = 0;
317 	if (engine->context_size)
318 		DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
319 
320 	/* Nothing to do here, execute in order of dependencies */
321 	engine->schedule = NULL;
322 
323 	seqlock_init(&engine->stats.lock);
324 
325 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
326 
327 	/* Scrub mmio state on takeover */
328 	intel_engine_sanitize_mmio(engine);
329 
330 	gt->engine_class[info->class][info->instance] = engine;
331 
332 	intel_engine_add_user(engine);
333 	gt->i915->engine[id] = engine;
334 
335 	return 0;
336 }
337 
338 static void __setup_engine_capabilities(struct intel_engine_cs *engine)
339 {
340 	struct drm_i915_private *i915 = engine->i915;
341 
342 	if (engine->class == VIDEO_DECODE_CLASS) {
343 		/*
344 		 * HEVC support is present on first engine instance
345 		 * before Gen11 and on all instances afterwards.
346 		 */
347 		if (INTEL_GEN(i915) >= 11 ||
348 		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
349 			engine->uabi_capabilities |=
350 				I915_VIDEO_CLASS_CAPABILITY_HEVC;
351 
352 		/*
353 		 * SFC block is present only on even logical engine
354 		 * instances.
355 		 */
356 		if ((INTEL_GEN(i915) >= 11 &&
357 		     RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) ||
358 		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
359 			engine->uabi_capabilities |=
360 				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
361 	} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
362 		if (INTEL_GEN(i915) >= 9)
363 			engine->uabi_capabilities |=
364 				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
365 	}
366 }
367 
368 static void intel_setup_engine_capabilities(struct drm_i915_private *i915)
369 {
370 	struct intel_engine_cs *engine;
371 	enum intel_engine_id id;
372 
373 	for_each_engine(engine, i915, id)
374 		__setup_engine_capabilities(engine);
375 }
376 
377 /**
378  * intel_engines_cleanup() - free the resources allocated for Command Streamers
379  * @i915: the i915 devic
380  */
381 void intel_engines_cleanup(struct drm_i915_private *i915)
382 {
383 	struct intel_engine_cs *engine;
384 	enum intel_engine_id id;
385 
386 	for_each_engine(engine, i915, id) {
387 		engine->destroy(engine);
388 		i915->engine[id] = NULL;
389 	}
390 }
391 
392 /**
393  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
394  * @i915: the i915 device
395  *
396  * Return: non-zero if the initialization failed.
397  */
398 int intel_engines_init_mmio(struct drm_i915_private *i915)
399 {
400 	struct intel_device_info *device_info = mkwrite_device_info(i915);
401 	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
402 	unsigned int mask = 0;
403 	unsigned int i;
404 	int err;
405 
406 	WARN_ON(engine_mask == 0);
407 	WARN_ON(engine_mask &
408 		GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
409 
410 	if (i915_inject_probe_failure(i915))
411 		return -ENODEV;
412 
413 	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
414 		if (!HAS_ENGINE(i915, i))
415 			continue;
416 
417 		err = intel_engine_setup(&i915->gt, i);
418 		if (err)
419 			goto cleanup;
420 
421 		mask |= BIT(i);
422 	}
423 
424 	/*
425 	 * Catch failures to update intel_engines table when the new engines
426 	 * are added to the driver by a warning and disabling the forgotten
427 	 * engines.
428 	 */
429 	if (WARN_ON(mask != engine_mask))
430 		device_info->engine_mask = mask;
431 
432 	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
433 
434 	intel_gt_check_and_clear_faults(&i915->gt);
435 
436 	intel_setup_engine_capabilities(i915);
437 
438 	return 0;
439 
440 cleanup:
441 	intel_engines_cleanup(i915);
442 	return err;
443 }
444 
445 /**
446  * intel_engines_init() - init the Engine Command Streamers
447  * @i915: i915 device private
448  *
449  * Return: non-zero if the initialization failed.
450  */
451 int intel_engines_init(struct drm_i915_private *i915)
452 {
453 	int (*init)(struct intel_engine_cs *engine);
454 	struct intel_engine_cs *engine;
455 	enum intel_engine_id id;
456 	int err;
457 
458 	if (HAS_EXECLISTS(i915))
459 		init = intel_execlists_submission_init;
460 	else
461 		init = intel_ring_submission_init;
462 
463 	for_each_engine(engine, i915, id) {
464 		err = init(engine);
465 		if (err)
466 			goto cleanup;
467 	}
468 
469 	return 0;
470 
471 cleanup:
472 	intel_engines_cleanup(i915);
473 	return err;
474 }
475 
476 void intel_engine_init_execlists(struct intel_engine_cs *engine)
477 {
478 	struct intel_engine_execlists * const execlists = &engine->execlists;
479 
480 	execlists->port_mask = 1;
481 	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
482 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
483 
484 	memset(execlists->pending, 0, sizeof(execlists->pending));
485 	execlists->active =
486 		memset(execlists->inflight, 0, sizeof(execlists->inflight));
487 
488 	execlists->queue_priority_hint = INT_MIN;
489 	execlists->queue = RB_ROOT_CACHED;
490 }
491 
492 static void cleanup_status_page(struct intel_engine_cs *engine)
493 {
494 	struct i915_vma *vma;
495 
496 	/* Prevent writes into HWSP after returning the page to the system */
497 	intel_engine_set_hwsp_writemask(engine, ~0u);
498 
499 	vma = fetch_and_zero(&engine->status_page.vma);
500 	if (!vma)
501 		return;
502 
503 	if (!HWS_NEEDS_PHYSICAL(engine->i915))
504 		i915_vma_unpin(vma);
505 
506 	i915_gem_object_unpin_map(vma->obj);
507 	i915_gem_object_put(vma->obj);
508 }
509 
510 static int pin_ggtt_status_page(struct intel_engine_cs *engine,
511 				struct i915_vma *vma)
512 {
513 	unsigned int flags;
514 
515 	flags = PIN_GLOBAL;
516 	if (!HAS_LLC(engine->i915))
517 		/*
518 		 * On g33, we cannot place HWS above 256MiB, so
519 		 * restrict its pinning to the low mappable arena.
520 		 * Though this restriction is not documented for
521 		 * gen4, gen5, or byt, they also behave similarly
522 		 * and hang if the HWS is placed at the top of the
523 		 * GTT. To generalise, it appears that all !llc
524 		 * platforms have issues with us placing the HWS
525 		 * above the mappable region (even though we never
526 		 * actually map it).
527 		 */
528 		flags |= PIN_MAPPABLE;
529 	else
530 		flags |= PIN_HIGH;
531 
532 	return i915_vma_pin(vma, 0, 0, flags);
533 }
534 
535 static int init_status_page(struct intel_engine_cs *engine)
536 {
537 	struct drm_i915_gem_object *obj;
538 	struct i915_vma *vma;
539 	void *vaddr;
540 	int ret;
541 
542 	/*
543 	 * Though the HWS register does support 36bit addresses, historically
544 	 * we have had hangs and corruption reported due to wild writes if
545 	 * the HWS is placed above 4G. We only allow objects to be allocated
546 	 * in GFP_DMA32 for i965, and no earlier physical address users had
547 	 * access to more than 4G.
548 	 */
549 	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
550 	if (IS_ERR(obj)) {
551 		DRM_ERROR("Failed to allocate status page\n");
552 		return PTR_ERR(obj);
553 	}
554 
555 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
556 
557 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
558 	if (IS_ERR(vma)) {
559 		ret = PTR_ERR(vma);
560 		goto err;
561 	}
562 
563 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
564 	if (IS_ERR(vaddr)) {
565 		ret = PTR_ERR(vaddr);
566 		goto err;
567 	}
568 
569 	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
570 	engine->status_page.vma = vma;
571 
572 	if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
573 		ret = pin_ggtt_status_page(engine, vma);
574 		if (ret)
575 			goto err_unpin;
576 	}
577 
578 	return 0;
579 
580 err_unpin:
581 	i915_gem_object_unpin_map(obj);
582 err:
583 	i915_gem_object_put(obj);
584 	return ret;
585 }
586 
587 static int intel_engine_setup_common(struct intel_engine_cs *engine)
588 {
589 	int err;
590 
591 	init_llist_head(&engine->barrier_tasks);
592 
593 	err = init_status_page(engine);
594 	if (err)
595 		return err;
596 
597 	intel_engine_init_active(engine, ENGINE_PHYSICAL);
598 	intel_engine_init_breadcrumbs(engine);
599 	intel_engine_init_execlists(engine);
600 	intel_engine_init_hangcheck(engine);
601 	intel_engine_init_cmd_parser(engine);
602 	intel_engine_init__pm(engine);
603 
604 	intel_engine_pool_init(&engine->pool);
605 
606 	/* Use the whole device by default */
607 	engine->sseu =
608 		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
609 
610 	intel_engine_init_workarounds(engine);
611 	intel_engine_init_whitelist(engine);
612 	intel_engine_init_ctx_wa(engine);
613 
614 	return 0;
615 }
616 
617 /**
618  * intel_engines_setup- setup engine state not requiring hw access
619  * @i915: Device to setup.
620  *
621  * Initializes engine structure members shared between legacy and execlists
622  * submission modes which do not require hardware access.
623  *
624  * Typically done early in the submission mode specific engine setup stage.
625  */
626 int intel_engines_setup(struct drm_i915_private *i915)
627 {
628 	int (*setup)(struct intel_engine_cs *engine);
629 	struct intel_engine_cs *engine;
630 	enum intel_engine_id id;
631 	int err;
632 
633 	if (HAS_EXECLISTS(i915))
634 		setup = intel_execlists_submission_setup;
635 	else
636 		setup = intel_ring_submission_setup;
637 
638 	for_each_engine(engine, i915, id) {
639 		err = intel_engine_setup_common(engine);
640 		if (err)
641 			goto cleanup;
642 
643 		err = setup(engine);
644 		if (err)
645 			goto cleanup;
646 
647 		/* We expect the backend to take control over its state */
648 		GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
649 
650 		GEM_BUG_ON(!engine->cops);
651 	}
652 
653 	return 0;
654 
655 cleanup:
656 	intel_engines_cleanup(i915);
657 	return err;
658 }
659 
660 struct measure_breadcrumb {
661 	struct i915_request rq;
662 	struct intel_timeline timeline;
663 	struct intel_ring ring;
664 	u32 cs[1024];
665 };
666 
667 static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
668 {
669 	struct measure_breadcrumb *frame;
670 	int dw = -ENOMEM;
671 
672 	GEM_BUG_ON(!engine->gt->scratch);
673 
674 	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
675 	if (!frame)
676 		return -ENOMEM;
677 
678 	if (intel_timeline_init(&frame->timeline,
679 				engine->gt,
680 				engine->status_page.vma))
681 		goto out_frame;
682 
683 	mutex_lock(&frame->timeline.mutex);
684 
685 	frame->ring.vaddr = frame->cs;
686 	frame->ring.size = sizeof(frame->cs);
687 	frame->ring.effective_size = frame->ring.size;
688 	intel_ring_update_space(&frame->ring);
689 
690 	frame->rq.i915 = engine->i915;
691 	frame->rq.engine = engine;
692 	frame->rq.ring = &frame->ring;
693 	rcu_assign_pointer(frame->rq.timeline, &frame->timeline);
694 
695 	dw = intel_timeline_pin(&frame->timeline);
696 	if (dw < 0)
697 		goto out_timeline;
698 
699 	spin_lock_irq(&engine->active.lock);
700 	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
701 	spin_unlock_irq(&engine->active.lock);
702 
703 	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
704 
705 	intel_timeline_unpin(&frame->timeline);
706 
707 out_timeline:
708 	mutex_unlock(&frame->timeline.mutex);
709 	intel_timeline_fini(&frame->timeline);
710 out_frame:
711 	kfree(frame);
712 	return dw;
713 }
714 
715 void
716 intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
717 {
718 	INIT_LIST_HEAD(&engine->active.requests);
719 
720 	spin_lock_init(&engine->active.lock);
721 	lockdep_set_subclass(&engine->active.lock, subclass);
722 
723 	/*
724 	 * Due to an interesting quirk in lockdep's internal debug tracking,
725 	 * after setting a subclass we must ensure the lock is used. Otherwise,
726 	 * nr_unused_locks is incremented once too often.
727 	 */
728 #ifdef CONFIG_DEBUG_LOCK_ALLOC
729 	local_irq_disable();
730 	lock_map_acquire(&engine->active.lock.dep_map);
731 	lock_map_release(&engine->active.lock.dep_map);
732 	local_irq_enable();
733 #endif
734 }
735 
736 static struct intel_context *
737 create_kernel_context(struct intel_engine_cs *engine)
738 {
739 	struct intel_context *ce;
740 	int err;
741 
742 	ce = intel_context_create(engine->i915->kernel_context, engine);
743 	if (IS_ERR(ce))
744 		return ce;
745 
746 	ce->ring = __intel_context_ring_size(SZ_4K);
747 
748 	err = intel_context_pin(ce);
749 	if (err) {
750 		intel_context_put(ce);
751 		return ERR_PTR(err);
752 	}
753 
754 	return ce;
755 }
756 
757 /**
758  * intel_engines_init_common - initialize cengine state which might require hw access
759  * @engine: Engine to initialize.
760  *
761  * Initializes @engine@ structure members shared between legacy and execlists
762  * submission modes which do require hardware access.
763  *
764  * Typcally done at later stages of submission mode specific engine setup.
765  *
766  * Returns zero on success or an error code on failure.
767  */
768 int intel_engine_init_common(struct intel_engine_cs *engine)
769 {
770 	struct intel_context *ce;
771 	int ret;
772 
773 	engine->set_default_submission(engine);
774 
775 	/*
776 	 * We may need to do things with the shrinker which
777 	 * require us to immediately switch back to the default
778 	 * context. This can cause a problem as pinning the
779 	 * default context also requires GTT space which may not
780 	 * be available. To avoid this we always pin the default
781 	 * context.
782 	 */
783 	ce = create_kernel_context(engine);
784 	if (IS_ERR(ce))
785 		return PTR_ERR(ce);
786 
787 	engine->kernel_context = ce;
788 
789 	ret = measure_breadcrumb_dw(engine);
790 	if (ret < 0)
791 		goto err_unpin;
792 
793 	engine->emit_fini_breadcrumb_dw = ret;
794 
795 	return 0;
796 
797 err_unpin:
798 	intel_context_unpin(ce);
799 	intel_context_put(ce);
800 	return ret;
801 }
802 
803 /**
804  * intel_engines_cleanup_common - cleans up the engine state created by
805  *                                the common initiailizers.
806  * @engine: Engine to cleanup.
807  *
808  * This cleans up everything created by the common helpers.
809  */
810 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
811 {
812 	GEM_BUG_ON(!list_empty(&engine->active.requests));
813 
814 	cleanup_status_page(engine);
815 
816 	intel_engine_pool_fini(&engine->pool);
817 	intel_engine_fini_breadcrumbs(engine);
818 	intel_engine_cleanup_cmd_parser(engine);
819 
820 	if (engine->default_state)
821 		i915_gem_object_put(engine->default_state);
822 
823 	if (engine->kernel_context) {
824 		intel_context_unpin(engine->kernel_context);
825 		intel_context_put(engine->kernel_context);
826 	}
827 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
828 
829 	intel_wa_list_free(&engine->ctx_wa_list);
830 	intel_wa_list_free(&engine->wa_list);
831 	intel_wa_list_free(&engine->whitelist);
832 }
833 
834 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
835 {
836 	struct drm_i915_private *i915 = engine->i915;
837 
838 	u64 acthd;
839 
840 	if (INTEL_GEN(i915) >= 8)
841 		acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
842 	else if (INTEL_GEN(i915) >= 4)
843 		acthd = ENGINE_READ(engine, RING_ACTHD);
844 	else
845 		acthd = ENGINE_READ(engine, ACTHD);
846 
847 	return acthd;
848 }
849 
850 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
851 {
852 	u64 bbaddr;
853 
854 	if (INTEL_GEN(engine->i915) >= 8)
855 		bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
856 	else
857 		bbaddr = ENGINE_READ(engine, RING_BBADDR);
858 
859 	return bbaddr;
860 }
861 
862 int intel_engine_stop_cs(struct intel_engine_cs *engine)
863 {
864 	struct intel_uncore *uncore = engine->uncore;
865 	const u32 base = engine->mmio_base;
866 	const i915_reg_t mode = RING_MI_MODE(base);
867 	int err;
868 
869 	if (INTEL_GEN(engine->i915) < 3)
870 		return -ENODEV;
871 
872 	GEM_TRACE("%s\n", engine->name);
873 
874 	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
875 
876 	err = 0;
877 	if (__intel_wait_for_register_fw(uncore,
878 					 mode, MODE_IDLE, MODE_IDLE,
879 					 1000, 0,
880 					 NULL)) {
881 		GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
882 		err = -ETIMEDOUT;
883 	}
884 
885 	/* A final mmio read to let GPU writes be hopefully flushed to memory */
886 	intel_uncore_posting_read_fw(uncore, mode);
887 
888 	return err;
889 }
890 
891 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
892 {
893 	GEM_TRACE("%s\n", engine->name);
894 
895 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
896 }
897 
898 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
899 {
900 	switch (type) {
901 	case I915_CACHE_NONE: return " uncached";
902 	case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
903 	case I915_CACHE_L3_LLC: return " L3+LLC";
904 	case I915_CACHE_WT: return " WT";
905 	default: return "";
906 	}
907 }
908 
909 static u32
910 read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
911 		  i915_reg_t reg)
912 {
913 	struct drm_i915_private *i915 = engine->i915;
914 	struct intel_uncore *uncore = engine->uncore;
915 	u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
916 	enum forcewake_domains fw_domains;
917 
918 	if (INTEL_GEN(i915) >= 11) {
919 		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
920 		mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
921 	} else {
922 		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
923 		mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
924 	}
925 
926 	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
927 						    FW_REG_READ);
928 	fw_domains |= intel_uncore_forcewake_for_reg(uncore,
929 						     GEN8_MCR_SELECTOR,
930 						     FW_REG_READ | FW_REG_WRITE);
931 
932 	spin_lock_irq(&uncore->lock);
933 	intel_uncore_forcewake_get__locked(uncore, fw_domains);
934 
935 	old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
936 
937 	mcr &= ~mcr_mask;
938 	mcr |= mcr_ss;
939 	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
940 
941 	val = intel_uncore_read_fw(uncore, reg);
942 
943 	mcr &= ~mcr_mask;
944 	mcr |= old_mcr & mcr_mask;
945 
946 	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
947 
948 	intel_uncore_forcewake_put__locked(uncore, fw_domains);
949 	spin_unlock_irq(&uncore->lock);
950 
951 	return val;
952 }
953 
954 /* NB: please notice the memset */
955 void intel_engine_get_instdone(struct intel_engine_cs *engine,
956 			       struct intel_instdone *instdone)
957 {
958 	struct drm_i915_private *i915 = engine->i915;
959 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
960 	struct intel_uncore *uncore = engine->uncore;
961 	u32 mmio_base = engine->mmio_base;
962 	int slice;
963 	int subslice;
964 
965 	memset(instdone, 0, sizeof(*instdone));
966 
967 	switch (INTEL_GEN(i915)) {
968 	default:
969 		instdone->instdone =
970 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
971 
972 		if (engine->id != RCS0)
973 			break;
974 
975 		instdone->slice_common =
976 			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
977 		for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
978 			instdone->sampler[slice][subslice] =
979 				read_subslice_reg(engine, slice, subslice,
980 						  GEN7_SAMPLER_INSTDONE);
981 			instdone->row[slice][subslice] =
982 				read_subslice_reg(engine, slice, subslice,
983 						  GEN7_ROW_INSTDONE);
984 		}
985 		break;
986 	case 7:
987 		instdone->instdone =
988 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
989 
990 		if (engine->id != RCS0)
991 			break;
992 
993 		instdone->slice_common =
994 			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
995 		instdone->sampler[0][0] =
996 			intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
997 		instdone->row[0][0] =
998 			intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
999 
1000 		break;
1001 	case 6:
1002 	case 5:
1003 	case 4:
1004 		instdone->instdone =
1005 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1006 		if (engine->id == RCS0)
1007 			/* HACK: Using the wrong struct member */
1008 			instdone->slice_common =
1009 				intel_uncore_read(uncore, GEN4_INSTDONE1);
1010 		break;
1011 	case 3:
1012 	case 2:
1013 		instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
1014 		break;
1015 	}
1016 }
1017 
1018 static bool ring_is_idle(struct intel_engine_cs *engine)
1019 {
1020 	bool idle = true;
1021 
1022 	if (I915_SELFTEST_ONLY(!engine->mmio_base))
1023 		return true;
1024 
1025 	if (!intel_engine_pm_get_if_awake(engine))
1026 		return true;
1027 
1028 	/* First check that no commands are left in the ring */
1029 	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
1030 	    (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
1031 		idle = false;
1032 
1033 	/* No bit for gen2, so assume the CS parser is idle */
1034 	if (INTEL_GEN(engine->i915) > 2 &&
1035 	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
1036 		idle = false;
1037 
1038 	intel_engine_pm_put(engine);
1039 
1040 	return idle;
1041 }
1042 
1043 /**
1044  * intel_engine_is_idle() - Report if the engine has finished process all work
1045  * @engine: the intel_engine_cs
1046  *
1047  * Return true if there are no requests pending, nothing left to be submitted
1048  * to hardware, and that the engine is idle.
1049  */
1050 bool intel_engine_is_idle(struct intel_engine_cs *engine)
1051 {
1052 	/* More white lies, if wedged, hw state is inconsistent */
1053 	if (intel_gt_is_wedged(engine->gt))
1054 		return true;
1055 
1056 	if (!intel_engine_pm_is_awake(engine))
1057 		return true;
1058 
1059 	/* Waiting to drain ELSP? */
1060 	if (execlists_active(&engine->execlists)) {
1061 		struct tasklet_struct *t = &engine->execlists.tasklet;
1062 
1063 		synchronize_hardirq(engine->i915->drm.pdev->irq);
1064 
1065 		local_bh_disable();
1066 		if (tasklet_trylock(t)) {
1067 			/* Must wait for any GPU reset in progress. */
1068 			if (__tasklet_is_enabled(t))
1069 				t->func(t->data);
1070 			tasklet_unlock(t);
1071 		}
1072 		local_bh_enable();
1073 
1074 		/* Otherwise flush the tasklet if it was on another cpu */
1075 		tasklet_unlock_wait(t);
1076 
1077 		if (execlists_active(&engine->execlists))
1078 			return false;
1079 	}
1080 
1081 	/* ELSP is empty, but there are ready requests? E.g. after reset */
1082 	if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
1083 		return false;
1084 
1085 	/* Ring stopped? */
1086 	return ring_is_idle(engine);
1087 }
1088 
1089 bool intel_engines_are_idle(struct intel_gt *gt)
1090 {
1091 	struct intel_engine_cs *engine;
1092 	enum intel_engine_id id;
1093 
1094 	/*
1095 	 * If the driver is wedged, HW state may be very inconsistent and
1096 	 * report that it is still busy, even though we have stopped using it.
1097 	 */
1098 	if (intel_gt_is_wedged(gt))
1099 		return true;
1100 
1101 	/* Already parked (and passed an idleness test); must still be idle */
1102 	if (!READ_ONCE(gt->awake))
1103 		return true;
1104 
1105 	for_each_engine(engine, gt->i915, id) {
1106 		if (!intel_engine_is_idle(engine))
1107 			return false;
1108 	}
1109 
1110 	return true;
1111 }
1112 
1113 void intel_engines_reset_default_submission(struct intel_gt *gt)
1114 {
1115 	struct intel_engine_cs *engine;
1116 	enum intel_engine_id id;
1117 
1118 	for_each_engine(engine, gt->i915, id)
1119 		engine->set_default_submission(engine);
1120 }
1121 
1122 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1123 {
1124 	switch (INTEL_GEN(engine->i915)) {
1125 	case 2:
1126 		return false; /* uses physical not virtual addresses */
1127 	case 3:
1128 		/* maybe only uses physical not virtual addresses */
1129 		return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1130 	case 4:
1131 		return !IS_I965G(engine->i915); /* who knows! */
1132 	case 6:
1133 		return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1134 	default:
1135 		return true;
1136 	}
1137 }
1138 
1139 static int print_sched_attr(struct drm_i915_private *i915,
1140 			    const struct i915_sched_attr *attr,
1141 			    char *buf, int x, int len)
1142 {
1143 	if (attr->priority == I915_PRIORITY_INVALID)
1144 		return x;
1145 
1146 	x += snprintf(buf + x, len - x,
1147 		      " prio=%d", attr->priority);
1148 
1149 	return x;
1150 }
1151 
1152 static void print_request(struct drm_printer *m,
1153 			  struct i915_request *rq,
1154 			  const char *prefix)
1155 {
1156 	const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
1157 	char buf[80] = "";
1158 	int x = 0;
1159 
1160 	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
1161 
1162 	drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
1163 		   prefix,
1164 		   rq->fence.context, rq->fence.seqno,
1165 		   i915_request_completed(rq) ? "!" :
1166 		   i915_request_started(rq) ? "*" :
1167 		   "",
1168 		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
1169 			    &rq->fence.flags) ? "+" :
1170 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1171 			    &rq->fence.flags) ? "-" :
1172 		   "",
1173 		   buf,
1174 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
1175 		   name);
1176 }
1177 
1178 static void hexdump(struct drm_printer *m, const void *buf, size_t len)
1179 {
1180 	const size_t rowsize = 8 * sizeof(u32);
1181 	const void *prev = NULL;
1182 	bool skip = false;
1183 	size_t pos;
1184 
1185 	for (pos = 0; pos < len; pos += rowsize) {
1186 		char line[128];
1187 
1188 		if (prev && !memcmp(prev, buf + pos, rowsize)) {
1189 			if (!skip) {
1190 				drm_printf(m, "*\n");
1191 				skip = true;
1192 			}
1193 			continue;
1194 		}
1195 
1196 		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
1197 						rowsize, sizeof(u32),
1198 						line, sizeof(line),
1199 						false) >= sizeof(line));
1200 		drm_printf(m, "[%04zx] %s\n", pos, line);
1201 
1202 		prev = buf + pos;
1203 		skip = false;
1204 	}
1205 }
1206 
1207 static struct intel_timeline *get_timeline(struct i915_request *rq)
1208 {
1209 	struct intel_timeline *tl;
1210 
1211 	/*
1212 	 * Even though we are holding the engine->active.lock here, there
1213 	 * is no control over the submission queue per-se and we are
1214 	 * inspecting the active state at a random point in time, with an
1215 	 * unknown queue. Play safe and make sure the timeline remains valid.
1216 	 * (Only being used for pretty printing, one extra kref shouldn't
1217 	 * cause a camel stampede!)
1218 	 */
1219 	rcu_read_lock();
1220 	tl = rcu_dereference(rq->timeline);
1221 	if (!kref_get_unless_zero(&tl->kref))
1222 		tl = NULL;
1223 	rcu_read_unlock();
1224 
1225 	return tl;
1226 }
1227 
1228 static void intel_engine_print_registers(struct intel_engine_cs *engine,
1229 					 struct drm_printer *m)
1230 {
1231 	struct drm_i915_private *dev_priv = engine->i915;
1232 	const struct intel_engine_execlists * const execlists =
1233 		&engine->execlists;
1234 	unsigned long flags;
1235 	u64 addr;
1236 
1237 	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
1238 		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
1239 	drm_printf(m, "\tRING_START: 0x%08x\n",
1240 		   ENGINE_READ(engine, RING_START));
1241 	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
1242 		   ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
1243 	drm_printf(m, "\tRING_TAIL:  0x%08x\n",
1244 		   ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
1245 	drm_printf(m, "\tRING_CTL:   0x%08x%s\n",
1246 		   ENGINE_READ(engine, RING_CTL),
1247 		   ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
1248 	if (INTEL_GEN(engine->i915) > 2) {
1249 		drm_printf(m, "\tRING_MODE:  0x%08x%s\n",
1250 			   ENGINE_READ(engine, RING_MI_MODE),
1251 			   ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
1252 	}
1253 
1254 	if (INTEL_GEN(dev_priv) >= 6) {
1255 		drm_printf(m, "\tRING_IMR: %08x\n",
1256 			   ENGINE_READ(engine, RING_IMR));
1257 	}
1258 
1259 	addr = intel_engine_get_active_head(engine);
1260 	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
1261 		   upper_32_bits(addr), lower_32_bits(addr));
1262 	addr = intel_engine_get_last_batch_head(engine);
1263 	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1264 		   upper_32_bits(addr), lower_32_bits(addr));
1265 	if (INTEL_GEN(dev_priv) >= 8)
1266 		addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
1267 	else if (INTEL_GEN(dev_priv) >= 4)
1268 		addr = ENGINE_READ(engine, RING_DMA_FADD);
1269 	else
1270 		addr = ENGINE_READ(engine, DMA_FADD_I8XX);
1271 	drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
1272 		   upper_32_bits(addr), lower_32_bits(addr));
1273 	if (INTEL_GEN(dev_priv) >= 4) {
1274 		drm_printf(m, "\tIPEIR: 0x%08x\n",
1275 			   ENGINE_READ(engine, RING_IPEIR));
1276 		drm_printf(m, "\tIPEHR: 0x%08x\n",
1277 			   ENGINE_READ(engine, RING_IPEHR));
1278 	} else {
1279 		drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
1280 		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
1281 	}
1282 
1283 	if (HAS_EXECLISTS(dev_priv)) {
1284 		struct i915_request * const *port, *rq;
1285 		const u32 *hws =
1286 			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
1287 		const u8 num_entries = execlists->csb_size;
1288 		unsigned int idx;
1289 		u8 read, write;
1290 
1291 		drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
1292 			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
1293 			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
1294 			   num_entries);
1295 
1296 		read = execlists->csb_head;
1297 		write = READ_ONCE(*execlists->csb_write);
1298 
1299 		drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
1300 			   read, write,
1301 			   yesno(test_bit(TASKLET_STATE_SCHED,
1302 					  &engine->execlists.tasklet.state)),
1303 			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
1304 		if (read >= num_entries)
1305 			read = 0;
1306 		if (write >= num_entries)
1307 			write = 0;
1308 		if (read > write)
1309 			write += num_entries;
1310 		while (read < write) {
1311 			idx = ++read % num_entries;
1312 			drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
1313 				   idx, hws[idx * 2], hws[idx * 2 + 1]);
1314 		}
1315 
1316 		spin_lock_irqsave(&engine->active.lock, flags);
1317 		for (port = execlists->active; (rq = *port); port++) {
1318 			char hdr[80];
1319 			int len;
1320 
1321 			len = snprintf(hdr, sizeof(hdr),
1322 				       "\t\tActive[%d]: ",
1323 				       (int)(port - execlists->active));
1324 			if (!i915_request_signaled(rq)) {
1325 				struct intel_timeline *tl = get_timeline(rq);
1326 
1327 				len += snprintf(hdr + len, sizeof(hdr) - len,
1328 						"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
1329 						i915_ggtt_offset(rq->ring->vma),
1330 						tl ? tl->hwsp_offset : 0,
1331 						hwsp_seqno(rq));
1332 
1333 				if (tl)
1334 					intel_timeline_put(tl);
1335 			}
1336 			snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
1337 			print_request(m, rq, hdr);
1338 		}
1339 		for (port = execlists->pending; (rq = *port); port++) {
1340 			struct intel_timeline *tl = get_timeline(rq);
1341 			char hdr[80];
1342 
1343 			snprintf(hdr, sizeof(hdr),
1344 				 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
1345 				 (int)(port - execlists->pending),
1346 				 i915_ggtt_offset(rq->ring->vma),
1347 				 tl ? tl->hwsp_offset : 0,
1348 				 hwsp_seqno(rq));
1349 			print_request(m, rq, hdr);
1350 
1351 			if (tl)
1352 				intel_timeline_put(tl);
1353 		}
1354 		spin_unlock_irqrestore(&engine->active.lock, flags);
1355 	} else if (INTEL_GEN(dev_priv) > 6) {
1356 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1357 			   ENGINE_READ(engine, RING_PP_DIR_BASE));
1358 		drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1359 			   ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
1360 		drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1361 			   ENGINE_READ(engine, RING_PP_DIR_DCLV));
1362 	}
1363 }
1364 
1365 static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
1366 {
1367 	void *ring;
1368 	int size;
1369 
1370 	drm_printf(m,
1371 		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
1372 		   rq->head, rq->postfix, rq->tail,
1373 		   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1374 		   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1375 
1376 	size = rq->tail - rq->head;
1377 	if (rq->tail < rq->head)
1378 		size += rq->ring->size;
1379 
1380 	ring = kmalloc(size, GFP_ATOMIC);
1381 	if (ring) {
1382 		const void *vaddr = rq->ring->vaddr;
1383 		unsigned int head = rq->head;
1384 		unsigned int len = 0;
1385 
1386 		if (rq->tail < head) {
1387 			len = rq->ring->size - head;
1388 			memcpy(ring, vaddr + head, len);
1389 			head = 0;
1390 		}
1391 		memcpy(ring + len, vaddr + head, size - len);
1392 
1393 		hexdump(m, ring, size);
1394 		kfree(ring);
1395 	}
1396 }
1397 
1398 void intel_engine_dump(struct intel_engine_cs *engine,
1399 		       struct drm_printer *m,
1400 		       const char *header, ...)
1401 {
1402 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
1403 	struct i915_request *rq;
1404 	intel_wakeref_t wakeref;
1405 	unsigned long flags;
1406 
1407 	if (header) {
1408 		va_list ap;
1409 
1410 		va_start(ap, header);
1411 		drm_vprintf(m, header, &ap);
1412 		va_end(ap);
1413 	}
1414 
1415 	if (intel_gt_is_wedged(engine->gt))
1416 		drm_printf(m, "*** WEDGED ***\n");
1417 
1418 	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
1419 	drm_printf(m, "\tHangcheck: %d ms ago\n",
1420 		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
1421 	drm_printf(m, "\tReset count: %d (global %d)\n",
1422 		   i915_reset_engine_count(error, engine),
1423 		   i915_reset_count(error));
1424 
1425 	drm_printf(m, "\tRequests:\n");
1426 
1427 	spin_lock_irqsave(&engine->active.lock, flags);
1428 	rq = intel_engine_find_active_request(engine);
1429 	if (rq) {
1430 		struct intel_timeline *tl = get_timeline(rq);
1431 
1432 		print_request(m, rq, "\t\tactive ");
1433 
1434 		drm_printf(m, "\t\tring->start:  0x%08x\n",
1435 			   i915_ggtt_offset(rq->ring->vma));
1436 		drm_printf(m, "\t\tring->head:   0x%08x\n",
1437 			   rq->ring->head);
1438 		drm_printf(m, "\t\tring->tail:   0x%08x\n",
1439 			   rq->ring->tail);
1440 		drm_printf(m, "\t\tring->emit:   0x%08x\n",
1441 			   rq->ring->emit);
1442 		drm_printf(m, "\t\tring->space:  0x%08x\n",
1443 			   rq->ring->space);
1444 
1445 		if (tl) {
1446 			drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
1447 				   tl->hwsp_offset);
1448 			intel_timeline_put(tl);
1449 		}
1450 
1451 		print_request_ring(m, rq);
1452 
1453 		if (rq->hw_context->lrc_reg_state) {
1454 			drm_printf(m, "Logical Ring Context:\n");
1455 			hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE);
1456 		}
1457 	}
1458 	spin_unlock_irqrestore(&engine->active.lock, flags);
1459 
1460 	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
1461 	wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
1462 	if (wakeref) {
1463 		intel_engine_print_registers(engine, m);
1464 		intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref);
1465 	} else {
1466 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
1467 	}
1468 
1469 	intel_execlists_show_requests(engine, m, print_request, 8);
1470 
1471 	drm_printf(m, "HWSP:\n");
1472 	hexdump(m, engine->status_page.addr, PAGE_SIZE);
1473 
1474 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
1475 
1476 	intel_engine_print_breadcrumbs(engine, m);
1477 }
1478 
1479 /**
1480  * intel_enable_engine_stats() - Enable engine busy tracking on engine
1481  * @engine: engine to enable stats collection
1482  *
1483  * Start collecting the engine busyness data for @engine.
1484  *
1485  * Returns 0 on success or a negative error code.
1486  */
1487 int intel_enable_engine_stats(struct intel_engine_cs *engine)
1488 {
1489 	struct intel_engine_execlists *execlists = &engine->execlists;
1490 	unsigned long flags;
1491 	int err = 0;
1492 
1493 	if (!intel_engine_supports_stats(engine))
1494 		return -ENODEV;
1495 
1496 	spin_lock_irqsave(&engine->active.lock, flags);
1497 	write_seqlock(&engine->stats.lock);
1498 
1499 	if (unlikely(engine->stats.enabled == ~0)) {
1500 		err = -EBUSY;
1501 		goto unlock;
1502 	}
1503 
1504 	if (engine->stats.enabled++ == 0) {
1505 		struct i915_request * const *port;
1506 		struct i915_request *rq;
1507 
1508 		engine->stats.enabled_at = ktime_get();
1509 
1510 		/* XXX submission method oblivious? */
1511 		for (port = execlists->active; (rq = *port); port++)
1512 			engine->stats.active++;
1513 
1514 		for (port = execlists->pending; (rq = *port); port++) {
1515 			/* Exclude any contexts already counted in active */
1516 			if (!intel_context_inflight_count(rq->hw_context))
1517 				engine->stats.active++;
1518 		}
1519 
1520 		if (engine->stats.active)
1521 			engine->stats.start = engine->stats.enabled_at;
1522 	}
1523 
1524 unlock:
1525 	write_sequnlock(&engine->stats.lock);
1526 	spin_unlock_irqrestore(&engine->active.lock, flags);
1527 
1528 	return err;
1529 }
1530 
1531 static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
1532 {
1533 	ktime_t total = engine->stats.total;
1534 
1535 	/*
1536 	 * If the engine is executing something at the moment
1537 	 * add it to the total.
1538 	 */
1539 	if (engine->stats.active)
1540 		total = ktime_add(total,
1541 				  ktime_sub(ktime_get(), engine->stats.start));
1542 
1543 	return total;
1544 }
1545 
1546 /**
1547  * intel_engine_get_busy_time() - Return current accumulated engine busyness
1548  * @engine: engine to report on
1549  *
1550  * Returns accumulated time @engine was busy since engine stats were enabled.
1551  */
1552 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
1553 {
1554 	unsigned int seq;
1555 	ktime_t total;
1556 
1557 	do {
1558 		seq = read_seqbegin(&engine->stats.lock);
1559 		total = __intel_engine_get_busy_time(engine);
1560 	} while (read_seqretry(&engine->stats.lock, seq));
1561 
1562 	return total;
1563 }
1564 
1565 /**
1566  * intel_disable_engine_stats() - Disable engine busy tracking on engine
1567  * @engine: engine to disable stats collection
1568  *
1569  * Stops collecting the engine busyness data for @engine.
1570  */
1571 void intel_disable_engine_stats(struct intel_engine_cs *engine)
1572 {
1573 	unsigned long flags;
1574 
1575 	if (!intel_engine_supports_stats(engine))
1576 		return;
1577 
1578 	write_seqlock_irqsave(&engine->stats.lock, flags);
1579 	WARN_ON_ONCE(engine->stats.enabled == 0);
1580 	if (--engine->stats.enabled == 0) {
1581 		engine->stats.total = __intel_engine_get_busy_time(engine);
1582 		engine->stats.active = 0;
1583 	}
1584 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1585 }
1586 
1587 static bool match_ring(struct i915_request *rq)
1588 {
1589 	u32 ring = ENGINE_READ(rq->engine, RING_START);
1590 
1591 	return ring == i915_ggtt_offset(rq->ring->vma);
1592 }
1593 
1594 struct i915_request *
1595 intel_engine_find_active_request(struct intel_engine_cs *engine)
1596 {
1597 	struct i915_request *request, *active = NULL;
1598 
1599 	/*
1600 	 * We are called by the error capture, reset and to dump engine
1601 	 * state at random points in time. In particular, note that neither is
1602 	 * crucially ordered with an interrupt. After a hang, the GPU is dead
1603 	 * and we assume that no more writes can happen (we waited long enough
1604 	 * for all writes that were in transaction to be flushed) - adding an
1605 	 * extra delay for a recent interrupt is pointless. Hence, we do
1606 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
1607 	 * At all other times, we must assume the GPU is still running, but
1608 	 * we only care about the snapshot of this moment.
1609 	 */
1610 	lockdep_assert_held(&engine->active.lock);
1611 	list_for_each_entry(request, &engine->active.requests, sched.link) {
1612 		if (i915_request_completed(request))
1613 			continue;
1614 
1615 		if (!i915_request_started(request))
1616 			continue;
1617 
1618 		/* More than one preemptible request may match! */
1619 		if (!match_ring(request))
1620 			continue;
1621 
1622 		active = request;
1623 		break;
1624 	}
1625 
1626 	return active;
1627 }
1628 
1629 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1630 #include "mock_engine.c"
1631 #include "selftest_engine.c"
1632 #include "selftest_engine_cs.c"
1633 #endif
1634