1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <drm/drm_print.h>
26 
27 #include "gem/i915_gem_context.h"
28 
29 #include "i915_drv.h"
30 
31 #include "gt/intel_gt.h"
32 
33 #include "intel_engine.h"
34 #include "intel_engine_pm.h"
35 #include "intel_context.h"
36 #include "intel_lrc.h"
37 #include "intel_reset.h"
38 
39 /* Haswell does have the CXT_SIZE register however it does not appear to be
40  * valid. Now, docs explain in dwords what is in the context object. The full
41  * size is 70720 bytes, however, the power context and execlist context will
42  * never be saved (power context is stored elsewhere, and execlists don't work
43  * on HSW) - so the final size, including the extra state required for the
44  * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
45  */
46 #define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
47 
48 #define DEFAULT_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
49 #define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
50 #define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
51 #define GEN10_LR_CONTEXT_RENDER_SIZE	(18 * PAGE_SIZE)
52 #define GEN11_LR_CONTEXT_RENDER_SIZE	(14 * PAGE_SIZE)
53 
54 #define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
55 
56 struct engine_class_info {
57 	const char *name;
58 	u8 uabi_class;
59 };
60 
61 static const struct engine_class_info intel_engine_classes[] = {
62 	[RENDER_CLASS] = {
63 		.name = "rcs",
64 		.uabi_class = I915_ENGINE_CLASS_RENDER,
65 	},
66 	[COPY_ENGINE_CLASS] = {
67 		.name = "bcs",
68 		.uabi_class = I915_ENGINE_CLASS_COPY,
69 	},
70 	[VIDEO_DECODE_CLASS] = {
71 		.name = "vcs",
72 		.uabi_class = I915_ENGINE_CLASS_VIDEO,
73 	},
74 	[VIDEO_ENHANCEMENT_CLASS] = {
75 		.name = "vecs",
76 		.uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE,
77 	},
78 };
79 
80 #define MAX_MMIO_BASES 3
81 struct engine_info {
82 	unsigned int hw_id;
83 	u8 class;
84 	u8 instance;
85 	/* mmio bases table *must* be sorted in reverse gen order */
86 	struct engine_mmio_base {
87 		u32 gen : 8;
88 		u32 base : 24;
89 	} mmio_bases[MAX_MMIO_BASES];
90 };
91 
92 static const struct engine_info intel_engines[] = {
93 	[RCS0] = {
94 		.hw_id = RCS0_HW,
95 		.class = RENDER_CLASS,
96 		.instance = 0,
97 		.mmio_bases = {
98 			{ .gen = 1, .base = RENDER_RING_BASE }
99 		},
100 	},
101 	[BCS0] = {
102 		.hw_id = BCS0_HW,
103 		.class = COPY_ENGINE_CLASS,
104 		.instance = 0,
105 		.mmio_bases = {
106 			{ .gen = 6, .base = BLT_RING_BASE }
107 		},
108 	},
109 	[VCS0] = {
110 		.hw_id = VCS0_HW,
111 		.class = VIDEO_DECODE_CLASS,
112 		.instance = 0,
113 		.mmio_bases = {
114 			{ .gen = 11, .base = GEN11_BSD_RING_BASE },
115 			{ .gen = 6, .base = GEN6_BSD_RING_BASE },
116 			{ .gen = 4, .base = BSD_RING_BASE }
117 		},
118 	},
119 	[VCS1] = {
120 		.hw_id = VCS1_HW,
121 		.class = VIDEO_DECODE_CLASS,
122 		.instance = 1,
123 		.mmio_bases = {
124 			{ .gen = 11, .base = GEN11_BSD2_RING_BASE },
125 			{ .gen = 8, .base = GEN8_BSD2_RING_BASE }
126 		},
127 	},
128 	[VCS2] = {
129 		.hw_id = VCS2_HW,
130 		.class = VIDEO_DECODE_CLASS,
131 		.instance = 2,
132 		.mmio_bases = {
133 			{ .gen = 11, .base = GEN11_BSD3_RING_BASE }
134 		},
135 	},
136 	[VCS3] = {
137 		.hw_id = VCS3_HW,
138 		.class = VIDEO_DECODE_CLASS,
139 		.instance = 3,
140 		.mmio_bases = {
141 			{ .gen = 11, .base = GEN11_BSD4_RING_BASE }
142 		},
143 	},
144 	[VECS0] = {
145 		.hw_id = VECS0_HW,
146 		.class = VIDEO_ENHANCEMENT_CLASS,
147 		.instance = 0,
148 		.mmio_bases = {
149 			{ .gen = 11, .base = GEN11_VEBOX_RING_BASE },
150 			{ .gen = 7, .base = VEBOX_RING_BASE }
151 		},
152 	},
153 	[VECS1] = {
154 		.hw_id = VECS1_HW,
155 		.class = VIDEO_ENHANCEMENT_CLASS,
156 		.instance = 1,
157 		.mmio_bases = {
158 			{ .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
159 		},
160 	},
161 };
162 
163 /**
164  * intel_engine_context_size() - return the size of the context for an engine
165  * @dev_priv: i915 device private
166  * @class: engine class
167  *
168  * Each engine class may require a different amount of space for a context
169  * image.
170  *
171  * Return: size (in bytes) of an engine class specific context image
172  *
173  * Note: this size includes the HWSP, which is part of the context image
174  * in LRC mode, but does not include the "shared data page" used with
175  * GuC submission. The caller should account for this if using the GuC.
176  */
177 u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
178 {
179 	u32 cxt_size;
180 
181 	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
182 
183 	switch (class) {
184 	case RENDER_CLASS:
185 		switch (INTEL_GEN(dev_priv)) {
186 		default:
187 			MISSING_CASE(INTEL_GEN(dev_priv));
188 			return DEFAULT_LR_CONTEXT_RENDER_SIZE;
189 		case 11:
190 			return GEN11_LR_CONTEXT_RENDER_SIZE;
191 		case 10:
192 			return GEN10_LR_CONTEXT_RENDER_SIZE;
193 		case 9:
194 			return GEN9_LR_CONTEXT_RENDER_SIZE;
195 		case 8:
196 			return GEN8_LR_CONTEXT_RENDER_SIZE;
197 		case 7:
198 			if (IS_HASWELL(dev_priv))
199 				return HSW_CXT_TOTAL_SIZE;
200 
201 			cxt_size = I915_READ(GEN7_CXT_SIZE);
202 			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
203 					PAGE_SIZE);
204 		case 6:
205 			cxt_size = I915_READ(CXT_SIZE);
206 			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
207 					PAGE_SIZE);
208 		case 5:
209 		case 4:
210 			/*
211 			 * There is a discrepancy here between the size reported
212 			 * by the register and the size of the context layout
213 			 * in the docs. Both are described as authorative!
214 			 *
215 			 * The discrepancy is on the order of a few cachelines,
216 			 * but the total is under one page (4k), which is our
217 			 * minimum allocation anyway so it should all come
218 			 * out in the wash.
219 			 */
220 			cxt_size = I915_READ(CXT_SIZE) + 1;
221 			DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
222 					 INTEL_GEN(dev_priv),
223 					 cxt_size * 64,
224 					 cxt_size - 1);
225 			return round_up(cxt_size * 64, PAGE_SIZE);
226 		case 3:
227 		case 2:
228 		/* For the special day when i810 gets merged. */
229 		case 1:
230 			return 0;
231 		}
232 		break;
233 	default:
234 		MISSING_CASE(class);
235 		/* fall through */
236 	case VIDEO_DECODE_CLASS:
237 	case VIDEO_ENHANCEMENT_CLASS:
238 	case COPY_ENGINE_CLASS:
239 		if (INTEL_GEN(dev_priv) < 8)
240 			return 0;
241 		return GEN8_LR_CONTEXT_OTHER_SIZE;
242 	}
243 }
244 
245 static u32 __engine_mmio_base(struct drm_i915_private *i915,
246 			      const struct engine_mmio_base *bases)
247 {
248 	int i;
249 
250 	for (i = 0; i < MAX_MMIO_BASES; i++)
251 		if (INTEL_GEN(i915) >= bases[i].gen)
252 			break;
253 
254 	GEM_BUG_ON(i == MAX_MMIO_BASES);
255 	GEM_BUG_ON(!bases[i].base);
256 
257 	return bases[i].base;
258 }
259 
260 static void __sprint_engine_name(char *name, const struct engine_info *info)
261 {
262 	WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
263 			 intel_engine_classes[info->class].name,
264 			 info->instance) >= INTEL_ENGINE_CS_MAX_NAME);
265 }
266 
267 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
268 {
269 	/*
270 	 * Though they added more rings on g4x/ilk, they did not add
271 	 * per-engine HWSTAM until gen6.
272 	 */
273 	if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
274 		return;
275 
276 	if (INTEL_GEN(engine->i915) >= 3)
277 		ENGINE_WRITE(engine, RING_HWSTAM, mask);
278 	else
279 		ENGINE_WRITE16(engine, RING_HWSTAM, mask);
280 }
281 
282 static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
283 {
284 	/* Mask off all writes into the unknown HWSP */
285 	intel_engine_set_hwsp_writemask(engine, ~0u);
286 }
287 
288 static int
289 intel_engine_setup(struct drm_i915_private *dev_priv,
290 		   enum intel_engine_id id)
291 {
292 	const struct engine_info *info = &intel_engines[id];
293 	struct intel_engine_cs *engine;
294 
295 	GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
296 
297 	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
298 	BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
299 
300 	if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
301 		return -EINVAL;
302 
303 	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
304 		return -EINVAL;
305 
306 	if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
307 		return -EINVAL;
308 
309 	GEM_BUG_ON(dev_priv->engine[id]);
310 	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
311 	if (!engine)
312 		return -ENOMEM;
313 
314 	BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
315 
316 	engine->id = id;
317 	engine->mask = BIT(id);
318 	engine->i915 = dev_priv;
319 	engine->gt = &dev_priv->gt;
320 	engine->uncore = &dev_priv->uncore;
321 	__sprint_engine_name(engine->name, info);
322 	engine->hw_id = engine->guc_id = info->hw_id;
323 	engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
324 	engine->class = info->class;
325 	engine->instance = info->instance;
326 
327 	/*
328 	 * To be overridden by the backend on setup. However to facilitate
329 	 * cleanup on error during setup, we always provide the destroy vfunc.
330 	 */
331 	engine->destroy = (typeof(engine->destroy))kfree;
332 
333 	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
334 
335 	engine->context_size = intel_engine_context_size(dev_priv,
336 							 engine->class);
337 	if (WARN_ON(engine->context_size > BIT(20)))
338 		engine->context_size = 0;
339 	if (engine->context_size)
340 		DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
341 
342 	/* Nothing to do here, execute in order of dependencies */
343 	engine->schedule = NULL;
344 
345 	seqlock_init(&engine->stats.lock);
346 
347 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
348 
349 	/* Scrub mmio state on takeover */
350 	intel_engine_sanitize_mmio(engine);
351 
352 	dev_priv->engine_class[info->class][info->instance] = engine;
353 	dev_priv->engine[id] = engine;
354 	return 0;
355 }
356 
357 static void __setup_engine_capabilities(struct intel_engine_cs *engine)
358 {
359 	struct drm_i915_private *i915 = engine->i915;
360 
361 	if (engine->class == VIDEO_DECODE_CLASS) {
362 		/*
363 		 * HEVC support is present on first engine instance
364 		 * before Gen11 and on all instances afterwards.
365 		 */
366 		if (INTEL_GEN(i915) >= 11 ||
367 		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
368 			engine->uabi_capabilities |=
369 				I915_VIDEO_CLASS_CAPABILITY_HEVC;
370 
371 		/*
372 		 * SFC block is present only on even logical engine
373 		 * instances.
374 		 */
375 		if ((INTEL_GEN(i915) >= 11 &&
376 		     RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) ||
377 		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
378 			engine->uabi_capabilities |=
379 				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
380 	} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
381 		if (INTEL_GEN(i915) >= 9)
382 			engine->uabi_capabilities |=
383 				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
384 	}
385 }
386 
387 static void intel_setup_engine_capabilities(struct drm_i915_private *i915)
388 {
389 	struct intel_engine_cs *engine;
390 	enum intel_engine_id id;
391 
392 	for_each_engine(engine, i915, id)
393 		__setup_engine_capabilities(engine);
394 }
395 
396 /**
397  * intel_engines_cleanup() - free the resources allocated for Command Streamers
398  * @i915: the i915 devic
399  */
400 void intel_engines_cleanup(struct drm_i915_private *i915)
401 {
402 	struct intel_engine_cs *engine;
403 	enum intel_engine_id id;
404 
405 	for_each_engine(engine, i915, id) {
406 		engine->destroy(engine);
407 		i915->engine[id] = NULL;
408 	}
409 }
410 
411 /**
412  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
413  * @i915: the i915 device
414  *
415  * Return: non-zero if the initialization failed.
416  */
417 int intel_engines_init_mmio(struct drm_i915_private *i915)
418 {
419 	struct intel_device_info *device_info = mkwrite_device_info(i915);
420 	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
421 	unsigned int mask = 0;
422 	unsigned int i;
423 	int err;
424 
425 	WARN_ON(engine_mask == 0);
426 	WARN_ON(engine_mask &
427 		GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
428 
429 	if (i915_inject_probe_failure())
430 		return -ENODEV;
431 
432 	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
433 		if (!HAS_ENGINE(i915, i))
434 			continue;
435 
436 		err = intel_engine_setup(i915, i);
437 		if (err)
438 			goto cleanup;
439 
440 		mask |= BIT(i);
441 	}
442 
443 	/*
444 	 * Catch failures to update intel_engines table when the new engines
445 	 * are added to the driver by a warning and disabling the forgotten
446 	 * engines.
447 	 */
448 	if (WARN_ON(mask != engine_mask))
449 		device_info->engine_mask = mask;
450 
451 	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
452 
453 	intel_gt_check_and_clear_faults(&i915->gt);
454 
455 	intel_setup_engine_capabilities(i915);
456 
457 	return 0;
458 
459 cleanup:
460 	intel_engines_cleanup(i915);
461 	return err;
462 }
463 
464 /**
465  * intel_engines_init() - init the Engine Command Streamers
466  * @i915: i915 device private
467  *
468  * Return: non-zero if the initialization failed.
469  */
470 int intel_engines_init(struct drm_i915_private *i915)
471 {
472 	int (*init)(struct intel_engine_cs *engine);
473 	struct intel_engine_cs *engine;
474 	enum intel_engine_id id;
475 	int err;
476 
477 	if (HAS_EXECLISTS(i915))
478 		init = intel_execlists_submission_init;
479 	else
480 		init = intel_ring_submission_init;
481 
482 	for_each_engine(engine, i915, id) {
483 		err = init(engine);
484 		if (err)
485 			goto cleanup;
486 	}
487 
488 	return 0;
489 
490 cleanup:
491 	intel_engines_cleanup(i915);
492 	return err;
493 }
494 
495 static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
496 {
497 	i915_gem_batch_pool_init(&engine->batch_pool, engine);
498 }
499 
500 void intel_engine_init_execlists(struct intel_engine_cs *engine)
501 {
502 	struct intel_engine_execlists * const execlists = &engine->execlists;
503 
504 	execlists->port_mask = 1;
505 	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
506 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
507 
508 	memset(execlists->pending, 0, sizeof(execlists->pending));
509 	execlists->active =
510 		memset(execlists->inflight, 0, sizeof(execlists->inflight));
511 
512 	execlists->queue_priority_hint = INT_MIN;
513 	execlists->queue = RB_ROOT_CACHED;
514 }
515 
516 static void cleanup_status_page(struct intel_engine_cs *engine)
517 {
518 	struct i915_vma *vma;
519 
520 	/* Prevent writes into HWSP after returning the page to the system */
521 	intel_engine_set_hwsp_writemask(engine, ~0u);
522 
523 	vma = fetch_and_zero(&engine->status_page.vma);
524 	if (!vma)
525 		return;
526 
527 	if (!HWS_NEEDS_PHYSICAL(engine->i915))
528 		i915_vma_unpin(vma);
529 
530 	i915_gem_object_unpin_map(vma->obj);
531 	i915_gem_object_put(vma->obj);
532 }
533 
534 static int pin_ggtt_status_page(struct intel_engine_cs *engine,
535 				struct i915_vma *vma)
536 {
537 	unsigned int flags;
538 
539 	flags = PIN_GLOBAL;
540 	if (!HAS_LLC(engine->i915))
541 		/*
542 		 * On g33, we cannot place HWS above 256MiB, so
543 		 * restrict its pinning to the low mappable arena.
544 		 * Though this restriction is not documented for
545 		 * gen4, gen5, or byt, they also behave similarly
546 		 * and hang if the HWS is placed at the top of the
547 		 * GTT. To generalise, it appears that all !llc
548 		 * platforms have issues with us placing the HWS
549 		 * above the mappable region (even though we never
550 		 * actually map it).
551 		 */
552 		flags |= PIN_MAPPABLE;
553 	else
554 		flags |= PIN_HIGH;
555 
556 	return i915_vma_pin(vma, 0, 0, flags);
557 }
558 
559 static int init_status_page(struct intel_engine_cs *engine)
560 {
561 	struct drm_i915_gem_object *obj;
562 	struct i915_vma *vma;
563 	void *vaddr;
564 	int ret;
565 
566 	/*
567 	 * Though the HWS register does support 36bit addresses, historically
568 	 * we have had hangs and corruption reported due to wild writes if
569 	 * the HWS is placed above 4G. We only allow objects to be allocated
570 	 * in GFP_DMA32 for i965, and no earlier physical address users had
571 	 * access to more than 4G.
572 	 */
573 	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
574 	if (IS_ERR(obj)) {
575 		DRM_ERROR("Failed to allocate status page\n");
576 		return PTR_ERR(obj);
577 	}
578 
579 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
580 
581 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
582 	if (IS_ERR(vma)) {
583 		ret = PTR_ERR(vma);
584 		goto err;
585 	}
586 
587 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
588 	if (IS_ERR(vaddr)) {
589 		ret = PTR_ERR(vaddr);
590 		goto err;
591 	}
592 
593 	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
594 	engine->status_page.vma = vma;
595 
596 	if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
597 		ret = pin_ggtt_status_page(engine, vma);
598 		if (ret)
599 			goto err_unpin;
600 	}
601 
602 	return 0;
603 
604 err_unpin:
605 	i915_gem_object_unpin_map(obj);
606 err:
607 	i915_gem_object_put(obj);
608 	return ret;
609 }
610 
611 static int intel_engine_setup_common(struct intel_engine_cs *engine)
612 {
613 	int err;
614 
615 	init_llist_head(&engine->barrier_tasks);
616 
617 	err = init_status_page(engine);
618 	if (err)
619 		return err;
620 
621 	intel_engine_init_active(engine, ENGINE_PHYSICAL);
622 	intel_engine_init_breadcrumbs(engine);
623 	intel_engine_init_execlists(engine);
624 	intel_engine_init_hangcheck(engine);
625 	intel_engine_init_batch_pool(engine);
626 	intel_engine_init_cmd_parser(engine);
627 	intel_engine_init__pm(engine);
628 
629 	/* Use the whole device by default */
630 	engine->sseu =
631 		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
632 
633 	intel_engine_init_workarounds(engine);
634 	intel_engine_init_whitelist(engine);
635 	intel_engine_init_ctx_wa(engine);
636 
637 	return 0;
638 }
639 
640 /**
641  * intel_engines_setup- setup engine state not requiring hw access
642  * @i915: Device to setup.
643  *
644  * Initializes engine structure members shared between legacy and execlists
645  * submission modes which do not require hardware access.
646  *
647  * Typically done early in the submission mode specific engine setup stage.
648  */
649 int intel_engines_setup(struct drm_i915_private *i915)
650 {
651 	int (*setup)(struct intel_engine_cs *engine);
652 	struct intel_engine_cs *engine;
653 	enum intel_engine_id id;
654 	int err;
655 
656 	if (HAS_EXECLISTS(i915))
657 		setup = intel_execlists_submission_setup;
658 	else
659 		setup = intel_ring_submission_setup;
660 
661 	for_each_engine(engine, i915, id) {
662 		err = intel_engine_setup_common(engine);
663 		if (err)
664 			goto cleanup;
665 
666 		err = setup(engine);
667 		if (err)
668 			goto cleanup;
669 
670 		/* We expect the backend to take control over its state */
671 		GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
672 
673 		GEM_BUG_ON(!engine->cops);
674 	}
675 
676 	return 0;
677 
678 cleanup:
679 	intel_engines_cleanup(i915);
680 	return err;
681 }
682 
683 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
684 {
685 	static const struct {
686 		u8 engine;
687 		u8 sched;
688 	} map[] = {
689 #define MAP(x, y) { ilog2(I915_ENGINE_##x), ilog2(I915_SCHEDULER_CAP_##y) }
690 		MAP(HAS_PREEMPTION, PREEMPTION),
691 		MAP(HAS_SEMAPHORES, SEMAPHORES),
692 		MAP(SUPPORTS_STATS, ENGINE_BUSY_STATS),
693 #undef MAP
694 	};
695 	struct intel_engine_cs *engine;
696 	enum intel_engine_id id;
697 	u32 enabled, disabled;
698 
699 	enabled = 0;
700 	disabled = 0;
701 	for_each_engine(engine, i915, id) { /* all engines must agree! */
702 		int i;
703 
704 		if (engine->schedule)
705 			enabled |= (I915_SCHEDULER_CAP_ENABLED |
706 				    I915_SCHEDULER_CAP_PRIORITY);
707 		else
708 			disabled |= (I915_SCHEDULER_CAP_ENABLED |
709 				     I915_SCHEDULER_CAP_PRIORITY);
710 
711 		for (i = 0; i < ARRAY_SIZE(map); i++) {
712 			if (engine->flags & BIT(map[i].engine))
713 				enabled |= BIT(map[i].sched);
714 			else
715 				disabled |= BIT(map[i].sched);
716 		}
717 	}
718 
719 	i915->caps.scheduler = enabled & ~disabled;
720 	if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED))
721 		i915->caps.scheduler = 0;
722 }
723 
724 struct measure_breadcrumb {
725 	struct i915_request rq;
726 	struct intel_timeline timeline;
727 	struct intel_ring ring;
728 	u32 cs[1024];
729 };
730 
731 static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
732 {
733 	struct measure_breadcrumb *frame;
734 	int dw = -ENOMEM;
735 
736 	GEM_BUG_ON(!engine->gt->scratch);
737 
738 	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
739 	if (!frame)
740 		return -ENOMEM;
741 
742 	if (intel_timeline_init(&frame->timeline,
743 				engine->gt,
744 				engine->status_page.vma))
745 		goto out_frame;
746 
747 	INIT_LIST_HEAD(&frame->ring.request_list);
748 	frame->ring.timeline = &frame->timeline;
749 	frame->ring.vaddr = frame->cs;
750 	frame->ring.size = sizeof(frame->cs);
751 	frame->ring.effective_size = frame->ring.size;
752 	intel_ring_update_space(&frame->ring);
753 
754 	frame->rq.i915 = engine->i915;
755 	frame->rq.engine = engine;
756 	frame->rq.ring = &frame->ring;
757 	frame->rq.timeline = &frame->timeline;
758 
759 	dw = intel_timeline_pin(&frame->timeline);
760 	if (dw < 0)
761 		goto out_timeline;
762 
763 	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
764 	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
765 
766 	intel_timeline_unpin(&frame->timeline);
767 
768 out_timeline:
769 	intel_timeline_fini(&frame->timeline);
770 out_frame:
771 	kfree(frame);
772 	return dw;
773 }
774 
775 static int pin_context(struct i915_gem_context *ctx,
776 		       struct intel_engine_cs *engine,
777 		       struct intel_context **out)
778 {
779 	struct intel_context *ce;
780 	int err;
781 
782 	ce = i915_gem_context_get_engine(ctx, engine->id);
783 	if (IS_ERR(ce))
784 		return PTR_ERR(ce);
785 
786 	err = intel_context_pin(ce);
787 	intel_context_put(ce);
788 	if (err)
789 		return err;
790 
791 	*out = ce;
792 	return 0;
793 }
794 
795 void
796 intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
797 {
798 	INIT_LIST_HEAD(&engine->active.requests);
799 
800 	spin_lock_init(&engine->active.lock);
801 	lockdep_set_subclass(&engine->active.lock, subclass);
802 
803 	/*
804 	 * Due to an interesting quirk in lockdep's internal debug tracking,
805 	 * after setting a subclass we must ensure the lock is used. Otherwise,
806 	 * nr_unused_locks is incremented once too often.
807 	 */
808 #ifdef CONFIG_DEBUG_LOCK_ALLOC
809 	local_irq_disable();
810 	lock_map_acquire(&engine->active.lock.dep_map);
811 	lock_map_release(&engine->active.lock.dep_map);
812 	local_irq_enable();
813 #endif
814 }
815 
816 /**
817  * intel_engines_init_common - initialize cengine state which might require hw access
818  * @engine: Engine to initialize.
819  *
820  * Initializes @engine@ structure members shared between legacy and execlists
821  * submission modes which do require hardware access.
822  *
823  * Typcally done at later stages of submission mode specific engine setup.
824  *
825  * Returns zero on success or an error code on failure.
826  */
827 int intel_engine_init_common(struct intel_engine_cs *engine)
828 {
829 	struct drm_i915_private *i915 = engine->i915;
830 	int ret;
831 
832 	engine->set_default_submission(engine);
833 
834 	/* We may need to do things with the shrinker which
835 	 * require us to immediately switch back to the default
836 	 * context. This can cause a problem as pinning the
837 	 * default context also requires GTT space which may not
838 	 * be available. To avoid this we always pin the default
839 	 * context.
840 	 */
841 	ret = pin_context(i915->kernel_context, engine,
842 			  &engine->kernel_context);
843 	if (ret)
844 		return ret;
845 
846 	ret = measure_breadcrumb_dw(engine);
847 	if (ret < 0)
848 		goto err_unpin;
849 
850 	engine->emit_fini_breadcrumb_dw = ret;
851 
852 	return 0;
853 
854 err_unpin:
855 	intel_context_unpin(engine->kernel_context);
856 	return ret;
857 }
858 
859 /**
860  * intel_engines_cleanup_common - cleans up the engine state created by
861  *                                the common initiailizers.
862  * @engine: Engine to cleanup.
863  *
864  * This cleans up everything created by the common helpers.
865  */
866 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
867 {
868 	GEM_BUG_ON(!list_empty(&engine->active.requests));
869 
870 	cleanup_status_page(engine);
871 
872 	intel_engine_fini_breadcrumbs(engine);
873 	intel_engine_cleanup_cmd_parser(engine);
874 	i915_gem_batch_pool_fini(&engine->batch_pool);
875 
876 	if (engine->default_state)
877 		i915_gem_object_put(engine->default_state);
878 
879 	intel_context_unpin(engine->kernel_context);
880 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
881 
882 	intel_wa_list_free(&engine->ctx_wa_list);
883 	intel_wa_list_free(&engine->wa_list);
884 	intel_wa_list_free(&engine->whitelist);
885 }
886 
887 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
888 {
889 	struct drm_i915_private *i915 = engine->i915;
890 
891 	u64 acthd;
892 
893 	if (INTEL_GEN(i915) >= 8)
894 		acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
895 	else if (INTEL_GEN(i915) >= 4)
896 		acthd = ENGINE_READ(engine, RING_ACTHD);
897 	else
898 		acthd = ENGINE_READ(engine, ACTHD);
899 
900 	return acthd;
901 }
902 
903 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
904 {
905 	u64 bbaddr;
906 
907 	if (INTEL_GEN(engine->i915) >= 8)
908 		bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
909 	else
910 		bbaddr = ENGINE_READ(engine, RING_BBADDR);
911 
912 	return bbaddr;
913 }
914 
915 int intel_engine_stop_cs(struct intel_engine_cs *engine)
916 {
917 	struct intel_uncore *uncore = engine->uncore;
918 	const u32 base = engine->mmio_base;
919 	const i915_reg_t mode = RING_MI_MODE(base);
920 	int err;
921 
922 	if (INTEL_GEN(engine->i915) < 3)
923 		return -ENODEV;
924 
925 	GEM_TRACE("%s\n", engine->name);
926 
927 	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
928 
929 	err = 0;
930 	if (__intel_wait_for_register_fw(uncore,
931 					 mode, MODE_IDLE, MODE_IDLE,
932 					 1000, 0,
933 					 NULL)) {
934 		GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
935 		err = -ETIMEDOUT;
936 	}
937 
938 	/* A final mmio read to let GPU writes be hopefully flushed to memory */
939 	intel_uncore_posting_read_fw(uncore, mode);
940 
941 	return err;
942 }
943 
944 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
945 {
946 	GEM_TRACE("%s\n", engine->name);
947 
948 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
949 }
950 
951 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
952 {
953 	switch (type) {
954 	case I915_CACHE_NONE: return " uncached";
955 	case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
956 	case I915_CACHE_L3_LLC: return " L3+LLC";
957 	case I915_CACHE_WT: return " WT";
958 	default: return "";
959 	}
960 }
961 
962 static u32
963 read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
964 		  i915_reg_t reg)
965 {
966 	struct drm_i915_private *i915 = engine->i915;
967 	struct intel_uncore *uncore = engine->uncore;
968 	u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
969 	enum forcewake_domains fw_domains;
970 
971 	if (INTEL_GEN(i915) >= 11) {
972 		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
973 		mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
974 	} else {
975 		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
976 		mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
977 	}
978 
979 	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
980 						    FW_REG_READ);
981 	fw_domains |= intel_uncore_forcewake_for_reg(uncore,
982 						     GEN8_MCR_SELECTOR,
983 						     FW_REG_READ | FW_REG_WRITE);
984 
985 	spin_lock_irq(&uncore->lock);
986 	intel_uncore_forcewake_get__locked(uncore, fw_domains);
987 
988 	old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
989 
990 	mcr &= ~mcr_mask;
991 	mcr |= mcr_ss;
992 	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
993 
994 	val = intel_uncore_read_fw(uncore, reg);
995 
996 	mcr &= ~mcr_mask;
997 	mcr |= old_mcr & mcr_mask;
998 
999 	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
1000 
1001 	intel_uncore_forcewake_put__locked(uncore, fw_domains);
1002 	spin_unlock_irq(&uncore->lock);
1003 
1004 	return val;
1005 }
1006 
1007 /* NB: please notice the memset */
1008 void intel_engine_get_instdone(struct intel_engine_cs *engine,
1009 			       struct intel_instdone *instdone)
1010 {
1011 	struct drm_i915_private *i915 = engine->i915;
1012 	struct intel_uncore *uncore = engine->uncore;
1013 	u32 mmio_base = engine->mmio_base;
1014 	int slice;
1015 	int subslice;
1016 
1017 	memset(instdone, 0, sizeof(*instdone));
1018 
1019 	switch (INTEL_GEN(i915)) {
1020 	default:
1021 		instdone->instdone =
1022 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1023 
1024 		if (engine->id != RCS0)
1025 			break;
1026 
1027 		instdone->slice_common =
1028 			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1029 		for_each_instdone_slice_subslice(i915, slice, subslice) {
1030 			instdone->sampler[slice][subslice] =
1031 				read_subslice_reg(engine, slice, subslice,
1032 						  GEN7_SAMPLER_INSTDONE);
1033 			instdone->row[slice][subslice] =
1034 				read_subslice_reg(engine, slice, subslice,
1035 						  GEN7_ROW_INSTDONE);
1036 		}
1037 		break;
1038 	case 7:
1039 		instdone->instdone =
1040 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1041 
1042 		if (engine->id != RCS0)
1043 			break;
1044 
1045 		instdone->slice_common =
1046 			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1047 		instdone->sampler[0][0] =
1048 			intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
1049 		instdone->row[0][0] =
1050 			intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
1051 
1052 		break;
1053 	case 6:
1054 	case 5:
1055 	case 4:
1056 		instdone->instdone =
1057 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1058 		if (engine->id == RCS0)
1059 			/* HACK: Using the wrong struct member */
1060 			instdone->slice_common =
1061 				intel_uncore_read(uncore, GEN4_INSTDONE1);
1062 		break;
1063 	case 3:
1064 	case 2:
1065 		instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
1066 		break;
1067 	}
1068 }
1069 
1070 static bool ring_is_idle(struct intel_engine_cs *engine)
1071 {
1072 	struct drm_i915_private *dev_priv = engine->i915;
1073 	intel_wakeref_t wakeref;
1074 	bool idle = true;
1075 
1076 	if (I915_SELFTEST_ONLY(!engine->mmio_base))
1077 		return true;
1078 
1079 	/* If the whole device is asleep, the engine must be idle */
1080 	wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
1081 	if (!wakeref)
1082 		return true;
1083 
1084 	/* First check that no commands are left in the ring */
1085 	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
1086 	    (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
1087 		idle = false;
1088 
1089 	/* No bit for gen2, so assume the CS parser is idle */
1090 	if (INTEL_GEN(dev_priv) > 2 &&
1091 	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
1092 		idle = false;
1093 
1094 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
1095 
1096 	return idle;
1097 }
1098 
1099 /**
1100  * intel_engine_is_idle() - Report if the engine has finished process all work
1101  * @engine: the intel_engine_cs
1102  *
1103  * Return true if there are no requests pending, nothing left to be submitted
1104  * to hardware, and that the engine is idle.
1105  */
1106 bool intel_engine_is_idle(struct intel_engine_cs *engine)
1107 {
1108 	/* More white lies, if wedged, hw state is inconsistent */
1109 	if (intel_gt_is_wedged(engine->gt))
1110 		return true;
1111 
1112 	if (!intel_engine_pm_is_awake(engine))
1113 		return true;
1114 
1115 	/* Waiting to drain ELSP? */
1116 	if (execlists_active(&engine->execlists)) {
1117 		struct tasklet_struct *t = &engine->execlists.tasklet;
1118 
1119 		synchronize_hardirq(engine->i915->drm.pdev->irq);
1120 
1121 		local_bh_disable();
1122 		if (tasklet_trylock(t)) {
1123 			/* Must wait for any GPU reset in progress. */
1124 			if (__tasklet_is_enabled(t))
1125 				t->func(t->data);
1126 			tasklet_unlock(t);
1127 		}
1128 		local_bh_enable();
1129 
1130 		/* Otherwise flush the tasklet if it was on another cpu */
1131 		tasklet_unlock_wait(t);
1132 
1133 		if (execlists_active(&engine->execlists))
1134 			return false;
1135 	}
1136 
1137 	/* ELSP is empty, but there are ready requests? E.g. after reset */
1138 	if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
1139 		return false;
1140 
1141 	/* Ring stopped? */
1142 	return ring_is_idle(engine);
1143 }
1144 
1145 bool intel_engines_are_idle(struct intel_gt *gt)
1146 {
1147 	struct intel_engine_cs *engine;
1148 	enum intel_engine_id id;
1149 
1150 	/*
1151 	 * If the driver is wedged, HW state may be very inconsistent and
1152 	 * report that it is still busy, even though we have stopped using it.
1153 	 */
1154 	if (intel_gt_is_wedged(gt))
1155 		return true;
1156 
1157 	/* Already parked (and passed an idleness test); must still be idle */
1158 	if (!READ_ONCE(gt->awake))
1159 		return true;
1160 
1161 	for_each_engine(engine, gt->i915, id) {
1162 		if (!intel_engine_is_idle(engine))
1163 			return false;
1164 	}
1165 
1166 	return true;
1167 }
1168 
1169 void intel_engines_reset_default_submission(struct intel_gt *gt)
1170 {
1171 	struct intel_engine_cs *engine;
1172 	enum intel_engine_id id;
1173 
1174 	for_each_engine(engine, gt->i915, id)
1175 		engine->set_default_submission(engine);
1176 }
1177 
1178 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1179 {
1180 	switch (INTEL_GEN(engine->i915)) {
1181 	case 2:
1182 		return false; /* uses physical not virtual addresses */
1183 	case 3:
1184 		/* maybe only uses physical not virtual addresses */
1185 		return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1186 	case 6:
1187 		return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1188 	default:
1189 		return true;
1190 	}
1191 }
1192 
1193 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
1194 {
1195 	struct intel_engine_cs *engine;
1196 	enum intel_engine_id id;
1197 	unsigned int which;
1198 
1199 	which = 0;
1200 	for_each_engine(engine, i915, id)
1201 		if (engine->default_state)
1202 			which |= BIT(engine->uabi_class);
1203 
1204 	return which;
1205 }
1206 
1207 static int print_sched_attr(struct drm_i915_private *i915,
1208 			    const struct i915_sched_attr *attr,
1209 			    char *buf, int x, int len)
1210 {
1211 	if (attr->priority == I915_PRIORITY_INVALID)
1212 		return x;
1213 
1214 	x += snprintf(buf + x, len - x,
1215 		      " prio=%d", attr->priority);
1216 
1217 	return x;
1218 }
1219 
1220 static void print_request(struct drm_printer *m,
1221 			  struct i915_request *rq,
1222 			  const char *prefix)
1223 {
1224 	const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
1225 	char buf[80] = "";
1226 	int x = 0;
1227 
1228 	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
1229 
1230 	drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
1231 		   prefix,
1232 		   rq->fence.context, rq->fence.seqno,
1233 		   i915_request_completed(rq) ? "!" :
1234 		   i915_request_started(rq) ? "*" :
1235 		   "",
1236 		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
1237 			    &rq->fence.flags) ? "+" :
1238 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1239 			    &rq->fence.flags) ? "-" :
1240 		   "",
1241 		   buf,
1242 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
1243 		   name);
1244 }
1245 
1246 static void hexdump(struct drm_printer *m, const void *buf, size_t len)
1247 {
1248 	const size_t rowsize = 8 * sizeof(u32);
1249 	const void *prev = NULL;
1250 	bool skip = false;
1251 	size_t pos;
1252 
1253 	for (pos = 0; pos < len; pos += rowsize) {
1254 		char line[128];
1255 
1256 		if (prev && !memcmp(prev, buf + pos, rowsize)) {
1257 			if (!skip) {
1258 				drm_printf(m, "*\n");
1259 				skip = true;
1260 			}
1261 			continue;
1262 		}
1263 
1264 		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
1265 						rowsize, sizeof(u32),
1266 						line, sizeof(line),
1267 						false) >= sizeof(line));
1268 		drm_printf(m, "[%04zx] %s\n", pos, line);
1269 
1270 		prev = buf + pos;
1271 		skip = false;
1272 	}
1273 }
1274 
1275 static void intel_engine_print_registers(struct intel_engine_cs *engine,
1276 					 struct drm_printer *m)
1277 {
1278 	struct drm_i915_private *dev_priv = engine->i915;
1279 	const struct intel_engine_execlists * const execlists =
1280 		&engine->execlists;
1281 	unsigned long flags;
1282 	u64 addr;
1283 
1284 	if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7))
1285 		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
1286 	drm_printf(m, "\tRING_START: 0x%08x\n",
1287 		   ENGINE_READ(engine, RING_START));
1288 	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
1289 		   ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
1290 	drm_printf(m, "\tRING_TAIL:  0x%08x\n",
1291 		   ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
1292 	drm_printf(m, "\tRING_CTL:   0x%08x%s\n",
1293 		   ENGINE_READ(engine, RING_CTL),
1294 		   ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
1295 	if (INTEL_GEN(engine->i915) > 2) {
1296 		drm_printf(m, "\tRING_MODE:  0x%08x%s\n",
1297 			   ENGINE_READ(engine, RING_MI_MODE),
1298 			   ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
1299 	}
1300 
1301 	if (INTEL_GEN(dev_priv) >= 6) {
1302 		drm_printf(m, "\tRING_IMR: %08x\n",
1303 			   ENGINE_READ(engine, RING_IMR));
1304 	}
1305 
1306 	addr = intel_engine_get_active_head(engine);
1307 	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
1308 		   upper_32_bits(addr), lower_32_bits(addr));
1309 	addr = intel_engine_get_last_batch_head(engine);
1310 	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1311 		   upper_32_bits(addr), lower_32_bits(addr));
1312 	if (INTEL_GEN(dev_priv) >= 8)
1313 		addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
1314 	else if (INTEL_GEN(dev_priv) >= 4)
1315 		addr = ENGINE_READ(engine, RING_DMA_FADD);
1316 	else
1317 		addr = ENGINE_READ(engine, DMA_FADD_I8XX);
1318 	drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
1319 		   upper_32_bits(addr), lower_32_bits(addr));
1320 	if (INTEL_GEN(dev_priv) >= 4) {
1321 		drm_printf(m, "\tIPEIR: 0x%08x\n",
1322 			   ENGINE_READ(engine, RING_IPEIR));
1323 		drm_printf(m, "\tIPEHR: 0x%08x\n",
1324 			   ENGINE_READ(engine, RING_IPEHR));
1325 	} else {
1326 		drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
1327 		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
1328 	}
1329 
1330 	if (HAS_EXECLISTS(dev_priv)) {
1331 		struct i915_request * const *port, *rq;
1332 		const u32 *hws =
1333 			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
1334 		const u8 num_entries = execlists->csb_size;
1335 		unsigned int idx;
1336 		u8 read, write;
1337 
1338 		drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
1339 			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
1340 			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
1341 			   num_entries);
1342 
1343 		read = execlists->csb_head;
1344 		write = READ_ONCE(*execlists->csb_write);
1345 
1346 		drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
1347 			   read, write,
1348 			   yesno(test_bit(TASKLET_STATE_SCHED,
1349 					  &engine->execlists.tasklet.state)),
1350 			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
1351 		if (read >= num_entries)
1352 			read = 0;
1353 		if (write >= num_entries)
1354 			write = 0;
1355 		if (read > write)
1356 			write += num_entries;
1357 		while (read < write) {
1358 			idx = ++read % num_entries;
1359 			drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
1360 				   idx, hws[idx * 2], hws[idx * 2 + 1]);
1361 		}
1362 
1363 		spin_lock_irqsave(&engine->active.lock, flags);
1364 		for (port = execlists->active; (rq = *port); port++) {
1365 			char hdr[80];
1366 			int len;
1367 
1368 			len = snprintf(hdr, sizeof(hdr),
1369 				       "\t\tActive[%d: ",
1370 				       (int)(port - execlists->active));
1371 			if (!i915_request_signaled(rq))
1372 				len += snprintf(hdr + len, sizeof(hdr) - len,
1373 						"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
1374 						i915_ggtt_offset(rq->ring->vma),
1375 						rq->timeline->hwsp_offset,
1376 						hwsp_seqno(rq));
1377 			snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
1378 			print_request(m, rq, hdr);
1379 		}
1380 		for (port = execlists->pending; (rq = *port); port++) {
1381 			char hdr[80];
1382 
1383 			snprintf(hdr, sizeof(hdr),
1384 				 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
1385 				 (int)(port - execlists->pending),
1386 				 i915_ggtt_offset(rq->ring->vma),
1387 				 rq->timeline->hwsp_offset,
1388 				 hwsp_seqno(rq));
1389 			print_request(m, rq, hdr);
1390 		}
1391 		spin_unlock_irqrestore(&engine->active.lock, flags);
1392 	} else if (INTEL_GEN(dev_priv) > 6) {
1393 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1394 			   ENGINE_READ(engine, RING_PP_DIR_BASE));
1395 		drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1396 			   ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
1397 		drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1398 			   ENGINE_READ(engine, RING_PP_DIR_DCLV));
1399 	}
1400 }
1401 
1402 static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
1403 {
1404 	void *ring;
1405 	int size;
1406 
1407 	drm_printf(m,
1408 		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
1409 		   rq->head, rq->postfix, rq->tail,
1410 		   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1411 		   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1412 
1413 	size = rq->tail - rq->head;
1414 	if (rq->tail < rq->head)
1415 		size += rq->ring->size;
1416 
1417 	ring = kmalloc(size, GFP_ATOMIC);
1418 	if (ring) {
1419 		const void *vaddr = rq->ring->vaddr;
1420 		unsigned int head = rq->head;
1421 		unsigned int len = 0;
1422 
1423 		if (rq->tail < head) {
1424 			len = rq->ring->size - head;
1425 			memcpy(ring, vaddr + head, len);
1426 			head = 0;
1427 		}
1428 		memcpy(ring + len, vaddr + head, size - len);
1429 
1430 		hexdump(m, ring, size);
1431 		kfree(ring);
1432 	}
1433 }
1434 
1435 void intel_engine_dump(struct intel_engine_cs *engine,
1436 		       struct drm_printer *m,
1437 		       const char *header, ...)
1438 {
1439 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
1440 	struct i915_request *rq;
1441 	intel_wakeref_t wakeref;
1442 	unsigned long flags;
1443 
1444 	if (header) {
1445 		va_list ap;
1446 
1447 		va_start(ap, header);
1448 		drm_vprintf(m, header, &ap);
1449 		va_end(ap);
1450 	}
1451 
1452 	if (intel_gt_is_wedged(engine->gt))
1453 		drm_printf(m, "*** WEDGED ***\n");
1454 
1455 	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
1456 	drm_printf(m, "\tHangcheck: %d ms ago\n",
1457 		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
1458 	drm_printf(m, "\tReset count: %d (global %d)\n",
1459 		   i915_reset_engine_count(error, engine),
1460 		   i915_reset_count(error));
1461 
1462 	drm_printf(m, "\tRequests:\n");
1463 
1464 	spin_lock_irqsave(&engine->active.lock, flags);
1465 	rq = intel_engine_find_active_request(engine);
1466 	if (rq) {
1467 		print_request(m, rq, "\t\tactive ");
1468 
1469 		drm_printf(m, "\t\tring->start:  0x%08x\n",
1470 			   i915_ggtt_offset(rq->ring->vma));
1471 		drm_printf(m, "\t\tring->head:   0x%08x\n",
1472 			   rq->ring->head);
1473 		drm_printf(m, "\t\tring->tail:   0x%08x\n",
1474 			   rq->ring->tail);
1475 		drm_printf(m, "\t\tring->emit:   0x%08x\n",
1476 			   rq->ring->emit);
1477 		drm_printf(m, "\t\tring->space:  0x%08x\n",
1478 			   rq->ring->space);
1479 		drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
1480 			   rq->timeline->hwsp_offset);
1481 
1482 		print_request_ring(m, rq);
1483 	}
1484 	spin_unlock_irqrestore(&engine->active.lock, flags);
1485 
1486 	wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
1487 	if (wakeref) {
1488 		intel_engine_print_registers(engine, m);
1489 		intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref);
1490 	} else {
1491 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
1492 	}
1493 
1494 	intel_execlists_show_requests(engine, m, print_request, 8);
1495 
1496 	drm_printf(m, "HWSP:\n");
1497 	hexdump(m, engine->status_page.addr, PAGE_SIZE);
1498 
1499 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
1500 
1501 	intel_engine_print_breadcrumbs(engine, m);
1502 }
1503 
1504 static u8 user_class_map[] = {
1505 	[I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
1506 	[I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
1507 	[I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
1508 	[I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
1509 };
1510 
1511 struct intel_engine_cs *
1512 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
1513 {
1514 	if (class >= ARRAY_SIZE(user_class_map))
1515 		return NULL;
1516 
1517 	class = user_class_map[class];
1518 
1519 	GEM_BUG_ON(class > MAX_ENGINE_CLASS);
1520 
1521 	if (instance > MAX_ENGINE_INSTANCE)
1522 		return NULL;
1523 
1524 	return i915->engine_class[class][instance];
1525 }
1526 
1527 /**
1528  * intel_enable_engine_stats() - Enable engine busy tracking on engine
1529  * @engine: engine to enable stats collection
1530  *
1531  * Start collecting the engine busyness data for @engine.
1532  *
1533  * Returns 0 on success or a negative error code.
1534  */
1535 int intel_enable_engine_stats(struct intel_engine_cs *engine)
1536 {
1537 	struct intel_engine_execlists *execlists = &engine->execlists;
1538 	unsigned long flags;
1539 	int err = 0;
1540 
1541 	if (!intel_engine_supports_stats(engine))
1542 		return -ENODEV;
1543 
1544 	spin_lock_irqsave(&engine->active.lock, flags);
1545 	write_seqlock(&engine->stats.lock);
1546 
1547 	if (unlikely(engine->stats.enabled == ~0)) {
1548 		err = -EBUSY;
1549 		goto unlock;
1550 	}
1551 
1552 	if (engine->stats.enabled++ == 0) {
1553 		struct i915_request * const *port;
1554 		struct i915_request *rq;
1555 
1556 		engine->stats.enabled_at = ktime_get();
1557 
1558 		/* XXX submission method oblivious? */
1559 		for (port = execlists->active; (rq = *port); port++)
1560 			engine->stats.active++;
1561 
1562 		for (port = execlists->pending; (rq = *port); port++) {
1563 			/* Exclude any contexts already counted in active */
1564 			if (intel_context_inflight_count(rq->hw_context) == 1)
1565 				engine->stats.active++;
1566 		}
1567 
1568 		if (engine->stats.active)
1569 			engine->stats.start = engine->stats.enabled_at;
1570 	}
1571 
1572 unlock:
1573 	write_sequnlock(&engine->stats.lock);
1574 	spin_unlock_irqrestore(&engine->active.lock, flags);
1575 
1576 	return err;
1577 }
1578 
1579 static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
1580 {
1581 	ktime_t total = engine->stats.total;
1582 
1583 	/*
1584 	 * If the engine is executing something at the moment
1585 	 * add it to the total.
1586 	 */
1587 	if (engine->stats.active)
1588 		total = ktime_add(total,
1589 				  ktime_sub(ktime_get(), engine->stats.start));
1590 
1591 	return total;
1592 }
1593 
1594 /**
1595  * intel_engine_get_busy_time() - Return current accumulated engine busyness
1596  * @engine: engine to report on
1597  *
1598  * Returns accumulated time @engine was busy since engine stats were enabled.
1599  */
1600 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
1601 {
1602 	unsigned int seq;
1603 	ktime_t total;
1604 
1605 	do {
1606 		seq = read_seqbegin(&engine->stats.lock);
1607 		total = __intel_engine_get_busy_time(engine);
1608 	} while (read_seqretry(&engine->stats.lock, seq));
1609 
1610 	return total;
1611 }
1612 
1613 /**
1614  * intel_disable_engine_stats() - Disable engine busy tracking on engine
1615  * @engine: engine to disable stats collection
1616  *
1617  * Stops collecting the engine busyness data for @engine.
1618  */
1619 void intel_disable_engine_stats(struct intel_engine_cs *engine)
1620 {
1621 	unsigned long flags;
1622 
1623 	if (!intel_engine_supports_stats(engine))
1624 		return;
1625 
1626 	write_seqlock_irqsave(&engine->stats.lock, flags);
1627 	WARN_ON_ONCE(engine->stats.enabled == 0);
1628 	if (--engine->stats.enabled == 0) {
1629 		engine->stats.total = __intel_engine_get_busy_time(engine);
1630 		engine->stats.active = 0;
1631 	}
1632 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1633 }
1634 
1635 static bool match_ring(struct i915_request *rq)
1636 {
1637 	u32 ring = ENGINE_READ(rq->engine, RING_START);
1638 
1639 	return ring == i915_ggtt_offset(rq->ring->vma);
1640 }
1641 
1642 struct i915_request *
1643 intel_engine_find_active_request(struct intel_engine_cs *engine)
1644 {
1645 	struct i915_request *request, *active = NULL;
1646 
1647 	/*
1648 	 * We are called by the error capture, reset and to dump engine
1649 	 * state at random points in time. In particular, note that neither is
1650 	 * crucially ordered with an interrupt. After a hang, the GPU is dead
1651 	 * and we assume that no more writes can happen (we waited long enough
1652 	 * for all writes that were in transaction to be flushed) - adding an
1653 	 * extra delay for a recent interrupt is pointless. Hence, we do
1654 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
1655 	 * At all other times, we must assume the GPU is still running, but
1656 	 * we only care about the snapshot of this moment.
1657 	 */
1658 	lockdep_assert_held(&engine->active.lock);
1659 	list_for_each_entry(request, &engine->active.requests, sched.link) {
1660 		if (i915_request_completed(request))
1661 			continue;
1662 
1663 		if (!i915_request_started(request))
1664 			continue;
1665 
1666 		/* More than one preemptible request may match! */
1667 		if (!match_ring(request))
1668 			continue;
1669 
1670 		active = request;
1671 		break;
1672 	}
1673 
1674 	return active;
1675 }
1676 
1677 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1678 #include "selftest_engine_cs.c"
1679 #endif
1680