xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_lrc.c (revision c3d5cfe7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014 Intel Corporation
4  */
5 
6 #include "gem/i915_gem_lmem.h"
7 
8 #include "gen8_engine_cs.h"
9 #include "i915_drv.h"
10 #include "i915_perf.h"
11 #include "i915_reg.h"
12 #include "intel_context.h"
13 #include "intel_engine.h"
14 #include "intel_engine_regs.h"
15 #include "intel_gpu_commands.h"
16 #include "intel_gt.h"
17 #include "intel_gt_regs.h"
18 #include "intel_lrc.h"
19 #include "intel_lrc_reg.h"
20 #include "intel_ring.h"
21 #include "shmem_utils.h"
22 
23 static void set_offsets(u32 *regs,
24 			const u8 *data,
25 			const struct intel_engine_cs *engine,
26 			bool close)
27 #define NOP(x) (BIT(7) | (x))
28 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
29 #define POSTED BIT(0)
30 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
31 #define REG16(x) \
32 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
33 	(((x) >> 2) & 0x7f)
34 #define END 0
35 {
36 	const u32 base = engine->mmio_base;
37 
38 	while (*data) {
39 		u8 count, flags;
40 
41 		if (*data & BIT(7)) { /* skip */
42 			count = *data++ & ~BIT(7);
43 			regs += count;
44 			continue;
45 		}
46 
47 		count = *data & 0x3f;
48 		flags = *data >> 6;
49 		data++;
50 
51 		*regs = MI_LOAD_REGISTER_IMM(count);
52 		if (flags & POSTED)
53 			*regs |= MI_LRI_FORCE_POSTED;
54 		if (GRAPHICS_VER(engine->i915) >= 11)
55 			*regs |= MI_LRI_LRM_CS_MMIO;
56 		regs++;
57 
58 		GEM_BUG_ON(!count);
59 		do {
60 			u32 offset = 0;
61 			u8 v;
62 
63 			do {
64 				v = *data++;
65 				offset <<= 7;
66 				offset |= v & ~BIT(7);
67 			} while (v & BIT(7));
68 
69 			regs[0] = base + (offset << 2);
70 			regs += 2;
71 		} while (--count);
72 	}
73 
74 	if (close) {
75 		/* Close the batch; used mainly by live_lrc_layout() */
76 		*regs = MI_BATCH_BUFFER_END;
77 		if (GRAPHICS_VER(engine->i915) >= 11)
78 			*regs |= BIT(0);
79 	}
80 }
81 
82 static const u8 gen8_xcs_offsets[] = {
83 	NOP(1),
84 	LRI(11, 0),
85 	REG16(0x244),
86 	REG(0x034),
87 	REG(0x030),
88 	REG(0x038),
89 	REG(0x03c),
90 	REG(0x168),
91 	REG(0x140),
92 	REG(0x110),
93 	REG(0x11c),
94 	REG(0x114),
95 	REG(0x118),
96 
97 	NOP(9),
98 	LRI(9, 0),
99 	REG16(0x3a8),
100 	REG16(0x28c),
101 	REG16(0x288),
102 	REG16(0x284),
103 	REG16(0x280),
104 	REG16(0x27c),
105 	REG16(0x278),
106 	REG16(0x274),
107 	REG16(0x270),
108 
109 	NOP(13),
110 	LRI(2, 0),
111 	REG16(0x200),
112 	REG(0x028),
113 
114 	END
115 };
116 
117 static const u8 gen9_xcs_offsets[] = {
118 	NOP(1),
119 	LRI(14, POSTED),
120 	REG16(0x244),
121 	REG(0x034),
122 	REG(0x030),
123 	REG(0x038),
124 	REG(0x03c),
125 	REG(0x168),
126 	REG(0x140),
127 	REG(0x110),
128 	REG(0x11c),
129 	REG(0x114),
130 	REG(0x118),
131 	REG(0x1c0),
132 	REG(0x1c4),
133 	REG(0x1c8),
134 
135 	NOP(3),
136 	LRI(9, POSTED),
137 	REG16(0x3a8),
138 	REG16(0x28c),
139 	REG16(0x288),
140 	REG16(0x284),
141 	REG16(0x280),
142 	REG16(0x27c),
143 	REG16(0x278),
144 	REG16(0x274),
145 	REG16(0x270),
146 
147 	NOP(13),
148 	LRI(1, POSTED),
149 	REG16(0x200),
150 
151 	NOP(13),
152 	LRI(44, POSTED),
153 	REG(0x028),
154 	REG(0x09c),
155 	REG(0x0c0),
156 	REG(0x178),
157 	REG(0x17c),
158 	REG16(0x358),
159 	REG(0x170),
160 	REG(0x150),
161 	REG(0x154),
162 	REG(0x158),
163 	REG16(0x41c),
164 	REG16(0x600),
165 	REG16(0x604),
166 	REG16(0x608),
167 	REG16(0x60c),
168 	REG16(0x610),
169 	REG16(0x614),
170 	REG16(0x618),
171 	REG16(0x61c),
172 	REG16(0x620),
173 	REG16(0x624),
174 	REG16(0x628),
175 	REG16(0x62c),
176 	REG16(0x630),
177 	REG16(0x634),
178 	REG16(0x638),
179 	REG16(0x63c),
180 	REG16(0x640),
181 	REG16(0x644),
182 	REG16(0x648),
183 	REG16(0x64c),
184 	REG16(0x650),
185 	REG16(0x654),
186 	REG16(0x658),
187 	REG16(0x65c),
188 	REG16(0x660),
189 	REG16(0x664),
190 	REG16(0x668),
191 	REG16(0x66c),
192 	REG16(0x670),
193 	REG16(0x674),
194 	REG16(0x678),
195 	REG16(0x67c),
196 	REG(0x068),
197 
198 	END
199 };
200 
201 static const u8 gen12_xcs_offsets[] = {
202 	NOP(1),
203 	LRI(13, POSTED),
204 	REG16(0x244),
205 	REG(0x034),
206 	REG(0x030),
207 	REG(0x038),
208 	REG(0x03c),
209 	REG(0x168),
210 	REG(0x140),
211 	REG(0x110),
212 	REG(0x1c0),
213 	REG(0x1c4),
214 	REG(0x1c8),
215 	REG(0x180),
216 	REG16(0x2b4),
217 
218 	NOP(5),
219 	LRI(9, POSTED),
220 	REG16(0x3a8),
221 	REG16(0x28c),
222 	REG16(0x288),
223 	REG16(0x284),
224 	REG16(0x280),
225 	REG16(0x27c),
226 	REG16(0x278),
227 	REG16(0x274),
228 	REG16(0x270),
229 
230 	END
231 };
232 
233 static const u8 dg2_xcs_offsets[] = {
234 	NOP(1),
235 	LRI(15, POSTED),
236 	REG16(0x244),
237 	REG(0x034),
238 	REG(0x030),
239 	REG(0x038),
240 	REG(0x03c),
241 	REG(0x168),
242 	REG(0x140),
243 	REG(0x110),
244 	REG(0x1c0),
245 	REG(0x1c4),
246 	REG(0x1c8),
247 	REG(0x180),
248 	REG16(0x2b4),
249 	REG(0x120),
250 	REG(0x124),
251 
252 	NOP(1),
253 	LRI(9, POSTED),
254 	REG16(0x3a8),
255 	REG16(0x28c),
256 	REG16(0x288),
257 	REG16(0x284),
258 	REG16(0x280),
259 	REG16(0x27c),
260 	REG16(0x278),
261 	REG16(0x274),
262 	REG16(0x270),
263 
264 	END
265 };
266 
267 static const u8 mtl_xcs_offsets[] = {
268 	NOP(1),
269 	LRI(13, POSTED),
270 	REG16(0x244),
271 	REG(0x034),
272 	REG(0x030),
273 	REG(0x038),
274 	REG(0x03c),
275 	REG(0x168),
276 	REG(0x140),
277 	REG(0x110),
278 	REG(0x1c0),
279 	REG(0x1c4),
280 	REG(0x1c8),
281 	REG(0x180),
282 	REG16(0x2b4),
283 	NOP(4),
284 
285 	NOP(1),
286 	LRI(9, POSTED),
287 	REG16(0x3a8),
288 	REG16(0x28c),
289 	REG16(0x288),
290 	REG16(0x284),
291 	REG16(0x280),
292 	REG16(0x27c),
293 	REG16(0x278),
294 	REG16(0x274),
295 	REG16(0x270),
296 
297 	END
298 };
299 
300 static const u8 gen8_rcs_offsets[] = {
301 	NOP(1),
302 	LRI(14, POSTED),
303 	REG16(0x244),
304 	REG(0x034),
305 	REG(0x030),
306 	REG(0x038),
307 	REG(0x03c),
308 	REG(0x168),
309 	REG(0x140),
310 	REG(0x110),
311 	REG(0x11c),
312 	REG(0x114),
313 	REG(0x118),
314 	REG(0x1c0),
315 	REG(0x1c4),
316 	REG(0x1c8),
317 
318 	NOP(3),
319 	LRI(9, POSTED),
320 	REG16(0x3a8),
321 	REG16(0x28c),
322 	REG16(0x288),
323 	REG16(0x284),
324 	REG16(0x280),
325 	REG16(0x27c),
326 	REG16(0x278),
327 	REG16(0x274),
328 	REG16(0x270),
329 
330 	NOP(13),
331 	LRI(1, 0),
332 	REG(0x0c8),
333 
334 	END
335 };
336 
337 static const u8 gen9_rcs_offsets[] = {
338 	NOP(1),
339 	LRI(14, POSTED),
340 	REG16(0x244),
341 	REG(0x34),
342 	REG(0x30),
343 	REG(0x38),
344 	REG(0x3c),
345 	REG(0x168),
346 	REG(0x140),
347 	REG(0x110),
348 	REG(0x11c),
349 	REG(0x114),
350 	REG(0x118),
351 	REG(0x1c0),
352 	REG(0x1c4),
353 	REG(0x1c8),
354 
355 	NOP(3),
356 	LRI(9, POSTED),
357 	REG16(0x3a8),
358 	REG16(0x28c),
359 	REG16(0x288),
360 	REG16(0x284),
361 	REG16(0x280),
362 	REG16(0x27c),
363 	REG16(0x278),
364 	REG16(0x274),
365 	REG16(0x270),
366 
367 	NOP(13),
368 	LRI(1, 0),
369 	REG(0xc8),
370 
371 	NOP(13),
372 	LRI(44, POSTED),
373 	REG(0x28),
374 	REG(0x9c),
375 	REG(0xc0),
376 	REG(0x178),
377 	REG(0x17c),
378 	REG16(0x358),
379 	REG(0x170),
380 	REG(0x150),
381 	REG(0x154),
382 	REG(0x158),
383 	REG16(0x41c),
384 	REG16(0x600),
385 	REG16(0x604),
386 	REG16(0x608),
387 	REG16(0x60c),
388 	REG16(0x610),
389 	REG16(0x614),
390 	REG16(0x618),
391 	REG16(0x61c),
392 	REG16(0x620),
393 	REG16(0x624),
394 	REG16(0x628),
395 	REG16(0x62c),
396 	REG16(0x630),
397 	REG16(0x634),
398 	REG16(0x638),
399 	REG16(0x63c),
400 	REG16(0x640),
401 	REG16(0x644),
402 	REG16(0x648),
403 	REG16(0x64c),
404 	REG16(0x650),
405 	REG16(0x654),
406 	REG16(0x658),
407 	REG16(0x65c),
408 	REG16(0x660),
409 	REG16(0x664),
410 	REG16(0x668),
411 	REG16(0x66c),
412 	REG16(0x670),
413 	REG16(0x674),
414 	REG16(0x678),
415 	REG16(0x67c),
416 	REG(0x68),
417 
418 	END
419 };
420 
421 static const u8 gen11_rcs_offsets[] = {
422 	NOP(1),
423 	LRI(15, POSTED),
424 	REG16(0x244),
425 	REG(0x034),
426 	REG(0x030),
427 	REG(0x038),
428 	REG(0x03c),
429 	REG(0x168),
430 	REG(0x140),
431 	REG(0x110),
432 	REG(0x11c),
433 	REG(0x114),
434 	REG(0x118),
435 	REG(0x1c0),
436 	REG(0x1c4),
437 	REG(0x1c8),
438 	REG(0x180),
439 
440 	NOP(1),
441 	LRI(9, POSTED),
442 	REG16(0x3a8),
443 	REG16(0x28c),
444 	REG16(0x288),
445 	REG16(0x284),
446 	REG16(0x280),
447 	REG16(0x27c),
448 	REG16(0x278),
449 	REG16(0x274),
450 	REG16(0x270),
451 
452 	LRI(1, POSTED),
453 	REG(0x1b0),
454 
455 	NOP(10),
456 	LRI(1, 0),
457 	REG(0x0c8),
458 
459 	END
460 };
461 
462 static const u8 gen12_rcs_offsets[] = {
463 	NOP(1),
464 	LRI(13, POSTED),
465 	REG16(0x244),
466 	REG(0x034),
467 	REG(0x030),
468 	REG(0x038),
469 	REG(0x03c),
470 	REG(0x168),
471 	REG(0x140),
472 	REG(0x110),
473 	REG(0x1c0),
474 	REG(0x1c4),
475 	REG(0x1c8),
476 	REG(0x180),
477 	REG16(0x2b4),
478 
479 	NOP(5),
480 	LRI(9, POSTED),
481 	REG16(0x3a8),
482 	REG16(0x28c),
483 	REG16(0x288),
484 	REG16(0x284),
485 	REG16(0x280),
486 	REG16(0x27c),
487 	REG16(0x278),
488 	REG16(0x274),
489 	REG16(0x270),
490 
491 	LRI(3, POSTED),
492 	REG(0x1b0),
493 	REG16(0x5a8),
494 	REG16(0x5ac),
495 
496 	NOP(6),
497 	LRI(1, 0),
498 	REG(0x0c8),
499 	NOP(3 + 9 + 1),
500 
501 	LRI(51, POSTED),
502 	REG16(0x588),
503 	REG16(0x588),
504 	REG16(0x588),
505 	REG16(0x588),
506 	REG16(0x588),
507 	REG16(0x588),
508 	REG(0x028),
509 	REG(0x09c),
510 	REG(0x0c0),
511 	REG(0x178),
512 	REG(0x17c),
513 	REG16(0x358),
514 	REG(0x170),
515 	REG(0x150),
516 	REG(0x154),
517 	REG(0x158),
518 	REG16(0x41c),
519 	REG16(0x600),
520 	REG16(0x604),
521 	REG16(0x608),
522 	REG16(0x60c),
523 	REG16(0x610),
524 	REG16(0x614),
525 	REG16(0x618),
526 	REG16(0x61c),
527 	REG16(0x620),
528 	REG16(0x624),
529 	REG16(0x628),
530 	REG16(0x62c),
531 	REG16(0x630),
532 	REG16(0x634),
533 	REG16(0x638),
534 	REG16(0x63c),
535 	REG16(0x640),
536 	REG16(0x644),
537 	REG16(0x648),
538 	REG16(0x64c),
539 	REG16(0x650),
540 	REG16(0x654),
541 	REG16(0x658),
542 	REG16(0x65c),
543 	REG16(0x660),
544 	REG16(0x664),
545 	REG16(0x668),
546 	REG16(0x66c),
547 	REG16(0x670),
548 	REG16(0x674),
549 	REG16(0x678),
550 	REG16(0x67c),
551 	REG(0x068),
552 	REG(0x084),
553 	NOP(1),
554 
555 	END
556 };
557 
558 static const u8 xehp_rcs_offsets[] = {
559 	NOP(1),
560 	LRI(13, POSTED),
561 	REG16(0x244),
562 	REG(0x034),
563 	REG(0x030),
564 	REG(0x038),
565 	REG(0x03c),
566 	REG(0x168),
567 	REG(0x140),
568 	REG(0x110),
569 	REG(0x1c0),
570 	REG(0x1c4),
571 	REG(0x1c8),
572 	REG(0x180),
573 	REG16(0x2b4),
574 
575 	NOP(5),
576 	LRI(9, POSTED),
577 	REG16(0x3a8),
578 	REG16(0x28c),
579 	REG16(0x288),
580 	REG16(0x284),
581 	REG16(0x280),
582 	REG16(0x27c),
583 	REG16(0x278),
584 	REG16(0x274),
585 	REG16(0x270),
586 
587 	LRI(3, POSTED),
588 	REG(0x1b0),
589 	REG16(0x5a8),
590 	REG16(0x5ac),
591 
592 	NOP(6),
593 	LRI(1, 0),
594 	REG(0x0c8),
595 
596 	END
597 };
598 
599 static const u8 dg2_rcs_offsets[] = {
600 	NOP(1),
601 	LRI(15, POSTED),
602 	REG16(0x244),
603 	REG(0x034),
604 	REG(0x030),
605 	REG(0x038),
606 	REG(0x03c),
607 	REG(0x168),
608 	REG(0x140),
609 	REG(0x110),
610 	REG(0x1c0),
611 	REG(0x1c4),
612 	REG(0x1c8),
613 	REG(0x180),
614 	REG16(0x2b4),
615 	REG(0x120),
616 	REG(0x124),
617 
618 	NOP(1),
619 	LRI(9, POSTED),
620 	REG16(0x3a8),
621 	REG16(0x28c),
622 	REG16(0x288),
623 	REG16(0x284),
624 	REG16(0x280),
625 	REG16(0x27c),
626 	REG16(0x278),
627 	REG16(0x274),
628 	REG16(0x270),
629 
630 	LRI(3, POSTED),
631 	REG(0x1b0),
632 	REG16(0x5a8),
633 	REG16(0x5ac),
634 
635 	NOP(6),
636 	LRI(1, 0),
637 	REG(0x0c8),
638 
639 	END
640 };
641 
642 static const u8 mtl_rcs_offsets[] = {
643 	NOP(1),
644 	LRI(15, POSTED),
645 	REG16(0x244),
646 	REG(0x034),
647 	REG(0x030),
648 	REG(0x038),
649 	REG(0x03c),
650 	REG(0x168),
651 	REG(0x140),
652 	REG(0x110),
653 	REG(0x1c0),
654 	REG(0x1c4),
655 	REG(0x1c8),
656 	REG(0x180),
657 	REG16(0x2b4),
658 	REG(0x120),
659 	REG(0x124),
660 
661 	NOP(1),
662 	LRI(9, POSTED),
663 	REG16(0x3a8),
664 	REG16(0x28c),
665 	REG16(0x288),
666 	REG16(0x284),
667 	REG16(0x280),
668 	REG16(0x27c),
669 	REG16(0x278),
670 	REG16(0x274),
671 	REG16(0x270),
672 
673 	NOP(2),
674 	LRI(2, POSTED),
675 	REG16(0x5a8),
676 	REG16(0x5ac),
677 
678 	NOP(6),
679 	LRI(1, 0),
680 	REG(0x0c8),
681 
682 	END
683 };
684 
685 #undef END
686 #undef REG16
687 #undef REG
688 #undef LRI
689 #undef NOP
690 
691 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
692 {
693 	/*
694 	 * The gen12+ lists only have the registers we program in the basic
695 	 * default state. We rely on the context image using relative
696 	 * addressing to automatic fixup the register state between the
697 	 * physical engines for virtual engine.
698 	 */
699 	GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
700 		   !intel_engine_has_relative_mmio(engine));
701 
702 	if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
703 		if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
704 			return mtl_rcs_offsets;
705 		else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
706 			return dg2_rcs_offsets;
707 		else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
708 			return xehp_rcs_offsets;
709 		else if (GRAPHICS_VER(engine->i915) >= 12)
710 			return gen12_rcs_offsets;
711 		else if (GRAPHICS_VER(engine->i915) >= 11)
712 			return gen11_rcs_offsets;
713 		else if (GRAPHICS_VER(engine->i915) >= 9)
714 			return gen9_rcs_offsets;
715 		else
716 			return gen8_rcs_offsets;
717 	} else {
718 		if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
719 			return mtl_xcs_offsets;
720 		else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
721 			return dg2_xcs_offsets;
722 		else if (GRAPHICS_VER(engine->i915) >= 12)
723 			return gen12_xcs_offsets;
724 		else if (GRAPHICS_VER(engine->i915) >= 9)
725 			return gen9_xcs_offsets;
726 		else
727 			return gen8_xcs_offsets;
728 	}
729 }
730 
731 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
732 {
733 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
734 		return 0x70;
735 	else if (GRAPHICS_VER(engine->i915) >= 12)
736 		return 0x60;
737 	else if (GRAPHICS_VER(engine->i915) >= 9)
738 		return 0x54;
739 	else if (engine->class == RENDER_CLASS)
740 		return 0x58;
741 	else
742 		return -1;
743 }
744 
745 static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
746 {
747 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
748 		return 0x80;
749 	else if (GRAPHICS_VER(engine->i915) >= 12)
750 		return 0x70;
751 	else if (GRAPHICS_VER(engine->i915) >= 9)
752 		return 0x64;
753 	else if (GRAPHICS_VER(engine->i915) >= 8 &&
754 		 engine->class == RENDER_CLASS)
755 		return 0xc4;
756 	else
757 		return -1;
758 }
759 
760 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
761 {
762 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
763 		return 0x84;
764 	else if (GRAPHICS_VER(engine->i915) >= 12)
765 		return 0x74;
766 	else if (GRAPHICS_VER(engine->i915) >= 9)
767 		return 0x68;
768 	else if (engine->class == RENDER_CLASS)
769 		return 0xd8;
770 	else
771 		return -1;
772 }
773 
774 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
775 {
776 	if (GRAPHICS_VER(engine->i915) >= 12)
777 		return 0x12;
778 	else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
779 		return 0x18;
780 	else
781 		return -1;
782 }
783 
784 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
785 {
786 	int x;
787 
788 	x = lrc_ring_wa_bb_per_ctx(engine);
789 	if (x < 0)
790 		return x;
791 
792 	return x + 2;
793 }
794 
795 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
796 {
797 	int x;
798 
799 	x = lrc_ring_indirect_ptr(engine);
800 	if (x < 0)
801 		return x;
802 
803 	return x + 2;
804 }
805 
806 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
807 {
808 
809 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
810 		/*
811 		 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
812 		 * simply to match the RCS context image layout.
813 		 */
814 		return 0xc6;
815 	else if (engine->class != RENDER_CLASS)
816 		return -1;
817 	else if (GRAPHICS_VER(engine->i915) >= 12)
818 		return 0xb6;
819 	else if (GRAPHICS_VER(engine->i915) >= 11)
820 		return 0xaa;
821 	else
822 		return -1;
823 }
824 
825 static u32
826 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
827 {
828 	if (GRAPHICS_VER(engine->i915) >= 12)
829 		return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
830 	else if (GRAPHICS_VER(engine->i915) >= 11)
831 		return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
832 	else if (GRAPHICS_VER(engine->i915) >= 9)
833 		return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
834 	else if (GRAPHICS_VER(engine->i915) >= 8)
835 		return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
836 
837 	GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
838 
839 	return 0;
840 }
841 
842 static void
843 lrc_setup_indirect_ctx(u32 *regs,
844 		       const struct intel_engine_cs *engine,
845 		       u32 ctx_bb_ggtt_addr,
846 		       u32 size)
847 {
848 	GEM_BUG_ON(!size);
849 	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
850 	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
851 	regs[lrc_ring_indirect_ptr(engine) + 1] =
852 		ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
853 
854 	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
855 	regs[lrc_ring_indirect_offset(engine) + 1] =
856 		lrc_ring_indirect_offset_default(engine) << 6;
857 }
858 
859 static void init_common_regs(u32 * const regs,
860 			     const struct intel_context *ce,
861 			     const struct intel_engine_cs *engine,
862 			     bool inhibit)
863 {
864 	u32 ctl;
865 	int loc;
866 
867 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
868 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
869 	if (inhibit)
870 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
871 	if (GRAPHICS_VER(engine->i915) < 11)
872 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
873 					   CTX_CTRL_RS_CTX_ENABLE);
874 	regs[CTX_CONTEXT_CONTROL] = ctl;
875 
876 	regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
877 
878 	loc = lrc_ring_bb_offset(engine);
879 	if (loc != -1)
880 		regs[loc + 1] = 0;
881 }
882 
883 static void init_wa_bb_regs(u32 * const regs,
884 			    const struct intel_engine_cs *engine)
885 {
886 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
887 
888 	if (wa_ctx->per_ctx.size) {
889 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
890 
891 		GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
892 		regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
893 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
894 	}
895 
896 	if (wa_ctx->indirect_ctx.size) {
897 		lrc_setup_indirect_ctx(regs, engine,
898 				       i915_ggtt_offset(wa_ctx->vma) +
899 				       wa_ctx->indirect_ctx.offset,
900 				       wa_ctx->indirect_ctx.size);
901 	}
902 }
903 
904 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
905 {
906 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
907 		/* 64b PPGTT (48bit canonical)
908 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
909 		 * other PDP Descriptors are ignored.
910 		 */
911 		ASSIGN_CTX_PML4(ppgtt, regs);
912 	} else {
913 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
914 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
915 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
916 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
917 	}
918 }
919 
920 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
921 {
922 	if (i915_is_ggtt(vm))
923 		return i915_vm_to_ggtt(vm)->alias;
924 	else
925 		return i915_vm_to_ppgtt(vm);
926 }
927 
928 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
929 {
930 	int x;
931 
932 	x = lrc_ring_mi_mode(engine);
933 	if (x != -1) {
934 		regs[x + 1] &= ~STOP_RING;
935 		regs[x + 1] |= STOP_RING << 16;
936 	}
937 }
938 
939 static void __lrc_init_regs(u32 *regs,
940 			    const struct intel_context *ce,
941 			    const struct intel_engine_cs *engine,
942 			    bool inhibit)
943 {
944 	/*
945 	 * A context is actually a big batch buffer with several
946 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
947 	 * values we are setting here are only for the first context restore:
948 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
949 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
950 	 * we are not initializing here).
951 	 *
952 	 * Must keep consistent with virtual_update_register_offsets().
953 	 */
954 
955 	if (inhibit)
956 		memset(regs, 0, PAGE_SIZE);
957 
958 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
959 
960 	init_common_regs(regs, ce, engine, inhibit);
961 	init_ppgtt_regs(regs, vm_alias(ce->vm));
962 
963 	init_wa_bb_regs(regs, engine);
964 
965 	__reset_stop_ring(regs, engine);
966 }
967 
968 void lrc_init_regs(const struct intel_context *ce,
969 		   const struct intel_engine_cs *engine,
970 		   bool inhibit)
971 {
972 	__lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
973 }
974 
975 void lrc_reset_regs(const struct intel_context *ce,
976 		    const struct intel_engine_cs *engine)
977 {
978 	__reset_stop_ring(ce->lrc_reg_state, engine);
979 }
980 
981 static void
982 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
983 {
984 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
985 		return;
986 
987 	vaddr += engine->context_size;
988 
989 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
990 }
991 
992 static void
993 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
994 {
995 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
996 		return;
997 
998 	vaddr += engine->context_size;
999 
1000 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
1001 		drm_err_once(&engine->i915->drm,
1002 			     "%s context redzone overwritten!\n",
1003 			     engine->name);
1004 }
1005 
1006 static u32 context_wa_bb_offset(const struct intel_context *ce)
1007 {
1008 	return PAGE_SIZE * ce->wa_bb_page;
1009 }
1010 
1011 static u32 *context_indirect_bb(const struct intel_context *ce)
1012 {
1013 	void *ptr;
1014 
1015 	GEM_BUG_ON(!ce->wa_bb_page);
1016 
1017 	ptr = ce->lrc_reg_state;
1018 	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1019 	ptr += context_wa_bb_offset(ce);
1020 
1021 	return ptr;
1022 }
1023 
1024 void lrc_init_state(struct intel_context *ce,
1025 		    struct intel_engine_cs *engine,
1026 		    void *state)
1027 {
1028 	bool inhibit = true;
1029 
1030 	set_redzone(state, engine);
1031 
1032 	if (engine->default_state) {
1033 		shmem_read(engine->default_state, 0,
1034 			   state, engine->context_size);
1035 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
1036 		inhibit = false;
1037 	}
1038 
1039 	/* Clear the ppHWSP (inc. per-context counters) */
1040 	memset(state, 0, PAGE_SIZE);
1041 
1042 	/* Clear the indirect wa and storage */
1043 	if (ce->wa_bb_page)
1044 		memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
1045 
1046 	/*
1047 	 * The second page of the context object contains some registers which
1048 	 * must be set up prior to the first execution.
1049 	 */
1050 	__lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
1051 }
1052 
1053 u32 lrc_indirect_bb(const struct intel_context *ce)
1054 {
1055 	return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
1056 }
1057 
1058 static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
1059 {
1060 	/* If predication is active, this will be noop'ed */
1061 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1062 	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1063 	*cs++ = 0;
1064 	*cs++ = 0; /* No predication */
1065 
1066 	/* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
1067 	*cs++ = MI_BATCH_BUFFER_END | BIT(15);
1068 	*cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
1069 
1070 	/* Instructions are no longer predicated (disabled), we can proceed */
1071 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1072 	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1073 	*cs++ = 0;
1074 	*cs++ = 1; /* enable predication before the next BB */
1075 
1076 	*cs++ = MI_BATCH_BUFFER_END;
1077 	GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
1078 
1079 	return cs;
1080 }
1081 
1082 static struct i915_vma *
1083 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
1084 {
1085 	struct drm_i915_gem_object *obj;
1086 	struct i915_vma *vma;
1087 	u32 context_size;
1088 
1089 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
1090 
1091 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1092 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
1093 
1094 	if (GRAPHICS_VER(engine->i915) >= 12) {
1095 		ce->wa_bb_page = context_size / PAGE_SIZE;
1096 		context_size += PAGE_SIZE;
1097 	}
1098 
1099 	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
1100 		ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
1101 		context_size += PARENT_SCRATCH_SIZE;
1102 	}
1103 
1104 	obj = i915_gem_object_create_lmem(engine->i915, context_size,
1105 					  I915_BO_ALLOC_PM_VOLATILE);
1106 	if (IS_ERR(obj))
1107 		obj = i915_gem_object_create_shmem(engine->i915, context_size);
1108 	if (IS_ERR(obj))
1109 		return ERR_CAST(obj);
1110 
1111 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1112 	if (IS_ERR(vma)) {
1113 		i915_gem_object_put(obj);
1114 		return vma;
1115 	}
1116 
1117 	return vma;
1118 }
1119 
1120 static struct intel_timeline *
1121 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
1122 {
1123 	struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
1124 
1125 	return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
1126 }
1127 
1128 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
1129 {
1130 	struct intel_ring *ring;
1131 	struct i915_vma *vma;
1132 	int err;
1133 
1134 	GEM_BUG_ON(ce->state);
1135 
1136 	vma = __lrc_alloc_state(ce, engine);
1137 	if (IS_ERR(vma))
1138 		return PTR_ERR(vma);
1139 
1140 	ring = intel_engine_create_ring(engine, ce->ring_size);
1141 	if (IS_ERR(ring)) {
1142 		err = PTR_ERR(ring);
1143 		goto err_vma;
1144 	}
1145 
1146 	if (!page_mask_bits(ce->timeline)) {
1147 		struct intel_timeline *tl;
1148 
1149 		/*
1150 		 * Use the static global HWSP for the kernel context, and
1151 		 * a dynamically allocated cacheline for everyone else.
1152 		 */
1153 		if (unlikely(ce->timeline))
1154 			tl = pinned_timeline(ce, engine);
1155 		else
1156 			tl = intel_timeline_create(engine->gt);
1157 		if (IS_ERR(tl)) {
1158 			err = PTR_ERR(tl);
1159 			goto err_ring;
1160 		}
1161 
1162 		ce->timeline = tl;
1163 	}
1164 
1165 	ce->ring = ring;
1166 	ce->state = vma;
1167 
1168 	return 0;
1169 
1170 err_ring:
1171 	intel_ring_put(ring);
1172 err_vma:
1173 	i915_vma_put(vma);
1174 	return err;
1175 }
1176 
1177 void lrc_reset(struct intel_context *ce)
1178 {
1179 	GEM_BUG_ON(!intel_context_is_pinned(ce));
1180 
1181 	intel_ring_reset(ce->ring, ce->ring->emit);
1182 
1183 	/* Scrub away the garbage */
1184 	lrc_init_regs(ce, ce->engine, true);
1185 	ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
1186 }
1187 
1188 int
1189 lrc_pre_pin(struct intel_context *ce,
1190 	    struct intel_engine_cs *engine,
1191 	    struct i915_gem_ww_ctx *ww,
1192 	    void **vaddr)
1193 {
1194 	GEM_BUG_ON(!ce->state);
1195 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
1196 
1197 	*vaddr = i915_gem_object_pin_map(ce->state->obj,
1198 					 i915_coherent_map_type(ce->engine->i915,
1199 								ce->state->obj,
1200 								false) |
1201 					 I915_MAP_OVERRIDE);
1202 
1203 	return PTR_ERR_OR_ZERO(*vaddr);
1204 }
1205 
1206 int
1207 lrc_pin(struct intel_context *ce,
1208 	struct intel_engine_cs *engine,
1209 	void *vaddr)
1210 {
1211 	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
1212 
1213 	if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
1214 		lrc_init_state(ce, engine, vaddr);
1215 
1216 	ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
1217 	return 0;
1218 }
1219 
1220 void lrc_unpin(struct intel_context *ce)
1221 {
1222 	if (unlikely(ce->parallel.last_rq)) {
1223 		i915_request_put(ce->parallel.last_rq);
1224 		ce->parallel.last_rq = NULL;
1225 	}
1226 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1227 		      ce->engine);
1228 }
1229 
1230 void lrc_post_unpin(struct intel_context *ce)
1231 {
1232 	i915_gem_object_unpin_map(ce->state->obj);
1233 }
1234 
1235 void lrc_fini(struct intel_context *ce)
1236 {
1237 	if (!ce->state)
1238 		return;
1239 
1240 	intel_ring_put(fetch_and_zero(&ce->ring));
1241 	i915_vma_put(fetch_and_zero(&ce->state));
1242 }
1243 
1244 void lrc_destroy(struct kref *kref)
1245 {
1246 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1247 
1248 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1249 	GEM_BUG_ON(intel_context_is_pinned(ce));
1250 
1251 	lrc_fini(ce);
1252 
1253 	intel_context_fini(ce);
1254 	intel_context_free(ce);
1255 }
1256 
1257 static u32 *
1258 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1259 {
1260 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1261 		MI_SRM_LRM_GLOBAL_GTT |
1262 		MI_LRI_LRM_CS_MMIO;
1263 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1264 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1265 		CTX_TIMESTAMP * sizeof(u32);
1266 	*cs++ = 0;
1267 
1268 	*cs++ = MI_LOAD_REGISTER_REG |
1269 		MI_LRR_SOURCE_CS_MMIO |
1270 		MI_LRI_LRM_CS_MMIO;
1271 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1272 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1273 
1274 	*cs++ = MI_LOAD_REGISTER_REG |
1275 		MI_LRR_SOURCE_CS_MMIO |
1276 		MI_LRI_LRM_CS_MMIO;
1277 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1278 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1279 
1280 	return cs;
1281 }
1282 
1283 static u32 *
1284 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1285 {
1286 	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1287 
1288 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1289 		MI_SRM_LRM_GLOBAL_GTT |
1290 		MI_LRI_LRM_CS_MMIO;
1291 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1292 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1293 		(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
1294 	*cs++ = 0;
1295 
1296 	return cs;
1297 }
1298 
1299 static u32 *
1300 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1301 {
1302 	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1303 
1304 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1305 		MI_SRM_LRM_GLOBAL_GTT |
1306 		MI_LRI_LRM_CS_MMIO;
1307 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1308 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1309 		(lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1310 	*cs++ = 0;
1311 
1312 	*cs++ = MI_LOAD_REGISTER_REG |
1313 		MI_LRR_SOURCE_CS_MMIO |
1314 		MI_LRI_LRM_CS_MMIO;
1315 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1316 	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1317 
1318 	return cs;
1319 }
1320 
1321 /*
1322  * On DG2 during context restore of a preempted context in GPGPU mode,
1323  * RCS restore hang is detected. This is extremely timing dependent.
1324  * To address this below sw wabb is implemented for DG2 A steppings.
1325  */
1326 static u32 *
1327 dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
1328 {
1329 	*cs++ = MI_LOAD_REGISTER_IMM(1);
1330 	*cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG);
1331 	*cs++ = 0x21;
1332 
1333 	*cs++ = MI_LOAD_REGISTER_REG;
1334 	*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
1335 	*cs++ = i915_mmio_reg_offset(GEN12_CULLBIT1);
1336 
1337 	*cs++ = MI_LOAD_REGISTER_REG;
1338 	*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
1339 	*cs++ = i915_mmio_reg_offset(GEN12_CULLBIT2);
1340 
1341 	return cs;
1342 }
1343 
1344 /*
1345  * The bspec's tuning guide asks us to program a vertical watermark value of
1346  * 0x3FF.  However this register is not saved/restored properly by the
1347  * hardware, so we're required to apply the desired value via INDIRECT_CTX
1348  * batch buffer to ensure the value takes effect properly.  All other bits
1349  * in this register should remain at 0 (the hardware default).
1350  */
1351 static u32 *
1352 dg2_emit_draw_watermark_setting(u32 *cs)
1353 {
1354 	*cs++ = MI_LOAD_REGISTER_IMM(1);
1355 	*cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
1356 	*cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF);
1357 
1358 	return cs;
1359 }
1360 
1361 static u32 *
1362 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1363 {
1364 	cs = gen12_emit_timestamp_wa(ce, cs);
1365 	cs = gen12_emit_cmd_buf_wa(ce, cs);
1366 	cs = gen12_emit_restore_scratch(ce, cs);
1367 
1368 	/* Wa_22011450934:dg2 */
1369 	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
1370 	    IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
1371 		cs = dg2_emit_rcs_hang_wabb(ce, cs);
1372 
1373 	/* Wa_16013000631:dg2 */
1374 	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
1375 	    IS_DG2_G11(ce->engine->i915))
1376 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
1377 
1378 	/* hsdes: 1809175790 */
1379 	if (!HAS_FLAT_CCS(ce->engine->i915))
1380 		cs = gen12_emit_aux_table_inv(ce->engine->gt,
1381 					      cs, GEN12_GFX_CCS_AUX_NV);
1382 
1383 	/* Wa_16014892111 */
1384 	if (IS_DG2(ce->engine->i915))
1385 		cs = dg2_emit_draw_watermark_setting(cs);
1386 
1387 	return cs;
1388 }
1389 
1390 static u32 *
1391 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1392 {
1393 	cs = gen12_emit_timestamp_wa(ce, cs);
1394 	cs = gen12_emit_restore_scratch(ce, cs);
1395 
1396 	/* Wa_16013000631:dg2 */
1397 	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
1398 	    IS_DG2_G11(ce->engine->i915))
1399 		if (ce->engine->class == COMPUTE_CLASS)
1400 			cs = gen8_emit_pipe_control(cs,
1401 						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
1402 						    0);
1403 
1404 	/* hsdes: 1809175790 */
1405 	if (!HAS_FLAT_CCS(ce->engine->i915)) {
1406 		if (ce->engine->class == VIDEO_DECODE_CLASS)
1407 			cs = gen12_emit_aux_table_inv(ce->engine->gt,
1408 						      cs, GEN12_VD0_AUX_NV);
1409 		else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
1410 			cs = gen12_emit_aux_table_inv(ce->engine->gt,
1411 						      cs, GEN12_VE0_AUX_NV);
1412 	}
1413 
1414 	return cs;
1415 }
1416 
1417 static void
1418 setup_indirect_ctx_bb(const struct intel_context *ce,
1419 		      const struct intel_engine_cs *engine,
1420 		      u32 *(*emit)(const struct intel_context *, u32 *))
1421 {
1422 	u32 * const start = context_indirect_bb(ce);
1423 	u32 *cs;
1424 
1425 	cs = emit(ce, start);
1426 	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1427 	while ((unsigned long)cs % CACHELINE_BYTES)
1428 		*cs++ = MI_NOOP;
1429 
1430 	GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
1431 	setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
1432 
1433 	lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1434 			       lrc_indirect_bb(ce),
1435 			       (cs - start) * sizeof(*cs));
1436 }
1437 
1438 /*
1439  * The context descriptor encodes various attributes of a context,
1440  * including its GTT address and some flags. Because it's fairly
1441  * expensive to calculate, we'll just do it once and cache the result,
1442  * which remains valid until the context is unpinned.
1443  *
1444  * This is what a descriptor looks like, from LSB to MSB::
1445  *
1446  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
1447  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
1448  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
1449  *      bits 53-54:    mbz, reserved for use by hardware
1450  *      bits 55-63:    group ID, currently unused and set to 0
1451  *
1452  * Starting from Gen11, the upper dword of the descriptor has a new format:
1453  *
1454  *      bits 32-36:    reserved
1455  *      bits 37-47:    SW context ID
1456  *      bits 48:53:    engine instance
1457  *      bit 54:        mbz, reserved for use by hardware
1458  *      bits 55-60:    SW counter
1459  *      bits 61-63:    engine class
1460  *
1461  * On Xe_HP, the upper dword of the descriptor has a new format:
1462  *
1463  *      bits 32-37:    virtual function number
1464  *      bit 38:        mbz, reserved for use by hardware
1465  *      bits 39-54:    SW context ID
1466  *      bits 55-57:    reserved
1467  *      bits 58-63:    SW counter
1468  *
1469  * engine info, SW context ID and SW counter need to form a unique number
1470  * (Context ID) per lrc.
1471  */
1472 static u32 lrc_descriptor(const struct intel_context *ce)
1473 {
1474 	u32 desc;
1475 
1476 	desc = INTEL_LEGACY_32B_CONTEXT;
1477 	if (i915_vm_is_4lvl(ce->vm))
1478 		desc = INTEL_LEGACY_64B_CONTEXT;
1479 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1480 
1481 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1482 	if (GRAPHICS_VER(ce->vm->i915) == 8)
1483 		desc |= GEN8_CTX_L3LLC_COHERENT;
1484 
1485 	return i915_ggtt_offset(ce->state) | desc;
1486 }
1487 
1488 u32 lrc_update_regs(const struct intel_context *ce,
1489 		    const struct intel_engine_cs *engine,
1490 		    u32 head)
1491 {
1492 	struct intel_ring *ring = ce->ring;
1493 	u32 *regs = ce->lrc_reg_state;
1494 
1495 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1496 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1497 
1498 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1499 	regs[CTX_RING_HEAD] = head;
1500 	regs[CTX_RING_TAIL] = ring->tail;
1501 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1502 
1503 	/* RPCS */
1504 	if (engine->class == RENDER_CLASS) {
1505 		regs[CTX_R_PWR_CLK_STATE] =
1506 			intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1507 
1508 		i915_oa_init_reg_state(ce, engine);
1509 	}
1510 
1511 	if (ce->wa_bb_page) {
1512 		u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1513 
1514 		fn = gen12_emit_indirect_ctx_xcs;
1515 		if (ce->engine->class == RENDER_CLASS)
1516 			fn = gen12_emit_indirect_ctx_rcs;
1517 
1518 		/* Mutually exclusive wrt to global indirect bb */
1519 		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1520 		setup_indirect_ctx_bb(ce, engine, fn);
1521 	}
1522 
1523 	return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1524 }
1525 
1526 void lrc_update_offsets(struct intel_context *ce,
1527 			struct intel_engine_cs *engine)
1528 {
1529 	set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1530 }
1531 
1532 void lrc_check_regs(const struct intel_context *ce,
1533 		    const struct intel_engine_cs *engine,
1534 		    const char *when)
1535 {
1536 	const struct intel_ring *ring = ce->ring;
1537 	u32 *regs = ce->lrc_reg_state;
1538 	bool valid = true;
1539 	int x;
1540 
1541 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1542 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1543 		       engine->name,
1544 		       regs[CTX_RING_START],
1545 		       i915_ggtt_offset(ring->vma));
1546 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1547 		valid = false;
1548 	}
1549 
1550 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1551 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1552 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1553 		       engine->name,
1554 		       regs[CTX_RING_CTL],
1555 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1556 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1557 		valid = false;
1558 	}
1559 
1560 	x = lrc_ring_mi_mode(engine);
1561 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1562 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1563 		       engine->name, regs[x + 1]);
1564 		regs[x + 1] &= ~STOP_RING;
1565 		regs[x + 1] |= STOP_RING << 16;
1566 		valid = false;
1567 	}
1568 
1569 	WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1570 }
1571 
1572 /*
1573  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1574  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1575  * but there is a slight complication as this is applied in WA batch where the
1576  * values are only initialized once so we cannot take register value at the
1577  * beginning and reuse it further; hence we save its value to memory, upload a
1578  * constant value with bit21 set and then we restore it back with the saved value.
1579  * To simplify the WA, a constant value is formed by using the default value
1580  * of this register. This shouldn't be a problem because we are only modifying
1581  * it for a short period and this batch in non-premptible. We can ofcourse
1582  * use additional instructions that read the actual value of the register
1583  * at that time and set our bit of interest but it makes the WA complicated.
1584  *
1585  * This WA is also required for Gen9 so extracting as a function avoids
1586  * code duplication.
1587  */
1588 static u32 *
1589 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1590 {
1591 	/* NB no one else is allowed to scribble over scratch + 256! */
1592 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1593 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1594 	*batch++ = intel_gt_scratch_offset(engine->gt,
1595 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1596 	*batch++ = 0;
1597 
1598 	*batch++ = MI_LOAD_REGISTER_IMM(1);
1599 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1600 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1601 
1602 	batch = gen8_emit_pipe_control(batch,
1603 				       PIPE_CONTROL_CS_STALL |
1604 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
1605 				       0);
1606 
1607 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1608 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1609 	*batch++ = intel_gt_scratch_offset(engine->gt,
1610 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1611 	*batch++ = 0;
1612 
1613 	return batch;
1614 }
1615 
1616 /*
1617  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1618  * initialized at the beginning and shared across all contexts but this field
1619  * helps us to have multiple batches at different offsets and select them based
1620  * on a criteria. At the moment this batch always start at the beginning of the page
1621  * and at this point we don't have multiple wa_ctx batch buffers.
1622  *
1623  * The number of WA applied are not known at the beginning; we use this field
1624  * to return the no of DWORDS written.
1625  *
1626  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1627  * so it adds NOOPs as padding to make it cacheline aligned.
1628  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1629  * makes a complete batch buffer.
1630  */
1631 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1632 {
1633 	/* WaDisableCtxRestoreArbitration:bdw,chv */
1634 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1635 
1636 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1637 	if (IS_BROADWELL(engine->i915))
1638 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1639 
1640 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1641 	/* Actual scratch location is at 128 bytes offset */
1642 	batch = gen8_emit_pipe_control(batch,
1643 				       PIPE_CONTROL_FLUSH_L3 |
1644 				       PIPE_CONTROL_STORE_DATA_INDEX |
1645 				       PIPE_CONTROL_CS_STALL |
1646 				       PIPE_CONTROL_QW_WRITE,
1647 				       LRC_PPHWSP_SCRATCH_ADDR);
1648 
1649 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1650 
1651 	/* Pad to end of cacheline */
1652 	while ((unsigned long)batch % CACHELINE_BYTES)
1653 		*batch++ = MI_NOOP;
1654 
1655 	/*
1656 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1657 	 * execution depends on the length specified in terms of cache lines
1658 	 * in the register CTX_RCS_INDIRECT_CTX
1659 	 */
1660 
1661 	return batch;
1662 }
1663 
1664 struct lri {
1665 	i915_reg_t reg;
1666 	u32 value;
1667 };
1668 
1669 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1670 {
1671 	GEM_BUG_ON(!count || count > 63);
1672 
1673 	*batch++ = MI_LOAD_REGISTER_IMM(count);
1674 	do {
1675 		*batch++ = i915_mmio_reg_offset(lri->reg);
1676 		*batch++ = lri->value;
1677 	} while (lri++, --count);
1678 	*batch++ = MI_NOOP;
1679 
1680 	return batch;
1681 }
1682 
1683 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1684 {
1685 	static const struct lri lri[] = {
1686 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1687 		{
1688 			COMMON_SLICE_CHICKEN2,
1689 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1690 				       0),
1691 		},
1692 
1693 		/* BSpec: 11391 */
1694 		{
1695 			FF_SLICE_CHICKEN,
1696 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1697 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1698 		},
1699 
1700 		/* BSpec: 11299 */
1701 		{
1702 			_3D_CHICKEN3,
1703 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1704 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1705 		}
1706 	};
1707 
1708 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1709 
1710 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1711 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1712 
1713 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1714 	batch = gen8_emit_pipe_control(batch,
1715 				       PIPE_CONTROL_FLUSH_L3 |
1716 				       PIPE_CONTROL_STORE_DATA_INDEX |
1717 				       PIPE_CONTROL_CS_STALL |
1718 				       PIPE_CONTROL_QW_WRITE,
1719 				       LRC_PPHWSP_SCRATCH_ADDR);
1720 
1721 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1722 
1723 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
1724 	if (HAS_POOLED_EU(engine->i915)) {
1725 		/*
1726 		 * EU pool configuration is setup along with golden context
1727 		 * during context initialization. This value depends on
1728 		 * device type (2x6 or 3x6) and needs to be updated based
1729 		 * on which subslice is disabled especially for 2x6
1730 		 * devices, however it is safe to load default
1731 		 * configuration of 3x6 device instead of masking off
1732 		 * corresponding bits because HW ignores bits of a disabled
1733 		 * subslice and drops down to appropriate config. Please
1734 		 * see render_state_setup() in i915_gem_render_state.c for
1735 		 * possible configurations, to avoid duplication they are
1736 		 * not shown here again.
1737 		 */
1738 		*batch++ = GEN9_MEDIA_POOL_STATE;
1739 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
1740 		*batch++ = 0x00777000;
1741 		*batch++ = 0;
1742 		*batch++ = 0;
1743 		*batch++ = 0;
1744 	}
1745 
1746 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1747 
1748 	/* Pad to end of cacheline */
1749 	while ((unsigned long)batch % CACHELINE_BYTES)
1750 		*batch++ = MI_NOOP;
1751 
1752 	return batch;
1753 }
1754 
1755 #define CTX_WA_BB_SIZE (PAGE_SIZE)
1756 
1757 static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1758 {
1759 	struct drm_i915_gem_object *obj;
1760 	struct i915_vma *vma;
1761 	int err;
1762 
1763 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1764 	if (IS_ERR(obj))
1765 		return PTR_ERR(obj);
1766 
1767 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1768 	if (IS_ERR(vma)) {
1769 		err = PTR_ERR(vma);
1770 		goto err;
1771 	}
1772 
1773 	engine->wa_ctx.vma = vma;
1774 	return 0;
1775 
1776 err:
1777 	i915_gem_object_put(obj);
1778 	return err;
1779 }
1780 
1781 void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1782 {
1783 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1784 }
1785 
1786 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1787 
1788 void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1789 {
1790 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1791 	struct i915_wa_ctx_bb *wa_bb[] = {
1792 		&wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1793 	};
1794 	wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1795 	struct i915_gem_ww_ctx ww;
1796 	void *batch, *batch_ptr;
1797 	unsigned int i;
1798 	int err;
1799 
1800 	if (GRAPHICS_VER(engine->i915) >= 11 ||
1801 	    !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
1802 		return;
1803 
1804 	if (GRAPHICS_VER(engine->i915) == 9) {
1805 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
1806 		wa_bb_fn[1] = NULL;
1807 	} else if (GRAPHICS_VER(engine->i915) == 8) {
1808 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
1809 		wa_bb_fn[1] = NULL;
1810 	}
1811 
1812 	err = lrc_create_wa_ctx(engine);
1813 	if (err) {
1814 		/*
1815 		 * We continue even if we fail to initialize WA batch
1816 		 * because we only expect rare glitches but nothing
1817 		 * critical to prevent us from using GPU
1818 		 */
1819 		drm_err(&engine->i915->drm,
1820 			"Ignoring context switch w/a allocation error:%d\n",
1821 			err);
1822 		return;
1823 	}
1824 
1825 	if (!engine->wa_ctx.vma)
1826 		return;
1827 
1828 	i915_gem_ww_ctx_init(&ww, true);
1829 retry:
1830 	err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
1831 	if (!err)
1832 		err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
1833 	if (err)
1834 		goto err;
1835 
1836 	batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1837 	if (IS_ERR(batch)) {
1838 		err = PTR_ERR(batch);
1839 		goto err_unpin;
1840 	}
1841 
1842 	/*
1843 	 * Emit the two workaround batch buffers, recording the offset from the
1844 	 * start of the workaround batch buffer object for each and their
1845 	 * respective sizes.
1846 	 */
1847 	batch_ptr = batch;
1848 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1849 		wa_bb[i]->offset = batch_ptr - batch;
1850 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1851 						  CACHELINE_BYTES))) {
1852 			err = -EINVAL;
1853 			break;
1854 		}
1855 		if (wa_bb_fn[i])
1856 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1857 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1858 	}
1859 	GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1860 
1861 	__i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1862 	__i915_gem_object_release_map(wa_ctx->vma->obj);
1863 
1864 	/* Verify that we can handle failure to setup the wa_ctx */
1865 	if (!err)
1866 		err = i915_inject_probe_error(engine->i915, -ENODEV);
1867 
1868 err_unpin:
1869 	if (err)
1870 		i915_vma_unpin(wa_ctx->vma);
1871 err:
1872 	if (err == -EDEADLK) {
1873 		err = i915_gem_ww_ctx_backoff(&ww);
1874 		if (!err)
1875 			goto retry;
1876 	}
1877 	i915_gem_ww_ctx_fini(&ww);
1878 
1879 	if (err) {
1880 		i915_vma_put(engine->wa_ctx.vma);
1881 
1882 		/* Clear all flags to prevent further use */
1883 		memset(wa_ctx, 0, sizeof(*wa_ctx));
1884 	}
1885 }
1886 
1887 static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1888 {
1889 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1890 	stats->runtime.num_underflow++;
1891 	stats->runtime.max_underflow =
1892 		max_t(u32, stats->runtime.max_underflow, -dt);
1893 #endif
1894 }
1895 
1896 static u32 lrc_get_runtime(const struct intel_context *ce)
1897 {
1898 	/*
1899 	 * We can use either ppHWSP[16] which is recorded before the context
1900 	 * switch (and so excludes the cost of context switches) or use the
1901 	 * value from the context image itself, which is saved/restored earlier
1902 	 * and so includes the cost of the save.
1903 	 */
1904 	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1905 }
1906 
1907 void lrc_update_runtime(struct intel_context *ce)
1908 {
1909 	struct intel_context_stats *stats = &ce->stats;
1910 	u32 old;
1911 	s32 dt;
1912 
1913 	old = stats->runtime.last;
1914 	stats->runtime.last = lrc_get_runtime(ce);
1915 	dt = stats->runtime.last - old;
1916 	if (!dt)
1917 		return;
1918 
1919 	if (unlikely(dt < 0)) {
1920 		CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1921 			 old, stats->runtime.last, dt);
1922 		st_runtime_underflow(stats, dt);
1923 		return;
1924 	}
1925 
1926 	ewma_runtime_add(&stats->runtime.avg, dt);
1927 	stats->runtime.total += dt;
1928 }
1929 
1930 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1931 #include "selftest_lrc.c"
1932 #endif
1933