xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_lrc.c (revision e93e075d340859af772214c267d27f09f9db3e51)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014 Intel Corporation
4  */
5 
6 #include "gem/i915_gem_lmem.h"
7 
8 #include "gen8_engine_cs.h"
9 #include "i915_drv.h"
10 #include "i915_perf.h"
11 #include "i915_reg.h"
12 #include "intel_context.h"
13 #include "intel_engine.h"
14 #include "intel_engine_regs.h"
15 #include "intel_gpu_commands.h"
16 #include "intel_gt.h"
17 #include "intel_gt_regs.h"
18 #include "intel_lrc.h"
19 #include "intel_lrc_reg.h"
20 #include "intel_ring.h"
21 #include "shmem_utils.h"
22 
23 /*
24  * The per-platform tables are u8-encoded in @data. Decode @data and set the
25  * addresses' offset and commands in @regs. The following encoding is used
26  * for each byte. There are 2 steps: decoding commands and decoding addresses.
27  *
28  * Commands:
29  * [7]: create NOPs - number of NOPs are set in lower bits
30  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
31  *      MI_LRI_FORCE_POSTED
32  * [5:0]: Number of NOPs or registers to set values to in case of
33  *        MI_LOAD_REGISTER_IMM
34  *
35  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
36  * number of registers. They are set by using the REG/REG16 macros: the former
37  * is used for offsets smaller than 0x200 while the latter is for values bigger
38  * than that. Those macros already set all the bits documented below correctly:
39  *
40  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
41  *      follow, for the lower bits
42  * [6:0]: Register offset, without considering the engine base.
43  *
44  * This function only tweaks the commands and register offsets. Values are not
45  * filled out.
46  */
47 static void set_offsets(u32 *regs,
48 			const u8 *data,
49 			const struct intel_engine_cs *engine,
50 			bool close)
51 #define NOP(x) (BIT(7) | (x))
52 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
53 #define POSTED BIT(0)
54 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
55 #define REG16(x) \
56 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
57 	(((x) >> 2) & 0x7f)
58 #define END 0
59 {
60 	const u32 base = engine->mmio_base;
61 
62 	while (*data) {
63 		u8 count, flags;
64 
65 		if (*data & BIT(7)) { /* skip */
66 			count = *data++ & ~BIT(7);
67 			regs += count;
68 			continue;
69 		}
70 
71 		count = *data & 0x3f;
72 		flags = *data >> 6;
73 		data++;
74 
75 		*regs = MI_LOAD_REGISTER_IMM(count);
76 		if (flags & POSTED)
77 			*regs |= MI_LRI_FORCE_POSTED;
78 		if (GRAPHICS_VER(engine->i915) >= 11)
79 			*regs |= MI_LRI_LRM_CS_MMIO;
80 		regs++;
81 
82 		GEM_BUG_ON(!count);
83 		do {
84 			u32 offset = 0;
85 			u8 v;
86 
87 			do {
88 				v = *data++;
89 				offset <<= 7;
90 				offset |= v & ~BIT(7);
91 			} while (v & BIT(7));
92 
93 			regs[0] = base + (offset << 2);
94 			regs += 2;
95 		} while (--count);
96 	}
97 
98 	if (close) {
99 		/* Close the batch; used mainly by live_lrc_layout() */
100 		*regs = MI_BATCH_BUFFER_END;
101 		if (GRAPHICS_VER(engine->i915) >= 11)
102 			*regs |= BIT(0);
103 	}
104 }
105 
106 static const u8 gen8_xcs_offsets[] = {
107 	NOP(1),
108 	LRI(11, 0),
109 	REG16(0x244),
110 	REG(0x034),
111 	REG(0x030),
112 	REG(0x038),
113 	REG(0x03c),
114 	REG(0x168),
115 	REG(0x140),
116 	REG(0x110),
117 	REG(0x11c),
118 	REG(0x114),
119 	REG(0x118),
120 
121 	NOP(9),
122 	LRI(9, 0),
123 	REG16(0x3a8),
124 	REG16(0x28c),
125 	REG16(0x288),
126 	REG16(0x284),
127 	REG16(0x280),
128 	REG16(0x27c),
129 	REG16(0x278),
130 	REG16(0x274),
131 	REG16(0x270),
132 
133 	NOP(13),
134 	LRI(2, 0),
135 	REG16(0x200),
136 	REG(0x028),
137 
138 	END
139 };
140 
141 static const u8 gen9_xcs_offsets[] = {
142 	NOP(1),
143 	LRI(14, POSTED),
144 	REG16(0x244),
145 	REG(0x034),
146 	REG(0x030),
147 	REG(0x038),
148 	REG(0x03c),
149 	REG(0x168),
150 	REG(0x140),
151 	REG(0x110),
152 	REG(0x11c),
153 	REG(0x114),
154 	REG(0x118),
155 	REG(0x1c0),
156 	REG(0x1c4),
157 	REG(0x1c8),
158 
159 	NOP(3),
160 	LRI(9, POSTED),
161 	REG16(0x3a8),
162 	REG16(0x28c),
163 	REG16(0x288),
164 	REG16(0x284),
165 	REG16(0x280),
166 	REG16(0x27c),
167 	REG16(0x278),
168 	REG16(0x274),
169 	REG16(0x270),
170 
171 	NOP(13),
172 	LRI(1, POSTED),
173 	REG16(0x200),
174 
175 	NOP(13),
176 	LRI(44, POSTED),
177 	REG(0x028),
178 	REG(0x09c),
179 	REG(0x0c0),
180 	REG(0x178),
181 	REG(0x17c),
182 	REG16(0x358),
183 	REG(0x170),
184 	REG(0x150),
185 	REG(0x154),
186 	REG(0x158),
187 	REG16(0x41c),
188 	REG16(0x600),
189 	REG16(0x604),
190 	REG16(0x608),
191 	REG16(0x60c),
192 	REG16(0x610),
193 	REG16(0x614),
194 	REG16(0x618),
195 	REG16(0x61c),
196 	REG16(0x620),
197 	REG16(0x624),
198 	REG16(0x628),
199 	REG16(0x62c),
200 	REG16(0x630),
201 	REG16(0x634),
202 	REG16(0x638),
203 	REG16(0x63c),
204 	REG16(0x640),
205 	REG16(0x644),
206 	REG16(0x648),
207 	REG16(0x64c),
208 	REG16(0x650),
209 	REG16(0x654),
210 	REG16(0x658),
211 	REG16(0x65c),
212 	REG16(0x660),
213 	REG16(0x664),
214 	REG16(0x668),
215 	REG16(0x66c),
216 	REG16(0x670),
217 	REG16(0x674),
218 	REG16(0x678),
219 	REG16(0x67c),
220 	REG(0x068),
221 
222 	END
223 };
224 
225 static const u8 gen12_xcs_offsets[] = {
226 	NOP(1),
227 	LRI(13, POSTED),
228 	REG16(0x244),
229 	REG(0x034),
230 	REG(0x030),
231 	REG(0x038),
232 	REG(0x03c),
233 	REG(0x168),
234 	REG(0x140),
235 	REG(0x110),
236 	REG(0x1c0),
237 	REG(0x1c4),
238 	REG(0x1c8),
239 	REG(0x180),
240 	REG16(0x2b4),
241 
242 	NOP(5),
243 	LRI(9, POSTED),
244 	REG16(0x3a8),
245 	REG16(0x28c),
246 	REG16(0x288),
247 	REG16(0x284),
248 	REG16(0x280),
249 	REG16(0x27c),
250 	REG16(0x278),
251 	REG16(0x274),
252 	REG16(0x270),
253 
254 	END
255 };
256 
257 static const u8 dg2_xcs_offsets[] = {
258 	NOP(1),
259 	LRI(15, POSTED),
260 	REG16(0x244),
261 	REG(0x034),
262 	REG(0x030),
263 	REG(0x038),
264 	REG(0x03c),
265 	REG(0x168),
266 	REG(0x140),
267 	REG(0x110),
268 	REG(0x1c0),
269 	REG(0x1c4),
270 	REG(0x1c8),
271 	REG(0x180),
272 	REG16(0x2b4),
273 	REG(0x120),
274 	REG(0x124),
275 
276 	NOP(1),
277 	LRI(9, POSTED),
278 	REG16(0x3a8),
279 	REG16(0x28c),
280 	REG16(0x288),
281 	REG16(0x284),
282 	REG16(0x280),
283 	REG16(0x27c),
284 	REG16(0x278),
285 	REG16(0x274),
286 	REG16(0x270),
287 
288 	END
289 };
290 
291 static const u8 mtl_xcs_offsets[] = {
292 	NOP(1),
293 	LRI(13, POSTED),
294 	REG16(0x244),
295 	REG(0x034),
296 	REG(0x030),
297 	REG(0x038),
298 	REG(0x03c),
299 	REG(0x168),
300 	REG(0x140),
301 	REG(0x110),
302 	REG(0x1c0),
303 	REG(0x1c4),
304 	REG(0x1c8),
305 	REG(0x180),
306 	REG16(0x2b4),
307 	NOP(4),
308 
309 	NOP(1),
310 	LRI(9, POSTED),
311 	REG16(0x3a8),
312 	REG16(0x28c),
313 	REG16(0x288),
314 	REG16(0x284),
315 	REG16(0x280),
316 	REG16(0x27c),
317 	REG16(0x278),
318 	REG16(0x274),
319 	REG16(0x270),
320 
321 	END
322 };
323 
324 static const u8 gen8_rcs_offsets[] = {
325 	NOP(1),
326 	LRI(14, POSTED),
327 	REG16(0x244),
328 	REG(0x034),
329 	REG(0x030),
330 	REG(0x038),
331 	REG(0x03c),
332 	REG(0x168),
333 	REG(0x140),
334 	REG(0x110),
335 	REG(0x11c),
336 	REG(0x114),
337 	REG(0x118),
338 	REG(0x1c0),
339 	REG(0x1c4),
340 	REG(0x1c8),
341 
342 	NOP(3),
343 	LRI(9, POSTED),
344 	REG16(0x3a8),
345 	REG16(0x28c),
346 	REG16(0x288),
347 	REG16(0x284),
348 	REG16(0x280),
349 	REG16(0x27c),
350 	REG16(0x278),
351 	REG16(0x274),
352 	REG16(0x270),
353 
354 	NOP(13),
355 	LRI(1, 0),
356 	REG(0x0c8),
357 
358 	END
359 };
360 
361 static const u8 gen9_rcs_offsets[] = {
362 	NOP(1),
363 	LRI(14, POSTED),
364 	REG16(0x244),
365 	REG(0x34),
366 	REG(0x30),
367 	REG(0x38),
368 	REG(0x3c),
369 	REG(0x168),
370 	REG(0x140),
371 	REG(0x110),
372 	REG(0x11c),
373 	REG(0x114),
374 	REG(0x118),
375 	REG(0x1c0),
376 	REG(0x1c4),
377 	REG(0x1c8),
378 
379 	NOP(3),
380 	LRI(9, POSTED),
381 	REG16(0x3a8),
382 	REG16(0x28c),
383 	REG16(0x288),
384 	REG16(0x284),
385 	REG16(0x280),
386 	REG16(0x27c),
387 	REG16(0x278),
388 	REG16(0x274),
389 	REG16(0x270),
390 
391 	NOP(13),
392 	LRI(1, 0),
393 	REG(0xc8),
394 
395 	NOP(13),
396 	LRI(44, POSTED),
397 	REG(0x28),
398 	REG(0x9c),
399 	REG(0xc0),
400 	REG(0x178),
401 	REG(0x17c),
402 	REG16(0x358),
403 	REG(0x170),
404 	REG(0x150),
405 	REG(0x154),
406 	REG(0x158),
407 	REG16(0x41c),
408 	REG16(0x600),
409 	REG16(0x604),
410 	REG16(0x608),
411 	REG16(0x60c),
412 	REG16(0x610),
413 	REG16(0x614),
414 	REG16(0x618),
415 	REG16(0x61c),
416 	REG16(0x620),
417 	REG16(0x624),
418 	REG16(0x628),
419 	REG16(0x62c),
420 	REG16(0x630),
421 	REG16(0x634),
422 	REG16(0x638),
423 	REG16(0x63c),
424 	REG16(0x640),
425 	REG16(0x644),
426 	REG16(0x648),
427 	REG16(0x64c),
428 	REG16(0x650),
429 	REG16(0x654),
430 	REG16(0x658),
431 	REG16(0x65c),
432 	REG16(0x660),
433 	REG16(0x664),
434 	REG16(0x668),
435 	REG16(0x66c),
436 	REG16(0x670),
437 	REG16(0x674),
438 	REG16(0x678),
439 	REG16(0x67c),
440 	REG(0x68),
441 
442 	END
443 };
444 
445 static const u8 gen11_rcs_offsets[] = {
446 	NOP(1),
447 	LRI(15, POSTED),
448 	REG16(0x244),
449 	REG(0x034),
450 	REG(0x030),
451 	REG(0x038),
452 	REG(0x03c),
453 	REG(0x168),
454 	REG(0x140),
455 	REG(0x110),
456 	REG(0x11c),
457 	REG(0x114),
458 	REG(0x118),
459 	REG(0x1c0),
460 	REG(0x1c4),
461 	REG(0x1c8),
462 	REG(0x180),
463 
464 	NOP(1),
465 	LRI(9, POSTED),
466 	REG16(0x3a8),
467 	REG16(0x28c),
468 	REG16(0x288),
469 	REG16(0x284),
470 	REG16(0x280),
471 	REG16(0x27c),
472 	REG16(0x278),
473 	REG16(0x274),
474 	REG16(0x270),
475 
476 	LRI(1, POSTED),
477 	REG(0x1b0),
478 
479 	NOP(10),
480 	LRI(1, 0),
481 	REG(0x0c8),
482 
483 	END
484 };
485 
486 static const u8 gen12_rcs_offsets[] = {
487 	NOP(1),
488 	LRI(13, POSTED),
489 	REG16(0x244),
490 	REG(0x034),
491 	REG(0x030),
492 	REG(0x038),
493 	REG(0x03c),
494 	REG(0x168),
495 	REG(0x140),
496 	REG(0x110),
497 	REG(0x1c0),
498 	REG(0x1c4),
499 	REG(0x1c8),
500 	REG(0x180),
501 	REG16(0x2b4),
502 
503 	NOP(5),
504 	LRI(9, POSTED),
505 	REG16(0x3a8),
506 	REG16(0x28c),
507 	REG16(0x288),
508 	REG16(0x284),
509 	REG16(0x280),
510 	REG16(0x27c),
511 	REG16(0x278),
512 	REG16(0x274),
513 	REG16(0x270),
514 
515 	LRI(3, POSTED),
516 	REG(0x1b0),
517 	REG16(0x5a8),
518 	REG16(0x5ac),
519 
520 	NOP(6),
521 	LRI(1, 0),
522 	REG(0x0c8),
523 	NOP(3 + 9 + 1),
524 
525 	LRI(51, POSTED),
526 	REG16(0x588),
527 	REG16(0x588),
528 	REG16(0x588),
529 	REG16(0x588),
530 	REG16(0x588),
531 	REG16(0x588),
532 	REG(0x028),
533 	REG(0x09c),
534 	REG(0x0c0),
535 	REG(0x178),
536 	REG(0x17c),
537 	REG16(0x358),
538 	REG(0x170),
539 	REG(0x150),
540 	REG(0x154),
541 	REG(0x158),
542 	REG16(0x41c),
543 	REG16(0x600),
544 	REG16(0x604),
545 	REG16(0x608),
546 	REG16(0x60c),
547 	REG16(0x610),
548 	REG16(0x614),
549 	REG16(0x618),
550 	REG16(0x61c),
551 	REG16(0x620),
552 	REG16(0x624),
553 	REG16(0x628),
554 	REG16(0x62c),
555 	REG16(0x630),
556 	REG16(0x634),
557 	REG16(0x638),
558 	REG16(0x63c),
559 	REG16(0x640),
560 	REG16(0x644),
561 	REG16(0x648),
562 	REG16(0x64c),
563 	REG16(0x650),
564 	REG16(0x654),
565 	REG16(0x658),
566 	REG16(0x65c),
567 	REG16(0x660),
568 	REG16(0x664),
569 	REG16(0x668),
570 	REG16(0x66c),
571 	REG16(0x670),
572 	REG16(0x674),
573 	REG16(0x678),
574 	REG16(0x67c),
575 	REG(0x068),
576 	REG(0x084),
577 	NOP(1),
578 
579 	END
580 };
581 
582 static const u8 xehp_rcs_offsets[] = {
583 	NOP(1),
584 	LRI(13, POSTED),
585 	REG16(0x244),
586 	REG(0x034),
587 	REG(0x030),
588 	REG(0x038),
589 	REG(0x03c),
590 	REG(0x168),
591 	REG(0x140),
592 	REG(0x110),
593 	REG(0x1c0),
594 	REG(0x1c4),
595 	REG(0x1c8),
596 	REG(0x180),
597 	REG16(0x2b4),
598 
599 	NOP(5),
600 	LRI(9, POSTED),
601 	REG16(0x3a8),
602 	REG16(0x28c),
603 	REG16(0x288),
604 	REG16(0x284),
605 	REG16(0x280),
606 	REG16(0x27c),
607 	REG16(0x278),
608 	REG16(0x274),
609 	REG16(0x270),
610 
611 	LRI(3, POSTED),
612 	REG(0x1b0),
613 	REG16(0x5a8),
614 	REG16(0x5ac),
615 
616 	NOP(6),
617 	LRI(1, 0),
618 	REG(0x0c8),
619 
620 	END
621 };
622 
623 static const u8 dg2_rcs_offsets[] = {
624 	NOP(1),
625 	LRI(15, POSTED),
626 	REG16(0x244),
627 	REG(0x034),
628 	REG(0x030),
629 	REG(0x038),
630 	REG(0x03c),
631 	REG(0x168),
632 	REG(0x140),
633 	REG(0x110),
634 	REG(0x1c0),
635 	REG(0x1c4),
636 	REG(0x1c8),
637 	REG(0x180),
638 	REG16(0x2b4),
639 	REG(0x120),
640 	REG(0x124),
641 
642 	NOP(1),
643 	LRI(9, POSTED),
644 	REG16(0x3a8),
645 	REG16(0x28c),
646 	REG16(0x288),
647 	REG16(0x284),
648 	REG16(0x280),
649 	REG16(0x27c),
650 	REG16(0x278),
651 	REG16(0x274),
652 	REG16(0x270),
653 
654 	LRI(3, POSTED),
655 	REG(0x1b0),
656 	REG16(0x5a8),
657 	REG16(0x5ac),
658 
659 	NOP(6),
660 	LRI(1, 0),
661 	REG(0x0c8),
662 
663 	END
664 };
665 
666 static const u8 mtl_rcs_offsets[] = {
667 	NOP(1),
668 	LRI(15, POSTED),
669 	REG16(0x244),
670 	REG(0x034),
671 	REG(0x030),
672 	REG(0x038),
673 	REG(0x03c),
674 	REG(0x168),
675 	REG(0x140),
676 	REG(0x110),
677 	REG(0x1c0),
678 	REG(0x1c4),
679 	REG(0x1c8),
680 	REG(0x180),
681 	REG16(0x2b4),
682 	REG(0x120),
683 	REG(0x124),
684 
685 	NOP(1),
686 	LRI(9, POSTED),
687 	REG16(0x3a8),
688 	REG16(0x28c),
689 	REG16(0x288),
690 	REG16(0x284),
691 	REG16(0x280),
692 	REG16(0x27c),
693 	REG16(0x278),
694 	REG16(0x274),
695 	REG16(0x270),
696 
697 	NOP(2),
698 	LRI(2, POSTED),
699 	REG16(0x5a8),
700 	REG16(0x5ac),
701 
702 	NOP(6),
703 	LRI(1, 0),
704 	REG(0x0c8),
705 
706 	END
707 };
708 
709 #undef END
710 #undef REG16
711 #undef REG
712 #undef LRI
713 #undef NOP
714 
715 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
716 {
717 	/*
718 	 * The gen12+ lists only have the registers we program in the basic
719 	 * default state. We rely on the context image using relative
720 	 * addressing to automatic fixup the register state between the
721 	 * physical engines for virtual engine.
722 	 */
723 	GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
724 		   !intel_engine_has_relative_mmio(engine));
725 
726 	if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
727 		if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
728 			return mtl_rcs_offsets;
729 		else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
730 			return dg2_rcs_offsets;
731 		else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
732 			return xehp_rcs_offsets;
733 		else if (GRAPHICS_VER(engine->i915) >= 12)
734 			return gen12_rcs_offsets;
735 		else if (GRAPHICS_VER(engine->i915) >= 11)
736 			return gen11_rcs_offsets;
737 		else if (GRAPHICS_VER(engine->i915) >= 9)
738 			return gen9_rcs_offsets;
739 		else
740 			return gen8_rcs_offsets;
741 	} else {
742 		if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
743 			return mtl_xcs_offsets;
744 		else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
745 			return dg2_xcs_offsets;
746 		else if (GRAPHICS_VER(engine->i915) >= 12)
747 			return gen12_xcs_offsets;
748 		else if (GRAPHICS_VER(engine->i915) >= 9)
749 			return gen9_xcs_offsets;
750 		else
751 			return gen8_xcs_offsets;
752 	}
753 }
754 
755 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
756 {
757 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
758 		return 0x70;
759 	else if (GRAPHICS_VER(engine->i915) >= 12)
760 		return 0x60;
761 	else if (GRAPHICS_VER(engine->i915) >= 9)
762 		return 0x54;
763 	else if (engine->class == RENDER_CLASS)
764 		return 0x58;
765 	else
766 		return -1;
767 }
768 
769 static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
770 {
771 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
772 		return 0x80;
773 	else if (GRAPHICS_VER(engine->i915) >= 12)
774 		return 0x70;
775 	else if (GRAPHICS_VER(engine->i915) >= 9)
776 		return 0x64;
777 	else if (GRAPHICS_VER(engine->i915) >= 8 &&
778 		 engine->class == RENDER_CLASS)
779 		return 0xc4;
780 	else
781 		return -1;
782 }
783 
784 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
785 {
786 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
787 		return 0x84;
788 	else if (GRAPHICS_VER(engine->i915) >= 12)
789 		return 0x74;
790 	else if (GRAPHICS_VER(engine->i915) >= 9)
791 		return 0x68;
792 	else if (engine->class == RENDER_CLASS)
793 		return 0xd8;
794 	else
795 		return -1;
796 }
797 
798 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
799 {
800 	if (GRAPHICS_VER(engine->i915) >= 12)
801 		return 0x12;
802 	else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
803 		return 0x18;
804 	else
805 		return -1;
806 }
807 
808 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
809 {
810 	int x;
811 
812 	x = lrc_ring_wa_bb_per_ctx(engine);
813 	if (x < 0)
814 		return x;
815 
816 	return x + 2;
817 }
818 
819 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
820 {
821 	int x;
822 
823 	x = lrc_ring_indirect_ptr(engine);
824 	if (x < 0)
825 		return x;
826 
827 	return x + 2;
828 }
829 
830 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
831 {
832 
833 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
834 		/*
835 		 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
836 		 * simply to match the RCS context image layout.
837 		 */
838 		return 0xc6;
839 	else if (engine->class != RENDER_CLASS)
840 		return -1;
841 	else if (GRAPHICS_VER(engine->i915) >= 12)
842 		return 0xb6;
843 	else if (GRAPHICS_VER(engine->i915) >= 11)
844 		return 0xaa;
845 	else
846 		return -1;
847 }
848 
849 static u32
850 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
851 {
852 	if (GRAPHICS_VER(engine->i915) >= 12)
853 		return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
854 	else if (GRAPHICS_VER(engine->i915) >= 11)
855 		return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
856 	else if (GRAPHICS_VER(engine->i915) >= 9)
857 		return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
858 	else if (GRAPHICS_VER(engine->i915) >= 8)
859 		return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
860 
861 	GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
862 
863 	return 0;
864 }
865 
866 static void
867 lrc_setup_indirect_ctx(u32 *regs,
868 		       const struct intel_engine_cs *engine,
869 		       u32 ctx_bb_ggtt_addr,
870 		       u32 size)
871 {
872 	GEM_BUG_ON(!size);
873 	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
874 	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
875 	regs[lrc_ring_indirect_ptr(engine) + 1] =
876 		ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
877 
878 	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
879 	regs[lrc_ring_indirect_offset(engine) + 1] =
880 		lrc_ring_indirect_offset_default(engine) << 6;
881 }
882 
883 static void init_common_regs(u32 * const regs,
884 			     const struct intel_context *ce,
885 			     const struct intel_engine_cs *engine,
886 			     bool inhibit)
887 {
888 	u32 ctl;
889 	int loc;
890 
891 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
892 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
893 	if (inhibit)
894 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
895 	if (GRAPHICS_VER(engine->i915) < 11)
896 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
897 					   CTX_CTRL_RS_CTX_ENABLE);
898 	regs[CTX_CONTEXT_CONTROL] = ctl;
899 
900 	regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
901 
902 	loc = lrc_ring_bb_offset(engine);
903 	if (loc != -1)
904 		regs[loc + 1] = 0;
905 }
906 
907 static void init_wa_bb_regs(u32 * const regs,
908 			    const struct intel_engine_cs *engine)
909 {
910 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
911 
912 	if (wa_ctx->per_ctx.size) {
913 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
914 
915 		GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
916 		regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
917 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
918 	}
919 
920 	if (wa_ctx->indirect_ctx.size) {
921 		lrc_setup_indirect_ctx(regs, engine,
922 				       i915_ggtt_offset(wa_ctx->vma) +
923 				       wa_ctx->indirect_ctx.offset,
924 				       wa_ctx->indirect_ctx.size);
925 	}
926 }
927 
928 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
929 {
930 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
931 		/* 64b PPGTT (48bit canonical)
932 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
933 		 * other PDP Descriptors are ignored.
934 		 */
935 		ASSIGN_CTX_PML4(ppgtt, regs);
936 	} else {
937 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
938 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
939 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
940 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
941 	}
942 }
943 
944 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
945 {
946 	if (i915_is_ggtt(vm))
947 		return i915_vm_to_ggtt(vm)->alias;
948 	else
949 		return i915_vm_to_ppgtt(vm);
950 }
951 
952 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
953 {
954 	int x;
955 
956 	x = lrc_ring_mi_mode(engine);
957 	if (x != -1) {
958 		regs[x + 1] &= ~STOP_RING;
959 		regs[x + 1] |= STOP_RING << 16;
960 	}
961 }
962 
963 static void __lrc_init_regs(u32 *regs,
964 			    const struct intel_context *ce,
965 			    const struct intel_engine_cs *engine,
966 			    bool inhibit)
967 {
968 	/*
969 	 * A context is actually a big batch buffer with several
970 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
971 	 * values we are setting here are only for the first context restore:
972 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
973 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
974 	 * we are not initializing here).
975 	 *
976 	 * Must keep consistent with virtual_update_register_offsets().
977 	 */
978 
979 	if (inhibit)
980 		memset(regs, 0, PAGE_SIZE);
981 
982 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
983 
984 	init_common_regs(regs, ce, engine, inhibit);
985 	init_ppgtt_regs(regs, vm_alias(ce->vm));
986 
987 	init_wa_bb_regs(regs, engine);
988 
989 	__reset_stop_ring(regs, engine);
990 }
991 
992 void lrc_init_regs(const struct intel_context *ce,
993 		   const struct intel_engine_cs *engine,
994 		   bool inhibit)
995 {
996 	__lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
997 }
998 
999 void lrc_reset_regs(const struct intel_context *ce,
1000 		    const struct intel_engine_cs *engine)
1001 {
1002 	__reset_stop_ring(ce->lrc_reg_state, engine);
1003 }
1004 
1005 static void
1006 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
1007 {
1008 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1009 		return;
1010 
1011 	vaddr += engine->context_size;
1012 
1013 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
1014 }
1015 
1016 static void
1017 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
1018 {
1019 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1020 		return;
1021 
1022 	vaddr += engine->context_size;
1023 
1024 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
1025 		drm_err_once(&engine->i915->drm,
1026 			     "%s context redzone overwritten!\n",
1027 			     engine->name);
1028 }
1029 
1030 static u32 context_wa_bb_offset(const struct intel_context *ce)
1031 {
1032 	return PAGE_SIZE * ce->wa_bb_page;
1033 }
1034 
1035 static u32 *context_indirect_bb(const struct intel_context *ce)
1036 {
1037 	void *ptr;
1038 
1039 	GEM_BUG_ON(!ce->wa_bb_page);
1040 
1041 	ptr = ce->lrc_reg_state;
1042 	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1043 	ptr += context_wa_bb_offset(ce);
1044 
1045 	return ptr;
1046 }
1047 
1048 void lrc_init_state(struct intel_context *ce,
1049 		    struct intel_engine_cs *engine,
1050 		    void *state)
1051 {
1052 	bool inhibit = true;
1053 
1054 	set_redzone(state, engine);
1055 
1056 	if (engine->default_state) {
1057 		shmem_read(engine->default_state, 0,
1058 			   state, engine->context_size);
1059 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
1060 		inhibit = false;
1061 	}
1062 
1063 	/* Clear the ppHWSP (inc. per-context counters) */
1064 	memset(state, 0, PAGE_SIZE);
1065 
1066 	/* Clear the indirect wa and storage */
1067 	if (ce->wa_bb_page)
1068 		memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
1069 
1070 	/*
1071 	 * The second page of the context object contains some registers which
1072 	 * must be set up prior to the first execution.
1073 	 */
1074 	__lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
1075 }
1076 
1077 u32 lrc_indirect_bb(const struct intel_context *ce)
1078 {
1079 	return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
1080 }
1081 
1082 static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
1083 {
1084 	/* If predication is active, this will be noop'ed */
1085 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1086 	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1087 	*cs++ = 0;
1088 	*cs++ = 0; /* No predication */
1089 
1090 	/* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
1091 	*cs++ = MI_BATCH_BUFFER_END | BIT(15);
1092 	*cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
1093 
1094 	/* Instructions are no longer predicated (disabled), we can proceed */
1095 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1096 	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1097 	*cs++ = 0;
1098 	*cs++ = 1; /* enable predication before the next BB */
1099 
1100 	*cs++ = MI_BATCH_BUFFER_END;
1101 	GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
1102 
1103 	return cs;
1104 }
1105 
1106 static struct i915_vma *
1107 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
1108 {
1109 	struct drm_i915_gem_object *obj;
1110 	struct i915_vma *vma;
1111 	u32 context_size;
1112 
1113 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
1114 
1115 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1116 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
1117 
1118 	if (GRAPHICS_VER(engine->i915) >= 12) {
1119 		ce->wa_bb_page = context_size / PAGE_SIZE;
1120 		context_size += PAGE_SIZE;
1121 	}
1122 
1123 	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
1124 		ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
1125 		context_size += PARENT_SCRATCH_SIZE;
1126 	}
1127 
1128 	obj = i915_gem_object_create_lmem(engine->i915, context_size,
1129 					  I915_BO_ALLOC_PM_VOLATILE);
1130 	if (IS_ERR(obj))
1131 		obj = i915_gem_object_create_shmem(engine->i915, context_size);
1132 	if (IS_ERR(obj))
1133 		return ERR_CAST(obj);
1134 
1135 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1136 	if (IS_ERR(vma)) {
1137 		i915_gem_object_put(obj);
1138 		return vma;
1139 	}
1140 
1141 	return vma;
1142 }
1143 
1144 static struct intel_timeline *
1145 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
1146 {
1147 	struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
1148 
1149 	return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
1150 }
1151 
1152 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
1153 {
1154 	struct intel_ring *ring;
1155 	struct i915_vma *vma;
1156 	int err;
1157 
1158 	GEM_BUG_ON(ce->state);
1159 
1160 	vma = __lrc_alloc_state(ce, engine);
1161 	if (IS_ERR(vma))
1162 		return PTR_ERR(vma);
1163 
1164 	ring = intel_engine_create_ring(engine, ce->ring_size);
1165 	if (IS_ERR(ring)) {
1166 		err = PTR_ERR(ring);
1167 		goto err_vma;
1168 	}
1169 
1170 	if (!page_mask_bits(ce->timeline)) {
1171 		struct intel_timeline *tl;
1172 
1173 		/*
1174 		 * Use the static global HWSP for the kernel context, and
1175 		 * a dynamically allocated cacheline for everyone else.
1176 		 */
1177 		if (unlikely(ce->timeline))
1178 			tl = pinned_timeline(ce, engine);
1179 		else
1180 			tl = intel_timeline_create(engine->gt);
1181 		if (IS_ERR(tl)) {
1182 			err = PTR_ERR(tl);
1183 			goto err_ring;
1184 		}
1185 
1186 		ce->timeline = tl;
1187 	}
1188 
1189 	ce->ring = ring;
1190 	ce->state = vma;
1191 
1192 	return 0;
1193 
1194 err_ring:
1195 	intel_ring_put(ring);
1196 err_vma:
1197 	i915_vma_put(vma);
1198 	return err;
1199 }
1200 
1201 void lrc_reset(struct intel_context *ce)
1202 {
1203 	GEM_BUG_ON(!intel_context_is_pinned(ce));
1204 
1205 	intel_ring_reset(ce->ring, ce->ring->emit);
1206 
1207 	/* Scrub away the garbage */
1208 	lrc_init_regs(ce, ce->engine, true);
1209 	ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
1210 }
1211 
1212 int
1213 lrc_pre_pin(struct intel_context *ce,
1214 	    struct intel_engine_cs *engine,
1215 	    struct i915_gem_ww_ctx *ww,
1216 	    void **vaddr)
1217 {
1218 	GEM_BUG_ON(!ce->state);
1219 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
1220 
1221 	*vaddr = i915_gem_object_pin_map(ce->state->obj,
1222 					 i915_coherent_map_type(ce->engine->i915,
1223 								ce->state->obj,
1224 								false) |
1225 					 I915_MAP_OVERRIDE);
1226 
1227 	return PTR_ERR_OR_ZERO(*vaddr);
1228 }
1229 
1230 int
1231 lrc_pin(struct intel_context *ce,
1232 	struct intel_engine_cs *engine,
1233 	void *vaddr)
1234 {
1235 	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
1236 
1237 	if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
1238 		lrc_init_state(ce, engine, vaddr);
1239 
1240 	ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
1241 	return 0;
1242 }
1243 
1244 void lrc_unpin(struct intel_context *ce)
1245 {
1246 	if (unlikely(ce->parallel.last_rq)) {
1247 		i915_request_put(ce->parallel.last_rq);
1248 		ce->parallel.last_rq = NULL;
1249 	}
1250 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1251 		      ce->engine);
1252 }
1253 
1254 void lrc_post_unpin(struct intel_context *ce)
1255 {
1256 	i915_gem_object_unpin_map(ce->state->obj);
1257 }
1258 
1259 void lrc_fini(struct intel_context *ce)
1260 {
1261 	if (!ce->state)
1262 		return;
1263 
1264 	intel_ring_put(fetch_and_zero(&ce->ring));
1265 	i915_vma_put(fetch_and_zero(&ce->state));
1266 }
1267 
1268 void lrc_destroy(struct kref *kref)
1269 {
1270 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1271 
1272 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1273 	GEM_BUG_ON(intel_context_is_pinned(ce));
1274 
1275 	lrc_fini(ce);
1276 
1277 	intel_context_fini(ce);
1278 	intel_context_free(ce);
1279 }
1280 
1281 static u32 *
1282 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1283 {
1284 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1285 		MI_SRM_LRM_GLOBAL_GTT |
1286 		MI_LRI_LRM_CS_MMIO;
1287 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1288 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1289 		CTX_TIMESTAMP * sizeof(u32);
1290 	*cs++ = 0;
1291 
1292 	*cs++ = MI_LOAD_REGISTER_REG |
1293 		MI_LRR_SOURCE_CS_MMIO |
1294 		MI_LRI_LRM_CS_MMIO;
1295 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1296 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1297 
1298 	*cs++ = MI_LOAD_REGISTER_REG |
1299 		MI_LRR_SOURCE_CS_MMIO |
1300 		MI_LRI_LRM_CS_MMIO;
1301 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1302 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1303 
1304 	return cs;
1305 }
1306 
1307 static u32 *
1308 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1309 {
1310 	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1311 
1312 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1313 		MI_SRM_LRM_GLOBAL_GTT |
1314 		MI_LRI_LRM_CS_MMIO;
1315 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1316 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1317 		(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
1318 	*cs++ = 0;
1319 
1320 	return cs;
1321 }
1322 
1323 static u32 *
1324 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1325 {
1326 	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1327 
1328 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1329 		MI_SRM_LRM_GLOBAL_GTT |
1330 		MI_LRI_LRM_CS_MMIO;
1331 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1332 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1333 		(lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1334 	*cs++ = 0;
1335 
1336 	*cs++ = MI_LOAD_REGISTER_REG |
1337 		MI_LRR_SOURCE_CS_MMIO |
1338 		MI_LRI_LRM_CS_MMIO;
1339 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1340 	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1341 
1342 	return cs;
1343 }
1344 
1345 /*
1346  * On DG2 during context restore of a preempted context in GPGPU mode,
1347  * RCS restore hang is detected. This is extremely timing dependent.
1348  * To address this below sw wabb is implemented for DG2 A steppings.
1349  */
1350 static u32 *
1351 dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
1352 {
1353 	*cs++ = MI_LOAD_REGISTER_IMM(1);
1354 	*cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG);
1355 	*cs++ = 0x21;
1356 
1357 	*cs++ = MI_LOAD_REGISTER_REG;
1358 	*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
1359 	*cs++ = i915_mmio_reg_offset(GEN12_CULLBIT1);
1360 
1361 	*cs++ = MI_LOAD_REGISTER_REG;
1362 	*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
1363 	*cs++ = i915_mmio_reg_offset(GEN12_CULLBIT2);
1364 
1365 	return cs;
1366 }
1367 
1368 /*
1369  * The bspec's tuning guide asks us to program a vertical watermark value of
1370  * 0x3FF.  However this register is not saved/restored properly by the
1371  * hardware, so we're required to apply the desired value via INDIRECT_CTX
1372  * batch buffer to ensure the value takes effect properly.  All other bits
1373  * in this register should remain at 0 (the hardware default).
1374  */
1375 static u32 *
1376 dg2_emit_draw_watermark_setting(u32 *cs)
1377 {
1378 	*cs++ = MI_LOAD_REGISTER_IMM(1);
1379 	*cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
1380 	*cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF);
1381 
1382 	return cs;
1383 }
1384 
1385 static u32 *
1386 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1387 {
1388 	cs = gen12_emit_timestamp_wa(ce, cs);
1389 	cs = gen12_emit_cmd_buf_wa(ce, cs);
1390 	cs = gen12_emit_restore_scratch(ce, cs);
1391 
1392 	/* Wa_22011450934:dg2 */
1393 	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
1394 	    IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
1395 		cs = dg2_emit_rcs_hang_wabb(ce, cs);
1396 
1397 	/* Wa_16013000631:dg2 */
1398 	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
1399 	    IS_DG2_G11(ce->engine->i915))
1400 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
1401 
1402 	/* hsdes: 1809175790 */
1403 	if (!HAS_FLAT_CCS(ce->engine->i915))
1404 		cs = gen12_emit_aux_table_inv(ce->engine->gt,
1405 					      cs, GEN12_GFX_CCS_AUX_NV);
1406 
1407 	/* Wa_16014892111 */
1408 	if (IS_DG2(ce->engine->i915))
1409 		cs = dg2_emit_draw_watermark_setting(cs);
1410 
1411 	return cs;
1412 }
1413 
1414 static u32 *
1415 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1416 {
1417 	cs = gen12_emit_timestamp_wa(ce, cs);
1418 	cs = gen12_emit_restore_scratch(ce, cs);
1419 
1420 	/* Wa_16013000631:dg2 */
1421 	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
1422 	    IS_DG2_G11(ce->engine->i915))
1423 		if (ce->engine->class == COMPUTE_CLASS)
1424 			cs = gen8_emit_pipe_control(cs,
1425 						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
1426 						    0);
1427 
1428 	/* hsdes: 1809175790 */
1429 	if (!HAS_FLAT_CCS(ce->engine->i915)) {
1430 		if (ce->engine->class == VIDEO_DECODE_CLASS)
1431 			cs = gen12_emit_aux_table_inv(ce->engine->gt,
1432 						      cs, GEN12_VD0_AUX_NV);
1433 		else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
1434 			cs = gen12_emit_aux_table_inv(ce->engine->gt,
1435 						      cs, GEN12_VE0_AUX_NV);
1436 	}
1437 
1438 	return cs;
1439 }
1440 
1441 static void
1442 setup_indirect_ctx_bb(const struct intel_context *ce,
1443 		      const struct intel_engine_cs *engine,
1444 		      u32 *(*emit)(const struct intel_context *, u32 *))
1445 {
1446 	u32 * const start = context_indirect_bb(ce);
1447 	u32 *cs;
1448 
1449 	cs = emit(ce, start);
1450 	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1451 	while ((unsigned long)cs % CACHELINE_BYTES)
1452 		*cs++ = MI_NOOP;
1453 
1454 	GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
1455 	setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
1456 
1457 	lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1458 			       lrc_indirect_bb(ce),
1459 			       (cs - start) * sizeof(*cs));
1460 }
1461 
1462 /*
1463  * The context descriptor encodes various attributes of a context,
1464  * including its GTT address and some flags. Because it's fairly
1465  * expensive to calculate, we'll just do it once and cache the result,
1466  * which remains valid until the context is unpinned.
1467  *
1468  * This is what a descriptor looks like, from LSB to MSB::
1469  *
1470  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
1471  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
1472  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
1473  *      bits 53-54:    mbz, reserved for use by hardware
1474  *      bits 55-63:    group ID, currently unused and set to 0
1475  *
1476  * Starting from Gen11, the upper dword of the descriptor has a new format:
1477  *
1478  *      bits 32-36:    reserved
1479  *      bits 37-47:    SW context ID
1480  *      bits 48:53:    engine instance
1481  *      bit 54:        mbz, reserved for use by hardware
1482  *      bits 55-60:    SW counter
1483  *      bits 61-63:    engine class
1484  *
1485  * On Xe_HP, the upper dword of the descriptor has a new format:
1486  *
1487  *      bits 32-37:    virtual function number
1488  *      bit 38:        mbz, reserved for use by hardware
1489  *      bits 39-54:    SW context ID
1490  *      bits 55-57:    reserved
1491  *      bits 58-63:    SW counter
1492  *
1493  * engine info, SW context ID and SW counter need to form a unique number
1494  * (Context ID) per lrc.
1495  */
1496 static u32 lrc_descriptor(const struct intel_context *ce)
1497 {
1498 	u32 desc;
1499 
1500 	desc = INTEL_LEGACY_32B_CONTEXT;
1501 	if (i915_vm_is_4lvl(ce->vm))
1502 		desc = INTEL_LEGACY_64B_CONTEXT;
1503 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1504 
1505 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1506 	if (GRAPHICS_VER(ce->vm->i915) == 8)
1507 		desc |= GEN8_CTX_L3LLC_COHERENT;
1508 
1509 	return i915_ggtt_offset(ce->state) | desc;
1510 }
1511 
1512 u32 lrc_update_regs(const struct intel_context *ce,
1513 		    const struct intel_engine_cs *engine,
1514 		    u32 head)
1515 {
1516 	struct intel_ring *ring = ce->ring;
1517 	u32 *regs = ce->lrc_reg_state;
1518 
1519 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1520 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1521 
1522 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1523 	regs[CTX_RING_HEAD] = head;
1524 	regs[CTX_RING_TAIL] = ring->tail;
1525 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1526 
1527 	/* RPCS */
1528 	if (engine->class == RENDER_CLASS) {
1529 		regs[CTX_R_PWR_CLK_STATE] =
1530 			intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1531 
1532 		i915_oa_init_reg_state(ce, engine);
1533 	}
1534 
1535 	if (ce->wa_bb_page) {
1536 		u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1537 
1538 		fn = gen12_emit_indirect_ctx_xcs;
1539 		if (ce->engine->class == RENDER_CLASS)
1540 			fn = gen12_emit_indirect_ctx_rcs;
1541 
1542 		/* Mutually exclusive wrt to global indirect bb */
1543 		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1544 		setup_indirect_ctx_bb(ce, engine, fn);
1545 	}
1546 
1547 	return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1548 }
1549 
1550 void lrc_update_offsets(struct intel_context *ce,
1551 			struct intel_engine_cs *engine)
1552 {
1553 	set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1554 }
1555 
1556 void lrc_check_regs(const struct intel_context *ce,
1557 		    const struct intel_engine_cs *engine,
1558 		    const char *when)
1559 {
1560 	const struct intel_ring *ring = ce->ring;
1561 	u32 *regs = ce->lrc_reg_state;
1562 	bool valid = true;
1563 	int x;
1564 
1565 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1566 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1567 		       engine->name,
1568 		       regs[CTX_RING_START],
1569 		       i915_ggtt_offset(ring->vma));
1570 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1571 		valid = false;
1572 	}
1573 
1574 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1575 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1576 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1577 		       engine->name,
1578 		       regs[CTX_RING_CTL],
1579 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1580 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1581 		valid = false;
1582 	}
1583 
1584 	x = lrc_ring_mi_mode(engine);
1585 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1586 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1587 		       engine->name, regs[x + 1]);
1588 		regs[x + 1] &= ~STOP_RING;
1589 		regs[x + 1] |= STOP_RING << 16;
1590 		valid = false;
1591 	}
1592 
1593 	WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1594 }
1595 
1596 /*
1597  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1598  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1599  * but there is a slight complication as this is applied in WA batch where the
1600  * values are only initialized once so we cannot take register value at the
1601  * beginning and reuse it further; hence we save its value to memory, upload a
1602  * constant value with bit21 set and then we restore it back with the saved value.
1603  * To simplify the WA, a constant value is formed by using the default value
1604  * of this register. This shouldn't be a problem because we are only modifying
1605  * it for a short period and this batch in non-premptible. We can ofcourse
1606  * use additional instructions that read the actual value of the register
1607  * at that time and set our bit of interest but it makes the WA complicated.
1608  *
1609  * This WA is also required for Gen9 so extracting as a function avoids
1610  * code duplication.
1611  */
1612 static u32 *
1613 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1614 {
1615 	/* NB no one else is allowed to scribble over scratch + 256! */
1616 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1617 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1618 	*batch++ = intel_gt_scratch_offset(engine->gt,
1619 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1620 	*batch++ = 0;
1621 
1622 	*batch++ = MI_LOAD_REGISTER_IMM(1);
1623 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1624 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1625 
1626 	batch = gen8_emit_pipe_control(batch,
1627 				       PIPE_CONTROL_CS_STALL |
1628 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
1629 				       0);
1630 
1631 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1632 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1633 	*batch++ = intel_gt_scratch_offset(engine->gt,
1634 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1635 	*batch++ = 0;
1636 
1637 	return batch;
1638 }
1639 
1640 /*
1641  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1642  * initialized at the beginning and shared across all contexts but this field
1643  * helps us to have multiple batches at different offsets and select them based
1644  * on a criteria. At the moment this batch always start at the beginning of the page
1645  * and at this point we don't have multiple wa_ctx batch buffers.
1646  *
1647  * The number of WA applied are not known at the beginning; we use this field
1648  * to return the no of DWORDS written.
1649  *
1650  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1651  * so it adds NOOPs as padding to make it cacheline aligned.
1652  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1653  * makes a complete batch buffer.
1654  */
1655 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1656 {
1657 	/* WaDisableCtxRestoreArbitration:bdw,chv */
1658 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1659 
1660 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1661 	if (IS_BROADWELL(engine->i915))
1662 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1663 
1664 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1665 	/* Actual scratch location is at 128 bytes offset */
1666 	batch = gen8_emit_pipe_control(batch,
1667 				       PIPE_CONTROL_FLUSH_L3 |
1668 				       PIPE_CONTROL_STORE_DATA_INDEX |
1669 				       PIPE_CONTROL_CS_STALL |
1670 				       PIPE_CONTROL_QW_WRITE,
1671 				       LRC_PPHWSP_SCRATCH_ADDR);
1672 
1673 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1674 
1675 	/* Pad to end of cacheline */
1676 	while ((unsigned long)batch % CACHELINE_BYTES)
1677 		*batch++ = MI_NOOP;
1678 
1679 	/*
1680 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1681 	 * execution depends on the length specified in terms of cache lines
1682 	 * in the register CTX_RCS_INDIRECT_CTX
1683 	 */
1684 
1685 	return batch;
1686 }
1687 
1688 struct lri {
1689 	i915_reg_t reg;
1690 	u32 value;
1691 };
1692 
1693 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1694 {
1695 	GEM_BUG_ON(!count || count > 63);
1696 
1697 	*batch++ = MI_LOAD_REGISTER_IMM(count);
1698 	do {
1699 		*batch++ = i915_mmio_reg_offset(lri->reg);
1700 		*batch++ = lri->value;
1701 	} while (lri++, --count);
1702 	*batch++ = MI_NOOP;
1703 
1704 	return batch;
1705 }
1706 
1707 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1708 {
1709 	static const struct lri lri[] = {
1710 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1711 		{
1712 			COMMON_SLICE_CHICKEN2,
1713 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1714 				       0),
1715 		},
1716 
1717 		/* BSpec: 11391 */
1718 		{
1719 			FF_SLICE_CHICKEN,
1720 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1721 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1722 		},
1723 
1724 		/* BSpec: 11299 */
1725 		{
1726 			_3D_CHICKEN3,
1727 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1728 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1729 		}
1730 	};
1731 
1732 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1733 
1734 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1735 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1736 
1737 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1738 	batch = gen8_emit_pipe_control(batch,
1739 				       PIPE_CONTROL_FLUSH_L3 |
1740 				       PIPE_CONTROL_STORE_DATA_INDEX |
1741 				       PIPE_CONTROL_CS_STALL |
1742 				       PIPE_CONTROL_QW_WRITE,
1743 				       LRC_PPHWSP_SCRATCH_ADDR);
1744 
1745 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1746 
1747 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
1748 	if (HAS_POOLED_EU(engine->i915)) {
1749 		/*
1750 		 * EU pool configuration is setup along with golden context
1751 		 * during context initialization. This value depends on
1752 		 * device type (2x6 or 3x6) and needs to be updated based
1753 		 * on which subslice is disabled especially for 2x6
1754 		 * devices, however it is safe to load default
1755 		 * configuration of 3x6 device instead of masking off
1756 		 * corresponding bits because HW ignores bits of a disabled
1757 		 * subslice and drops down to appropriate config. Please
1758 		 * see render_state_setup() in i915_gem_render_state.c for
1759 		 * possible configurations, to avoid duplication they are
1760 		 * not shown here again.
1761 		 */
1762 		*batch++ = GEN9_MEDIA_POOL_STATE;
1763 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
1764 		*batch++ = 0x00777000;
1765 		*batch++ = 0;
1766 		*batch++ = 0;
1767 		*batch++ = 0;
1768 	}
1769 
1770 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1771 
1772 	/* Pad to end of cacheline */
1773 	while ((unsigned long)batch % CACHELINE_BYTES)
1774 		*batch++ = MI_NOOP;
1775 
1776 	return batch;
1777 }
1778 
1779 #define CTX_WA_BB_SIZE (PAGE_SIZE)
1780 
1781 static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1782 {
1783 	struct drm_i915_gem_object *obj;
1784 	struct i915_vma *vma;
1785 	int err;
1786 
1787 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1788 	if (IS_ERR(obj))
1789 		return PTR_ERR(obj);
1790 
1791 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1792 	if (IS_ERR(vma)) {
1793 		err = PTR_ERR(vma);
1794 		goto err;
1795 	}
1796 
1797 	engine->wa_ctx.vma = vma;
1798 	return 0;
1799 
1800 err:
1801 	i915_gem_object_put(obj);
1802 	return err;
1803 }
1804 
1805 void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1806 {
1807 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1808 }
1809 
1810 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1811 
1812 void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1813 {
1814 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1815 	struct i915_wa_ctx_bb *wa_bb[] = {
1816 		&wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1817 	};
1818 	wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1819 	struct i915_gem_ww_ctx ww;
1820 	void *batch, *batch_ptr;
1821 	unsigned int i;
1822 	int err;
1823 
1824 	if (GRAPHICS_VER(engine->i915) >= 11 ||
1825 	    !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
1826 		return;
1827 
1828 	if (GRAPHICS_VER(engine->i915) == 9) {
1829 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
1830 		wa_bb_fn[1] = NULL;
1831 	} else if (GRAPHICS_VER(engine->i915) == 8) {
1832 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
1833 		wa_bb_fn[1] = NULL;
1834 	}
1835 
1836 	err = lrc_create_wa_ctx(engine);
1837 	if (err) {
1838 		/*
1839 		 * We continue even if we fail to initialize WA batch
1840 		 * because we only expect rare glitches but nothing
1841 		 * critical to prevent us from using GPU
1842 		 */
1843 		drm_err(&engine->i915->drm,
1844 			"Ignoring context switch w/a allocation error:%d\n",
1845 			err);
1846 		return;
1847 	}
1848 
1849 	if (!engine->wa_ctx.vma)
1850 		return;
1851 
1852 	i915_gem_ww_ctx_init(&ww, true);
1853 retry:
1854 	err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
1855 	if (!err)
1856 		err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
1857 	if (err)
1858 		goto err;
1859 
1860 	batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1861 	if (IS_ERR(batch)) {
1862 		err = PTR_ERR(batch);
1863 		goto err_unpin;
1864 	}
1865 
1866 	/*
1867 	 * Emit the two workaround batch buffers, recording the offset from the
1868 	 * start of the workaround batch buffer object for each and their
1869 	 * respective sizes.
1870 	 */
1871 	batch_ptr = batch;
1872 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1873 		wa_bb[i]->offset = batch_ptr - batch;
1874 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1875 						  CACHELINE_BYTES))) {
1876 			err = -EINVAL;
1877 			break;
1878 		}
1879 		if (wa_bb_fn[i])
1880 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1881 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1882 	}
1883 	GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1884 
1885 	__i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1886 	__i915_gem_object_release_map(wa_ctx->vma->obj);
1887 
1888 	/* Verify that we can handle failure to setup the wa_ctx */
1889 	if (!err)
1890 		err = i915_inject_probe_error(engine->i915, -ENODEV);
1891 
1892 err_unpin:
1893 	if (err)
1894 		i915_vma_unpin(wa_ctx->vma);
1895 err:
1896 	if (err == -EDEADLK) {
1897 		err = i915_gem_ww_ctx_backoff(&ww);
1898 		if (!err)
1899 			goto retry;
1900 	}
1901 	i915_gem_ww_ctx_fini(&ww);
1902 
1903 	if (err) {
1904 		i915_vma_put(engine->wa_ctx.vma);
1905 
1906 		/* Clear all flags to prevent further use */
1907 		memset(wa_ctx, 0, sizeof(*wa_ctx));
1908 	}
1909 }
1910 
1911 static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1912 {
1913 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1914 	stats->runtime.num_underflow++;
1915 	stats->runtime.max_underflow =
1916 		max_t(u32, stats->runtime.max_underflow, -dt);
1917 #endif
1918 }
1919 
1920 static u32 lrc_get_runtime(const struct intel_context *ce)
1921 {
1922 	/*
1923 	 * We can use either ppHWSP[16] which is recorded before the context
1924 	 * switch (and so excludes the cost of context switches) or use the
1925 	 * value from the context image itself, which is saved/restored earlier
1926 	 * and so includes the cost of the save.
1927 	 */
1928 	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1929 }
1930 
1931 void lrc_update_runtime(struct intel_context *ce)
1932 {
1933 	struct intel_context_stats *stats = &ce->stats;
1934 	u32 old;
1935 	s32 dt;
1936 
1937 	old = stats->runtime.last;
1938 	stats->runtime.last = lrc_get_runtime(ce);
1939 	dt = stats->runtime.last - old;
1940 	if (!dt)
1941 		return;
1942 
1943 	if (unlikely(dt < 0)) {
1944 		CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1945 			 old, stats->runtime.last, dt);
1946 		st_runtime_underflow(stats, dt);
1947 		return;
1948 	}
1949 
1950 	ewma_runtime_add(&stats->runtime.avg, dt);
1951 	stats->runtime.total += dt;
1952 }
1953 
1954 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1955 #include "selftest_lrc.c"
1956 #endif
1957