1a0d3fdb6SChris Wilson // SPDX-License-Identifier: MIT
2a0d3fdb6SChris Wilson /*
3a0d3fdb6SChris Wilson * Copyright © 2014 Intel Corporation
4a0d3fdb6SChris Wilson */
5a0d3fdb6SChris Wilson
6ba485bc8SMatthew Auld #include "gem/i915_gem_lmem.h"
7ba485bc8SMatthew Auld
8a0d3fdb6SChris Wilson #include "gen8_engine_cs.h"
9a0d3fdb6SChris Wilson #include "i915_drv.h"
10a0d3fdb6SChris Wilson #include "i915_perf.h"
112bb116c7SJani Nikula #include "i915_reg.h"
122bb116c7SJani Nikula #include "intel_context.h"
13a0d3fdb6SChris Wilson #include "intel_engine.h"
14202b1f4cSMatt Roper #include "intel_engine_regs.h"
15a0d3fdb6SChris Wilson #include "intel_gpu_commands.h"
16a0d3fdb6SChris Wilson #include "intel_gt.h"
170d6419e9SMatt Roper #include "intel_gt_regs.h"
18a0d3fdb6SChris Wilson #include "intel_lrc.h"
19a0d3fdb6SChris Wilson #include "intel_lrc_reg.h"
20a0d3fdb6SChris Wilson #include "intel_ring.h"
21a0d3fdb6SChris Wilson #include "shmem_utils.h"
22a0d3fdb6SChris Wilson
23b1f80a5aSLucas De Marchi /*
24b1f80a5aSLucas De Marchi * The per-platform tables are u8-encoded in @data. Decode @data and set the
25b1f80a5aSLucas De Marchi * addresses' offset and commands in @regs. The following encoding is used
26b1f80a5aSLucas De Marchi * for each byte. There are 2 steps: decoding commands and decoding addresses.
27b1f80a5aSLucas De Marchi *
28b1f80a5aSLucas De Marchi * Commands:
29b1f80a5aSLucas De Marchi * [7]: create NOPs - number of NOPs are set in lower bits
30b1f80a5aSLucas De Marchi * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
31b1f80a5aSLucas De Marchi * MI_LRI_FORCE_POSTED
32b1f80a5aSLucas De Marchi * [5:0]: Number of NOPs or registers to set values to in case of
33b1f80a5aSLucas De Marchi * MI_LOAD_REGISTER_IMM
34b1f80a5aSLucas De Marchi *
35b1f80a5aSLucas De Marchi * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
36b1f80a5aSLucas De Marchi * number of registers. They are set by using the REG/REG16 macros: the former
37b1f80a5aSLucas De Marchi * is used for offsets smaller than 0x200 while the latter is for values bigger
38b1f80a5aSLucas De Marchi * than that. Those macros already set all the bits documented below correctly:
39b1f80a5aSLucas De Marchi *
40b1f80a5aSLucas De Marchi * [7]: When a register offset needs more than 6 bits, use additional bytes, to
41b1f80a5aSLucas De Marchi * follow, for the lower bits
42b1f80a5aSLucas De Marchi * [6:0]: Register offset, without considering the engine base.
43b1f80a5aSLucas De Marchi *
44b1f80a5aSLucas De Marchi * This function only tweaks the commands and register offsets. Values are not
45b1f80a5aSLucas De Marchi * filled out.
46b1f80a5aSLucas De Marchi */
set_offsets(u32 * regs,const u8 * data,const struct intel_engine_cs * engine,bool close)47a0d3fdb6SChris Wilson static void set_offsets(u32 *regs,
48a0d3fdb6SChris Wilson const u8 *data,
49a0d3fdb6SChris Wilson const struct intel_engine_cs *engine,
50a0d3fdb6SChris Wilson bool close)
51a0d3fdb6SChris Wilson #define NOP(x) (BIT(7) | (x))
52a0d3fdb6SChris Wilson #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
53a0d3fdb6SChris Wilson #define POSTED BIT(0)
54a0d3fdb6SChris Wilson #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
55a0d3fdb6SChris Wilson #define REG16(x) \
56a0d3fdb6SChris Wilson (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
57a0d3fdb6SChris Wilson (((x) >> 2) & 0x7f)
58a0d3fdb6SChris Wilson #define END 0
59a0d3fdb6SChris Wilson {
60a0d3fdb6SChris Wilson const u32 base = engine->mmio_base;
61a0d3fdb6SChris Wilson
62a0d3fdb6SChris Wilson while (*data) {
63a0d3fdb6SChris Wilson u8 count, flags;
64a0d3fdb6SChris Wilson
65a0d3fdb6SChris Wilson if (*data & BIT(7)) { /* skip */
66a0d3fdb6SChris Wilson count = *data++ & ~BIT(7);
67a0d3fdb6SChris Wilson regs += count;
68a0d3fdb6SChris Wilson continue;
69a0d3fdb6SChris Wilson }
70a0d3fdb6SChris Wilson
71a0d3fdb6SChris Wilson count = *data & 0x3f;
72a0d3fdb6SChris Wilson flags = *data >> 6;
73a0d3fdb6SChris Wilson data++;
74a0d3fdb6SChris Wilson
75a0d3fdb6SChris Wilson *regs = MI_LOAD_REGISTER_IMM(count);
76a0d3fdb6SChris Wilson if (flags & POSTED)
77a0d3fdb6SChris Wilson *regs |= MI_LRI_FORCE_POSTED;
78c816723bSLucas De Marchi if (GRAPHICS_VER(engine->i915) >= 11)
79a0d3fdb6SChris Wilson *regs |= MI_LRI_LRM_CS_MMIO;
80a0d3fdb6SChris Wilson regs++;
81a0d3fdb6SChris Wilson
82a0d3fdb6SChris Wilson GEM_BUG_ON(!count);
83a0d3fdb6SChris Wilson do {
84a0d3fdb6SChris Wilson u32 offset = 0;
85a0d3fdb6SChris Wilson u8 v;
86a0d3fdb6SChris Wilson
87a0d3fdb6SChris Wilson do {
88a0d3fdb6SChris Wilson v = *data++;
89a0d3fdb6SChris Wilson offset <<= 7;
90a0d3fdb6SChris Wilson offset |= v & ~BIT(7);
91a0d3fdb6SChris Wilson } while (v & BIT(7));
92a0d3fdb6SChris Wilson
93a0d3fdb6SChris Wilson regs[0] = base + (offset << 2);
94a0d3fdb6SChris Wilson regs += 2;
95a0d3fdb6SChris Wilson } while (--count);
96a0d3fdb6SChris Wilson }
97a0d3fdb6SChris Wilson
98a0d3fdb6SChris Wilson if (close) {
99a0d3fdb6SChris Wilson /* Close the batch; used mainly by live_lrc_layout() */
100a0d3fdb6SChris Wilson *regs = MI_BATCH_BUFFER_END;
1016266992cSLucas De Marchi if (GRAPHICS_VER(engine->i915) >= 11)
102a0d3fdb6SChris Wilson *regs |= BIT(0);
103a0d3fdb6SChris Wilson }
104a0d3fdb6SChris Wilson }
105a0d3fdb6SChris Wilson
106a0d3fdb6SChris Wilson static const u8 gen8_xcs_offsets[] = {
107a0d3fdb6SChris Wilson NOP(1),
108a0d3fdb6SChris Wilson LRI(11, 0),
109a0d3fdb6SChris Wilson REG16(0x244),
110a0d3fdb6SChris Wilson REG(0x034),
111a0d3fdb6SChris Wilson REG(0x030),
112a0d3fdb6SChris Wilson REG(0x038),
113a0d3fdb6SChris Wilson REG(0x03c),
114a0d3fdb6SChris Wilson REG(0x168),
115a0d3fdb6SChris Wilson REG(0x140),
116a0d3fdb6SChris Wilson REG(0x110),
117a0d3fdb6SChris Wilson REG(0x11c),
118a0d3fdb6SChris Wilson REG(0x114),
119a0d3fdb6SChris Wilson REG(0x118),
120a0d3fdb6SChris Wilson
121a0d3fdb6SChris Wilson NOP(9),
122a0d3fdb6SChris Wilson LRI(9, 0),
123a0d3fdb6SChris Wilson REG16(0x3a8),
124a0d3fdb6SChris Wilson REG16(0x28c),
125a0d3fdb6SChris Wilson REG16(0x288),
126a0d3fdb6SChris Wilson REG16(0x284),
127a0d3fdb6SChris Wilson REG16(0x280),
128a0d3fdb6SChris Wilson REG16(0x27c),
129a0d3fdb6SChris Wilson REG16(0x278),
130a0d3fdb6SChris Wilson REG16(0x274),
131a0d3fdb6SChris Wilson REG16(0x270),
132a0d3fdb6SChris Wilson
133a0d3fdb6SChris Wilson NOP(13),
134a0d3fdb6SChris Wilson LRI(2, 0),
135a0d3fdb6SChris Wilson REG16(0x200),
136a0d3fdb6SChris Wilson REG(0x028),
137a0d3fdb6SChris Wilson
138a0d3fdb6SChris Wilson END
139a0d3fdb6SChris Wilson };
140a0d3fdb6SChris Wilson
141a0d3fdb6SChris Wilson static const u8 gen9_xcs_offsets[] = {
142a0d3fdb6SChris Wilson NOP(1),
143a0d3fdb6SChris Wilson LRI(14, POSTED),
144a0d3fdb6SChris Wilson REG16(0x244),
145a0d3fdb6SChris Wilson REG(0x034),
146a0d3fdb6SChris Wilson REG(0x030),
147a0d3fdb6SChris Wilson REG(0x038),
148a0d3fdb6SChris Wilson REG(0x03c),
149a0d3fdb6SChris Wilson REG(0x168),
150a0d3fdb6SChris Wilson REG(0x140),
151a0d3fdb6SChris Wilson REG(0x110),
152a0d3fdb6SChris Wilson REG(0x11c),
153a0d3fdb6SChris Wilson REG(0x114),
154a0d3fdb6SChris Wilson REG(0x118),
155a0d3fdb6SChris Wilson REG(0x1c0),
156a0d3fdb6SChris Wilson REG(0x1c4),
157a0d3fdb6SChris Wilson REG(0x1c8),
158a0d3fdb6SChris Wilson
159a0d3fdb6SChris Wilson NOP(3),
160a0d3fdb6SChris Wilson LRI(9, POSTED),
161a0d3fdb6SChris Wilson REG16(0x3a8),
162a0d3fdb6SChris Wilson REG16(0x28c),
163a0d3fdb6SChris Wilson REG16(0x288),
164a0d3fdb6SChris Wilson REG16(0x284),
165a0d3fdb6SChris Wilson REG16(0x280),
166a0d3fdb6SChris Wilson REG16(0x27c),
167a0d3fdb6SChris Wilson REG16(0x278),
168a0d3fdb6SChris Wilson REG16(0x274),
169a0d3fdb6SChris Wilson REG16(0x270),
170a0d3fdb6SChris Wilson
171a0d3fdb6SChris Wilson NOP(13),
172a0d3fdb6SChris Wilson LRI(1, POSTED),
173a0d3fdb6SChris Wilson REG16(0x200),
174a0d3fdb6SChris Wilson
175a0d3fdb6SChris Wilson NOP(13),
176a0d3fdb6SChris Wilson LRI(44, POSTED),
177a0d3fdb6SChris Wilson REG(0x028),
178a0d3fdb6SChris Wilson REG(0x09c),
179a0d3fdb6SChris Wilson REG(0x0c0),
180a0d3fdb6SChris Wilson REG(0x178),
181a0d3fdb6SChris Wilson REG(0x17c),
182a0d3fdb6SChris Wilson REG16(0x358),
183a0d3fdb6SChris Wilson REG(0x170),
184a0d3fdb6SChris Wilson REG(0x150),
185a0d3fdb6SChris Wilson REG(0x154),
186a0d3fdb6SChris Wilson REG(0x158),
187a0d3fdb6SChris Wilson REG16(0x41c),
188a0d3fdb6SChris Wilson REG16(0x600),
189a0d3fdb6SChris Wilson REG16(0x604),
190a0d3fdb6SChris Wilson REG16(0x608),
191a0d3fdb6SChris Wilson REG16(0x60c),
192a0d3fdb6SChris Wilson REG16(0x610),
193a0d3fdb6SChris Wilson REG16(0x614),
194a0d3fdb6SChris Wilson REG16(0x618),
195a0d3fdb6SChris Wilson REG16(0x61c),
196a0d3fdb6SChris Wilson REG16(0x620),
197a0d3fdb6SChris Wilson REG16(0x624),
198a0d3fdb6SChris Wilson REG16(0x628),
199a0d3fdb6SChris Wilson REG16(0x62c),
200a0d3fdb6SChris Wilson REG16(0x630),
201a0d3fdb6SChris Wilson REG16(0x634),
202a0d3fdb6SChris Wilson REG16(0x638),
203a0d3fdb6SChris Wilson REG16(0x63c),
204a0d3fdb6SChris Wilson REG16(0x640),
205a0d3fdb6SChris Wilson REG16(0x644),
206a0d3fdb6SChris Wilson REG16(0x648),
207a0d3fdb6SChris Wilson REG16(0x64c),
208a0d3fdb6SChris Wilson REG16(0x650),
209a0d3fdb6SChris Wilson REG16(0x654),
210a0d3fdb6SChris Wilson REG16(0x658),
211a0d3fdb6SChris Wilson REG16(0x65c),
212a0d3fdb6SChris Wilson REG16(0x660),
213a0d3fdb6SChris Wilson REG16(0x664),
214a0d3fdb6SChris Wilson REG16(0x668),
215a0d3fdb6SChris Wilson REG16(0x66c),
216a0d3fdb6SChris Wilson REG16(0x670),
217a0d3fdb6SChris Wilson REG16(0x674),
218a0d3fdb6SChris Wilson REG16(0x678),
219a0d3fdb6SChris Wilson REG16(0x67c),
220a0d3fdb6SChris Wilson REG(0x068),
221a0d3fdb6SChris Wilson
222a0d3fdb6SChris Wilson END
223a0d3fdb6SChris Wilson };
224a0d3fdb6SChris Wilson
225a0d3fdb6SChris Wilson static const u8 gen12_xcs_offsets[] = {
226a0d3fdb6SChris Wilson NOP(1),
227a0d3fdb6SChris Wilson LRI(13, POSTED),
228a0d3fdb6SChris Wilson REG16(0x244),
229a0d3fdb6SChris Wilson REG(0x034),
230a0d3fdb6SChris Wilson REG(0x030),
231a0d3fdb6SChris Wilson REG(0x038),
232a0d3fdb6SChris Wilson REG(0x03c),
233a0d3fdb6SChris Wilson REG(0x168),
234a0d3fdb6SChris Wilson REG(0x140),
235a0d3fdb6SChris Wilson REG(0x110),
236a0d3fdb6SChris Wilson REG(0x1c0),
237a0d3fdb6SChris Wilson REG(0x1c4),
238a0d3fdb6SChris Wilson REG(0x1c8),
239a0d3fdb6SChris Wilson REG(0x180),
240a0d3fdb6SChris Wilson REG16(0x2b4),
241a0d3fdb6SChris Wilson
242a0d3fdb6SChris Wilson NOP(5),
243a0d3fdb6SChris Wilson LRI(9, POSTED),
244a0d3fdb6SChris Wilson REG16(0x3a8),
245a0d3fdb6SChris Wilson REG16(0x28c),
246a0d3fdb6SChris Wilson REG16(0x288),
247a0d3fdb6SChris Wilson REG16(0x284),
248a0d3fdb6SChris Wilson REG16(0x280),
249a0d3fdb6SChris Wilson REG16(0x27c),
250a0d3fdb6SChris Wilson REG16(0x278),
251a0d3fdb6SChris Wilson REG16(0x274),
252a0d3fdb6SChris Wilson REG16(0x270),
253a0d3fdb6SChris Wilson
254a0d3fdb6SChris Wilson END
255a0d3fdb6SChris Wilson };
256a0d3fdb6SChris Wilson
257ae4b0eacSAkeem G Abodunrin static const u8 dg2_xcs_offsets[] = {
258ae4b0eacSAkeem G Abodunrin NOP(1),
259ae4b0eacSAkeem G Abodunrin LRI(15, POSTED),
260ae4b0eacSAkeem G Abodunrin REG16(0x244),
261ae4b0eacSAkeem G Abodunrin REG(0x034),
262ae4b0eacSAkeem G Abodunrin REG(0x030),
263ae4b0eacSAkeem G Abodunrin REG(0x038),
264ae4b0eacSAkeem G Abodunrin REG(0x03c),
265ae4b0eacSAkeem G Abodunrin REG(0x168),
266ae4b0eacSAkeem G Abodunrin REG(0x140),
267ae4b0eacSAkeem G Abodunrin REG(0x110),
268ae4b0eacSAkeem G Abodunrin REG(0x1c0),
269ae4b0eacSAkeem G Abodunrin REG(0x1c4),
270ae4b0eacSAkeem G Abodunrin REG(0x1c8),
271ae4b0eacSAkeem G Abodunrin REG(0x180),
272ae4b0eacSAkeem G Abodunrin REG16(0x2b4),
273ae4b0eacSAkeem G Abodunrin REG(0x120),
274ae4b0eacSAkeem G Abodunrin REG(0x124),
275ae4b0eacSAkeem G Abodunrin
276ae4b0eacSAkeem G Abodunrin NOP(1),
277ae4b0eacSAkeem G Abodunrin LRI(9, POSTED),
278ae4b0eacSAkeem G Abodunrin REG16(0x3a8),
279ae4b0eacSAkeem G Abodunrin REG16(0x28c),
280ae4b0eacSAkeem G Abodunrin REG16(0x288),
281ae4b0eacSAkeem G Abodunrin REG16(0x284),
282ae4b0eacSAkeem G Abodunrin REG16(0x280),
283ae4b0eacSAkeem G Abodunrin REG16(0x27c),
284ae4b0eacSAkeem G Abodunrin REG16(0x278),
285ae4b0eacSAkeem G Abodunrin REG16(0x274),
286ae4b0eacSAkeem G Abodunrin REG16(0x270),
287ae4b0eacSAkeem G Abodunrin
288ae4b0eacSAkeem G Abodunrin END
289ae4b0eacSAkeem G Abodunrin };
290ae4b0eacSAkeem G Abodunrin
291a0d3fdb6SChris Wilson static const u8 gen8_rcs_offsets[] = {
292a0d3fdb6SChris Wilson NOP(1),
293a0d3fdb6SChris Wilson LRI(14, POSTED),
294a0d3fdb6SChris Wilson REG16(0x244),
295a0d3fdb6SChris Wilson REG(0x034),
296a0d3fdb6SChris Wilson REG(0x030),
297a0d3fdb6SChris Wilson REG(0x038),
298a0d3fdb6SChris Wilson REG(0x03c),
299a0d3fdb6SChris Wilson REG(0x168),
300a0d3fdb6SChris Wilson REG(0x140),
301a0d3fdb6SChris Wilson REG(0x110),
302a0d3fdb6SChris Wilson REG(0x11c),
303a0d3fdb6SChris Wilson REG(0x114),
304a0d3fdb6SChris Wilson REG(0x118),
305a0d3fdb6SChris Wilson REG(0x1c0),
306a0d3fdb6SChris Wilson REG(0x1c4),
307a0d3fdb6SChris Wilson REG(0x1c8),
308a0d3fdb6SChris Wilson
309a0d3fdb6SChris Wilson NOP(3),
310a0d3fdb6SChris Wilson LRI(9, POSTED),
311a0d3fdb6SChris Wilson REG16(0x3a8),
312a0d3fdb6SChris Wilson REG16(0x28c),
313a0d3fdb6SChris Wilson REG16(0x288),
314a0d3fdb6SChris Wilson REG16(0x284),
315a0d3fdb6SChris Wilson REG16(0x280),
316a0d3fdb6SChris Wilson REG16(0x27c),
317a0d3fdb6SChris Wilson REG16(0x278),
318a0d3fdb6SChris Wilson REG16(0x274),
319a0d3fdb6SChris Wilson REG16(0x270),
320a0d3fdb6SChris Wilson
321a0d3fdb6SChris Wilson NOP(13),
322a0d3fdb6SChris Wilson LRI(1, 0),
323a0d3fdb6SChris Wilson REG(0x0c8),
324a0d3fdb6SChris Wilson
325a0d3fdb6SChris Wilson END
326a0d3fdb6SChris Wilson };
327a0d3fdb6SChris Wilson
328a0d3fdb6SChris Wilson static const u8 gen9_rcs_offsets[] = {
329a0d3fdb6SChris Wilson NOP(1),
330a0d3fdb6SChris Wilson LRI(14, POSTED),
331a0d3fdb6SChris Wilson REG16(0x244),
332a0d3fdb6SChris Wilson REG(0x34),
333a0d3fdb6SChris Wilson REG(0x30),
334a0d3fdb6SChris Wilson REG(0x38),
335a0d3fdb6SChris Wilson REG(0x3c),
336a0d3fdb6SChris Wilson REG(0x168),
337a0d3fdb6SChris Wilson REG(0x140),
338a0d3fdb6SChris Wilson REG(0x110),
339a0d3fdb6SChris Wilson REG(0x11c),
340a0d3fdb6SChris Wilson REG(0x114),
341a0d3fdb6SChris Wilson REG(0x118),
342a0d3fdb6SChris Wilson REG(0x1c0),
343a0d3fdb6SChris Wilson REG(0x1c4),
344a0d3fdb6SChris Wilson REG(0x1c8),
345a0d3fdb6SChris Wilson
346a0d3fdb6SChris Wilson NOP(3),
347a0d3fdb6SChris Wilson LRI(9, POSTED),
348a0d3fdb6SChris Wilson REG16(0x3a8),
349a0d3fdb6SChris Wilson REG16(0x28c),
350a0d3fdb6SChris Wilson REG16(0x288),
351a0d3fdb6SChris Wilson REG16(0x284),
352a0d3fdb6SChris Wilson REG16(0x280),
353a0d3fdb6SChris Wilson REG16(0x27c),
354a0d3fdb6SChris Wilson REG16(0x278),
355a0d3fdb6SChris Wilson REG16(0x274),
356a0d3fdb6SChris Wilson REG16(0x270),
357a0d3fdb6SChris Wilson
358a0d3fdb6SChris Wilson NOP(13),
359a0d3fdb6SChris Wilson LRI(1, 0),
360a0d3fdb6SChris Wilson REG(0xc8),
361a0d3fdb6SChris Wilson
362a0d3fdb6SChris Wilson NOP(13),
363a0d3fdb6SChris Wilson LRI(44, POSTED),
364a0d3fdb6SChris Wilson REG(0x28),
365a0d3fdb6SChris Wilson REG(0x9c),
366a0d3fdb6SChris Wilson REG(0xc0),
367a0d3fdb6SChris Wilson REG(0x178),
368a0d3fdb6SChris Wilson REG(0x17c),
369a0d3fdb6SChris Wilson REG16(0x358),
370a0d3fdb6SChris Wilson REG(0x170),
371a0d3fdb6SChris Wilson REG(0x150),
372a0d3fdb6SChris Wilson REG(0x154),
373a0d3fdb6SChris Wilson REG(0x158),
374a0d3fdb6SChris Wilson REG16(0x41c),
375a0d3fdb6SChris Wilson REG16(0x600),
376a0d3fdb6SChris Wilson REG16(0x604),
377a0d3fdb6SChris Wilson REG16(0x608),
378a0d3fdb6SChris Wilson REG16(0x60c),
379a0d3fdb6SChris Wilson REG16(0x610),
380a0d3fdb6SChris Wilson REG16(0x614),
381a0d3fdb6SChris Wilson REG16(0x618),
382a0d3fdb6SChris Wilson REG16(0x61c),
383a0d3fdb6SChris Wilson REG16(0x620),
384a0d3fdb6SChris Wilson REG16(0x624),
385a0d3fdb6SChris Wilson REG16(0x628),
386a0d3fdb6SChris Wilson REG16(0x62c),
387a0d3fdb6SChris Wilson REG16(0x630),
388a0d3fdb6SChris Wilson REG16(0x634),
389a0d3fdb6SChris Wilson REG16(0x638),
390a0d3fdb6SChris Wilson REG16(0x63c),
391a0d3fdb6SChris Wilson REG16(0x640),
392a0d3fdb6SChris Wilson REG16(0x644),
393a0d3fdb6SChris Wilson REG16(0x648),
394a0d3fdb6SChris Wilson REG16(0x64c),
395a0d3fdb6SChris Wilson REG16(0x650),
396a0d3fdb6SChris Wilson REG16(0x654),
397a0d3fdb6SChris Wilson REG16(0x658),
398a0d3fdb6SChris Wilson REG16(0x65c),
399a0d3fdb6SChris Wilson REG16(0x660),
400a0d3fdb6SChris Wilson REG16(0x664),
401a0d3fdb6SChris Wilson REG16(0x668),
402a0d3fdb6SChris Wilson REG16(0x66c),
403a0d3fdb6SChris Wilson REG16(0x670),
404a0d3fdb6SChris Wilson REG16(0x674),
405a0d3fdb6SChris Wilson REG16(0x678),
406a0d3fdb6SChris Wilson REG16(0x67c),
407a0d3fdb6SChris Wilson REG(0x68),
408a0d3fdb6SChris Wilson
409a0d3fdb6SChris Wilson END
410a0d3fdb6SChris Wilson };
411a0d3fdb6SChris Wilson
412a0d3fdb6SChris Wilson static const u8 gen11_rcs_offsets[] = {
413a0d3fdb6SChris Wilson NOP(1),
414a0d3fdb6SChris Wilson LRI(15, POSTED),
415a0d3fdb6SChris Wilson REG16(0x244),
416a0d3fdb6SChris Wilson REG(0x034),
417a0d3fdb6SChris Wilson REG(0x030),
418a0d3fdb6SChris Wilson REG(0x038),
419a0d3fdb6SChris Wilson REG(0x03c),
420a0d3fdb6SChris Wilson REG(0x168),
421a0d3fdb6SChris Wilson REG(0x140),
422a0d3fdb6SChris Wilson REG(0x110),
423a0d3fdb6SChris Wilson REG(0x11c),
424a0d3fdb6SChris Wilson REG(0x114),
425a0d3fdb6SChris Wilson REG(0x118),
426a0d3fdb6SChris Wilson REG(0x1c0),
427a0d3fdb6SChris Wilson REG(0x1c4),
428a0d3fdb6SChris Wilson REG(0x1c8),
429a0d3fdb6SChris Wilson REG(0x180),
430a0d3fdb6SChris Wilson
431a0d3fdb6SChris Wilson NOP(1),
432a0d3fdb6SChris Wilson LRI(9, POSTED),
433a0d3fdb6SChris Wilson REG16(0x3a8),
434a0d3fdb6SChris Wilson REG16(0x28c),
435a0d3fdb6SChris Wilson REG16(0x288),
436a0d3fdb6SChris Wilson REG16(0x284),
437a0d3fdb6SChris Wilson REG16(0x280),
438a0d3fdb6SChris Wilson REG16(0x27c),
439a0d3fdb6SChris Wilson REG16(0x278),
440a0d3fdb6SChris Wilson REG16(0x274),
441a0d3fdb6SChris Wilson REG16(0x270),
442a0d3fdb6SChris Wilson
443a0d3fdb6SChris Wilson LRI(1, POSTED),
444a0d3fdb6SChris Wilson REG(0x1b0),
445a0d3fdb6SChris Wilson
446a0d3fdb6SChris Wilson NOP(10),
447a0d3fdb6SChris Wilson LRI(1, 0),
448a0d3fdb6SChris Wilson REG(0x0c8),
449a0d3fdb6SChris Wilson
450a0d3fdb6SChris Wilson END
451a0d3fdb6SChris Wilson };
452a0d3fdb6SChris Wilson
453a0d3fdb6SChris Wilson static const u8 gen12_rcs_offsets[] = {
454a0d3fdb6SChris Wilson NOP(1),
455a0d3fdb6SChris Wilson LRI(13, POSTED),
456a0d3fdb6SChris Wilson REG16(0x244),
457a0d3fdb6SChris Wilson REG(0x034),
458a0d3fdb6SChris Wilson REG(0x030),
459a0d3fdb6SChris Wilson REG(0x038),
460a0d3fdb6SChris Wilson REG(0x03c),
461a0d3fdb6SChris Wilson REG(0x168),
462a0d3fdb6SChris Wilson REG(0x140),
463a0d3fdb6SChris Wilson REG(0x110),
464a0d3fdb6SChris Wilson REG(0x1c0),
465a0d3fdb6SChris Wilson REG(0x1c4),
466a0d3fdb6SChris Wilson REG(0x1c8),
467a0d3fdb6SChris Wilson REG(0x180),
468a0d3fdb6SChris Wilson REG16(0x2b4),
469a0d3fdb6SChris Wilson
470a0d3fdb6SChris Wilson NOP(5),
471a0d3fdb6SChris Wilson LRI(9, POSTED),
472a0d3fdb6SChris Wilson REG16(0x3a8),
473a0d3fdb6SChris Wilson REG16(0x28c),
474a0d3fdb6SChris Wilson REG16(0x288),
475a0d3fdb6SChris Wilson REG16(0x284),
476a0d3fdb6SChris Wilson REG16(0x280),
477a0d3fdb6SChris Wilson REG16(0x27c),
478a0d3fdb6SChris Wilson REG16(0x278),
479a0d3fdb6SChris Wilson REG16(0x274),
480a0d3fdb6SChris Wilson REG16(0x270),
481a0d3fdb6SChris Wilson
482a0d3fdb6SChris Wilson LRI(3, POSTED),
483a0d3fdb6SChris Wilson REG(0x1b0),
484a0d3fdb6SChris Wilson REG16(0x5a8),
485a0d3fdb6SChris Wilson REG16(0x5ac),
486a0d3fdb6SChris Wilson
487a0d3fdb6SChris Wilson NOP(6),
488a0d3fdb6SChris Wilson LRI(1, 0),
489a0d3fdb6SChris Wilson REG(0x0c8),
490a0d3fdb6SChris Wilson NOP(3 + 9 + 1),
491a0d3fdb6SChris Wilson
492a0d3fdb6SChris Wilson LRI(51, POSTED),
493a0d3fdb6SChris Wilson REG16(0x588),
494a0d3fdb6SChris Wilson REG16(0x588),
495a0d3fdb6SChris Wilson REG16(0x588),
496a0d3fdb6SChris Wilson REG16(0x588),
497a0d3fdb6SChris Wilson REG16(0x588),
498a0d3fdb6SChris Wilson REG16(0x588),
499a0d3fdb6SChris Wilson REG(0x028),
500a0d3fdb6SChris Wilson REG(0x09c),
501a0d3fdb6SChris Wilson REG(0x0c0),
502a0d3fdb6SChris Wilson REG(0x178),
503a0d3fdb6SChris Wilson REG(0x17c),
504a0d3fdb6SChris Wilson REG16(0x358),
505a0d3fdb6SChris Wilson REG(0x170),
506a0d3fdb6SChris Wilson REG(0x150),
507a0d3fdb6SChris Wilson REG(0x154),
508a0d3fdb6SChris Wilson REG(0x158),
509a0d3fdb6SChris Wilson REG16(0x41c),
510a0d3fdb6SChris Wilson REG16(0x600),
511a0d3fdb6SChris Wilson REG16(0x604),
512a0d3fdb6SChris Wilson REG16(0x608),
513a0d3fdb6SChris Wilson REG16(0x60c),
514a0d3fdb6SChris Wilson REG16(0x610),
515a0d3fdb6SChris Wilson REG16(0x614),
516a0d3fdb6SChris Wilson REG16(0x618),
517a0d3fdb6SChris Wilson REG16(0x61c),
518a0d3fdb6SChris Wilson REG16(0x620),
519a0d3fdb6SChris Wilson REG16(0x624),
520a0d3fdb6SChris Wilson REG16(0x628),
521a0d3fdb6SChris Wilson REG16(0x62c),
522a0d3fdb6SChris Wilson REG16(0x630),
523a0d3fdb6SChris Wilson REG16(0x634),
524a0d3fdb6SChris Wilson REG16(0x638),
525a0d3fdb6SChris Wilson REG16(0x63c),
526a0d3fdb6SChris Wilson REG16(0x640),
527a0d3fdb6SChris Wilson REG16(0x644),
528a0d3fdb6SChris Wilson REG16(0x648),
529a0d3fdb6SChris Wilson REG16(0x64c),
530a0d3fdb6SChris Wilson REG16(0x650),
531a0d3fdb6SChris Wilson REG16(0x654),
532a0d3fdb6SChris Wilson REG16(0x658),
533a0d3fdb6SChris Wilson REG16(0x65c),
534a0d3fdb6SChris Wilson REG16(0x660),
535a0d3fdb6SChris Wilson REG16(0x664),
536a0d3fdb6SChris Wilson REG16(0x668),
537a0d3fdb6SChris Wilson REG16(0x66c),
538a0d3fdb6SChris Wilson REG16(0x670),
539a0d3fdb6SChris Wilson REG16(0x674),
540a0d3fdb6SChris Wilson REG16(0x678),
541a0d3fdb6SChris Wilson REG16(0x67c),
542a0d3fdb6SChris Wilson REG(0x068),
543a0d3fdb6SChris Wilson REG(0x084),
544a0d3fdb6SChris Wilson NOP(1),
545a0d3fdb6SChris Wilson
546a0d3fdb6SChris Wilson END
547a0d3fdb6SChris Wilson };
548a0d3fdb6SChris Wilson
5497fc37efdSPrathap Kumar Valsan static const u8 xehp_rcs_offsets[] = {
5507fc37efdSPrathap Kumar Valsan NOP(1),
5517fc37efdSPrathap Kumar Valsan LRI(13, POSTED),
5527fc37efdSPrathap Kumar Valsan REG16(0x244),
5537fc37efdSPrathap Kumar Valsan REG(0x034),
5547fc37efdSPrathap Kumar Valsan REG(0x030),
5557fc37efdSPrathap Kumar Valsan REG(0x038),
5567fc37efdSPrathap Kumar Valsan REG(0x03c),
5577fc37efdSPrathap Kumar Valsan REG(0x168),
5587fc37efdSPrathap Kumar Valsan REG(0x140),
5597fc37efdSPrathap Kumar Valsan REG(0x110),
5607fc37efdSPrathap Kumar Valsan REG(0x1c0),
5617fc37efdSPrathap Kumar Valsan REG(0x1c4),
5627fc37efdSPrathap Kumar Valsan REG(0x1c8),
5637fc37efdSPrathap Kumar Valsan REG(0x180),
5647fc37efdSPrathap Kumar Valsan REG16(0x2b4),
5657fc37efdSPrathap Kumar Valsan
5667fc37efdSPrathap Kumar Valsan NOP(5),
5677fc37efdSPrathap Kumar Valsan LRI(9, POSTED),
5687fc37efdSPrathap Kumar Valsan REG16(0x3a8),
5697fc37efdSPrathap Kumar Valsan REG16(0x28c),
5707fc37efdSPrathap Kumar Valsan REG16(0x288),
5717fc37efdSPrathap Kumar Valsan REG16(0x284),
5727fc37efdSPrathap Kumar Valsan REG16(0x280),
5737fc37efdSPrathap Kumar Valsan REG16(0x27c),
5747fc37efdSPrathap Kumar Valsan REG16(0x278),
5757fc37efdSPrathap Kumar Valsan REG16(0x274),
5767fc37efdSPrathap Kumar Valsan REG16(0x270),
5777fc37efdSPrathap Kumar Valsan
5787fc37efdSPrathap Kumar Valsan LRI(3, POSTED),
5797fc37efdSPrathap Kumar Valsan REG(0x1b0),
5807fc37efdSPrathap Kumar Valsan REG16(0x5a8),
5817fc37efdSPrathap Kumar Valsan REG16(0x5ac),
5827fc37efdSPrathap Kumar Valsan
5837fc37efdSPrathap Kumar Valsan NOP(6),
5847fc37efdSPrathap Kumar Valsan LRI(1, 0),
5857fc37efdSPrathap Kumar Valsan REG(0x0c8),
5867fc37efdSPrathap Kumar Valsan
5877fc37efdSPrathap Kumar Valsan END
5887fc37efdSPrathap Kumar Valsan };
5897fc37efdSPrathap Kumar Valsan
590ae4b0eacSAkeem G Abodunrin static const u8 dg2_rcs_offsets[] = {
591ae4b0eacSAkeem G Abodunrin NOP(1),
592ae4b0eacSAkeem G Abodunrin LRI(15, POSTED),
593ae4b0eacSAkeem G Abodunrin REG16(0x244),
594ae4b0eacSAkeem G Abodunrin REG(0x034),
595ae4b0eacSAkeem G Abodunrin REG(0x030),
596ae4b0eacSAkeem G Abodunrin REG(0x038),
597ae4b0eacSAkeem G Abodunrin REG(0x03c),
598ae4b0eacSAkeem G Abodunrin REG(0x168),
599ae4b0eacSAkeem G Abodunrin REG(0x140),
600ae4b0eacSAkeem G Abodunrin REG(0x110),
601ae4b0eacSAkeem G Abodunrin REG(0x1c0),
602ae4b0eacSAkeem G Abodunrin REG(0x1c4),
603ae4b0eacSAkeem G Abodunrin REG(0x1c8),
604ae4b0eacSAkeem G Abodunrin REG(0x180),
605ae4b0eacSAkeem G Abodunrin REG16(0x2b4),
606ae4b0eacSAkeem G Abodunrin REG(0x120),
607ae4b0eacSAkeem G Abodunrin REG(0x124),
608ae4b0eacSAkeem G Abodunrin
609ae4b0eacSAkeem G Abodunrin NOP(1),
610ae4b0eacSAkeem G Abodunrin LRI(9, POSTED),
611ae4b0eacSAkeem G Abodunrin REG16(0x3a8),
612ae4b0eacSAkeem G Abodunrin REG16(0x28c),
613ae4b0eacSAkeem G Abodunrin REG16(0x288),
614ae4b0eacSAkeem G Abodunrin REG16(0x284),
615ae4b0eacSAkeem G Abodunrin REG16(0x280),
616ae4b0eacSAkeem G Abodunrin REG16(0x27c),
617ae4b0eacSAkeem G Abodunrin REG16(0x278),
618ae4b0eacSAkeem G Abodunrin REG16(0x274),
619ae4b0eacSAkeem G Abodunrin REG16(0x270),
620ae4b0eacSAkeem G Abodunrin
621ae4b0eacSAkeem G Abodunrin LRI(3, POSTED),
622ae4b0eacSAkeem G Abodunrin REG(0x1b0),
623ae4b0eacSAkeem G Abodunrin REG16(0x5a8),
624ae4b0eacSAkeem G Abodunrin REG16(0x5ac),
625ae4b0eacSAkeem G Abodunrin
626ae4b0eacSAkeem G Abodunrin NOP(6),
627ae4b0eacSAkeem G Abodunrin LRI(1, 0),
628ae4b0eacSAkeem G Abodunrin REG(0x0c8),
629ae4b0eacSAkeem G Abodunrin
630ae4b0eacSAkeem G Abodunrin END
631ae4b0eacSAkeem G Abodunrin };
632ae4b0eacSAkeem G Abodunrin
6330d0e7d1eSMatt Roper static const u8 mtl_rcs_offsets[] = {
6340d0e7d1eSMatt Roper NOP(1),
6350d0e7d1eSMatt Roper LRI(15, POSTED),
6360d0e7d1eSMatt Roper REG16(0x244),
6370d0e7d1eSMatt Roper REG(0x034),
6380d0e7d1eSMatt Roper REG(0x030),
6390d0e7d1eSMatt Roper REG(0x038),
6400d0e7d1eSMatt Roper REG(0x03c),
6410d0e7d1eSMatt Roper REG(0x168),
6420d0e7d1eSMatt Roper REG(0x140),
6430d0e7d1eSMatt Roper REG(0x110),
6440d0e7d1eSMatt Roper REG(0x1c0),
6450d0e7d1eSMatt Roper REG(0x1c4),
6460d0e7d1eSMatt Roper REG(0x1c8),
6470d0e7d1eSMatt Roper REG(0x180),
6480d0e7d1eSMatt Roper REG16(0x2b4),
6490d0e7d1eSMatt Roper REG(0x120),
6500d0e7d1eSMatt Roper REG(0x124),
6510d0e7d1eSMatt Roper
6520d0e7d1eSMatt Roper NOP(1),
6530d0e7d1eSMatt Roper LRI(9, POSTED),
6540d0e7d1eSMatt Roper REG16(0x3a8),
6550d0e7d1eSMatt Roper REG16(0x28c),
6560d0e7d1eSMatt Roper REG16(0x288),
6570d0e7d1eSMatt Roper REG16(0x284),
6580d0e7d1eSMatt Roper REG16(0x280),
6590d0e7d1eSMatt Roper REG16(0x27c),
6600d0e7d1eSMatt Roper REG16(0x278),
6610d0e7d1eSMatt Roper REG16(0x274),
6620d0e7d1eSMatt Roper REG16(0x270),
6630d0e7d1eSMatt Roper
6640d0e7d1eSMatt Roper NOP(2),
6650d0e7d1eSMatt Roper LRI(2, POSTED),
6660d0e7d1eSMatt Roper REG16(0x5a8),
6670d0e7d1eSMatt Roper REG16(0x5ac),
6680d0e7d1eSMatt Roper
6690d0e7d1eSMatt Roper NOP(6),
6700d0e7d1eSMatt Roper LRI(1, 0),
6710d0e7d1eSMatt Roper REG(0x0c8),
6720d0e7d1eSMatt Roper
6730d0e7d1eSMatt Roper END
6740d0e7d1eSMatt Roper };
6750d0e7d1eSMatt Roper
676a0d3fdb6SChris Wilson #undef END
677a0d3fdb6SChris Wilson #undef REG16
678a0d3fdb6SChris Wilson #undef REG
679a0d3fdb6SChris Wilson #undef LRI
680a0d3fdb6SChris Wilson #undef NOP
681a0d3fdb6SChris Wilson
reg_offsets(const struct intel_engine_cs * engine)682a0d3fdb6SChris Wilson static const u8 *reg_offsets(const struct intel_engine_cs *engine)
683a0d3fdb6SChris Wilson {
684a0d3fdb6SChris Wilson /*
685a0d3fdb6SChris Wilson * The gen12+ lists only have the registers we program in the basic
686a0d3fdb6SChris Wilson * default state. We rely on the context image using relative
687a0d3fdb6SChris Wilson * addressing to automatic fixup the register state between the
688a0d3fdb6SChris Wilson * physical engines for virtual engine.
689a0d3fdb6SChris Wilson */
690c816723bSLucas De Marchi GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
691a0d3fdb6SChris Wilson !intel_engine_has_relative_mmio(engine));
692a0d3fdb6SChris Wilson
693c674c5b9SMatt Roper if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
6940d0e7d1eSMatt Roper if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
6950d0e7d1eSMatt Roper return mtl_rcs_offsets;
6960d0e7d1eSMatt Roper else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
697ae4b0eacSAkeem G Abodunrin return dg2_rcs_offsets;
698ae4b0eacSAkeem G Abodunrin else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
6997fc37efdSPrathap Kumar Valsan return xehp_rcs_offsets;
7007fc37efdSPrathap Kumar Valsan else if (GRAPHICS_VER(engine->i915) >= 12)
701a0d3fdb6SChris Wilson return gen12_rcs_offsets;
702c816723bSLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 11)
703a0d3fdb6SChris Wilson return gen11_rcs_offsets;
704c816723bSLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 9)
705a0d3fdb6SChris Wilson return gen9_rcs_offsets;
706a0d3fdb6SChris Wilson else
707a0d3fdb6SChris Wilson return gen8_rcs_offsets;
708a0d3fdb6SChris Wilson } else {
709ca54a9a3SLucas De Marchi if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
710ae4b0eacSAkeem G Abodunrin return dg2_xcs_offsets;
711ae4b0eacSAkeem G Abodunrin else if (GRAPHICS_VER(engine->i915) >= 12)
712a0d3fdb6SChris Wilson return gen12_xcs_offsets;
713c816723bSLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 9)
714a0d3fdb6SChris Wilson return gen9_xcs_offsets;
715a0d3fdb6SChris Wilson else
716a0d3fdb6SChris Wilson return gen8_xcs_offsets;
717a0d3fdb6SChris Wilson }
718a0d3fdb6SChris Wilson }
719a0d3fdb6SChris Wilson
lrc_ring_mi_mode(const struct intel_engine_cs * engine)720a0d3fdb6SChris Wilson static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
721a0d3fdb6SChris Wilson {
7227fc37efdSPrathap Kumar Valsan if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
7237fc37efdSPrathap Kumar Valsan return 0x70;
7247fc37efdSPrathap Kumar Valsan else if (GRAPHICS_VER(engine->i915) >= 12)
725a0d3fdb6SChris Wilson return 0x60;
726c816723bSLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 9)
727a0d3fdb6SChris Wilson return 0x54;
728a0d3fdb6SChris Wilson else if (engine->class == RENDER_CLASS)
729a0d3fdb6SChris Wilson return 0x58;
730a0d3fdb6SChris Wilson else
731a0d3fdb6SChris Wilson return -1;
732a0d3fdb6SChris Wilson }
733a0d3fdb6SChris Wilson
lrc_ring_bb_offset(const struct intel_engine_cs * engine)734c9424fa1SChris Wilson static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
735c9424fa1SChris Wilson {
736c9424fa1SChris Wilson if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
737c9424fa1SChris Wilson return 0x80;
738c9424fa1SChris Wilson else if (GRAPHICS_VER(engine->i915) >= 12)
739c9424fa1SChris Wilson return 0x70;
740c9424fa1SChris Wilson else if (GRAPHICS_VER(engine->i915) >= 9)
741c9424fa1SChris Wilson return 0x64;
742c9424fa1SChris Wilson else if (GRAPHICS_VER(engine->i915) >= 8 &&
743c9424fa1SChris Wilson engine->class == RENDER_CLASS)
744c9424fa1SChris Wilson return 0xc4;
745c9424fa1SChris Wilson else
746c9424fa1SChris Wilson return -1;
747c9424fa1SChris Wilson }
748c9424fa1SChris Wilson
lrc_ring_gpr0(const struct intel_engine_cs * engine)749a0d3fdb6SChris Wilson static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
750a0d3fdb6SChris Wilson {
7517fc37efdSPrathap Kumar Valsan if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
7527fc37efdSPrathap Kumar Valsan return 0x84;
7537fc37efdSPrathap Kumar Valsan else if (GRAPHICS_VER(engine->i915) >= 12)
754a0d3fdb6SChris Wilson return 0x74;
755c816723bSLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 9)
756a0d3fdb6SChris Wilson return 0x68;
757a0d3fdb6SChris Wilson else if (engine->class == RENDER_CLASS)
758a0d3fdb6SChris Wilson return 0xd8;
759a0d3fdb6SChris Wilson else
760a0d3fdb6SChris Wilson return -1;
761a0d3fdb6SChris Wilson }
762a0d3fdb6SChris Wilson
lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs * engine)763a0d3fdb6SChris Wilson static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
764a0d3fdb6SChris Wilson {
765c816723bSLucas De Marchi if (GRAPHICS_VER(engine->i915) >= 12)
766a0d3fdb6SChris Wilson return 0x12;
767c816723bSLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
768a0d3fdb6SChris Wilson return 0x18;
769a0d3fdb6SChris Wilson else
770a0d3fdb6SChris Wilson return -1;
771a0d3fdb6SChris Wilson }
772a0d3fdb6SChris Wilson
lrc_ring_indirect_ptr(const struct intel_engine_cs * engine)773a0d3fdb6SChris Wilson static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
774a0d3fdb6SChris Wilson {
775a0d3fdb6SChris Wilson int x;
776a0d3fdb6SChris Wilson
777a0d3fdb6SChris Wilson x = lrc_ring_wa_bb_per_ctx(engine);
778a0d3fdb6SChris Wilson if (x < 0)
779a0d3fdb6SChris Wilson return x;
780a0d3fdb6SChris Wilson
781a0d3fdb6SChris Wilson return x + 2;
782a0d3fdb6SChris Wilson }
783a0d3fdb6SChris Wilson
lrc_ring_indirect_offset(const struct intel_engine_cs * engine)784a0d3fdb6SChris Wilson static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
785a0d3fdb6SChris Wilson {
786a0d3fdb6SChris Wilson int x;
787a0d3fdb6SChris Wilson
788a0d3fdb6SChris Wilson x = lrc_ring_indirect_ptr(engine);
789a0d3fdb6SChris Wilson if (x < 0)
790a0d3fdb6SChris Wilson return x;
791a0d3fdb6SChris Wilson
792a0d3fdb6SChris Wilson return x + 2;
793a0d3fdb6SChris Wilson }
794a0d3fdb6SChris Wilson
lrc_ring_cmd_buf_cctl(const struct intel_engine_cs * engine)795a0d3fdb6SChris Wilson static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
796a0d3fdb6SChris Wilson {
797a0d3fdb6SChris Wilson
7987fc37efdSPrathap Kumar Valsan if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
7997fc37efdSPrathap Kumar Valsan /*
8007fc37efdSPrathap Kumar Valsan * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
8017fc37efdSPrathap Kumar Valsan * simply to match the RCS context image layout.
8027fc37efdSPrathap Kumar Valsan */
8037fc37efdSPrathap Kumar Valsan return 0xc6;
8047fc37efdSPrathap Kumar Valsan else if (engine->class != RENDER_CLASS)
8057fc37efdSPrathap Kumar Valsan return -1;
8067fc37efdSPrathap Kumar Valsan else if (GRAPHICS_VER(engine->i915) >= 12)
807a0d3fdb6SChris Wilson return 0xb6;
808c816723bSLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 11)
809a0d3fdb6SChris Wilson return 0xaa;
810a0d3fdb6SChris Wilson else
811a0d3fdb6SChris Wilson return -1;
812a0d3fdb6SChris Wilson }
813a0d3fdb6SChris Wilson
814a0d3fdb6SChris Wilson static u32
lrc_ring_indirect_offset_default(const struct intel_engine_cs * engine)815a0d3fdb6SChris Wilson lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
816a0d3fdb6SChris Wilson {
817c3d5cfe7SLucas De Marchi if (GRAPHICS_VER(engine->i915) >= 12)
818a0d3fdb6SChris Wilson return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
819c3d5cfe7SLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 11)
820a0d3fdb6SChris Wilson return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
821c3d5cfe7SLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 9)
822a0d3fdb6SChris Wilson return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
823c3d5cfe7SLucas De Marchi else if (GRAPHICS_VER(engine->i915) >= 8)
824a0d3fdb6SChris Wilson return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
825c3d5cfe7SLucas De Marchi
826c3d5cfe7SLucas De Marchi GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
827c3d5cfe7SLucas De Marchi
828c3d5cfe7SLucas De Marchi return 0;
829a0d3fdb6SChris Wilson }
830a0d3fdb6SChris Wilson
831a0d3fdb6SChris Wilson static void
lrc_setup_indirect_ctx(u32 * regs,const struct intel_engine_cs * engine,u32 ctx_bb_ggtt_addr,u32 size)832a0d3fdb6SChris Wilson lrc_setup_indirect_ctx(u32 *regs,
833a0d3fdb6SChris Wilson const struct intel_engine_cs *engine,
834a0d3fdb6SChris Wilson u32 ctx_bb_ggtt_addr,
835a0d3fdb6SChris Wilson u32 size)
836a0d3fdb6SChris Wilson {
837a0d3fdb6SChris Wilson GEM_BUG_ON(!size);
838a0d3fdb6SChris Wilson GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
839a0d3fdb6SChris Wilson GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
840a0d3fdb6SChris Wilson regs[lrc_ring_indirect_ptr(engine) + 1] =
841a0d3fdb6SChris Wilson ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
842a0d3fdb6SChris Wilson
843a0d3fdb6SChris Wilson GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
844a0d3fdb6SChris Wilson regs[lrc_ring_indirect_offset(engine) + 1] =
845a0d3fdb6SChris Wilson lrc_ring_indirect_offset_default(engine) << 6;
846a0d3fdb6SChris Wilson }
847a0d3fdb6SChris Wilson
init_common_regs(u32 * const regs,const struct intel_context * ce,const struct intel_engine_cs * engine,bool inhibit)848a0d3fdb6SChris Wilson static void init_common_regs(u32 * const regs,
849a0d3fdb6SChris Wilson const struct intel_context *ce,
850a0d3fdb6SChris Wilson const struct intel_engine_cs *engine,
851a0d3fdb6SChris Wilson bool inhibit)
852a0d3fdb6SChris Wilson {
853a0d3fdb6SChris Wilson u32 ctl;
854c9424fa1SChris Wilson int loc;
855a0d3fdb6SChris Wilson
856a0d3fdb6SChris Wilson ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
857a0d3fdb6SChris Wilson ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
858a0d3fdb6SChris Wilson if (inhibit)
859a0d3fdb6SChris Wilson ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
860c816723bSLucas De Marchi if (GRAPHICS_VER(engine->i915) < 11)
861a0d3fdb6SChris Wilson ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
862a0d3fdb6SChris Wilson CTX_CTRL_RS_CTX_ENABLE);
863a0d3fdb6SChris Wilson regs[CTX_CONTEXT_CONTROL] = ctl;
864a0d3fdb6SChris Wilson
865bb6287cbSTvrtko Ursulin regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
866c9424fa1SChris Wilson
867c9424fa1SChris Wilson loc = lrc_ring_bb_offset(engine);
868c9424fa1SChris Wilson if (loc != -1)
869c9424fa1SChris Wilson regs[loc + 1] = 0;
870a0d3fdb6SChris Wilson }
871a0d3fdb6SChris Wilson
init_wa_bb_regs(u32 * const regs,const struct intel_engine_cs * engine)872a0d3fdb6SChris Wilson static void init_wa_bb_regs(u32 * const regs,
873a0d3fdb6SChris Wilson const struct intel_engine_cs *engine)
874a0d3fdb6SChris Wilson {
875a0d3fdb6SChris Wilson const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
876a0d3fdb6SChris Wilson
877a0d3fdb6SChris Wilson if (wa_ctx->per_ctx.size) {
878a0d3fdb6SChris Wilson const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
879a0d3fdb6SChris Wilson
880a0d3fdb6SChris Wilson GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
881a0d3fdb6SChris Wilson regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
882a0d3fdb6SChris Wilson (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
883a0d3fdb6SChris Wilson }
884a0d3fdb6SChris Wilson
885a0d3fdb6SChris Wilson if (wa_ctx->indirect_ctx.size) {
886a0d3fdb6SChris Wilson lrc_setup_indirect_ctx(regs, engine,
887a0d3fdb6SChris Wilson i915_ggtt_offset(wa_ctx->vma) +
888a0d3fdb6SChris Wilson wa_ctx->indirect_ctx.offset,
889a0d3fdb6SChris Wilson wa_ctx->indirect_ctx.size);
890a0d3fdb6SChris Wilson }
891a0d3fdb6SChris Wilson }
892a0d3fdb6SChris Wilson
init_ppgtt_regs(u32 * regs,const struct i915_ppgtt * ppgtt)893a0d3fdb6SChris Wilson static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
894a0d3fdb6SChris Wilson {
895a0d3fdb6SChris Wilson if (i915_vm_is_4lvl(&ppgtt->vm)) {
896a0d3fdb6SChris Wilson /* 64b PPGTT (48bit canonical)
897a0d3fdb6SChris Wilson * PDP0_DESCRIPTOR contains the base address to PML4 and
898a0d3fdb6SChris Wilson * other PDP Descriptors are ignored.
899a0d3fdb6SChris Wilson */
900a0d3fdb6SChris Wilson ASSIGN_CTX_PML4(ppgtt, regs);
901a0d3fdb6SChris Wilson } else {
902a0d3fdb6SChris Wilson ASSIGN_CTX_PDP(ppgtt, regs, 3);
903a0d3fdb6SChris Wilson ASSIGN_CTX_PDP(ppgtt, regs, 2);
904a0d3fdb6SChris Wilson ASSIGN_CTX_PDP(ppgtt, regs, 1);
905a0d3fdb6SChris Wilson ASSIGN_CTX_PDP(ppgtt, regs, 0);
906a0d3fdb6SChris Wilson }
907a0d3fdb6SChris Wilson }
908a0d3fdb6SChris Wilson
vm_alias(struct i915_address_space * vm)909a0d3fdb6SChris Wilson static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
910a0d3fdb6SChris Wilson {
911a0d3fdb6SChris Wilson if (i915_is_ggtt(vm))
912a0d3fdb6SChris Wilson return i915_vm_to_ggtt(vm)->alias;
913a0d3fdb6SChris Wilson else
914a0d3fdb6SChris Wilson return i915_vm_to_ppgtt(vm);
915a0d3fdb6SChris Wilson }
916a0d3fdb6SChris Wilson
__reset_stop_ring(u32 * regs,const struct intel_engine_cs * engine)917a0d3fdb6SChris Wilson static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
918a0d3fdb6SChris Wilson {
919a0d3fdb6SChris Wilson int x;
920a0d3fdb6SChris Wilson
921a0d3fdb6SChris Wilson x = lrc_ring_mi_mode(engine);
922a0d3fdb6SChris Wilson if (x != -1) {
923a0d3fdb6SChris Wilson regs[x + 1] &= ~STOP_RING;
924a0d3fdb6SChris Wilson regs[x + 1] |= STOP_RING << 16;
925a0d3fdb6SChris Wilson }
926a0d3fdb6SChris Wilson }
927a0d3fdb6SChris Wilson
__lrc_init_regs(u32 * regs,const struct intel_context * ce,const struct intel_engine_cs * engine,bool inhibit)928a0d3fdb6SChris Wilson static void __lrc_init_regs(u32 *regs,
929a0d3fdb6SChris Wilson const struct intel_context *ce,
930a0d3fdb6SChris Wilson const struct intel_engine_cs *engine,
931a0d3fdb6SChris Wilson bool inhibit)
932a0d3fdb6SChris Wilson {
933a0d3fdb6SChris Wilson /*
934a0d3fdb6SChris Wilson * A context is actually a big batch buffer with several
935a0d3fdb6SChris Wilson * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
936a0d3fdb6SChris Wilson * values we are setting here are only for the first context restore:
937a0d3fdb6SChris Wilson * on a subsequent save, the GPU will recreate this batchbuffer with new
938a0d3fdb6SChris Wilson * values (including all the missing MI_LOAD_REGISTER_IMM commands that
939a0d3fdb6SChris Wilson * we are not initializing here).
940a0d3fdb6SChris Wilson *
941a0d3fdb6SChris Wilson * Must keep consistent with virtual_update_register_offsets().
942a0d3fdb6SChris Wilson */
943a0d3fdb6SChris Wilson
944a0d3fdb6SChris Wilson if (inhibit)
945a0d3fdb6SChris Wilson memset(regs, 0, PAGE_SIZE);
946a0d3fdb6SChris Wilson
947a0d3fdb6SChris Wilson set_offsets(regs, reg_offsets(engine), engine, inhibit);
948a0d3fdb6SChris Wilson
949a0d3fdb6SChris Wilson init_common_regs(regs, ce, engine, inhibit);
950a0d3fdb6SChris Wilson init_ppgtt_regs(regs, vm_alias(ce->vm));
951a0d3fdb6SChris Wilson
952a0d3fdb6SChris Wilson init_wa_bb_regs(regs, engine);
953a0d3fdb6SChris Wilson
954a0d3fdb6SChris Wilson __reset_stop_ring(regs, engine);
955a0d3fdb6SChris Wilson }
956a0d3fdb6SChris Wilson
lrc_init_regs(const struct intel_context * ce,const struct intel_engine_cs * engine,bool inhibit)957a0d3fdb6SChris Wilson void lrc_init_regs(const struct intel_context *ce,
958a0d3fdb6SChris Wilson const struct intel_engine_cs *engine,
959a0d3fdb6SChris Wilson bool inhibit)
960a0d3fdb6SChris Wilson {
961a0d3fdb6SChris Wilson __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
962a0d3fdb6SChris Wilson }
963a0d3fdb6SChris Wilson
lrc_reset_regs(const struct intel_context * ce,const struct intel_engine_cs * engine)964a0d3fdb6SChris Wilson void lrc_reset_regs(const struct intel_context *ce,
965a0d3fdb6SChris Wilson const struct intel_engine_cs *engine)
966a0d3fdb6SChris Wilson {
967a0d3fdb6SChris Wilson __reset_stop_ring(ce->lrc_reg_state, engine);
968a0d3fdb6SChris Wilson }
969a0d3fdb6SChris Wilson
970a0d3fdb6SChris Wilson static void
set_redzone(void * vaddr,const struct intel_engine_cs * engine)971a0d3fdb6SChris Wilson set_redzone(void *vaddr, const struct intel_engine_cs *engine)
972a0d3fdb6SChris Wilson {
973a0d3fdb6SChris Wilson if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
974a0d3fdb6SChris Wilson return;
975a0d3fdb6SChris Wilson
976a0d3fdb6SChris Wilson vaddr += engine->context_size;
977a0d3fdb6SChris Wilson
978a0d3fdb6SChris Wilson memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
979a0d3fdb6SChris Wilson }
980a0d3fdb6SChris Wilson
981a0d3fdb6SChris Wilson static void
check_redzone(const void * vaddr,const struct intel_engine_cs * engine)982a0d3fdb6SChris Wilson check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
983a0d3fdb6SChris Wilson {
984a0d3fdb6SChris Wilson if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
985a0d3fdb6SChris Wilson return;
986a0d3fdb6SChris Wilson
987a0d3fdb6SChris Wilson vaddr += engine->context_size;
988a0d3fdb6SChris Wilson
989a0d3fdb6SChris Wilson if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
990a0d3fdb6SChris Wilson drm_err_once(&engine->i915->drm,
991a0d3fdb6SChris Wilson "%s context redzone overwritten!\n",
992a0d3fdb6SChris Wilson engine->name);
993a0d3fdb6SChris Wilson }
994a0d3fdb6SChris Wilson
context_wa_bb_offset(const struct intel_context * ce)995166c44e6SChris Wilson static u32 context_wa_bb_offset(const struct intel_context *ce)
996166c44e6SChris Wilson {
997166c44e6SChris Wilson return PAGE_SIZE * ce->wa_bb_page;
998166c44e6SChris Wilson }
999166c44e6SChris Wilson
context_indirect_bb(const struct intel_context * ce)1000166c44e6SChris Wilson static u32 *context_indirect_bb(const struct intel_context *ce)
1001166c44e6SChris Wilson {
1002166c44e6SChris Wilson void *ptr;
1003166c44e6SChris Wilson
1004166c44e6SChris Wilson GEM_BUG_ON(!ce->wa_bb_page);
1005166c44e6SChris Wilson
1006166c44e6SChris Wilson ptr = ce->lrc_reg_state;
1007166c44e6SChris Wilson ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1008166c44e6SChris Wilson ptr += context_wa_bb_offset(ce);
1009166c44e6SChris Wilson
1010166c44e6SChris Wilson return ptr;
1011166c44e6SChris Wilson }
1012166c44e6SChris Wilson
lrc_init_state(struct intel_context * ce,struct intel_engine_cs * engine,void * state)1013a0d3fdb6SChris Wilson void lrc_init_state(struct intel_context *ce,
1014a0d3fdb6SChris Wilson struct intel_engine_cs *engine,
1015a0d3fdb6SChris Wilson void *state)
1016a0d3fdb6SChris Wilson {
1017a0d3fdb6SChris Wilson bool inhibit = true;
1018a0d3fdb6SChris Wilson
1019a0d3fdb6SChris Wilson set_redzone(state, engine);
1020a0d3fdb6SChris Wilson
1021a0d3fdb6SChris Wilson if (engine->default_state) {
1022a0d3fdb6SChris Wilson shmem_read(engine->default_state, 0,
1023a0d3fdb6SChris Wilson state, engine->context_size);
1024a0d3fdb6SChris Wilson __set_bit(CONTEXT_VALID_BIT, &ce->flags);
1025a0d3fdb6SChris Wilson inhibit = false;
1026a0d3fdb6SChris Wilson }
1027a0d3fdb6SChris Wilson
1028a0d3fdb6SChris Wilson /* Clear the ppHWSP (inc. per-context counters) */
1029a0d3fdb6SChris Wilson memset(state, 0, PAGE_SIZE);
1030a0d3fdb6SChris Wilson
1031166c44e6SChris Wilson /* Clear the indirect wa and storage */
1032166c44e6SChris Wilson if (ce->wa_bb_page)
1033166c44e6SChris Wilson memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
1034166c44e6SChris Wilson
1035a0d3fdb6SChris Wilson /*
1036a0d3fdb6SChris Wilson * The second page of the context object contains some registers which
1037a0d3fdb6SChris Wilson * must be set up prior to the first execution.
1038a0d3fdb6SChris Wilson */
1039a0d3fdb6SChris Wilson __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
1040a0d3fdb6SChris Wilson }
1041a0d3fdb6SChris Wilson
lrc_indirect_bb(const struct intel_context * ce)1042166c44e6SChris Wilson u32 lrc_indirect_bb(const struct intel_context *ce)
1043166c44e6SChris Wilson {
1044166c44e6SChris Wilson return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
1045166c44e6SChris Wilson }
1046166c44e6SChris Wilson
setup_predicate_disable_wa(const struct intel_context * ce,u32 * cs)1047166c44e6SChris Wilson static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
1048166c44e6SChris Wilson {
1049166c44e6SChris Wilson /* If predication is active, this will be noop'ed */
1050166c44e6SChris Wilson *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1051166c44e6SChris Wilson *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1052166c44e6SChris Wilson *cs++ = 0;
1053166c44e6SChris Wilson *cs++ = 0; /* No predication */
1054166c44e6SChris Wilson
1055166c44e6SChris Wilson /* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
1056166c44e6SChris Wilson *cs++ = MI_BATCH_BUFFER_END | BIT(15);
1057166c44e6SChris Wilson *cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
1058166c44e6SChris Wilson
1059166c44e6SChris Wilson /* Instructions are no longer predicated (disabled), we can proceed */
1060166c44e6SChris Wilson *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1061166c44e6SChris Wilson *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1062166c44e6SChris Wilson *cs++ = 0;
1063166c44e6SChris Wilson *cs++ = 1; /* enable predication before the next BB */
1064166c44e6SChris Wilson
1065166c44e6SChris Wilson *cs++ = MI_BATCH_BUFFER_END;
1066166c44e6SChris Wilson GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
1067166c44e6SChris Wilson
1068166c44e6SChris Wilson return cs;
1069166c44e6SChris Wilson }
1070166c44e6SChris Wilson
1071a0d3fdb6SChris Wilson static struct i915_vma *
__lrc_alloc_state(struct intel_context * ce,struct intel_engine_cs * engine)1072a0d3fdb6SChris Wilson __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
1073a0d3fdb6SChris Wilson {
1074a0d3fdb6SChris Wilson struct drm_i915_gem_object *obj;
1075a0d3fdb6SChris Wilson struct i915_vma *vma;
1076a0d3fdb6SChris Wilson u32 context_size;
1077a0d3fdb6SChris Wilson
1078a0d3fdb6SChris Wilson context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
1079a0d3fdb6SChris Wilson
1080a0d3fdb6SChris Wilson if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1081a0d3fdb6SChris Wilson context_size += I915_GTT_PAGE_SIZE; /* for redzone */
1082a0d3fdb6SChris Wilson
1083c3d5cfe7SLucas De Marchi if (GRAPHICS_VER(engine->i915) >= 12) {
1084a0d3fdb6SChris Wilson ce->wa_bb_page = context_size / PAGE_SIZE;
1085a0d3fdb6SChris Wilson context_size += PAGE_SIZE;
1086a0d3fdb6SChris Wilson }
1087a0d3fdb6SChris Wilson
1088c2aa552fSMatthew Brost if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
1089c2aa552fSMatthew Brost ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
1090c2aa552fSMatthew Brost context_size += PARENT_SCRATCH_SIZE;
1091c2aa552fSMatthew Brost }
1092c2aa552fSMatthew Brost
10930d8ee5baSThomas Hellström obj = i915_gem_object_create_lmem(engine->i915, context_size,
10940d8ee5baSThomas Hellström I915_BO_ALLOC_PM_VOLATILE);
109543aa755eSZhanjun Dong if (IS_ERR(obj)) {
1096a0d3fdb6SChris Wilson obj = i915_gem_object_create_shmem(engine->i915, context_size);
1097f17cc0f1SDan Carpenter if (IS_ERR(obj))
1098f17cc0f1SDan Carpenter return ERR_CAST(obj);
1099f17cc0f1SDan Carpenter
110043aa755eSZhanjun Dong /*
1101f1530f91SJonathan Cavitt * Wa_22016122933: For Media version 13.0, all Media GT shared
1102f1530f91SJonathan Cavitt * memory needs to be mapped as WC on CPU side and UC (PAT
1103f1530f91SJonathan Cavitt * index 2) on GPU side.
110443aa755eSZhanjun Dong */
1105f1530f91SJonathan Cavitt if (intel_gt_needs_wa_22016122933(engine->gt))
110643aa755eSZhanjun Dong i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
110743aa755eSZhanjun Dong }
1108a0d3fdb6SChris Wilson
1109a0d3fdb6SChris Wilson vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1110a0d3fdb6SChris Wilson if (IS_ERR(vma)) {
1111a0d3fdb6SChris Wilson i915_gem_object_put(obj);
1112a0d3fdb6SChris Wilson return vma;
1113a0d3fdb6SChris Wilson }
1114a0d3fdb6SChris Wilson
1115a0d3fdb6SChris Wilson return vma;
1116a0d3fdb6SChris Wilson }
1117a0d3fdb6SChris Wilson
1118a0d3fdb6SChris Wilson static struct intel_timeline *
pinned_timeline(struct intel_context * ce,struct intel_engine_cs * engine)1119a0d3fdb6SChris Wilson pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
1120a0d3fdb6SChris Wilson {
1121a0d3fdb6SChris Wilson struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
1122a0d3fdb6SChris Wilson
1123a0d3fdb6SChris Wilson return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
1124a0d3fdb6SChris Wilson }
1125a0d3fdb6SChris Wilson
lrc_alloc(struct intel_context * ce,struct intel_engine_cs * engine)1126a0d3fdb6SChris Wilson int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
1127a0d3fdb6SChris Wilson {
1128a0d3fdb6SChris Wilson struct intel_ring *ring;
1129a0d3fdb6SChris Wilson struct i915_vma *vma;
1130a0d3fdb6SChris Wilson int err;
1131a0d3fdb6SChris Wilson
1132a0d3fdb6SChris Wilson GEM_BUG_ON(ce->state);
1133a0d3fdb6SChris Wilson
1134a0d3fdb6SChris Wilson vma = __lrc_alloc_state(ce, engine);
1135a0d3fdb6SChris Wilson if (IS_ERR(vma))
1136a0d3fdb6SChris Wilson return PTR_ERR(vma);
1137a0d3fdb6SChris Wilson
113874e4b909SJason Ekstrand ring = intel_engine_create_ring(engine, ce->ring_size);
1139a0d3fdb6SChris Wilson if (IS_ERR(ring)) {
1140a0d3fdb6SChris Wilson err = PTR_ERR(ring);
1141a0d3fdb6SChris Wilson goto err_vma;
1142a0d3fdb6SChris Wilson }
1143a0d3fdb6SChris Wilson
1144a0d3fdb6SChris Wilson if (!page_mask_bits(ce->timeline)) {
1145a0d3fdb6SChris Wilson struct intel_timeline *tl;
1146a0d3fdb6SChris Wilson
1147a0d3fdb6SChris Wilson /*
1148a0d3fdb6SChris Wilson * Use the static global HWSP for the kernel context, and
1149a0d3fdb6SChris Wilson * a dynamically allocated cacheline for everyone else.
1150a0d3fdb6SChris Wilson */
1151a0d3fdb6SChris Wilson if (unlikely(ce->timeline))
1152a0d3fdb6SChris Wilson tl = pinned_timeline(ce, engine);
1153a0d3fdb6SChris Wilson else
1154a0d3fdb6SChris Wilson tl = intel_timeline_create(engine->gt);
1155a0d3fdb6SChris Wilson if (IS_ERR(tl)) {
1156a0d3fdb6SChris Wilson err = PTR_ERR(tl);
1157a0d3fdb6SChris Wilson goto err_ring;
1158a0d3fdb6SChris Wilson }
1159a0d3fdb6SChris Wilson
1160a0d3fdb6SChris Wilson ce->timeline = tl;
1161a0d3fdb6SChris Wilson }
1162a0d3fdb6SChris Wilson
1163a0d3fdb6SChris Wilson ce->ring = ring;
1164a0d3fdb6SChris Wilson ce->state = vma;
1165a0d3fdb6SChris Wilson
1166a0d3fdb6SChris Wilson return 0;
1167a0d3fdb6SChris Wilson
1168a0d3fdb6SChris Wilson err_ring:
1169a0d3fdb6SChris Wilson intel_ring_put(ring);
1170a0d3fdb6SChris Wilson err_vma:
1171a0d3fdb6SChris Wilson i915_vma_put(vma);
1172a0d3fdb6SChris Wilson return err;
1173a0d3fdb6SChris Wilson }
1174a0d3fdb6SChris Wilson
lrc_reset(struct intel_context * ce)1175a0d3fdb6SChris Wilson void lrc_reset(struct intel_context *ce)
1176a0d3fdb6SChris Wilson {
1177a0d3fdb6SChris Wilson GEM_BUG_ON(!intel_context_is_pinned(ce));
1178a0d3fdb6SChris Wilson
1179a0d3fdb6SChris Wilson intel_ring_reset(ce->ring, ce->ring->emit);
1180a0d3fdb6SChris Wilson
1181a0d3fdb6SChris Wilson /* Scrub away the garbage */
1182a0d3fdb6SChris Wilson lrc_init_regs(ce, ce->engine, true);
1183a0d3fdb6SChris Wilson ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
1184a0d3fdb6SChris Wilson }
1185a0d3fdb6SChris Wilson
1186a0d3fdb6SChris Wilson int
lrc_pre_pin(struct intel_context * ce,struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,void ** vaddr)1187a0d3fdb6SChris Wilson lrc_pre_pin(struct intel_context *ce,
1188a0d3fdb6SChris Wilson struct intel_engine_cs *engine,
1189a0d3fdb6SChris Wilson struct i915_gem_ww_ctx *ww,
1190a0d3fdb6SChris Wilson void **vaddr)
1191a0d3fdb6SChris Wilson {
1192a0d3fdb6SChris Wilson GEM_BUG_ON(!ce->state);
1193a0d3fdb6SChris Wilson GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
1194a0d3fdb6SChris Wilson
1195a0d3fdb6SChris Wilson *vaddr = i915_gem_object_pin_map(ce->state->obj,
1196115cdccaSJonathan Cavitt intel_gt_coherent_map_type(ce->engine->gt,
1197fa85bfd1SVenkata Sandeep Dhanalakota ce->state->obj,
1198fa85bfd1SVenkata Sandeep Dhanalakota false) |
1199a0d3fdb6SChris Wilson I915_MAP_OVERRIDE);
1200a0d3fdb6SChris Wilson
1201a0d3fdb6SChris Wilson return PTR_ERR_OR_ZERO(*vaddr);
1202a0d3fdb6SChris Wilson }
1203a0d3fdb6SChris Wilson
1204a0d3fdb6SChris Wilson int
lrc_pin(struct intel_context * ce,struct intel_engine_cs * engine,void * vaddr)1205a0d3fdb6SChris Wilson lrc_pin(struct intel_context *ce,
1206a0d3fdb6SChris Wilson struct intel_engine_cs *engine,
1207a0d3fdb6SChris Wilson void *vaddr)
1208a0d3fdb6SChris Wilson {
1209a0d3fdb6SChris Wilson ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
1210093a0beaSMaarten Lankhorst
1211093a0beaSMaarten Lankhorst if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
1212093a0beaSMaarten Lankhorst lrc_init_state(ce, engine, vaddr);
1213093a0beaSMaarten Lankhorst
1214a0d3fdb6SChris Wilson ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
1215a0d3fdb6SChris Wilson return 0;
1216a0d3fdb6SChris Wilson }
1217a0d3fdb6SChris Wilson
lrc_unpin(struct intel_context * ce)1218a0d3fdb6SChris Wilson void lrc_unpin(struct intel_context *ce)
1219a0d3fdb6SChris Wilson {
1220a88afcfaSMatthew Brost if (unlikely(ce->parallel.last_rq)) {
1221a88afcfaSMatthew Brost i915_request_put(ce->parallel.last_rq);
1222a88afcfaSMatthew Brost ce->parallel.last_rq = NULL;
1223a88afcfaSMatthew Brost }
1224a0d3fdb6SChris Wilson check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1225a0d3fdb6SChris Wilson ce->engine);
1226a0d3fdb6SChris Wilson }
1227a0d3fdb6SChris Wilson
lrc_post_unpin(struct intel_context * ce)1228a0d3fdb6SChris Wilson void lrc_post_unpin(struct intel_context *ce)
1229a0d3fdb6SChris Wilson {
1230a0d3fdb6SChris Wilson i915_gem_object_unpin_map(ce->state->obj);
1231a0d3fdb6SChris Wilson }
1232a0d3fdb6SChris Wilson
lrc_fini(struct intel_context * ce)1233a0d3fdb6SChris Wilson void lrc_fini(struct intel_context *ce)
1234a0d3fdb6SChris Wilson {
1235a0d3fdb6SChris Wilson if (!ce->state)
1236a0d3fdb6SChris Wilson return;
1237a0d3fdb6SChris Wilson
1238a0d3fdb6SChris Wilson intel_ring_put(fetch_and_zero(&ce->ring));
1239a0d3fdb6SChris Wilson i915_vma_put(fetch_and_zero(&ce->state));
1240a0d3fdb6SChris Wilson }
1241a0d3fdb6SChris Wilson
lrc_destroy(struct kref * kref)1242a0d3fdb6SChris Wilson void lrc_destroy(struct kref *kref)
1243a0d3fdb6SChris Wilson {
1244a0d3fdb6SChris Wilson struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1245a0d3fdb6SChris Wilson
1246a0d3fdb6SChris Wilson GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1247a0d3fdb6SChris Wilson GEM_BUG_ON(intel_context_is_pinned(ce));
1248a0d3fdb6SChris Wilson
1249a0d3fdb6SChris Wilson lrc_fini(ce);
1250a0d3fdb6SChris Wilson
1251a0d3fdb6SChris Wilson intel_context_fini(ce);
1252a0d3fdb6SChris Wilson intel_context_free(ce);
1253a0d3fdb6SChris Wilson }
1254a0d3fdb6SChris Wilson
1255a0d3fdb6SChris Wilson static u32 *
gen12_emit_timestamp_wa(const struct intel_context * ce,u32 * cs)1256a0d3fdb6SChris Wilson gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1257a0d3fdb6SChris Wilson {
1258a0d3fdb6SChris Wilson *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1259a0d3fdb6SChris Wilson MI_SRM_LRM_GLOBAL_GTT |
1260a0d3fdb6SChris Wilson MI_LRI_LRM_CS_MMIO;
1261a0d3fdb6SChris Wilson *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1262a0d3fdb6SChris Wilson *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1263a0d3fdb6SChris Wilson CTX_TIMESTAMP * sizeof(u32);
1264a0d3fdb6SChris Wilson *cs++ = 0;
1265a0d3fdb6SChris Wilson
1266a0d3fdb6SChris Wilson *cs++ = MI_LOAD_REGISTER_REG |
1267a0d3fdb6SChris Wilson MI_LRR_SOURCE_CS_MMIO |
1268a0d3fdb6SChris Wilson MI_LRI_LRM_CS_MMIO;
1269a0d3fdb6SChris Wilson *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1270a0d3fdb6SChris Wilson *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1271a0d3fdb6SChris Wilson
1272a0d3fdb6SChris Wilson *cs++ = MI_LOAD_REGISTER_REG |
1273a0d3fdb6SChris Wilson MI_LRR_SOURCE_CS_MMIO |
1274a0d3fdb6SChris Wilson MI_LRI_LRM_CS_MMIO;
1275a0d3fdb6SChris Wilson *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1276a0d3fdb6SChris Wilson *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1277a0d3fdb6SChris Wilson
1278a0d3fdb6SChris Wilson return cs;
1279a0d3fdb6SChris Wilson }
1280a0d3fdb6SChris Wilson
1281a0d3fdb6SChris Wilson static u32 *
gen12_emit_restore_scratch(const struct intel_context * ce,u32 * cs)1282a0d3fdb6SChris Wilson gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1283a0d3fdb6SChris Wilson {
1284a0d3fdb6SChris Wilson GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1285a0d3fdb6SChris Wilson
1286a0d3fdb6SChris Wilson *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1287a0d3fdb6SChris Wilson MI_SRM_LRM_GLOBAL_GTT |
1288a0d3fdb6SChris Wilson MI_LRI_LRM_CS_MMIO;
1289a0d3fdb6SChris Wilson *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1290a0d3fdb6SChris Wilson *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1291a0d3fdb6SChris Wilson (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
1292a0d3fdb6SChris Wilson *cs++ = 0;
1293a0d3fdb6SChris Wilson
1294a0d3fdb6SChris Wilson return cs;
1295a0d3fdb6SChris Wilson }
1296a0d3fdb6SChris Wilson
1297a0d3fdb6SChris Wilson static u32 *
gen12_emit_cmd_buf_wa(const struct intel_context * ce,u32 * cs)1298a0d3fdb6SChris Wilson gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1299a0d3fdb6SChris Wilson {
1300a0d3fdb6SChris Wilson GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1301a0d3fdb6SChris Wilson
1302a0d3fdb6SChris Wilson *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1303a0d3fdb6SChris Wilson MI_SRM_LRM_GLOBAL_GTT |
1304a0d3fdb6SChris Wilson MI_LRI_LRM_CS_MMIO;
1305a0d3fdb6SChris Wilson *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1306a0d3fdb6SChris Wilson *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1307a0d3fdb6SChris Wilson (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1308a0d3fdb6SChris Wilson *cs++ = 0;
1309a0d3fdb6SChris Wilson
1310a0d3fdb6SChris Wilson *cs++ = MI_LOAD_REGISTER_REG |
1311a0d3fdb6SChris Wilson MI_LRR_SOURCE_CS_MMIO |
1312a0d3fdb6SChris Wilson MI_LRI_LRM_CS_MMIO;
1313a0d3fdb6SChris Wilson *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1314a0d3fdb6SChris Wilson *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1315a0d3fdb6SChris Wilson
1316a0d3fdb6SChris Wilson return cs;
1317a0d3fdb6SChris Wilson }
1318a0d3fdb6SChris Wilson
131988d23edaSRamalingam C /*
132025bcc828SMatt Roper * The bspec's tuning guide asks us to program a vertical watermark value of
132125bcc828SMatt Roper * 0x3FF. However this register is not saved/restored properly by the
132225bcc828SMatt Roper * hardware, so we're required to apply the desired value via INDIRECT_CTX
132325bcc828SMatt Roper * batch buffer to ensure the value takes effect properly. All other bits
132425bcc828SMatt Roper * in this register should remain at 0 (the hardware default).
132525bcc828SMatt Roper */
132625bcc828SMatt Roper static u32 *
dg2_emit_draw_watermark_setting(u32 * cs)132725bcc828SMatt Roper dg2_emit_draw_watermark_setting(u32 *cs)
132825bcc828SMatt Roper {
132925bcc828SMatt Roper *cs++ = MI_LOAD_REGISTER_IMM(1);
133025bcc828SMatt Roper *cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
133125bcc828SMatt Roper *cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF);
133225bcc828SMatt Roper
133325bcc828SMatt Roper return cs;
133425bcc828SMatt Roper }
133525bcc828SMatt Roper
1336a0d3fdb6SChris Wilson static u32 *
gen12_emit_indirect_ctx_rcs(const struct intel_context * ce,u32 * cs)1337a0d3fdb6SChris Wilson gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1338a0d3fdb6SChris Wilson {
1339a0d3fdb6SChris Wilson cs = gen12_emit_timestamp_wa(ce, cs);
1340a0d3fdb6SChris Wilson cs = gen12_emit_cmd_buf_wa(ce, cs);
1341a0d3fdb6SChris Wilson cs = gen12_emit_restore_scratch(ce, cs);
1342a0d3fdb6SChris Wilson
13434b19f6b7SRamalingam C /* Wa_16013000631:dg2 */
13440a9901fdSMatt Roper if (IS_DG2_G11(ce->engine->i915))
13454b19f6b7SRamalingam C cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
13464b19f6b7SRamalingam C
134776ff7789SAndi Shyti cs = gen12_emit_aux_table_inv(ce->engine, cs);
1348d8b93201SFei Yang
134925bcc828SMatt Roper /* Wa_16014892111 */
1350*b3749611SMatt Roper if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
1351*b3749611SMatt Roper IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
13521a365a2bSRadhakrishna Sripada IS_DG2(ce->engine->i915))
135325bcc828SMatt Roper cs = dg2_emit_draw_watermark_setting(cs);
135425bcc828SMatt Roper
1355a0d3fdb6SChris Wilson return cs;
1356a0d3fdb6SChris Wilson }
1357a0d3fdb6SChris Wilson
1358a0d3fdb6SChris Wilson static u32 *
gen12_emit_indirect_ctx_xcs(const struct intel_context * ce,u32 * cs)1359a0d3fdb6SChris Wilson gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1360a0d3fdb6SChris Wilson {
1361a0d3fdb6SChris Wilson cs = gen12_emit_timestamp_wa(ce, cs);
1362a0d3fdb6SChris Wilson cs = gen12_emit_restore_scratch(ce, cs);
1363a0d3fdb6SChris Wilson
1364ff6b19d3SMatt Roper /* Wa_16013000631:dg2 */
13650a9901fdSMatt Roper if (IS_DG2_G11(ce->engine->i915))
1366ff6b19d3SMatt Roper if (ce->engine->class == COMPUTE_CLASS)
1367ff6b19d3SMatt Roper cs = gen8_emit_pipe_control(cs,
1368ff6b19d3SMatt Roper PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
1369ff6b19d3SMatt Roper 0);
1370ff6b19d3SMatt Roper
137176ff7789SAndi Shyti return gen12_emit_aux_table_inv(ce->engine, cs);
1372a0d3fdb6SChris Wilson }
1373a0d3fdb6SChris Wilson
1374a0d3fdb6SChris Wilson static void
setup_indirect_ctx_bb(const struct intel_context * ce,const struct intel_engine_cs * engine,u32 * (* emit)(const struct intel_context *,u32 *))1375a0d3fdb6SChris Wilson setup_indirect_ctx_bb(const struct intel_context *ce,
1376a0d3fdb6SChris Wilson const struct intel_engine_cs *engine,
1377a0d3fdb6SChris Wilson u32 *(*emit)(const struct intel_context *, u32 *))
1378a0d3fdb6SChris Wilson {
1379a0d3fdb6SChris Wilson u32 * const start = context_indirect_bb(ce);
1380a0d3fdb6SChris Wilson u32 *cs;
1381a0d3fdb6SChris Wilson
1382a0d3fdb6SChris Wilson cs = emit(ce, start);
1383a0d3fdb6SChris Wilson GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1384a0d3fdb6SChris Wilson while ((unsigned long)cs % CACHELINE_BYTES)
1385a0d3fdb6SChris Wilson *cs++ = MI_NOOP;
1386a0d3fdb6SChris Wilson
1387166c44e6SChris Wilson GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
1388166c44e6SChris Wilson setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
1389166c44e6SChris Wilson
1390a0d3fdb6SChris Wilson lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1391166c44e6SChris Wilson lrc_indirect_bb(ce),
1392a0d3fdb6SChris Wilson (cs - start) * sizeof(*cs));
1393a0d3fdb6SChris Wilson }
1394a0d3fdb6SChris Wilson
1395a0d3fdb6SChris Wilson /*
1396a0d3fdb6SChris Wilson * The context descriptor encodes various attributes of a context,
1397a0d3fdb6SChris Wilson * including its GTT address and some flags. Because it's fairly
1398a0d3fdb6SChris Wilson * expensive to calculate, we'll just do it once and cache the result,
1399a0d3fdb6SChris Wilson * which remains valid until the context is unpinned.
1400a0d3fdb6SChris Wilson *
1401a0d3fdb6SChris Wilson * This is what a descriptor looks like, from LSB to MSB::
1402a0d3fdb6SChris Wilson *
1403a0d3fdb6SChris Wilson * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
1404a0d3fdb6SChris Wilson * bits 12-31: LRCA, GTT address of (the HWSP of) this context
1405a0d3fdb6SChris Wilson * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
1406a0d3fdb6SChris Wilson * bits 53-54: mbz, reserved for use by hardware
1407a0d3fdb6SChris Wilson * bits 55-63: group ID, currently unused and set to 0
1408a0d3fdb6SChris Wilson *
1409a0d3fdb6SChris Wilson * Starting from Gen11, the upper dword of the descriptor has a new format:
1410a0d3fdb6SChris Wilson *
1411a0d3fdb6SChris Wilson * bits 32-36: reserved
1412a0d3fdb6SChris Wilson * bits 37-47: SW context ID
1413a0d3fdb6SChris Wilson * bits 48:53: engine instance
1414a0d3fdb6SChris Wilson * bit 54: mbz, reserved for use by hardware
1415a0d3fdb6SChris Wilson * bits 55-60: SW counter
1416a0d3fdb6SChris Wilson * bits 61-63: engine class
1417a0d3fdb6SChris Wilson *
141850a9ea08SStuart Summers * On Xe_HP, the upper dword of the descriptor has a new format:
141950a9ea08SStuart Summers *
142050a9ea08SStuart Summers * bits 32-37: virtual function number
142150a9ea08SStuart Summers * bit 38: mbz, reserved for use by hardware
142250a9ea08SStuart Summers * bits 39-54: SW context ID
142350a9ea08SStuart Summers * bits 55-57: reserved
142450a9ea08SStuart Summers * bits 58-63: SW counter
142550a9ea08SStuart Summers *
1426a0d3fdb6SChris Wilson * engine info, SW context ID and SW counter need to form a unique number
1427a0d3fdb6SChris Wilson * (Context ID) per lrc.
1428a0d3fdb6SChris Wilson */
lrc_descriptor(const struct intel_context * ce)14299834dfefSChris Wilson static u32 lrc_descriptor(const struct intel_context *ce)
1430a0d3fdb6SChris Wilson {
1431a0d3fdb6SChris Wilson u32 desc;
1432a0d3fdb6SChris Wilson
1433a0d3fdb6SChris Wilson desc = INTEL_LEGACY_32B_CONTEXT;
1434a0d3fdb6SChris Wilson if (i915_vm_is_4lvl(ce->vm))
1435a0d3fdb6SChris Wilson desc = INTEL_LEGACY_64B_CONTEXT;
1436a0d3fdb6SChris Wilson desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1437a0d3fdb6SChris Wilson
1438a0d3fdb6SChris Wilson desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1439c816723bSLucas De Marchi if (GRAPHICS_VER(ce->vm->i915) == 8)
1440a0d3fdb6SChris Wilson desc |= GEN8_CTX_L3LLC_COHERENT;
1441a0d3fdb6SChris Wilson
1442a0d3fdb6SChris Wilson return i915_ggtt_offset(ce->state) | desc;
1443a0d3fdb6SChris Wilson }
1444a0d3fdb6SChris Wilson
lrc_update_regs(const struct intel_context * ce,const struct intel_engine_cs * engine,u32 head)1445a0d3fdb6SChris Wilson u32 lrc_update_regs(const struct intel_context *ce,
1446a0d3fdb6SChris Wilson const struct intel_engine_cs *engine,
1447a0d3fdb6SChris Wilson u32 head)
1448a0d3fdb6SChris Wilson {
1449a0d3fdb6SChris Wilson struct intel_ring *ring = ce->ring;
1450a0d3fdb6SChris Wilson u32 *regs = ce->lrc_reg_state;
1451a0d3fdb6SChris Wilson
1452a0d3fdb6SChris Wilson GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1453a0d3fdb6SChris Wilson GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1454a0d3fdb6SChris Wilson
1455a0d3fdb6SChris Wilson regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1456a0d3fdb6SChris Wilson regs[CTX_RING_HEAD] = head;
1457a0d3fdb6SChris Wilson regs[CTX_RING_TAIL] = ring->tail;
1458a0d3fdb6SChris Wilson regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1459a0d3fdb6SChris Wilson
1460a0d3fdb6SChris Wilson /* RPCS */
1461a0d3fdb6SChris Wilson if (engine->class == RENDER_CLASS) {
1462a0d3fdb6SChris Wilson regs[CTX_R_PWR_CLK_STATE] =
1463a0d3fdb6SChris Wilson intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1464a0d3fdb6SChris Wilson
1465a0d3fdb6SChris Wilson i915_oa_init_reg_state(ce, engine);
1466a0d3fdb6SChris Wilson }
1467a0d3fdb6SChris Wilson
1468a0d3fdb6SChris Wilson if (ce->wa_bb_page) {
1469a0d3fdb6SChris Wilson u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1470a0d3fdb6SChris Wilson
1471a0d3fdb6SChris Wilson fn = gen12_emit_indirect_ctx_xcs;
1472a0d3fdb6SChris Wilson if (ce->engine->class == RENDER_CLASS)
1473a0d3fdb6SChris Wilson fn = gen12_emit_indirect_ctx_rcs;
1474a0d3fdb6SChris Wilson
1475a0d3fdb6SChris Wilson /* Mutually exclusive wrt to global indirect bb */
1476a0d3fdb6SChris Wilson GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1477a0d3fdb6SChris Wilson setup_indirect_ctx_bb(ce, engine, fn);
1478a0d3fdb6SChris Wilson }
1479a0d3fdb6SChris Wilson
1480a0d3fdb6SChris Wilson return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1481a0d3fdb6SChris Wilson }
1482a0d3fdb6SChris Wilson
lrc_update_offsets(struct intel_context * ce,struct intel_engine_cs * engine)1483a0d3fdb6SChris Wilson void lrc_update_offsets(struct intel_context *ce,
1484a0d3fdb6SChris Wilson struct intel_engine_cs *engine)
1485a0d3fdb6SChris Wilson {
1486a0d3fdb6SChris Wilson set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1487a0d3fdb6SChris Wilson }
1488a0d3fdb6SChris Wilson
lrc_check_regs(const struct intel_context * ce,const struct intel_engine_cs * engine,const char * when)1489a0d3fdb6SChris Wilson void lrc_check_regs(const struct intel_context *ce,
1490a0d3fdb6SChris Wilson const struct intel_engine_cs *engine,
1491a0d3fdb6SChris Wilson const char *when)
1492a0d3fdb6SChris Wilson {
1493a0d3fdb6SChris Wilson const struct intel_ring *ring = ce->ring;
1494a0d3fdb6SChris Wilson u32 *regs = ce->lrc_reg_state;
1495a0d3fdb6SChris Wilson bool valid = true;
1496a0d3fdb6SChris Wilson int x;
1497a0d3fdb6SChris Wilson
1498a0d3fdb6SChris Wilson if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1499a0d3fdb6SChris Wilson pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1500a0d3fdb6SChris Wilson engine->name,
1501a0d3fdb6SChris Wilson regs[CTX_RING_START],
1502a0d3fdb6SChris Wilson i915_ggtt_offset(ring->vma));
1503a0d3fdb6SChris Wilson regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1504a0d3fdb6SChris Wilson valid = false;
1505a0d3fdb6SChris Wilson }
1506a0d3fdb6SChris Wilson
1507a0d3fdb6SChris Wilson if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1508a0d3fdb6SChris Wilson (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1509a0d3fdb6SChris Wilson pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1510a0d3fdb6SChris Wilson engine->name,
1511a0d3fdb6SChris Wilson regs[CTX_RING_CTL],
1512a0d3fdb6SChris Wilson (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1513a0d3fdb6SChris Wilson regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1514a0d3fdb6SChris Wilson valid = false;
1515a0d3fdb6SChris Wilson }
1516a0d3fdb6SChris Wilson
1517a0d3fdb6SChris Wilson x = lrc_ring_mi_mode(engine);
1518a0d3fdb6SChris Wilson if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1519a0d3fdb6SChris Wilson pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1520a0d3fdb6SChris Wilson engine->name, regs[x + 1]);
1521a0d3fdb6SChris Wilson regs[x + 1] &= ~STOP_RING;
1522a0d3fdb6SChris Wilson regs[x + 1] |= STOP_RING << 16;
1523a0d3fdb6SChris Wilson valid = false;
1524a0d3fdb6SChris Wilson }
1525a0d3fdb6SChris Wilson
1526a0d3fdb6SChris Wilson WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1527a0d3fdb6SChris Wilson }
1528a0d3fdb6SChris Wilson
1529a0d3fdb6SChris Wilson /*
1530a0d3fdb6SChris Wilson * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1531a0d3fdb6SChris Wilson * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1532a0d3fdb6SChris Wilson * but there is a slight complication as this is applied in WA batch where the
1533a0d3fdb6SChris Wilson * values are only initialized once so we cannot take register value at the
1534a0d3fdb6SChris Wilson * beginning and reuse it further; hence we save its value to memory, upload a
1535a0d3fdb6SChris Wilson * constant value with bit21 set and then we restore it back with the saved value.
1536a0d3fdb6SChris Wilson * To simplify the WA, a constant value is formed by using the default value
1537a0d3fdb6SChris Wilson * of this register. This shouldn't be a problem because we are only modifying
1538a0d3fdb6SChris Wilson * it for a short period and this batch in non-premptible. We can ofcourse
1539a0d3fdb6SChris Wilson * use additional instructions that read the actual value of the register
1540a0d3fdb6SChris Wilson * at that time and set our bit of interest but it makes the WA complicated.
1541a0d3fdb6SChris Wilson *
1542a0d3fdb6SChris Wilson * This WA is also required for Gen9 so extracting as a function avoids
1543a0d3fdb6SChris Wilson * code duplication.
1544a0d3fdb6SChris Wilson */
1545a0d3fdb6SChris Wilson static u32 *
gen8_emit_flush_coherentl3_wa(struct intel_engine_cs * engine,u32 * batch)1546a0d3fdb6SChris Wilson gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1547a0d3fdb6SChris Wilson {
1548a0d3fdb6SChris Wilson /* NB no one else is allowed to scribble over scratch + 256! */
1549a0d3fdb6SChris Wilson *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1550a0d3fdb6SChris Wilson *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1551a0d3fdb6SChris Wilson *batch++ = intel_gt_scratch_offset(engine->gt,
1552a0d3fdb6SChris Wilson INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1553a0d3fdb6SChris Wilson *batch++ = 0;
1554a0d3fdb6SChris Wilson
1555a0d3fdb6SChris Wilson *batch++ = MI_LOAD_REGISTER_IMM(1);
1556a0d3fdb6SChris Wilson *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1557a0d3fdb6SChris Wilson *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1558a0d3fdb6SChris Wilson
1559a0d3fdb6SChris Wilson batch = gen8_emit_pipe_control(batch,
1560a0d3fdb6SChris Wilson PIPE_CONTROL_CS_STALL |
1561a0d3fdb6SChris Wilson PIPE_CONTROL_DC_FLUSH_ENABLE,
1562a0d3fdb6SChris Wilson 0);
1563a0d3fdb6SChris Wilson
1564a0d3fdb6SChris Wilson *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1565a0d3fdb6SChris Wilson *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1566a0d3fdb6SChris Wilson *batch++ = intel_gt_scratch_offset(engine->gt,
1567a0d3fdb6SChris Wilson INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1568a0d3fdb6SChris Wilson *batch++ = 0;
1569a0d3fdb6SChris Wilson
1570a0d3fdb6SChris Wilson return batch;
1571a0d3fdb6SChris Wilson }
1572a0d3fdb6SChris Wilson
1573a0d3fdb6SChris Wilson /*
1574a0d3fdb6SChris Wilson * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1575a0d3fdb6SChris Wilson * initialized at the beginning and shared across all contexts but this field
1576a0d3fdb6SChris Wilson * helps us to have multiple batches at different offsets and select them based
1577a0d3fdb6SChris Wilson * on a criteria. At the moment this batch always start at the beginning of the page
1578a0d3fdb6SChris Wilson * and at this point we don't have multiple wa_ctx batch buffers.
1579a0d3fdb6SChris Wilson *
1580a0d3fdb6SChris Wilson * The number of WA applied are not known at the beginning; we use this field
1581a0d3fdb6SChris Wilson * to return the no of DWORDS written.
1582a0d3fdb6SChris Wilson *
1583a0d3fdb6SChris Wilson * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1584a0d3fdb6SChris Wilson * so it adds NOOPs as padding to make it cacheline aligned.
1585a0d3fdb6SChris Wilson * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1586a0d3fdb6SChris Wilson * makes a complete batch buffer.
1587a0d3fdb6SChris Wilson */
gen8_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)1588a0d3fdb6SChris Wilson static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1589a0d3fdb6SChris Wilson {
1590a0d3fdb6SChris Wilson /* WaDisableCtxRestoreArbitration:bdw,chv */
1591a0d3fdb6SChris Wilson *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1592a0d3fdb6SChris Wilson
1593a0d3fdb6SChris Wilson /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1594a0d3fdb6SChris Wilson if (IS_BROADWELL(engine->i915))
1595a0d3fdb6SChris Wilson batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1596a0d3fdb6SChris Wilson
1597a0d3fdb6SChris Wilson /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1598a0d3fdb6SChris Wilson /* Actual scratch location is at 128 bytes offset */
1599a0d3fdb6SChris Wilson batch = gen8_emit_pipe_control(batch,
1600a0d3fdb6SChris Wilson PIPE_CONTROL_FLUSH_L3 |
1601a0d3fdb6SChris Wilson PIPE_CONTROL_STORE_DATA_INDEX |
1602a0d3fdb6SChris Wilson PIPE_CONTROL_CS_STALL |
1603a0d3fdb6SChris Wilson PIPE_CONTROL_QW_WRITE,
1604a0d3fdb6SChris Wilson LRC_PPHWSP_SCRATCH_ADDR);
1605a0d3fdb6SChris Wilson
1606a0d3fdb6SChris Wilson *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1607a0d3fdb6SChris Wilson
1608a0d3fdb6SChris Wilson /* Pad to end of cacheline */
1609a0d3fdb6SChris Wilson while ((unsigned long)batch % CACHELINE_BYTES)
1610a0d3fdb6SChris Wilson *batch++ = MI_NOOP;
1611a0d3fdb6SChris Wilson
1612a0d3fdb6SChris Wilson /*
1613a0d3fdb6SChris Wilson * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1614a0d3fdb6SChris Wilson * execution depends on the length specified in terms of cache lines
1615a0d3fdb6SChris Wilson * in the register CTX_RCS_INDIRECT_CTX
1616a0d3fdb6SChris Wilson */
1617a0d3fdb6SChris Wilson
1618a0d3fdb6SChris Wilson return batch;
1619a0d3fdb6SChris Wilson }
1620a0d3fdb6SChris Wilson
1621a0d3fdb6SChris Wilson struct lri {
1622a0d3fdb6SChris Wilson i915_reg_t reg;
1623a0d3fdb6SChris Wilson u32 value;
1624a0d3fdb6SChris Wilson };
1625a0d3fdb6SChris Wilson
emit_lri(u32 * batch,const struct lri * lri,unsigned int count)1626a0d3fdb6SChris Wilson static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1627a0d3fdb6SChris Wilson {
1628a0d3fdb6SChris Wilson GEM_BUG_ON(!count || count > 63);
1629a0d3fdb6SChris Wilson
1630a0d3fdb6SChris Wilson *batch++ = MI_LOAD_REGISTER_IMM(count);
1631a0d3fdb6SChris Wilson do {
1632a0d3fdb6SChris Wilson *batch++ = i915_mmio_reg_offset(lri->reg);
1633a0d3fdb6SChris Wilson *batch++ = lri->value;
1634a0d3fdb6SChris Wilson } while (lri++, --count);
1635a0d3fdb6SChris Wilson *batch++ = MI_NOOP;
1636a0d3fdb6SChris Wilson
1637a0d3fdb6SChris Wilson return batch;
1638a0d3fdb6SChris Wilson }
1639a0d3fdb6SChris Wilson
gen9_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)1640a0d3fdb6SChris Wilson static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1641a0d3fdb6SChris Wilson {
1642a0d3fdb6SChris Wilson static const struct lri lri[] = {
1643a0d3fdb6SChris Wilson /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1644a0d3fdb6SChris Wilson {
1645a0d3fdb6SChris Wilson COMMON_SLICE_CHICKEN2,
1646a0d3fdb6SChris Wilson __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1647a0d3fdb6SChris Wilson 0),
1648a0d3fdb6SChris Wilson },
1649a0d3fdb6SChris Wilson
1650a0d3fdb6SChris Wilson /* BSpec: 11391 */
1651a0d3fdb6SChris Wilson {
1652a0d3fdb6SChris Wilson FF_SLICE_CHICKEN,
1653a0d3fdb6SChris Wilson __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1654a0d3fdb6SChris Wilson FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1655a0d3fdb6SChris Wilson },
1656a0d3fdb6SChris Wilson
1657a0d3fdb6SChris Wilson /* BSpec: 11299 */
1658a0d3fdb6SChris Wilson {
1659a0d3fdb6SChris Wilson _3D_CHICKEN3,
1660a0d3fdb6SChris Wilson __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1661a0d3fdb6SChris Wilson _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1662a0d3fdb6SChris Wilson }
1663a0d3fdb6SChris Wilson };
1664a0d3fdb6SChris Wilson
1665a0d3fdb6SChris Wilson *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1666a0d3fdb6SChris Wilson
1667a0d3fdb6SChris Wilson /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1668a0d3fdb6SChris Wilson batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1669a0d3fdb6SChris Wilson
1670a0d3fdb6SChris Wilson /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1671a0d3fdb6SChris Wilson batch = gen8_emit_pipe_control(batch,
1672a0d3fdb6SChris Wilson PIPE_CONTROL_FLUSH_L3 |
1673a0d3fdb6SChris Wilson PIPE_CONTROL_STORE_DATA_INDEX |
1674a0d3fdb6SChris Wilson PIPE_CONTROL_CS_STALL |
1675a0d3fdb6SChris Wilson PIPE_CONTROL_QW_WRITE,
1676a0d3fdb6SChris Wilson LRC_PPHWSP_SCRATCH_ADDR);
1677a0d3fdb6SChris Wilson
1678a0d3fdb6SChris Wilson batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1679a0d3fdb6SChris Wilson
1680a0d3fdb6SChris Wilson /* WaMediaPoolStateCmdInWABB:bxt,glk */
1681a0d3fdb6SChris Wilson if (HAS_POOLED_EU(engine->i915)) {
1682a0d3fdb6SChris Wilson /*
1683a0d3fdb6SChris Wilson * EU pool configuration is setup along with golden context
1684a0d3fdb6SChris Wilson * during context initialization. This value depends on
1685a0d3fdb6SChris Wilson * device type (2x6 or 3x6) and needs to be updated based
1686a0d3fdb6SChris Wilson * on which subslice is disabled especially for 2x6
1687a0d3fdb6SChris Wilson * devices, however it is safe to load default
1688a0d3fdb6SChris Wilson * configuration of 3x6 device instead of masking off
1689a0d3fdb6SChris Wilson * corresponding bits because HW ignores bits of a disabled
1690a0d3fdb6SChris Wilson * subslice and drops down to appropriate config. Please
1691a0d3fdb6SChris Wilson * see render_state_setup() in i915_gem_render_state.c for
1692a0d3fdb6SChris Wilson * possible configurations, to avoid duplication they are
1693a0d3fdb6SChris Wilson * not shown here again.
1694a0d3fdb6SChris Wilson */
1695a0d3fdb6SChris Wilson *batch++ = GEN9_MEDIA_POOL_STATE;
1696a0d3fdb6SChris Wilson *batch++ = GEN9_MEDIA_POOL_ENABLE;
1697a0d3fdb6SChris Wilson *batch++ = 0x00777000;
1698a0d3fdb6SChris Wilson *batch++ = 0;
1699a0d3fdb6SChris Wilson *batch++ = 0;
1700a0d3fdb6SChris Wilson *batch++ = 0;
1701a0d3fdb6SChris Wilson }
1702a0d3fdb6SChris Wilson
1703a0d3fdb6SChris Wilson *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1704a0d3fdb6SChris Wilson
1705a0d3fdb6SChris Wilson /* Pad to end of cacheline */
1706a0d3fdb6SChris Wilson while ((unsigned long)batch % CACHELINE_BYTES)
1707a0d3fdb6SChris Wilson *batch++ = MI_NOOP;
1708a0d3fdb6SChris Wilson
1709a0d3fdb6SChris Wilson return batch;
1710a0d3fdb6SChris Wilson }
1711a0d3fdb6SChris Wilson
1712a0d3fdb6SChris Wilson #define CTX_WA_BB_SIZE (PAGE_SIZE)
1713a0d3fdb6SChris Wilson
lrc_create_wa_ctx(struct intel_engine_cs * engine)17145ace5e96SMaarten Lankhorst static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1715a0d3fdb6SChris Wilson {
1716a0d3fdb6SChris Wilson struct drm_i915_gem_object *obj;
1717a0d3fdb6SChris Wilson struct i915_vma *vma;
1718a0d3fdb6SChris Wilson int err;
1719a0d3fdb6SChris Wilson
1720a0d3fdb6SChris Wilson obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1721a0d3fdb6SChris Wilson if (IS_ERR(obj))
1722a0d3fdb6SChris Wilson return PTR_ERR(obj);
1723a0d3fdb6SChris Wilson
1724a0d3fdb6SChris Wilson vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1725a0d3fdb6SChris Wilson if (IS_ERR(vma)) {
1726a0d3fdb6SChris Wilson err = PTR_ERR(vma);
1727a0d3fdb6SChris Wilson goto err;
1728a0d3fdb6SChris Wilson }
1729a0d3fdb6SChris Wilson
1730a0d3fdb6SChris Wilson engine->wa_ctx.vma = vma;
1731a0d3fdb6SChris Wilson return 0;
1732a0d3fdb6SChris Wilson
1733a0d3fdb6SChris Wilson err:
1734a0d3fdb6SChris Wilson i915_gem_object_put(obj);
1735a0d3fdb6SChris Wilson return err;
1736a0d3fdb6SChris Wilson }
1737a0d3fdb6SChris Wilson
lrc_fini_wa_ctx(struct intel_engine_cs * engine)1738a0d3fdb6SChris Wilson void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1739a0d3fdb6SChris Wilson {
1740a0d3fdb6SChris Wilson i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1741a0d3fdb6SChris Wilson }
1742a0d3fdb6SChris Wilson
1743a0d3fdb6SChris Wilson typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1744a0d3fdb6SChris Wilson
lrc_init_wa_ctx(struct intel_engine_cs * engine)17459a437ccbSChris Wilson void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1746a0d3fdb6SChris Wilson {
1747a0d3fdb6SChris Wilson struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1748a0d3fdb6SChris Wilson struct i915_wa_ctx_bb *wa_bb[] = {
1749a0d3fdb6SChris Wilson &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1750a0d3fdb6SChris Wilson };
1751a0d3fdb6SChris Wilson wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
17525ace5e96SMaarten Lankhorst struct i915_gem_ww_ctx ww;
1753a0d3fdb6SChris Wilson void *batch, *batch_ptr;
1754a0d3fdb6SChris Wilson unsigned int i;
17559a437ccbSChris Wilson int err;
1756a0d3fdb6SChris Wilson
1757783f6f85SLucas De Marchi if (GRAPHICS_VER(engine->i915) >= 11 ||
1758783f6f85SLucas De Marchi !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
17599a437ccbSChris Wilson return;
1760a0d3fdb6SChris Wilson
1761783f6f85SLucas De Marchi if (GRAPHICS_VER(engine->i915) == 9) {
1762a0d3fdb6SChris Wilson wa_bb_fn[0] = gen9_init_indirectctx_bb;
1763a0d3fdb6SChris Wilson wa_bb_fn[1] = NULL;
1764783f6f85SLucas De Marchi } else if (GRAPHICS_VER(engine->i915) == 8) {
1765a0d3fdb6SChris Wilson wa_bb_fn[0] = gen8_init_indirectctx_bb;
1766a0d3fdb6SChris Wilson wa_bb_fn[1] = NULL;
1767a0d3fdb6SChris Wilson }
1768a0d3fdb6SChris Wilson
17695ace5e96SMaarten Lankhorst err = lrc_create_wa_ctx(engine);
17709a437ccbSChris Wilson if (err) {
17719a437ccbSChris Wilson /*
17729a437ccbSChris Wilson * We continue even if we fail to initialize WA batch
17739a437ccbSChris Wilson * because we only expect rare glitches but nothing
17749a437ccbSChris Wilson * critical to prevent us from using GPU
17759a437ccbSChris Wilson */
17769a437ccbSChris Wilson drm_err(&engine->i915->drm,
17779a437ccbSChris Wilson "Ignoring context switch w/a allocation error:%d\n",
17789a437ccbSChris Wilson err);
17799a437ccbSChris Wilson return;
1780a0d3fdb6SChris Wilson }
1781a0d3fdb6SChris Wilson
17825ace5e96SMaarten Lankhorst if (!engine->wa_ctx.vma)
17835ace5e96SMaarten Lankhorst return;
17845ace5e96SMaarten Lankhorst
17855ace5e96SMaarten Lankhorst i915_gem_ww_ctx_init(&ww, true);
17865ace5e96SMaarten Lankhorst retry:
17875ace5e96SMaarten Lankhorst err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
17885ace5e96SMaarten Lankhorst if (!err)
17895ace5e96SMaarten Lankhorst err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
17905ace5e96SMaarten Lankhorst if (err)
17915ace5e96SMaarten Lankhorst goto err;
17925ace5e96SMaarten Lankhorst
1793a0d3fdb6SChris Wilson batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
17945ace5e96SMaarten Lankhorst if (IS_ERR(batch)) {
17955ace5e96SMaarten Lankhorst err = PTR_ERR(batch);
17965ace5e96SMaarten Lankhorst goto err_unpin;
17975ace5e96SMaarten Lankhorst }
1798a0d3fdb6SChris Wilson
1799a0d3fdb6SChris Wilson /*
1800a0d3fdb6SChris Wilson * Emit the two workaround batch buffers, recording the offset from the
1801a0d3fdb6SChris Wilson * start of the workaround batch buffer object for each and their
1802a0d3fdb6SChris Wilson * respective sizes.
1803a0d3fdb6SChris Wilson */
1804a0d3fdb6SChris Wilson batch_ptr = batch;
1805a0d3fdb6SChris Wilson for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1806a0d3fdb6SChris Wilson wa_bb[i]->offset = batch_ptr - batch;
1807a0d3fdb6SChris Wilson if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1808a0d3fdb6SChris Wilson CACHELINE_BYTES))) {
18099a437ccbSChris Wilson err = -EINVAL;
1810a0d3fdb6SChris Wilson break;
1811a0d3fdb6SChris Wilson }
1812a0d3fdb6SChris Wilson if (wa_bb_fn[i])
1813a0d3fdb6SChris Wilson batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1814a0d3fdb6SChris Wilson wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1815a0d3fdb6SChris Wilson }
1816a0d3fdb6SChris Wilson GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1817a0d3fdb6SChris Wilson
1818a0d3fdb6SChris Wilson __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1819a0d3fdb6SChris Wilson __i915_gem_object_release_map(wa_ctx->vma->obj);
1820a0d3fdb6SChris Wilson
18219a437ccbSChris Wilson /* Verify that we can handle failure to setup the wa_ctx */
18225ace5e96SMaarten Lankhorst if (!err)
18235ace5e96SMaarten Lankhorst err = i915_inject_probe_error(engine->i915, -ENODEV);
18245ace5e96SMaarten Lankhorst
18255ace5e96SMaarten Lankhorst err_unpin:
18265ace5e96SMaarten Lankhorst if (err)
18275ace5e96SMaarten Lankhorst i915_vma_unpin(wa_ctx->vma);
18285ace5e96SMaarten Lankhorst err:
18295ace5e96SMaarten Lankhorst if (err == -EDEADLK) {
18305ace5e96SMaarten Lankhorst err = i915_gem_ww_ctx_backoff(&ww);
18315ace5e96SMaarten Lankhorst if (!err)
18325ace5e96SMaarten Lankhorst goto retry;
18335ace5e96SMaarten Lankhorst }
18345ace5e96SMaarten Lankhorst i915_gem_ww_ctx_fini(&ww);
18355ace5e96SMaarten Lankhorst
18365ace5e96SMaarten Lankhorst if (err) {
18375ace5e96SMaarten Lankhorst i915_vma_put(engine->wa_ctx.vma);
18385ace5e96SMaarten Lankhorst
18395ace5e96SMaarten Lankhorst /* Clear all flags to prevent further use */
18405ace5e96SMaarten Lankhorst memset(wa_ctx, 0, sizeof(*wa_ctx));
18415ace5e96SMaarten Lankhorst }
1842a0d3fdb6SChris Wilson }
1843a0d3fdb6SChris Wilson
st_runtime_underflow(struct intel_context_stats * stats,s32 dt)1844bb6287cbSTvrtko Ursulin static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1845a0d3fdb6SChris Wilson {
1846a0d3fdb6SChris Wilson #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1847bb6287cbSTvrtko Ursulin stats->runtime.num_underflow++;
1848bb6287cbSTvrtko Ursulin stats->runtime.max_underflow =
1849bb6287cbSTvrtko Ursulin max_t(u32, stats->runtime.max_underflow, -dt);
1850a0d3fdb6SChris Wilson #endif
1851a0d3fdb6SChris Wilson }
1852a0d3fdb6SChris Wilson
lrc_get_runtime(const struct intel_context * ce)1853dd4821baSJani Nikula static u32 lrc_get_runtime(const struct intel_context *ce)
1854dd4821baSJani Nikula {
1855dd4821baSJani Nikula /*
1856dd4821baSJani Nikula * We can use either ppHWSP[16] which is recorded before the context
1857dd4821baSJani Nikula * switch (and so excludes the cost of context switches) or use the
1858dd4821baSJani Nikula * value from the context image itself, which is saved/restored earlier
1859dd4821baSJani Nikula * and so includes the cost of the save.
1860dd4821baSJani Nikula */
1861dd4821baSJani Nikula return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1862dd4821baSJani Nikula }
1863dd4821baSJani Nikula
lrc_update_runtime(struct intel_context * ce)1864a0d3fdb6SChris Wilson void lrc_update_runtime(struct intel_context *ce)
1865a0d3fdb6SChris Wilson {
1866bb6287cbSTvrtko Ursulin struct intel_context_stats *stats = &ce->stats;
1867a0d3fdb6SChris Wilson u32 old;
1868a0d3fdb6SChris Wilson s32 dt;
1869a0d3fdb6SChris Wilson
1870bb6287cbSTvrtko Ursulin old = stats->runtime.last;
1871bb6287cbSTvrtko Ursulin stats->runtime.last = lrc_get_runtime(ce);
1872bb6287cbSTvrtko Ursulin dt = stats->runtime.last - old;
1873bb6287cbSTvrtko Ursulin if (!dt)
1874a0d3fdb6SChris Wilson return;
1875a0d3fdb6SChris Wilson
1876a0d3fdb6SChris Wilson if (unlikely(dt < 0)) {
1877a0d3fdb6SChris Wilson CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1878bb6287cbSTvrtko Ursulin old, stats->runtime.last, dt);
1879bb6287cbSTvrtko Ursulin st_runtime_underflow(stats, dt);
1880a0d3fdb6SChris Wilson return;
1881a0d3fdb6SChris Wilson }
1882a0d3fdb6SChris Wilson
1883bb6287cbSTvrtko Ursulin ewma_runtime_add(&stats->runtime.avg, dt);
1884bb6287cbSTvrtko Ursulin stats->runtime.total += dt;
1885a0d3fdb6SChris Wilson }
1886a0d3fdb6SChris Wilson
1887a0d3fdb6SChris Wilson #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1888a0d3fdb6SChris Wilson #include "selftest_lrc.c"
1889a0d3fdb6SChris Wilson #endif
1890