1c1f85878SChris Wilson // SPDX-License-Identifier: MIT
2c1f85878SChris Wilson /*
3c1f85878SChris Wilson * Copyright © 2020 Intel Corporation
4c1f85878SChris Wilson */
5c1f85878SChris Wilson
6c1f85878SChris Wilson #include "gen6_engine_cs.h"
7c1f85878SChris Wilson #include "intel_engine.h"
8*202b1f4cSMatt Roper #include "intel_engine_regs.h"
9c1f85878SChris Wilson #include "intel_gpu_commands.h"
10c1f85878SChris Wilson #include "intel_gt.h"
11c1f85878SChris Wilson #include "intel_gt_irq.h"
12c1f85878SChris Wilson #include "intel_gt_pm_irq.h"
13c1f85878SChris Wilson #include "intel_ring.h"
14c1f85878SChris Wilson
15c1f85878SChris Wilson #define HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
16c1f85878SChris Wilson
17c1f85878SChris Wilson /*
18c1f85878SChris Wilson * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
19c1f85878SChris Wilson * implementing two workarounds on gen6. From section 1.4.7.1
20c1f85878SChris Wilson * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
21c1f85878SChris Wilson *
22c1f85878SChris Wilson * [DevSNB-C+{W/A}] Before any depth stall flush (including those
23c1f85878SChris Wilson * produced by non-pipelined state commands), software needs to first
24c1f85878SChris Wilson * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
25c1f85878SChris Wilson * 0.
26c1f85878SChris Wilson *
27c1f85878SChris Wilson * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
28c1f85878SChris Wilson * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
29c1f85878SChris Wilson *
30c1f85878SChris Wilson * And the workaround for these two requires this workaround first:
31c1f85878SChris Wilson *
32c1f85878SChris Wilson * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
33c1f85878SChris Wilson * BEFORE the pipe-control with a post-sync op and no write-cache
34c1f85878SChris Wilson * flushes.
35c1f85878SChris Wilson *
36c1f85878SChris Wilson * And this last workaround is tricky because of the requirements on
37c1f85878SChris Wilson * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
38c1f85878SChris Wilson * volume 2 part 1:
39c1f85878SChris Wilson *
40c1f85878SChris Wilson * "1 of the following must also be set:
41c1f85878SChris Wilson * - Render Target Cache Flush Enable ([12] of DW1)
42c1f85878SChris Wilson * - Depth Cache Flush Enable ([0] of DW1)
43c1f85878SChris Wilson * - Stall at Pixel Scoreboard ([1] of DW1)
44c1f85878SChris Wilson * - Depth Stall ([13] of DW1)
45c1f85878SChris Wilson * - Post-Sync Operation ([13] of DW1)
46c1f85878SChris Wilson * - Notify Enable ([8] of DW1)"
47c1f85878SChris Wilson *
48c1f85878SChris Wilson * The cache flushes require the workaround flush that triggered this
49c1f85878SChris Wilson * one, so we can't use it. Depth stall would trigger the same.
50c1f85878SChris Wilson * Post-sync nonzero is what triggered this second workaround, so we
51c1f85878SChris Wilson * can't use that one either. Notify enable is IRQs, which aren't
52c1f85878SChris Wilson * really our business. That leaves only stall at scoreboard.
53c1f85878SChris Wilson */
54c1f85878SChris Wilson static int
gen6_emit_post_sync_nonzero_flush(struct i915_request * rq)55c1f85878SChris Wilson gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
56c1f85878SChris Wilson {
57c1f85878SChris Wilson u32 scratch_addr =
58c1f85878SChris Wilson intel_gt_scratch_offset(rq->engine->gt,
59c1f85878SChris Wilson INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
60c1f85878SChris Wilson u32 *cs;
61c1f85878SChris Wilson
62c1f85878SChris Wilson cs = intel_ring_begin(rq, 6);
63c1f85878SChris Wilson if (IS_ERR(cs))
64c1f85878SChris Wilson return PTR_ERR(cs);
65c1f85878SChris Wilson
66c1f85878SChris Wilson *cs++ = GFX_OP_PIPE_CONTROL(5);
67c1f85878SChris Wilson *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
68c1f85878SChris Wilson *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
69c1f85878SChris Wilson *cs++ = 0; /* low dword */
70c1f85878SChris Wilson *cs++ = 0; /* high dword */
71c1f85878SChris Wilson *cs++ = MI_NOOP;
72c1f85878SChris Wilson intel_ring_advance(rq, cs);
73c1f85878SChris Wilson
74c1f85878SChris Wilson cs = intel_ring_begin(rq, 6);
75c1f85878SChris Wilson if (IS_ERR(cs))
76c1f85878SChris Wilson return PTR_ERR(cs);
77c1f85878SChris Wilson
78c1f85878SChris Wilson *cs++ = GFX_OP_PIPE_CONTROL(5);
79c1f85878SChris Wilson *cs++ = PIPE_CONTROL_QW_WRITE;
80c1f85878SChris Wilson *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
81c1f85878SChris Wilson *cs++ = 0;
82c1f85878SChris Wilson *cs++ = 0;
83c1f85878SChris Wilson *cs++ = MI_NOOP;
84c1f85878SChris Wilson intel_ring_advance(rq, cs);
85c1f85878SChris Wilson
86c1f85878SChris Wilson return 0;
87c1f85878SChris Wilson }
88c1f85878SChris Wilson
gen6_emit_flush_rcs(struct i915_request * rq,u32 mode)89c1f85878SChris Wilson int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode)
90c1f85878SChris Wilson {
91c1f85878SChris Wilson u32 scratch_addr =
92c1f85878SChris Wilson intel_gt_scratch_offset(rq->engine->gt,
93c1f85878SChris Wilson INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
94c1f85878SChris Wilson u32 *cs, flags = 0;
95c1f85878SChris Wilson int ret;
96c1f85878SChris Wilson
97c1f85878SChris Wilson /* Force SNB workarounds for PIPE_CONTROL flushes */
98c1f85878SChris Wilson ret = gen6_emit_post_sync_nonzero_flush(rq);
99c1f85878SChris Wilson if (ret)
100c1f85878SChris Wilson return ret;
101c1f85878SChris Wilson
102c1f85878SChris Wilson /*
103c1f85878SChris Wilson * Just flush everything. Experiments have shown that reducing the
104c1f85878SChris Wilson * number of bits based on the write domains has little performance
105c1f85878SChris Wilson * impact. And when rearranging requests, the order of flushes is
106c1f85878SChris Wilson * unknown.
107c1f85878SChris Wilson */
108c1f85878SChris Wilson if (mode & EMIT_FLUSH) {
109c1f85878SChris Wilson flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
110c1f85878SChris Wilson flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
111c1f85878SChris Wilson /*
112c1f85878SChris Wilson * Ensure that any following seqno writes only happen
113c1f85878SChris Wilson * when the render cache is indeed flushed.
114c1f85878SChris Wilson */
115c1f85878SChris Wilson flags |= PIPE_CONTROL_CS_STALL;
116c1f85878SChris Wilson }
117c1f85878SChris Wilson if (mode & EMIT_INVALIDATE) {
118c1f85878SChris Wilson flags |= PIPE_CONTROL_TLB_INVALIDATE;
119c1f85878SChris Wilson flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
120c1f85878SChris Wilson flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
121c1f85878SChris Wilson flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
122c1f85878SChris Wilson flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
123c1f85878SChris Wilson flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
124c1f85878SChris Wilson /*
125c1f85878SChris Wilson * TLB invalidate requires a post-sync write.
126c1f85878SChris Wilson */
127c1f85878SChris Wilson flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
128c1f85878SChris Wilson }
129c1f85878SChris Wilson
130c1f85878SChris Wilson cs = intel_ring_begin(rq, 4);
131c1f85878SChris Wilson if (IS_ERR(cs))
132c1f85878SChris Wilson return PTR_ERR(cs);
133c1f85878SChris Wilson
134c1f85878SChris Wilson *cs++ = GFX_OP_PIPE_CONTROL(4);
135c1f85878SChris Wilson *cs++ = flags;
136c1f85878SChris Wilson *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
137c1f85878SChris Wilson *cs++ = 0;
138c1f85878SChris Wilson intel_ring_advance(rq, cs);
139c1f85878SChris Wilson
140c1f85878SChris Wilson return 0;
141c1f85878SChris Wilson }
142c1f85878SChris Wilson
gen6_emit_breadcrumb_rcs(struct i915_request * rq,u32 * cs)143c1f85878SChris Wilson u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
144c1f85878SChris Wilson {
145c1f85878SChris Wilson /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
146c1f85878SChris Wilson *cs++ = GFX_OP_PIPE_CONTROL(4);
147c1f85878SChris Wilson *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
148c1f85878SChris Wilson *cs++ = 0;
149c1f85878SChris Wilson *cs++ = 0;
150c1f85878SChris Wilson
151c1f85878SChris Wilson *cs++ = GFX_OP_PIPE_CONTROL(4);
152c1f85878SChris Wilson *cs++ = PIPE_CONTROL_QW_WRITE;
153c1f85878SChris Wilson *cs++ = intel_gt_scratch_offset(rq->engine->gt,
154c1f85878SChris Wilson INTEL_GT_SCRATCH_FIELD_DEFAULT) |
155c1f85878SChris Wilson PIPE_CONTROL_GLOBAL_GTT;
156c1f85878SChris Wilson *cs++ = 0;
157c1f85878SChris Wilson
158c1f85878SChris Wilson /* Finally we can flush and with it emit the breadcrumb */
159c1f85878SChris Wilson *cs++ = GFX_OP_PIPE_CONTROL(4);
160c1f85878SChris Wilson *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
161c1f85878SChris Wilson PIPE_CONTROL_DEPTH_CACHE_FLUSH |
162c1f85878SChris Wilson PIPE_CONTROL_DC_FLUSH_ENABLE |
163c1f85878SChris Wilson PIPE_CONTROL_QW_WRITE |
164c1f85878SChris Wilson PIPE_CONTROL_CS_STALL);
16512ca695dSMaarten Lankhorst *cs++ = i915_request_active_seqno(rq) |
166c1f85878SChris Wilson PIPE_CONTROL_GLOBAL_GTT;
167c1f85878SChris Wilson *cs++ = rq->fence.seqno;
168c1f85878SChris Wilson
169c1f85878SChris Wilson *cs++ = MI_USER_INTERRUPT;
170c1f85878SChris Wilson *cs++ = MI_NOOP;
171c1f85878SChris Wilson
172c1f85878SChris Wilson rq->tail = intel_ring_offset(rq, cs);
173c1f85878SChris Wilson assert_ring_tail_valid(rq->ring, rq->tail);
174c1f85878SChris Wilson
175c1f85878SChris Wilson return cs;
176c1f85878SChris Wilson }
177c1f85878SChris Wilson
mi_flush_dw(struct i915_request * rq,u32 flags)178c1f85878SChris Wilson static int mi_flush_dw(struct i915_request *rq, u32 flags)
179c1f85878SChris Wilson {
180c1f85878SChris Wilson u32 cmd, *cs;
181c1f85878SChris Wilson
182c1f85878SChris Wilson cs = intel_ring_begin(rq, 4);
183c1f85878SChris Wilson if (IS_ERR(cs))
184c1f85878SChris Wilson return PTR_ERR(cs);
185c1f85878SChris Wilson
186c1f85878SChris Wilson cmd = MI_FLUSH_DW;
187c1f85878SChris Wilson
188c1f85878SChris Wilson /*
189c1f85878SChris Wilson * We always require a command barrier so that subsequent
190c1f85878SChris Wilson * commands, such as breadcrumb interrupts, are strictly ordered
191c1f85878SChris Wilson * wrt the contents of the write cache being flushed to memory
192c1f85878SChris Wilson * (and thus being coherent from the CPU).
193c1f85878SChris Wilson */
194c1f85878SChris Wilson cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
195c1f85878SChris Wilson
196c1f85878SChris Wilson /*
197c1f85878SChris Wilson * Bspec vol 1c.3 - blitter engine command streamer:
198c1f85878SChris Wilson * "If ENABLED, all TLBs will be invalidated once the flush
199c1f85878SChris Wilson * operation is complete. This bit is only valid when the
200c1f85878SChris Wilson * Post-Sync Operation field is a value of 1h or 3h."
201c1f85878SChris Wilson */
202c1f85878SChris Wilson cmd |= flags;
203c1f85878SChris Wilson
204c1f85878SChris Wilson *cs++ = cmd;
205c1f85878SChris Wilson *cs++ = HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
206c1f85878SChris Wilson *cs++ = 0;
207c1f85878SChris Wilson *cs++ = MI_NOOP;
208c1f85878SChris Wilson
209c1f85878SChris Wilson intel_ring_advance(rq, cs);
210c1f85878SChris Wilson
211c1f85878SChris Wilson return 0;
212c1f85878SChris Wilson }
213c1f85878SChris Wilson
gen6_flush_dw(struct i915_request * rq,u32 mode,u32 invflags)214c1f85878SChris Wilson static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
215c1f85878SChris Wilson {
216c1f85878SChris Wilson return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
217c1f85878SChris Wilson }
218c1f85878SChris Wilson
gen6_emit_flush_xcs(struct i915_request * rq,u32 mode)219c1f85878SChris Wilson int gen6_emit_flush_xcs(struct i915_request *rq, u32 mode)
220c1f85878SChris Wilson {
221c1f85878SChris Wilson return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
222c1f85878SChris Wilson }
223c1f85878SChris Wilson
gen6_emit_flush_vcs(struct i915_request * rq,u32 mode)224c1f85878SChris Wilson int gen6_emit_flush_vcs(struct i915_request *rq, u32 mode)
225c1f85878SChris Wilson {
226c1f85878SChris Wilson return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
227c1f85878SChris Wilson }
228c1f85878SChris Wilson
gen6_emit_bb_start(struct i915_request * rq,u64 offset,u32 len,unsigned int dispatch_flags)229c1f85878SChris Wilson int gen6_emit_bb_start(struct i915_request *rq,
230c1f85878SChris Wilson u64 offset, u32 len,
231c1f85878SChris Wilson unsigned int dispatch_flags)
232c1f85878SChris Wilson {
233c1f85878SChris Wilson u32 security;
234c1f85878SChris Wilson u32 *cs;
235c1f85878SChris Wilson
236c1f85878SChris Wilson security = MI_BATCH_NON_SECURE_I965;
237c1f85878SChris Wilson if (dispatch_flags & I915_DISPATCH_SECURE)
238c1f85878SChris Wilson security = 0;
239c1f85878SChris Wilson
240c1f85878SChris Wilson cs = intel_ring_begin(rq, 2);
241c1f85878SChris Wilson if (IS_ERR(cs))
242c1f85878SChris Wilson return PTR_ERR(cs);
243c1f85878SChris Wilson
244c1f85878SChris Wilson cs = __gen6_emit_bb_start(cs, offset, security);
245c1f85878SChris Wilson intel_ring_advance(rq, cs);
246c1f85878SChris Wilson
247c1f85878SChris Wilson return 0;
248c1f85878SChris Wilson }
249c1f85878SChris Wilson
250c1f85878SChris Wilson int
hsw_emit_bb_start(struct i915_request * rq,u64 offset,u32 len,unsigned int dispatch_flags)251c1f85878SChris Wilson hsw_emit_bb_start(struct i915_request *rq,
252c1f85878SChris Wilson u64 offset, u32 len,
253c1f85878SChris Wilson unsigned int dispatch_flags)
254c1f85878SChris Wilson {
255c1f85878SChris Wilson u32 security;
256c1f85878SChris Wilson u32 *cs;
257c1f85878SChris Wilson
258c1f85878SChris Wilson security = MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW;
259c1f85878SChris Wilson if (dispatch_flags & I915_DISPATCH_SECURE)
260c1f85878SChris Wilson security = 0;
261c1f85878SChris Wilson
262c1f85878SChris Wilson cs = intel_ring_begin(rq, 2);
263c1f85878SChris Wilson if (IS_ERR(cs))
264c1f85878SChris Wilson return PTR_ERR(cs);
265c1f85878SChris Wilson
266c1f85878SChris Wilson cs = __gen6_emit_bb_start(cs, offset, security);
267c1f85878SChris Wilson intel_ring_advance(rq, cs);
268c1f85878SChris Wilson
269c1f85878SChris Wilson return 0;
270c1f85878SChris Wilson }
271c1f85878SChris Wilson
gen7_stall_cs(struct i915_request * rq)272c1f85878SChris Wilson static int gen7_stall_cs(struct i915_request *rq)
273c1f85878SChris Wilson {
274c1f85878SChris Wilson u32 *cs;
275c1f85878SChris Wilson
276c1f85878SChris Wilson cs = intel_ring_begin(rq, 4);
277c1f85878SChris Wilson if (IS_ERR(cs))
278c1f85878SChris Wilson return PTR_ERR(cs);
279c1f85878SChris Wilson
280c1f85878SChris Wilson *cs++ = GFX_OP_PIPE_CONTROL(4);
281c1f85878SChris Wilson *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
282c1f85878SChris Wilson *cs++ = 0;
283c1f85878SChris Wilson *cs++ = 0;
284c1f85878SChris Wilson intel_ring_advance(rq, cs);
285c1f85878SChris Wilson
286c1f85878SChris Wilson return 0;
287c1f85878SChris Wilson }
288c1f85878SChris Wilson
gen7_emit_flush_rcs(struct i915_request * rq,u32 mode)289c1f85878SChris Wilson int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode)
290c1f85878SChris Wilson {
291c1f85878SChris Wilson u32 scratch_addr =
292c1f85878SChris Wilson intel_gt_scratch_offset(rq->engine->gt,
293c1f85878SChris Wilson INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
294c1f85878SChris Wilson u32 *cs, flags = 0;
295c1f85878SChris Wilson
296c1f85878SChris Wilson /*
297c1f85878SChris Wilson * Ensure that any following seqno writes only happen when the render
298c1f85878SChris Wilson * cache is indeed flushed.
299c1f85878SChris Wilson *
300c1f85878SChris Wilson * Workaround: 4th PIPE_CONTROL command (except the ones with only
301c1f85878SChris Wilson * read-cache invalidate bits set) must have the CS_STALL bit set. We
302c1f85878SChris Wilson * don't try to be clever and just set it unconditionally.
303c1f85878SChris Wilson */
304c1f85878SChris Wilson flags |= PIPE_CONTROL_CS_STALL;
305c1f85878SChris Wilson
306c1f85878SChris Wilson /*
307c1f85878SChris Wilson * CS_STALL suggests at least a post-sync write.
308c1f85878SChris Wilson */
309c1f85878SChris Wilson flags |= PIPE_CONTROL_QW_WRITE;
310c1f85878SChris Wilson flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
311c1f85878SChris Wilson
312c1f85878SChris Wilson /*
313c1f85878SChris Wilson * Just flush everything. Experiments have shown that reducing the
314c1f85878SChris Wilson * number of bits based on the write domains has little performance
315c1f85878SChris Wilson * impact.
316c1f85878SChris Wilson */
317c1f85878SChris Wilson if (mode & EMIT_FLUSH) {
318c1f85878SChris Wilson flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
319c1f85878SChris Wilson flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
320c1f85878SChris Wilson flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
321c1f85878SChris Wilson flags |= PIPE_CONTROL_FLUSH_ENABLE;
322c1f85878SChris Wilson }
323c1f85878SChris Wilson if (mode & EMIT_INVALIDATE) {
324c1f85878SChris Wilson flags |= PIPE_CONTROL_TLB_INVALIDATE;
325c1f85878SChris Wilson flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
326c1f85878SChris Wilson flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
327c1f85878SChris Wilson flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
328c1f85878SChris Wilson flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
329c1f85878SChris Wilson flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
330c1f85878SChris Wilson flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
331c1f85878SChris Wilson
332c1f85878SChris Wilson /*
333c1f85878SChris Wilson * Workaround: we must issue a pipe_control with CS-stall bit
334c1f85878SChris Wilson * set before a pipe_control command that has the state cache
335c1f85878SChris Wilson * invalidate bit set.
336c1f85878SChris Wilson */
337c1f85878SChris Wilson gen7_stall_cs(rq);
338c1f85878SChris Wilson }
339c1f85878SChris Wilson
340c1f85878SChris Wilson cs = intel_ring_begin(rq, 4);
341c1f85878SChris Wilson if (IS_ERR(cs))
342c1f85878SChris Wilson return PTR_ERR(cs);
343c1f85878SChris Wilson
344c1f85878SChris Wilson *cs++ = GFX_OP_PIPE_CONTROL(4);
345c1f85878SChris Wilson *cs++ = flags;
346c1f85878SChris Wilson *cs++ = scratch_addr;
347c1f85878SChris Wilson *cs++ = 0;
348c1f85878SChris Wilson intel_ring_advance(rq, cs);
349c1f85878SChris Wilson
350c1f85878SChris Wilson return 0;
351c1f85878SChris Wilson }
352c1f85878SChris Wilson
gen7_emit_breadcrumb_rcs(struct i915_request * rq,u32 * cs)353c1f85878SChris Wilson u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
354c1f85878SChris Wilson {
355c1f85878SChris Wilson *cs++ = GFX_OP_PIPE_CONTROL(4);
356c1f85878SChris Wilson *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
357c1f85878SChris Wilson PIPE_CONTROL_DEPTH_CACHE_FLUSH |
358c1f85878SChris Wilson PIPE_CONTROL_DC_FLUSH_ENABLE |
359c1f85878SChris Wilson PIPE_CONTROL_FLUSH_ENABLE |
360c1f85878SChris Wilson PIPE_CONTROL_QW_WRITE |
361c1f85878SChris Wilson PIPE_CONTROL_GLOBAL_GTT_IVB |
362c1f85878SChris Wilson PIPE_CONTROL_CS_STALL);
36312ca695dSMaarten Lankhorst *cs++ = i915_request_active_seqno(rq);
364c1f85878SChris Wilson *cs++ = rq->fence.seqno;
365c1f85878SChris Wilson
366c1f85878SChris Wilson *cs++ = MI_USER_INTERRUPT;
367c1f85878SChris Wilson *cs++ = MI_NOOP;
368c1f85878SChris Wilson
369c1f85878SChris Wilson rq->tail = intel_ring_offset(rq, cs);
370c1f85878SChris Wilson assert_ring_tail_valid(rq->ring, rq->tail);
371c1f85878SChris Wilson
372c1f85878SChris Wilson return cs;
373c1f85878SChris Wilson }
374c1f85878SChris Wilson
gen6_emit_breadcrumb_xcs(struct i915_request * rq,u32 * cs)375c1f85878SChris Wilson u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
376c1f85878SChris Wilson {
377c1f85878SChris Wilson GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
37812ca695dSMaarten Lankhorst GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
379c1f85878SChris Wilson
380c1f85878SChris Wilson *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
381c1f85878SChris Wilson *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
382c1f85878SChris Wilson *cs++ = rq->fence.seqno;
383c1f85878SChris Wilson
384c1f85878SChris Wilson *cs++ = MI_USER_INTERRUPT;
385c1f85878SChris Wilson
386c1f85878SChris Wilson rq->tail = intel_ring_offset(rq, cs);
387c1f85878SChris Wilson assert_ring_tail_valid(rq->ring, rq->tail);
388c1f85878SChris Wilson
389c1f85878SChris Wilson return cs;
390c1f85878SChris Wilson }
391c1f85878SChris Wilson
392c1f85878SChris Wilson #define GEN7_XCS_WA 32
gen7_emit_breadcrumb_xcs(struct i915_request * rq,u32 * cs)393c1f85878SChris Wilson u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
394c1f85878SChris Wilson {
395c1f85878SChris Wilson int i;
396c1f85878SChris Wilson
397c1f85878SChris Wilson GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
39812ca695dSMaarten Lankhorst GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
399c1f85878SChris Wilson
400c1f85878SChris Wilson *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
401c1f85878SChris Wilson MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
402c1f85878SChris Wilson *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
403c1f85878SChris Wilson *cs++ = rq->fence.seqno;
404c1f85878SChris Wilson
405c1f85878SChris Wilson for (i = 0; i < GEN7_XCS_WA; i++) {
406c1f85878SChris Wilson *cs++ = MI_STORE_DWORD_INDEX;
407c1f85878SChris Wilson *cs++ = I915_GEM_HWS_SEQNO_ADDR;
408c1f85878SChris Wilson *cs++ = rq->fence.seqno;
409c1f85878SChris Wilson }
410c1f85878SChris Wilson
411c1f85878SChris Wilson *cs++ = MI_FLUSH_DW;
412c1f85878SChris Wilson *cs++ = 0;
413c1f85878SChris Wilson *cs++ = 0;
414c1f85878SChris Wilson
415c1f85878SChris Wilson *cs++ = MI_USER_INTERRUPT;
416c1f85878SChris Wilson *cs++ = MI_NOOP;
417c1f85878SChris Wilson
418c1f85878SChris Wilson rq->tail = intel_ring_offset(rq, cs);
419c1f85878SChris Wilson assert_ring_tail_valid(rq->ring, rq->tail);
420c1f85878SChris Wilson
421c1f85878SChris Wilson return cs;
422c1f85878SChris Wilson }
423c1f85878SChris Wilson #undef GEN7_XCS_WA
424c1f85878SChris Wilson
gen6_irq_enable(struct intel_engine_cs * engine)425c1f85878SChris Wilson void gen6_irq_enable(struct intel_engine_cs *engine)
426c1f85878SChris Wilson {
427c1f85878SChris Wilson ENGINE_WRITE(engine, RING_IMR,
428c1f85878SChris Wilson ~(engine->irq_enable_mask | engine->irq_keep_mask));
429c1f85878SChris Wilson
430c1f85878SChris Wilson /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
431c1f85878SChris Wilson ENGINE_POSTING_READ(engine, RING_IMR);
432c1f85878SChris Wilson
433c1f85878SChris Wilson gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
434c1f85878SChris Wilson }
435c1f85878SChris Wilson
gen6_irq_disable(struct intel_engine_cs * engine)436c1f85878SChris Wilson void gen6_irq_disable(struct intel_engine_cs *engine)
437c1f85878SChris Wilson {
438c1f85878SChris Wilson ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
439c1f85878SChris Wilson gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
440c1f85878SChris Wilson }
441c1f85878SChris Wilson
hsw_irq_enable_vecs(struct intel_engine_cs * engine)442c1f85878SChris Wilson void hsw_irq_enable_vecs(struct intel_engine_cs *engine)
443c1f85878SChris Wilson {
444c1f85878SChris Wilson ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
445c1f85878SChris Wilson
446c1f85878SChris Wilson /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
447c1f85878SChris Wilson ENGINE_POSTING_READ(engine, RING_IMR);
448c1f85878SChris Wilson
449c1f85878SChris Wilson gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask);
450c1f85878SChris Wilson }
451c1f85878SChris Wilson
hsw_irq_disable_vecs(struct intel_engine_cs * engine)452c1f85878SChris Wilson void hsw_irq_disable_vecs(struct intel_engine_cs *engine)
453c1f85878SChris Wilson {
454c1f85878SChris Wilson ENGINE_WRITE(engine, RING_IMR, ~0);
455c1f85878SChris Wilson gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask);
456c1f85878SChris Wilson }
457