1 /* SPDX-License-Identifier: MIT */
2 #ifndef _INTEL_RINGBUFFER_H_
3 #define _INTEL_RINGBUFFER_H_
4 
5 #include <drm/drm_util.h>
6 
7 #include <linux/hashtable.h>
8 #include <linux/irq_work.h>
9 #include <linux/random.h>
10 #include <linux/seqlock.h>
11 
12 #include "i915_pmu.h"
13 #include "i915_reg.h"
14 #include "i915_request.h"
15 #include "i915_selftest.h"
16 #include "gt/intel_timeline.h"
17 #include "intel_engine_types.h"
18 #include "intel_gpu_commands.h"
19 #include "intel_workarounds.h"
20 
21 struct drm_printer;
22 
23 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
24  * but keeps the logic simple. Indeed, the whole purpose of this macro is just
25  * to give some inclination as to some of the magic values used in the various
26  * workarounds!
27  */
28 #define CACHELINE_BYTES 64
29 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
30 
31 /*
32  * The register defines to be used with the following macros need to accept a
33  * base param, e.g:
34  *
35  * REG_FOO(base) _MMIO((base) + <relative offset>)
36  * ENGINE_READ(engine, REG_FOO);
37  *
38  * register arrays are to be defined and accessed as follows:
39  *
40  * REG_BAR(base, i) _MMIO((base) + <relative offset> + (i) * <shift>)
41  * ENGINE_READ_IDX(engine, REG_BAR, i)
42  */
43 
44 #define __ENGINE_REG_OP(op__, engine__, ...) \
45 	intel_uncore_##op__((engine__)->uncore, __VA_ARGS__)
46 
47 #define __ENGINE_READ_OP(op__, engine__, reg__) \
48 	__ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base))
49 
50 #define ENGINE_READ16(...)	__ENGINE_READ_OP(read16, __VA_ARGS__)
51 #define ENGINE_READ(...)	__ENGINE_READ_OP(read, __VA_ARGS__)
52 #define ENGINE_READ_FW(...)	__ENGINE_READ_OP(read_fw, __VA_ARGS__)
53 #define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read_fw, __VA_ARGS__)
54 #define ENGINE_POSTING_READ16(...) __ENGINE_READ_OP(posting_read16, __VA_ARGS__)
55 
56 #define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \
57 	__ENGINE_REG_OP(read64_2x32, (engine__), \
58 			lower_reg__((engine__)->mmio_base), \
59 			upper_reg__((engine__)->mmio_base))
60 
61 #define ENGINE_READ_IDX(engine__, reg__, idx__) \
62 	__ENGINE_REG_OP(read, (engine__), reg__((engine__)->mmio_base, (idx__)))
63 
64 #define __ENGINE_WRITE_OP(op__, engine__, reg__, val__) \
65 	__ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base), (val__))
66 
67 #define ENGINE_WRITE16(...)	__ENGINE_WRITE_OP(write16, __VA_ARGS__)
68 #define ENGINE_WRITE(...)	__ENGINE_WRITE_OP(write, __VA_ARGS__)
69 #define ENGINE_WRITE_FW(...)	__ENGINE_WRITE_OP(write_fw, __VA_ARGS__)
70 
71 #define GEN6_RING_FAULT_REG_READ(engine__) \
72 	intel_uncore_read((engine__)->uncore, RING_FAULT_REG(engine__))
73 
74 #define GEN6_RING_FAULT_REG_POSTING_READ(engine__) \
75 	intel_uncore_posting_read((engine__)->uncore, RING_FAULT_REG(engine__))
76 
77 #define GEN6_RING_FAULT_REG_RMW(engine__, clear__, set__) \
78 ({ \
79 	u32 __val; \
80 \
81 	__val = intel_uncore_read((engine__)->uncore, \
82 				  RING_FAULT_REG(engine__)); \
83 	__val &= ~(clear__); \
84 	__val |= (set__); \
85 	intel_uncore_write((engine__)->uncore, RING_FAULT_REG(engine__), \
86 			   __val); \
87 })
88 
89 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
90  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
91  */
92 enum intel_engine_hangcheck_action {
93 	ENGINE_IDLE = 0,
94 	ENGINE_WAIT,
95 	ENGINE_ACTIVE_SEQNO,
96 	ENGINE_ACTIVE_HEAD,
97 	ENGINE_ACTIVE_SUBUNITS,
98 	ENGINE_WAIT_KICK,
99 	ENGINE_DEAD,
100 };
101 
102 static inline const char *
103 hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
104 {
105 	switch (a) {
106 	case ENGINE_IDLE:
107 		return "idle";
108 	case ENGINE_WAIT:
109 		return "wait";
110 	case ENGINE_ACTIVE_SEQNO:
111 		return "active seqno";
112 	case ENGINE_ACTIVE_HEAD:
113 		return "active head";
114 	case ENGINE_ACTIVE_SUBUNITS:
115 		return "active subunits";
116 	case ENGINE_WAIT_KICK:
117 		return "wait kick";
118 	case ENGINE_DEAD:
119 		return "dead";
120 	}
121 
122 	return "unknown";
123 }
124 
125 static inline unsigned int
126 execlists_num_ports(const struct intel_engine_execlists * const execlists)
127 {
128 	return execlists->port_mask + 1;
129 }
130 
131 static inline struct i915_request *
132 execlists_active(const struct intel_engine_execlists *execlists)
133 {
134 	GEM_BUG_ON(execlists->active - execlists->inflight >
135 		   execlists_num_ports(execlists));
136 	return READ_ONCE(*execlists->active);
137 }
138 
139 struct i915_request *
140 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
141 
142 static inline u32
143 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
144 {
145 	/* Ensure that the compiler doesn't optimize away the load. */
146 	return READ_ONCE(engine->status_page.addr[reg]);
147 }
148 
149 static inline void
150 intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
151 {
152 	/* Writing into the status page should be done sparingly. Since
153 	 * we do when we are uncertain of the device state, we take a bit
154 	 * of extra paranoia to try and ensure that the HWS takes the value
155 	 * we give and that it doesn't end up trapped inside the CPU!
156 	 */
157 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
158 		mb();
159 		clflush(&engine->status_page.addr[reg]);
160 		engine->status_page.addr[reg] = value;
161 		clflush(&engine->status_page.addr[reg]);
162 		mb();
163 	} else {
164 		WRITE_ONCE(engine->status_page.addr[reg], value);
165 	}
166 }
167 
168 /*
169  * Reads a dword out of the status page, which is written to from the command
170  * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
171  * MI_STORE_DATA_IMM.
172  *
173  * The following dwords have a reserved meaning:
174  * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
175  * 0x04: ring 0 head pointer
176  * 0x05: ring 1 head pointer (915-class)
177  * 0x06: ring 2 head pointer (915-class)
178  * 0x10-0x1b: Context status DWords (GM45)
179  * 0x1f: Last written status offset. (GM45)
180  * 0x20-0x2f: Reserved (Gen6+)
181  *
182  * The area from dword 0x30 to 0x3ff is available for driver usage.
183  */
184 #define I915_GEM_HWS_PREEMPT		0x32
185 #define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
186 #define I915_GEM_HWS_SEQNO		0x40
187 #define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
188 #define I915_GEM_HWS_SCRATCH		0x80
189 #define I915_GEM_HWS_SCRATCH_ADDR	(I915_GEM_HWS_SCRATCH * sizeof(u32))
190 
191 #define I915_HWS_CSB_BUF0_INDEX		0x10
192 #define I915_HWS_CSB_WRITE_INDEX	0x1f
193 #define CNL_HWS_CSB_WRITE_INDEX		0x2f
194 
195 struct intel_ring *
196 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
197 int intel_ring_pin(struct intel_ring *ring);
198 void intel_ring_reset(struct intel_ring *ring, u32 tail);
199 unsigned int intel_ring_update_space(struct intel_ring *ring);
200 void intel_ring_unpin(struct intel_ring *ring);
201 void intel_ring_free(struct kref *ref);
202 
203 static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
204 {
205 	kref_get(&ring->ref);
206 	return ring;
207 }
208 
209 static inline void intel_ring_put(struct intel_ring *ring)
210 {
211 	kref_put(&ring->ref, intel_ring_free);
212 }
213 
214 void intel_engine_stop(struct intel_engine_cs *engine);
215 void intel_engine_cleanup(struct intel_engine_cs *engine);
216 
217 int __must_check intel_ring_cacheline_align(struct i915_request *rq);
218 
219 u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
220 
221 static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
222 {
223 	/* Dummy function.
224 	 *
225 	 * This serves as a placeholder in the code so that the reader
226 	 * can compare against the preceding intel_ring_begin() and
227 	 * check that the number of dwords emitted matches the space
228 	 * reserved for the command packet (i.e. the value passed to
229 	 * intel_ring_begin()).
230 	 */
231 	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
232 }
233 
234 static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
235 {
236 	return pos & (ring->size - 1);
237 }
238 
239 static inline bool
240 intel_ring_offset_valid(const struct intel_ring *ring,
241 			unsigned int pos)
242 {
243 	if (pos & -ring->size) /* must be strictly within the ring */
244 		return false;
245 
246 	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
247 		return false;
248 
249 	return true;
250 }
251 
252 static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
253 {
254 	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
255 	u32 offset = addr - rq->ring->vaddr;
256 	GEM_BUG_ON(offset > rq->ring->size);
257 	return intel_ring_wrap(rq->ring, offset);
258 }
259 
260 static inline void
261 assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
262 {
263 	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
264 
265 	/*
266 	 * "Ring Buffer Use"
267 	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
268 	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
269 	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
270 	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
271 	 * same cacheline, the Head Pointer must not be greater than the Tail
272 	 * Pointer."
273 	 *
274 	 * We use ring->head as the last known location of the actual RING_HEAD,
275 	 * it may have advanced but in the worst case it is equally the same
276 	 * as ring->head and so we should never program RING_TAIL to advance
277 	 * into the same cacheline as ring->head.
278 	 */
279 #define cacheline(a) round_down(a, CACHELINE_BYTES)
280 	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
281 		   tail < ring->head);
282 #undef cacheline
283 }
284 
285 static inline unsigned int
286 intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
287 {
288 	/* Whilst writes to the tail are strictly order, there is no
289 	 * serialisation between readers and the writers. The tail may be
290 	 * read by i915_request_retire() just as it is being updated
291 	 * by execlists, as although the breadcrumb is complete, the context
292 	 * switch hasn't been seen.
293 	 */
294 	assert_ring_tail_valid(ring, tail);
295 	ring->tail = tail;
296 	return tail;
297 }
298 
299 static inline unsigned int
300 __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
301 {
302 	/*
303 	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
304 	 * same cacheline, the Head Pointer must not be greater than the Tail
305 	 * Pointer."
306 	 */
307 	GEM_BUG_ON(!is_power_of_2(size));
308 	return (head - tail - CACHELINE_BYTES) & (size - 1);
309 }
310 
311 int intel_engines_init_mmio(struct drm_i915_private *i915);
312 int intel_engines_setup(struct drm_i915_private *i915);
313 int intel_engines_init(struct drm_i915_private *i915);
314 void intel_engines_cleanup(struct drm_i915_private *i915);
315 
316 int intel_engine_init_common(struct intel_engine_cs *engine);
317 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
318 
319 int intel_ring_submission_setup(struct intel_engine_cs *engine);
320 int intel_ring_submission_init(struct intel_engine_cs *engine);
321 
322 int intel_engine_stop_cs(struct intel_engine_cs *engine);
323 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
324 
325 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
326 
327 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
328 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
329 
330 void intel_engine_get_instdone(struct intel_engine_cs *engine,
331 			       struct intel_instdone *instdone);
332 
333 void intel_engine_init_execlists(struct intel_engine_cs *engine);
334 
335 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
336 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
337 
338 void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
339 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
340 
341 static inline void
342 intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
343 {
344 	irq_work_queue(&engine->breadcrumbs.irq_work);
345 }
346 
347 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
348 
349 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
350 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
351 
352 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
353 				    struct drm_printer *p);
354 
355 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
356 {
357 	memset(batch, 0, 6 * sizeof(u32));
358 
359 	batch[0] = GFX_OP_PIPE_CONTROL(6);
360 	batch[1] = flags;
361 	batch[2] = offset;
362 
363 	return batch + 6;
364 }
365 
366 static inline u32 *
367 gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
368 {
369 	/* We're using qword write, offset should be aligned to 8 bytes. */
370 	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
371 
372 	/* w/a for post sync ops following a GPGPU operation we
373 	 * need a prior CS_STALL, which is emitted by the flush
374 	 * following the batch.
375 	 */
376 	*cs++ = GFX_OP_PIPE_CONTROL(6);
377 	*cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
378 	*cs++ = gtt_offset;
379 	*cs++ = 0;
380 	*cs++ = value;
381 	/* We're thrashing one dword of HWS. */
382 	*cs++ = 0;
383 
384 	return cs;
385 }
386 
387 static inline u32 *
388 gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
389 {
390 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
391 	GEM_BUG_ON(gtt_offset & (1 << 5));
392 	/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
393 	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
394 
395 	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
396 	*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
397 	*cs++ = 0;
398 	*cs++ = value;
399 
400 	return cs;
401 }
402 
403 static inline void __intel_engine_reset(struct intel_engine_cs *engine,
404 					bool stalled)
405 {
406 	if (engine->reset.reset)
407 		engine->reset.reset(engine, stalled);
408 	engine->serial++; /* contexts lost */
409 }
410 
411 bool intel_engine_is_idle(struct intel_engine_cs *engine);
412 bool intel_engines_are_idle(struct intel_gt *gt);
413 
414 void intel_engines_reset_default_submission(struct intel_gt *gt);
415 
416 bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
417 
418 __printf(3, 4)
419 void intel_engine_dump(struct intel_engine_cs *engine,
420 		       struct drm_printer *m,
421 		       const char *header, ...);
422 
423 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
424 {
425 	unsigned long flags;
426 
427 	if (READ_ONCE(engine->stats.enabled) == 0)
428 		return;
429 
430 	write_seqlock_irqsave(&engine->stats.lock, flags);
431 
432 	if (engine->stats.enabled > 0) {
433 		if (engine->stats.active++ == 0)
434 			engine->stats.start = ktime_get();
435 		GEM_BUG_ON(engine->stats.active == 0);
436 	}
437 
438 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
439 }
440 
441 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
442 {
443 	unsigned long flags;
444 
445 	if (READ_ONCE(engine->stats.enabled) == 0)
446 		return;
447 
448 	write_seqlock_irqsave(&engine->stats.lock, flags);
449 
450 	if (engine->stats.enabled > 0) {
451 		ktime_t last;
452 
453 		if (engine->stats.active && --engine->stats.active == 0) {
454 			/*
455 			 * Decrement the active context count and in case GPU
456 			 * is now idle add up to the running total.
457 			 */
458 			last = ktime_sub(ktime_get(), engine->stats.start);
459 
460 			engine->stats.total = ktime_add(engine->stats.total,
461 							last);
462 		} else if (engine->stats.active == 0) {
463 			/*
464 			 * After turning on engine stats, context out might be
465 			 * the first event in which case we account from the
466 			 * time stats gathering was turned on.
467 			 */
468 			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
469 
470 			engine->stats.total = ktime_add(engine->stats.total,
471 							last);
472 		}
473 	}
474 
475 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
476 }
477 
478 int intel_enable_engine_stats(struct intel_engine_cs *engine);
479 void intel_disable_engine_stats(struct intel_engine_cs *engine);
480 
481 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
482 
483 struct i915_request *
484 intel_engine_find_active_request(struct intel_engine_cs *engine);
485 
486 u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class);
487 
488 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
489 
490 static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
491 {
492 	if (!execlists->preempt_hang.inject_hang)
493 		return false;
494 
495 	complete(&execlists->preempt_hang.completion);
496 	return true;
497 }
498 
499 #else
500 
501 static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
502 {
503 	return false;
504 }
505 
506 #endif
507 
508 void intel_engine_init_active(struct intel_engine_cs *engine,
509 			      unsigned int subclass);
510 #define ENGINE_PHYSICAL	0
511 #define ENGINE_MOCK	1
512 #define ENGINE_VIRTUAL	2
513 
514 #endif /* _INTEL_RINGBUFFER_H_ */
515