1 /* SPDX-License-Identifier: MIT */
2 #ifndef _INTEL_RINGBUFFER_H_
3 #define _INTEL_RINGBUFFER_H_
4 
5 #include <drm/drm_util.h>
6 
7 #include <linux/hashtable.h>
8 #include <linux/irq_work.h>
9 #include <linux/random.h>
10 #include <linux/seqlock.h>
11 
12 #include "i915_gem_batch_pool.h"
13 #include "i915_pmu.h"
14 #include "i915_reg.h"
15 #include "i915_request.h"
16 #include "i915_selftest.h"
17 #include "i915_timeline.h"
18 #include "intel_engine_types.h"
19 #include "intel_gpu_commands.h"
20 #include "intel_workarounds.h"
21 
22 struct drm_printer;
23 
24 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
25  * but keeps the logic simple. Indeed, the whole purpose of this macro is just
26  * to give some inclination as to some of the magic values used in the various
27  * workarounds!
28  */
29 #define CACHELINE_BYTES 64
30 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
31 
32 /*
33  * The register defines to be used with the following macros need to accept a
34  * base param, e.g:
35  *
36  * REG_FOO(base) _MMIO((base) + <relative offset>)
37  * ENGINE_READ(engine, REG_FOO);
38  *
39  * register arrays are to be defined and accessed as follows:
40  *
41  * REG_BAR(base, i) _MMIO((base) + <relative offset> + (i) * <shift>)
42  * ENGINE_READ_IDX(engine, REG_BAR, i)
43  */
44 
45 #define __ENGINE_REG_OP(op__, engine__, ...) \
46 	intel_uncore_##op__((engine__)->uncore, __VA_ARGS__)
47 
48 #define __ENGINE_READ_OP(op__, engine__, reg__) \
49 	__ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base))
50 
51 #define ENGINE_READ16(...)	__ENGINE_READ_OP(read16, __VA_ARGS__)
52 #define ENGINE_READ(...)	__ENGINE_READ_OP(read, __VA_ARGS__)
53 #define ENGINE_READ_FW(...)	__ENGINE_READ_OP(read_fw, __VA_ARGS__)
54 #define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__)
55 
56 #define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \
57 	__ENGINE_REG_OP(read64_2x32, (engine__), \
58 			lower_reg__((engine__)->mmio_base), \
59 			upper_reg__((engine__)->mmio_base))
60 
61 #define ENGINE_READ_IDX(engine__, reg__, idx__) \
62 	__ENGINE_REG_OP(read, (engine__), reg__((engine__)->mmio_base, (idx__)))
63 
64 #define __ENGINE_WRITE_OP(op__, engine__, reg__, val__) \
65 	__ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base), (val__))
66 
67 #define ENGINE_WRITE16(...)	__ENGINE_WRITE_OP(write16, __VA_ARGS__)
68 #define ENGINE_WRITE(...)	__ENGINE_WRITE_OP(write, __VA_ARGS__)
69 #define ENGINE_WRITE_FW(...)	__ENGINE_WRITE_OP(write_fw, __VA_ARGS__)
70 
71 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
72  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
73  */
74 enum intel_engine_hangcheck_action {
75 	ENGINE_IDLE = 0,
76 	ENGINE_WAIT,
77 	ENGINE_ACTIVE_SEQNO,
78 	ENGINE_ACTIVE_HEAD,
79 	ENGINE_ACTIVE_SUBUNITS,
80 	ENGINE_WAIT_KICK,
81 	ENGINE_DEAD,
82 };
83 
84 static inline const char *
85 hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
86 {
87 	switch (a) {
88 	case ENGINE_IDLE:
89 		return "idle";
90 	case ENGINE_WAIT:
91 		return "wait";
92 	case ENGINE_ACTIVE_SEQNO:
93 		return "active seqno";
94 	case ENGINE_ACTIVE_HEAD:
95 		return "active head";
96 	case ENGINE_ACTIVE_SUBUNITS:
97 		return "active subunits";
98 	case ENGINE_WAIT_KICK:
99 		return "wait kick";
100 	case ENGINE_DEAD:
101 		return "dead";
102 	}
103 
104 	return "unknown";
105 }
106 
107 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
108 
109 static inline void
110 execlists_set_active(struct intel_engine_execlists *execlists,
111 		     unsigned int bit)
112 {
113 	__set_bit(bit, (unsigned long *)&execlists->active);
114 }
115 
116 static inline bool
117 execlists_set_active_once(struct intel_engine_execlists *execlists,
118 			  unsigned int bit)
119 {
120 	return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
121 }
122 
123 static inline void
124 execlists_clear_active(struct intel_engine_execlists *execlists,
125 		       unsigned int bit)
126 {
127 	__clear_bit(bit, (unsigned long *)&execlists->active);
128 }
129 
130 static inline void
131 execlists_clear_all_active(struct intel_engine_execlists *execlists)
132 {
133 	execlists->active = 0;
134 }
135 
136 static inline bool
137 execlists_is_active(const struct intel_engine_execlists *execlists,
138 		    unsigned int bit)
139 {
140 	return test_bit(bit, (unsigned long *)&execlists->active);
141 }
142 
143 void execlists_user_begin(struct intel_engine_execlists *execlists,
144 			  const struct execlist_port *port);
145 void execlists_user_end(struct intel_engine_execlists *execlists);
146 
147 void
148 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
149 
150 struct i915_request *
151 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
152 
153 static inline unsigned int
154 execlists_num_ports(const struct intel_engine_execlists * const execlists)
155 {
156 	return execlists->port_mask + 1;
157 }
158 
159 static inline struct execlist_port *
160 execlists_port_complete(struct intel_engine_execlists * const execlists,
161 			struct execlist_port * const port)
162 {
163 	const unsigned int m = execlists->port_mask;
164 
165 	GEM_BUG_ON(port_index(port, execlists) != 0);
166 	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
167 
168 	memmove(port, port + 1, m * sizeof(struct execlist_port));
169 	memset(port + m, 0, sizeof(struct execlist_port));
170 
171 	return port;
172 }
173 
174 static inline u32
175 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
176 {
177 	/* Ensure that the compiler doesn't optimize away the load. */
178 	return READ_ONCE(engine->status_page.addr[reg]);
179 }
180 
181 static inline void
182 intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
183 {
184 	/* Writing into the status page should be done sparingly. Since
185 	 * we do when we are uncertain of the device state, we take a bit
186 	 * of extra paranoia to try and ensure that the HWS takes the value
187 	 * we give and that it doesn't end up trapped inside the CPU!
188 	 */
189 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
190 		mb();
191 		clflush(&engine->status_page.addr[reg]);
192 		engine->status_page.addr[reg] = value;
193 		clflush(&engine->status_page.addr[reg]);
194 		mb();
195 	} else {
196 		WRITE_ONCE(engine->status_page.addr[reg], value);
197 	}
198 }
199 
200 /*
201  * Reads a dword out of the status page, which is written to from the command
202  * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
203  * MI_STORE_DATA_IMM.
204  *
205  * The following dwords have a reserved meaning:
206  * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
207  * 0x04: ring 0 head pointer
208  * 0x05: ring 1 head pointer (915-class)
209  * 0x06: ring 2 head pointer (915-class)
210  * 0x10-0x1b: Context status DWords (GM45)
211  * 0x1f: Last written status offset. (GM45)
212  * 0x20-0x2f: Reserved (Gen6+)
213  *
214  * The area from dword 0x30 to 0x3ff is available for driver usage.
215  */
216 #define I915_GEM_HWS_PREEMPT		0x32
217 #define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
218 #define I915_GEM_HWS_SEQNO		0x40
219 #define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
220 #define I915_GEM_HWS_SCRATCH		0x80
221 #define I915_GEM_HWS_SCRATCH_ADDR	(I915_GEM_HWS_SCRATCH * sizeof(u32))
222 
223 #define I915_HWS_CSB_BUF0_INDEX		0x10
224 #define I915_HWS_CSB_WRITE_INDEX	0x1f
225 #define CNL_HWS_CSB_WRITE_INDEX		0x2f
226 
227 struct intel_ring *
228 intel_engine_create_ring(struct intel_engine_cs *engine,
229 			 struct i915_timeline *timeline,
230 			 int size);
231 int intel_ring_pin(struct intel_ring *ring);
232 void intel_ring_reset(struct intel_ring *ring, u32 tail);
233 unsigned int intel_ring_update_space(struct intel_ring *ring);
234 void intel_ring_unpin(struct intel_ring *ring);
235 void intel_ring_free(struct kref *ref);
236 
237 static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
238 {
239 	kref_get(&ring->ref);
240 	return ring;
241 }
242 
243 static inline void intel_ring_put(struct intel_ring *ring)
244 {
245 	kref_put(&ring->ref, intel_ring_free);
246 }
247 
248 void intel_engine_stop(struct intel_engine_cs *engine);
249 void intel_engine_cleanup(struct intel_engine_cs *engine);
250 
251 int __must_check intel_ring_cacheline_align(struct i915_request *rq);
252 
253 u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
254 
255 static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
256 {
257 	/* Dummy function.
258 	 *
259 	 * This serves as a placeholder in the code so that the reader
260 	 * can compare against the preceding intel_ring_begin() and
261 	 * check that the number of dwords emitted matches the space
262 	 * reserved for the command packet (i.e. the value passed to
263 	 * intel_ring_begin()).
264 	 */
265 	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
266 }
267 
268 static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
269 {
270 	return pos & (ring->size - 1);
271 }
272 
273 static inline bool
274 intel_ring_offset_valid(const struct intel_ring *ring,
275 			unsigned int pos)
276 {
277 	if (pos & -ring->size) /* must be strictly within the ring */
278 		return false;
279 
280 	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
281 		return false;
282 
283 	return true;
284 }
285 
286 static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
287 {
288 	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
289 	u32 offset = addr - rq->ring->vaddr;
290 	GEM_BUG_ON(offset > rq->ring->size);
291 	return intel_ring_wrap(rq->ring, offset);
292 }
293 
294 static inline void
295 assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
296 {
297 	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
298 
299 	/*
300 	 * "Ring Buffer Use"
301 	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
302 	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
303 	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
304 	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
305 	 * same cacheline, the Head Pointer must not be greater than the Tail
306 	 * Pointer."
307 	 *
308 	 * We use ring->head as the last known location of the actual RING_HEAD,
309 	 * it may have advanced but in the worst case it is equally the same
310 	 * as ring->head and so we should never program RING_TAIL to advance
311 	 * into the same cacheline as ring->head.
312 	 */
313 #define cacheline(a) round_down(a, CACHELINE_BYTES)
314 	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
315 		   tail < ring->head);
316 #undef cacheline
317 }
318 
319 static inline unsigned int
320 intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
321 {
322 	/* Whilst writes to the tail are strictly order, there is no
323 	 * serialisation between readers and the writers. The tail may be
324 	 * read by i915_request_retire() just as it is being updated
325 	 * by execlists, as although the breadcrumb is complete, the context
326 	 * switch hasn't been seen.
327 	 */
328 	assert_ring_tail_valid(ring, tail);
329 	ring->tail = tail;
330 	return tail;
331 }
332 
333 static inline unsigned int
334 __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
335 {
336 	/*
337 	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
338 	 * same cacheline, the Head Pointer must not be greater than the Tail
339 	 * Pointer."
340 	 */
341 	GEM_BUG_ON(!is_power_of_2(size));
342 	return (head - tail - CACHELINE_BYTES) & (size - 1);
343 }
344 
345 int intel_engines_init_mmio(struct drm_i915_private *i915);
346 int intel_engines_setup(struct drm_i915_private *i915);
347 int intel_engines_init(struct drm_i915_private *i915);
348 void intel_engines_cleanup(struct drm_i915_private *i915);
349 
350 int intel_engine_init_common(struct intel_engine_cs *engine);
351 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
352 
353 int intel_ring_submission_setup(struct intel_engine_cs *engine);
354 int intel_ring_submission_init(struct intel_engine_cs *engine);
355 
356 int intel_engine_stop_cs(struct intel_engine_cs *engine);
357 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
358 
359 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
360 
361 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
362 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
363 
364 void intel_engine_get_instdone(struct intel_engine_cs *engine,
365 			       struct intel_instdone *instdone);
366 
367 void intel_engine_init_execlists(struct intel_engine_cs *engine);
368 
369 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
370 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
371 
372 void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
373 void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
374 
375 void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
376 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
377 
378 static inline void
379 intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
380 {
381 	irq_work_queue(&engine->breadcrumbs.irq_work);
382 }
383 
384 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
385 
386 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
387 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
388 
389 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
390 				    struct drm_printer *p);
391 
392 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
393 {
394 	memset(batch, 0, 6 * sizeof(u32));
395 
396 	batch[0] = GFX_OP_PIPE_CONTROL(6);
397 	batch[1] = flags;
398 	batch[2] = offset;
399 
400 	return batch + 6;
401 }
402 
403 static inline u32 *
404 gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
405 {
406 	/* We're using qword write, offset should be aligned to 8 bytes. */
407 	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
408 
409 	/* w/a for post sync ops following a GPGPU operation we
410 	 * need a prior CS_STALL, which is emitted by the flush
411 	 * following the batch.
412 	 */
413 	*cs++ = GFX_OP_PIPE_CONTROL(6);
414 	*cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
415 	*cs++ = gtt_offset;
416 	*cs++ = 0;
417 	*cs++ = value;
418 	/* We're thrashing one dword of HWS. */
419 	*cs++ = 0;
420 
421 	return cs;
422 }
423 
424 static inline u32 *
425 gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
426 {
427 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
428 	GEM_BUG_ON(gtt_offset & (1 << 5));
429 	/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
430 	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
431 
432 	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
433 	*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
434 	*cs++ = 0;
435 	*cs++ = value;
436 
437 	return cs;
438 }
439 
440 static inline void intel_engine_reset(struct intel_engine_cs *engine,
441 				      bool stalled)
442 {
443 	if (engine->reset.reset)
444 		engine->reset.reset(engine, stalled);
445 	engine->serial++; /* contexts lost */
446 }
447 
448 bool intel_engine_is_idle(struct intel_engine_cs *engine);
449 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
450 
451 void intel_engine_lost_context(struct intel_engine_cs *engine);
452 
453 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
454 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
455 
456 bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
457 
458 __printf(3, 4)
459 void intel_engine_dump(struct intel_engine_cs *engine,
460 		       struct drm_printer *m,
461 		       const char *header, ...);
462 
463 struct intel_engine_cs *
464 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
465 
466 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
467 {
468 	unsigned long flags;
469 
470 	if (READ_ONCE(engine->stats.enabled) == 0)
471 		return;
472 
473 	write_seqlock_irqsave(&engine->stats.lock, flags);
474 
475 	if (engine->stats.enabled > 0) {
476 		if (engine->stats.active++ == 0)
477 			engine->stats.start = ktime_get();
478 		GEM_BUG_ON(engine->stats.active == 0);
479 	}
480 
481 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
482 }
483 
484 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
485 {
486 	unsigned long flags;
487 
488 	if (READ_ONCE(engine->stats.enabled) == 0)
489 		return;
490 
491 	write_seqlock_irqsave(&engine->stats.lock, flags);
492 
493 	if (engine->stats.enabled > 0) {
494 		ktime_t last;
495 
496 		if (engine->stats.active && --engine->stats.active == 0) {
497 			/*
498 			 * Decrement the active context count and in case GPU
499 			 * is now idle add up to the running total.
500 			 */
501 			last = ktime_sub(ktime_get(), engine->stats.start);
502 
503 			engine->stats.total = ktime_add(engine->stats.total,
504 							last);
505 		} else if (engine->stats.active == 0) {
506 			/*
507 			 * After turning on engine stats, context out might be
508 			 * the first event in which case we account from the
509 			 * time stats gathering was turned on.
510 			 */
511 			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
512 
513 			engine->stats.total = ktime_add(engine->stats.total,
514 							last);
515 		}
516 	}
517 
518 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
519 }
520 
521 int intel_enable_engine_stats(struct intel_engine_cs *engine);
522 void intel_disable_engine_stats(struct intel_engine_cs *engine);
523 
524 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
525 
526 struct i915_request *
527 intel_engine_find_active_request(struct intel_engine_cs *engine);
528 
529 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
530 
531 static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
532 {
533 	if (!execlists->preempt_hang.inject_hang)
534 		return false;
535 
536 	complete(&execlists->preempt_hang.completion);
537 	return true;
538 }
539 
540 #else
541 
542 static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
543 {
544 	return false;
545 }
546 
547 #endif
548 
549 #endif /* _INTEL_RINGBUFFER_H_ */
550