xref: /openbmc/linux/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c (revision a8f4fcdd8ba7d191c29ae87a2315906fe90368d6)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014 Intel Corporation
4  */
5 
6 #include <linux/circ_buf.h>
7 
8 #include "gem/i915_gem_context.h"
9 #include "gt/gen8_engine_cs.h"
10 #include "gt/intel_breadcrumbs.h"
11 #include "gt/intel_context.h"
12 #include "gt/intel_engine_pm.h"
13 #include "gt/intel_engine_heartbeat.h"
14 #include "gt/intel_gpu_commands.h"
15 #include "gt/intel_gt.h"
16 #include "gt/intel_gt_irq.h"
17 #include "gt/intel_gt_pm.h"
18 #include "gt/intel_gt_requests.h"
19 #include "gt/intel_lrc.h"
20 #include "gt/intel_lrc_reg.h"
21 #include "gt/intel_mocs.h"
22 #include "gt/intel_ring.h"
23 
24 #include "intel_guc_submission.h"
25 
26 #include "i915_drv.h"
27 #include "i915_trace.h"
28 
29 /**
30  * DOC: GuC-based command submission
31  *
32  * The Scratch registers:
33  * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
34  * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
35  * triggers an interrupt on the GuC via another register write (0xC4C8).
36  * Firmware writes a success/fail code back to the action register after
37  * processes the request. The kernel driver polls waiting for this update and
38  * then proceeds.
39  *
40  * Command Transport buffers (CTBs):
41  * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
42  * - G2H) are a message interface between the i915 and GuC.
43  *
44  * Context registration:
45  * Before a context can be submitted it must be registered with the GuC via a
46  * H2G. A unique guc_id is associated with each context. The context is either
47  * registered at request creation time (normal operation) or at submission time
48  * (abnormal operation, e.g. after a reset).
49  *
50  * Context submission:
51  * The i915 updates the LRC tail value in memory. The i915 must enable the
52  * scheduling of the context within the GuC for the GuC to actually consider it.
53  * Therefore, the first time a disabled context is submitted we use a schedule
54  * enable H2G, while follow up submissions are done via the context submit H2G,
55  * which informs the GuC that a previously enabled context has new work
56  * available.
57  *
58  * Context unpin:
59  * To unpin a context a H2G is used to disable scheduling. When the
60  * corresponding G2H returns indicating the scheduling disable operation has
61  * completed it is safe to unpin the context. While a disable is in flight it
62  * isn't safe to resubmit the context so a fence is used to stall all future
63  * requests of that context until the G2H is returned.
64  *
65  * Context deregistration:
66  * Before a context can be destroyed or if we steal its guc_id we must
67  * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
68  * safe to submit anything to this guc_id until the deregister completes so a
69  * fence is used to stall all requests associated with this guc_id until the
70  * corresponding G2H returns indicating the guc_id has been deregistered.
71  *
72  * submission_state.guc_ids:
73  * Unique number associated with private GuC context data passed in during
74  * context registration / submission / deregistration. 64k available. Simple ida
75  * is used for allocation.
76  *
77  * Stealing guc_ids:
78  * If no guc_ids are available they can be stolen from another context at
79  * request creation time if that context is unpinned. If a guc_id can't be found
80  * we punt this problem to the user as we believe this is near impossible to hit
81  * during normal use cases.
82  *
83  * Locking:
84  * In the GuC submission code we have 3 basic spin locks which protect
85  * everything. Details about each below.
86  *
87  * sched_engine->lock
88  * This is the submission lock for all contexts that share an i915 schedule
89  * engine (sched_engine), thus only one of the contexts which share a
90  * sched_engine can be submitting at a time. Currently only one sched_engine is
91  * used for all of GuC submission but that could change in the future.
92  *
93  * guc->submission_state.lock
94  * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
95  * list.
96  *
97  * ce->guc_state.lock
98  * Protects everything under ce->guc_state. Ensures that a context is in the
99  * correct state before issuing a H2G. e.g. We don't issue a schedule disable
100  * on a disabled context (bad idea), we don't issue a schedule enable when a
101  * schedule disable is in flight, etc... Also protects list of inflight requests
102  * on the context and the priority management state. Lock is individual to each
103  * context.
104  *
105  * Lock ordering rules:
106  * sched_engine->lock -> ce->guc_state.lock
107  * guc->submission_state.lock -> ce->guc_state.lock
108  *
109  * Reset races:
110  * When a full GT reset is triggered it is assumed that some G2H responses to
111  * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
112  * fatal as we do certain operations upon receiving a G2H (e.g. destroy
113  * contexts, release guc_ids, etc...). When this occurs we can scrub the
114  * context state and cleanup appropriately, however this is quite racey.
115  * To avoid races, the reset code must disable submission before scrubbing for
116  * the missing G2H, while the submission code must check for submission being
117  * disabled and skip sending H2Gs and updating context states when it is. Both
118  * sides must also make sure to hold the relevant locks.
119  */
120 
121 /* GuC Virtual Engine */
122 struct guc_virtual_engine {
123 	struct intel_engine_cs base;
124 	struct intel_context context;
125 };
126 
127 static struct intel_context *
128 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
129 		   unsigned long flags);
130 
131 static struct intel_context *
132 guc_create_parallel(struct intel_engine_cs **engines,
133 		    unsigned int num_siblings,
134 		    unsigned int width);
135 
136 #define GUC_REQUEST_SIZE 64 /* bytes */
137 
138 /*
139  * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
140  * per the GuC submission interface. A different allocation algorithm is used
141  * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
142  * partition the guc_id space. We believe the number of multi-lrc contexts in
143  * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
144  * multi-lrc.
145  */
146 #define NUMBER_MULTI_LRC_GUC_ID		(GUC_MAX_LRC_DESCRIPTORS / 16)
147 
148 /*
149  * Below is a set of functions which control the GuC scheduling state which
150  * require a lock.
151  */
152 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER	BIT(0)
153 #define SCHED_STATE_DESTROYED				BIT(1)
154 #define SCHED_STATE_PENDING_DISABLE			BIT(2)
155 #define SCHED_STATE_BANNED				BIT(3)
156 #define SCHED_STATE_ENABLED				BIT(4)
157 #define SCHED_STATE_PENDING_ENABLE			BIT(5)
158 #define SCHED_STATE_REGISTERED				BIT(6)
159 #define SCHED_STATE_BLOCKED_SHIFT			7
160 #define SCHED_STATE_BLOCKED		BIT(SCHED_STATE_BLOCKED_SHIFT)
161 #define SCHED_STATE_BLOCKED_MASK	(0xfff << SCHED_STATE_BLOCKED_SHIFT)
162 
163 static inline void init_sched_state(struct intel_context *ce)
164 {
165 	lockdep_assert_held(&ce->guc_state.lock);
166 	ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
167 }
168 
169 __maybe_unused
170 static bool sched_state_is_init(struct intel_context *ce)
171 {
172 	/*
173 	 * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after
174 	 * suspend.
175 	 */
176 	return !(ce->guc_state.sched_state &=
177 		 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
178 }
179 
180 static inline bool
181 context_wait_for_deregister_to_register(struct intel_context *ce)
182 {
183 	return ce->guc_state.sched_state &
184 		SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
185 }
186 
187 static inline void
188 set_context_wait_for_deregister_to_register(struct intel_context *ce)
189 {
190 	lockdep_assert_held(&ce->guc_state.lock);
191 	ce->guc_state.sched_state |=
192 		SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
193 }
194 
195 static inline void
196 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
197 {
198 	lockdep_assert_held(&ce->guc_state.lock);
199 	ce->guc_state.sched_state &=
200 		~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
201 }
202 
203 static inline bool
204 context_destroyed(struct intel_context *ce)
205 {
206 	return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
207 }
208 
209 static inline void
210 set_context_destroyed(struct intel_context *ce)
211 {
212 	lockdep_assert_held(&ce->guc_state.lock);
213 	ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
214 }
215 
216 static inline bool context_pending_disable(struct intel_context *ce)
217 {
218 	return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
219 }
220 
221 static inline void set_context_pending_disable(struct intel_context *ce)
222 {
223 	lockdep_assert_held(&ce->guc_state.lock);
224 	ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
225 }
226 
227 static inline void clr_context_pending_disable(struct intel_context *ce)
228 {
229 	lockdep_assert_held(&ce->guc_state.lock);
230 	ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
231 }
232 
233 static inline bool context_banned(struct intel_context *ce)
234 {
235 	return ce->guc_state.sched_state & SCHED_STATE_BANNED;
236 }
237 
238 static inline void set_context_banned(struct intel_context *ce)
239 {
240 	lockdep_assert_held(&ce->guc_state.lock);
241 	ce->guc_state.sched_state |= SCHED_STATE_BANNED;
242 }
243 
244 static inline void clr_context_banned(struct intel_context *ce)
245 {
246 	lockdep_assert_held(&ce->guc_state.lock);
247 	ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
248 }
249 
250 static inline bool context_enabled(struct intel_context *ce)
251 {
252 	return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
253 }
254 
255 static inline void set_context_enabled(struct intel_context *ce)
256 {
257 	lockdep_assert_held(&ce->guc_state.lock);
258 	ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
259 }
260 
261 static inline void clr_context_enabled(struct intel_context *ce)
262 {
263 	lockdep_assert_held(&ce->guc_state.lock);
264 	ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
265 }
266 
267 static inline bool context_pending_enable(struct intel_context *ce)
268 {
269 	return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
270 }
271 
272 static inline void set_context_pending_enable(struct intel_context *ce)
273 {
274 	lockdep_assert_held(&ce->guc_state.lock);
275 	ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
276 }
277 
278 static inline void clr_context_pending_enable(struct intel_context *ce)
279 {
280 	lockdep_assert_held(&ce->guc_state.lock);
281 	ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
282 }
283 
284 static inline bool context_registered(struct intel_context *ce)
285 {
286 	return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
287 }
288 
289 static inline void set_context_registered(struct intel_context *ce)
290 {
291 	lockdep_assert_held(&ce->guc_state.lock);
292 	ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
293 }
294 
295 static inline void clr_context_registered(struct intel_context *ce)
296 {
297 	lockdep_assert_held(&ce->guc_state.lock);
298 	ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
299 }
300 
301 static inline u32 context_blocked(struct intel_context *ce)
302 {
303 	return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
304 		SCHED_STATE_BLOCKED_SHIFT;
305 }
306 
307 static inline void incr_context_blocked(struct intel_context *ce)
308 {
309 	lockdep_assert_held(&ce->guc_state.lock);
310 
311 	ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
312 
313 	GEM_BUG_ON(!context_blocked(ce));	/* Overflow check */
314 }
315 
316 static inline void decr_context_blocked(struct intel_context *ce)
317 {
318 	lockdep_assert_held(&ce->guc_state.lock);
319 
320 	GEM_BUG_ON(!context_blocked(ce));	/* Underflow check */
321 
322 	ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
323 }
324 
325 static inline bool context_has_committed_requests(struct intel_context *ce)
326 {
327 	return !!ce->guc_state.number_committed_requests;
328 }
329 
330 static inline void incr_context_committed_requests(struct intel_context *ce)
331 {
332 	lockdep_assert_held(&ce->guc_state.lock);
333 	++ce->guc_state.number_committed_requests;
334 	GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
335 }
336 
337 static inline void decr_context_committed_requests(struct intel_context *ce)
338 {
339 	lockdep_assert_held(&ce->guc_state.lock);
340 	--ce->guc_state.number_committed_requests;
341 	GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
342 }
343 
344 static struct intel_context *
345 request_to_scheduling_context(struct i915_request *rq)
346 {
347 	return intel_context_to_parent(rq->context);
348 }
349 
350 static inline bool context_guc_id_invalid(struct intel_context *ce)
351 {
352 	return ce->guc_id.id == GUC_INVALID_LRC_ID;
353 }
354 
355 static inline void set_context_guc_id_invalid(struct intel_context *ce)
356 {
357 	ce->guc_id.id = GUC_INVALID_LRC_ID;
358 }
359 
360 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
361 {
362 	return &ce->engine->gt->uc.guc;
363 }
364 
365 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
366 {
367 	return rb_entry(rb, struct i915_priolist, node);
368 }
369 
370 /*
371  * When using multi-lrc submission a scratch memory area is reserved in the
372  * parent's context state for the process descriptor, work queue, and handshake
373  * between the parent + children contexts to insert safe preemption points
374  * between each of the BBs. Currently the scratch area is sized to a page.
375  *
376  * The layout of this scratch area is below:
377  * 0						guc_process_desc
378  * + sizeof(struct guc_process_desc)		child go
379  * + CACHELINE_BYTES				child join[0]
380  * ...
381  * + CACHELINE_BYTES				child join[n - 1]
382  * ...						unused
383  * PARENT_SCRATCH_SIZE / 2			work queue start
384  * ...						work queue
385  * PARENT_SCRATCH_SIZE - 1			work queue end
386  */
387 #define WQ_SIZE			(PARENT_SCRATCH_SIZE / 2)
388 #define WQ_OFFSET		(PARENT_SCRATCH_SIZE - WQ_SIZE)
389 
390 struct sync_semaphore {
391 	u32 semaphore;
392 	u8 unused[CACHELINE_BYTES - sizeof(u32)];
393 };
394 
395 struct parent_scratch {
396 	struct guc_process_desc pdesc;
397 
398 	struct sync_semaphore go;
399 	struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
400 
401 	u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) -
402 		sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
403 
404 	u32 wq[WQ_SIZE / sizeof(u32)];
405 };
406 
407 static u32 __get_parent_scratch_offset(struct intel_context *ce)
408 {
409 	GEM_BUG_ON(!ce->parallel.guc.parent_page);
410 
411 	return ce->parallel.guc.parent_page * PAGE_SIZE;
412 }
413 
414 static u32 __get_wq_offset(struct intel_context *ce)
415 {
416 	BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
417 
418 	return __get_parent_scratch_offset(ce) + WQ_OFFSET;
419 }
420 
421 static struct parent_scratch *
422 __get_parent_scratch(struct intel_context *ce)
423 {
424 	BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
425 	BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
426 
427 	/*
428 	 * Need to subtract LRC_STATE_OFFSET here as the
429 	 * parallel.guc.parent_page is the offset into ce->state while
430 	 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
431 	 */
432 	return (struct parent_scratch *)
433 		(ce->lrc_reg_state +
434 		 ((__get_parent_scratch_offset(ce) -
435 		   LRC_STATE_OFFSET) / sizeof(u32)));
436 }
437 
438 static struct guc_process_desc *
439 __get_process_desc(struct intel_context *ce)
440 {
441 	struct parent_scratch *ps = __get_parent_scratch(ce);
442 
443 	return &ps->pdesc;
444 }
445 
446 static u32 *get_wq_pointer(struct guc_process_desc *desc,
447 			   struct intel_context *ce,
448 			   u32 wqi_size)
449 {
450 	/*
451 	 * Check for space in work queue. Caching a value of head pointer in
452 	 * intel_context structure in order reduce the number accesses to shared
453 	 * GPU memory which may be across a PCIe bus.
454 	 */
455 #define AVAILABLE_SPACE	\
456 	CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
457 	if (wqi_size > AVAILABLE_SPACE) {
458 		ce->parallel.guc.wqi_head = READ_ONCE(desc->head);
459 
460 		if (wqi_size > AVAILABLE_SPACE)
461 			return NULL;
462 	}
463 #undef AVAILABLE_SPACE
464 
465 	return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
466 }
467 
468 static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index)
469 {
470 	struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr;
471 
472 	GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS);
473 
474 	return &base[index];
475 }
476 
477 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
478 {
479 	struct intel_context *ce = xa_load(&guc->context_lookup, id);
480 
481 	GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS);
482 
483 	return ce;
484 }
485 
486 static int guc_lrc_desc_pool_create(struct intel_guc *guc)
487 {
488 	u32 size;
489 	int ret;
490 
491 	size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) *
492 			  GUC_MAX_LRC_DESCRIPTORS);
493 	ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool,
494 					     (void **)&guc->lrc_desc_pool_vaddr);
495 	if (ret)
496 		return ret;
497 
498 	return 0;
499 }
500 
501 static void guc_lrc_desc_pool_destroy(struct intel_guc *guc)
502 {
503 	guc->lrc_desc_pool_vaddr = NULL;
504 	i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP);
505 }
506 
507 static inline bool guc_submission_initialized(struct intel_guc *guc)
508 {
509 	return !!guc->lrc_desc_pool_vaddr;
510 }
511 
512 static inline void reset_lrc_desc(struct intel_guc *guc, u32 id)
513 {
514 	if (likely(guc_submission_initialized(guc))) {
515 		struct guc_lrc_desc *desc = __get_lrc_desc(guc, id);
516 		unsigned long flags;
517 
518 		memset(desc, 0, sizeof(*desc));
519 
520 		/*
521 		 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
522 		 * the lower level functions directly.
523 		 */
524 		xa_lock_irqsave(&guc->context_lookup, flags);
525 		__xa_erase(&guc->context_lookup, id);
526 		xa_unlock_irqrestore(&guc->context_lookup, flags);
527 	}
528 }
529 
530 static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id)
531 {
532 	return __get_context(guc, id);
533 }
534 
535 static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
536 					   struct intel_context *ce)
537 {
538 	unsigned long flags;
539 
540 	/*
541 	 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
542 	 * lower level functions directly.
543 	 */
544 	xa_lock_irqsave(&guc->context_lookup, flags);
545 	__xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
546 	xa_unlock_irqrestore(&guc->context_lookup, flags);
547 }
548 
549 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
550 {
551 	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
552 		wake_up_all(&guc->ct.wq);
553 }
554 
555 static int guc_submission_send_busy_loop(struct intel_guc *guc,
556 					 const u32 *action,
557 					 u32 len,
558 					 u32 g2h_len_dw,
559 					 bool loop)
560 {
561 	/*
562 	 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
563 	 * so we don't handle the case where we don't get a reply because we
564 	 * aborted the send due to the channel being busy.
565 	 */
566 	GEM_BUG_ON(g2h_len_dw && !loop);
567 
568 	if (g2h_len_dw)
569 		atomic_inc(&guc->outstanding_submission_g2h);
570 
571 	return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
572 }
573 
574 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
575 				   atomic_t *wait_var,
576 				   bool interruptible,
577 				   long timeout)
578 {
579 	const int state = interruptible ?
580 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
581 	DEFINE_WAIT(wait);
582 
583 	might_sleep();
584 	GEM_BUG_ON(timeout < 0);
585 
586 	if (!atomic_read(wait_var))
587 		return 0;
588 
589 	if (!timeout)
590 		return -ETIME;
591 
592 	for (;;) {
593 		prepare_to_wait(&guc->ct.wq, &wait, state);
594 
595 		if (!atomic_read(wait_var))
596 			break;
597 
598 		if (signal_pending_state(state, current)) {
599 			timeout = -EINTR;
600 			break;
601 		}
602 
603 		if (!timeout) {
604 			timeout = -ETIME;
605 			break;
606 		}
607 
608 		timeout = io_schedule_timeout(timeout);
609 	}
610 	finish_wait(&guc->ct.wq, &wait);
611 
612 	return (timeout < 0) ? timeout : 0;
613 }
614 
615 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
616 {
617 	if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
618 		return 0;
619 
620 	return intel_guc_wait_for_pending_msg(guc,
621 					      &guc->outstanding_submission_g2h,
622 					      true, timeout);
623 }
624 
625 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop);
626 
627 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
628 {
629 	int err = 0;
630 	struct intel_context *ce = request_to_scheduling_context(rq);
631 	u32 action[3];
632 	int len = 0;
633 	u32 g2h_len_dw = 0;
634 	bool enabled;
635 
636 	lockdep_assert_held(&rq->engine->sched_engine->lock);
637 
638 	/*
639 	 * Corner case where requests were sitting in the priority list or a
640 	 * request resubmitted after the context was banned.
641 	 */
642 	if (unlikely(intel_context_is_banned(ce))) {
643 		i915_request_put(i915_request_mark_eio(rq));
644 		intel_engine_signal_breadcrumbs(ce->engine);
645 		return 0;
646 	}
647 
648 	GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
649 	GEM_BUG_ON(context_guc_id_invalid(ce));
650 
651 	spin_lock(&ce->guc_state.lock);
652 
653 	/*
654 	 * The request / context will be run on the hardware when scheduling
655 	 * gets enabled in the unblock. For multi-lrc we still submit the
656 	 * context to move the LRC tails.
657 	 */
658 	if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
659 		goto out;
660 
661 	enabled = context_enabled(ce) || context_blocked(ce);
662 
663 	if (!enabled) {
664 		action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
665 		action[len++] = ce->guc_id.id;
666 		action[len++] = GUC_CONTEXT_ENABLE;
667 		set_context_pending_enable(ce);
668 		intel_context_get(ce);
669 		g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
670 	} else {
671 		action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
672 		action[len++] = ce->guc_id.id;
673 	}
674 
675 	err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
676 	if (!enabled && !err) {
677 		trace_intel_context_sched_enable(ce);
678 		atomic_inc(&guc->outstanding_submission_g2h);
679 		set_context_enabled(ce);
680 
681 		/*
682 		 * Without multi-lrc KMD does the submission step (moving the
683 		 * lrc tail) so enabling scheduling is sufficient to submit the
684 		 * context. This isn't the case in multi-lrc submission as the
685 		 * GuC needs to move the tails, hence the need for another H2G
686 		 * to submit a multi-lrc context after enabling scheduling.
687 		 */
688 		if (intel_context_is_parent(ce)) {
689 			action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
690 			err = intel_guc_send_nb(guc, action, len - 1, 0);
691 		}
692 	} else if (!enabled) {
693 		clr_context_pending_enable(ce);
694 		intel_context_put(ce);
695 	}
696 	if (likely(!err))
697 		trace_i915_request_guc_submit(rq);
698 
699 out:
700 	spin_unlock(&ce->guc_state.lock);
701 	return err;
702 }
703 
704 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
705 {
706 	int ret = __guc_add_request(guc, rq);
707 
708 	if (unlikely(ret == -EBUSY)) {
709 		guc->stalled_request = rq;
710 		guc->submission_stall_reason = STALL_ADD_REQUEST;
711 	}
712 
713 	return ret;
714 }
715 
716 static inline void guc_set_lrc_tail(struct i915_request *rq)
717 {
718 	rq->context->lrc_reg_state[CTX_RING_TAIL] =
719 		intel_ring_set_tail(rq->ring, rq->tail);
720 }
721 
722 static inline int rq_prio(const struct i915_request *rq)
723 {
724 	return rq->sched.attr.priority;
725 }
726 
727 static bool is_multi_lrc_rq(struct i915_request *rq)
728 {
729 	return intel_context_is_parallel(rq->context);
730 }
731 
732 static bool can_merge_rq(struct i915_request *rq,
733 			 struct i915_request *last)
734 {
735 	return request_to_scheduling_context(rq) ==
736 		request_to_scheduling_context(last);
737 }
738 
739 static u32 wq_space_until_wrap(struct intel_context *ce)
740 {
741 	return (WQ_SIZE - ce->parallel.guc.wqi_tail);
742 }
743 
744 static void write_wqi(struct guc_process_desc *desc,
745 		      struct intel_context *ce,
746 		      u32 wqi_size)
747 {
748 	BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
749 
750 	/*
751 	 * Ensure WQI are visible before updating tail
752 	 */
753 	intel_guc_write_barrier(ce_to_guc(ce));
754 
755 	ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
756 		(WQ_SIZE - 1);
757 	WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail);
758 }
759 
760 static int guc_wq_noop_append(struct intel_context *ce)
761 {
762 	struct guc_process_desc *desc = __get_process_desc(ce);
763 	u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce));
764 	u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
765 
766 	if (!wqi)
767 		return -EBUSY;
768 
769 	GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
770 
771 	*wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
772 		FIELD_PREP(WQ_LEN_MASK, len_dw);
773 	ce->parallel.guc.wqi_tail = 0;
774 
775 	return 0;
776 }
777 
778 static int __guc_wq_item_append(struct i915_request *rq)
779 {
780 	struct intel_context *ce = request_to_scheduling_context(rq);
781 	struct intel_context *child;
782 	struct guc_process_desc *desc = __get_process_desc(ce);
783 	unsigned int wqi_size = (ce->parallel.number_children + 4) *
784 		sizeof(u32);
785 	u32 *wqi;
786 	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
787 	int ret;
788 
789 	/* Ensure context is in correct state updating work queue */
790 	GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
791 	GEM_BUG_ON(context_guc_id_invalid(ce));
792 	GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
793 	GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id));
794 
795 	/* Insert NOOP if this work queue item will wrap the tail pointer. */
796 	if (wqi_size > wq_space_until_wrap(ce)) {
797 		ret = guc_wq_noop_append(ce);
798 		if (ret)
799 			return ret;
800 	}
801 
802 	wqi = get_wq_pointer(desc, ce, wqi_size);
803 	if (!wqi)
804 		return -EBUSY;
805 
806 	GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
807 
808 	*wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
809 		FIELD_PREP(WQ_LEN_MASK, len_dw);
810 	*wqi++ = ce->lrc.lrca;
811 	*wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
812 	       FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
813 	*wqi++ = 0;	/* fence_id */
814 	for_each_child(ce, child)
815 		*wqi++ = child->ring->tail / sizeof(u64);
816 
817 	write_wqi(desc, ce, wqi_size);
818 
819 	return 0;
820 }
821 
822 static int guc_wq_item_append(struct intel_guc *guc,
823 			      struct i915_request *rq)
824 {
825 	struct intel_context *ce = request_to_scheduling_context(rq);
826 	int ret = 0;
827 
828 	if (likely(!intel_context_is_banned(ce))) {
829 		ret = __guc_wq_item_append(rq);
830 
831 		if (unlikely(ret == -EBUSY)) {
832 			guc->stalled_request = rq;
833 			guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
834 		}
835 	}
836 
837 	return ret;
838 }
839 
840 static bool multi_lrc_submit(struct i915_request *rq)
841 {
842 	struct intel_context *ce = request_to_scheduling_context(rq);
843 
844 	intel_ring_set_tail(rq->ring, rq->tail);
845 
846 	/*
847 	 * We expect the front end (execbuf IOCTL) to set this flag on the last
848 	 * request generated from a multi-BB submission. This indicates to the
849 	 * backend (GuC interface) that we should submit this context thus
850 	 * submitting all the requests generated in parallel.
851 	 */
852 	return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
853 		intel_context_is_banned(ce);
854 }
855 
856 static int guc_dequeue_one_context(struct intel_guc *guc)
857 {
858 	struct i915_sched_engine * const sched_engine = guc->sched_engine;
859 	struct i915_request *last = NULL;
860 	bool submit = false;
861 	struct rb_node *rb;
862 	int ret;
863 
864 	lockdep_assert_held(&sched_engine->lock);
865 
866 	if (guc->stalled_request) {
867 		submit = true;
868 		last = guc->stalled_request;
869 
870 		switch (guc->submission_stall_reason) {
871 		case STALL_REGISTER_CONTEXT:
872 			goto register_context;
873 		case STALL_MOVE_LRC_TAIL:
874 			goto move_lrc_tail;
875 		case STALL_ADD_REQUEST:
876 			goto add_request;
877 		default:
878 			MISSING_CASE(guc->submission_stall_reason);
879 		}
880 	}
881 
882 	while ((rb = rb_first_cached(&sched_engine->queue))) {
883 		struct i915_priolist *p = to_priolist(rb);
884 		struct i915_request *rq, *rn;
885 
886 		priolist_for_each_request_consume(rq, rn, p) {
887 			if (last && !can_merge_rq(rq, last))
888 				goto register_context;
889 
890 			list_del_init(&rq->sched.link);
891 
892 			__i915_request_submit(rq);
893 
894 			trace_i915_request_in(rq, 0);
895 			last = rq;
896 
897 			if (is_multi_lrc_rq(rq)) {
898 				/*
899 				 * We need to coalesce all multi-lrc requests in
900 				 * a relationship into a single H2G. We are
901 				 * guaranteed that all of these requests will be
902 				 * submitted sequentially.
903 				 */
904 				if (multi_lrc_submit(rq)) {
905 					submit = true;
906 					goto register_context;
907 				}
908 			} else {
909 				submit = true;
910 			}
911 		}
912 
913 		rb_erase_cached(&p->node, &sched_engine->queue);
914 		i915_priolist_free(p);
915 	}
916 
917 register_context:
918 	if (submit) {
919 		struct intel_context *ce = request_to_scheduling_context(last);
920 
921 		if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) &&
922 			     !intel_context_is_banned(ce))) {
923 			ret = guc_lrc_desc_pin(ce, false);
924 			if (unlikely(ret == -EPIPE)) {
925 				goto deadlk;
926 			} else if (ret == -EBUSY) {
927 				guc->stalled_request = last;
928 				guc->submission_stall_reason =
929 					STALL_REGISTER_CONTEXT;
930 				goto schedule_tasklet;
931 			} else if (ret != 0) {
932 				GEM_WARN_ON(ret);	/* Unexpected */
933 				goto deadlk;
934 			}
935 		}
936 
937 move_lrc_tail:
938 		if (is_multi_lrc_rq(last)) {
939 			ret = guc_wq_item_append(guc, last);
940 			if (ret == -EBUSY) {
941 				goto schedule_tasklet;
942 			} else if (ret != 0) {
943 				GEM_WARN_ON(ret);	/* Unexpected */
944 				goto deadlk;
945 			}
946 		} else {
947 			guc_set_lrc_tail(last);
948 		}
949 
950 add_request:
951 		ret = guc_add_request(guc, last);
952 		if (unlikely(ret == -EPIPE)) {
953 			goto deadlk;
954 		} else if (ret == -EBUSY) {
955 			goto schedule_tasklet;
956 		} else if (ret != 0) {
957 			GEM_WARN_ON(ret);	/* Unexpected */
958 			goto deadlk;
959 		}
960 	}
961 
962 	guc->stalled_request = NULL;
963 	guc->submission_stall_reason = STALL_NONE;
964 	return submit;
965 
966 deadlk:
967 	sched_engine->tasklet.callback = NULL;
968 	tasklet_disable_nosync(&sched_engine->tasklet);
969 	return false;
970 
971 schedule_tasklet:
972 	tasklet_schedule(&sched_engine->tasklet);
973 	return false;
974 }
975 
976 static void guc_submission_tasklet(struct tasklet_struct *t)
977 {
978 	struct i915_sched_engine *sched_engine =
979 		from_tasklet(sched_engine, t, tasklet);
980 	unsigned long flags;
981 	bool loop;
982 
983 	spin_lock_irqsave(&sched_engine->lock, flags);
984 
985 	do {
986 		loop = guc_dequeue_one_context(sched_engine->private_data);
987 	} while (loop);
988 
989 	i915_sched_engine_reset_on_empty(sched_engine);
990 
991 	spin_unlock_irqrestore(&sched_engine->lock, flags);
992 }
993 
994 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
995 {
996 	if (iir & GT_RENDER_USER_INTERRUPT)
997 		intel_engine_signal_breadcrumbs(engine);
998 }
999 
1000 static void __guc_context_destroy(struct intel_context *ce);
1001 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1002 static void guc_signal_context_fence(struct intel_context *ce);
1003 static void guc_cancel_context_requests(struct intel_context *ce);
1004 static void guc_blocked_fence_complete(struct intel_context *ce);
1005 
1006 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1007 {
1008 	struct intel_context *ce;
1009 	unsigned long index, flags;
1010 	bool pending_disable, pending_enable, deregister, destroyed, banned;
1011 
1012 	xa_lock_irqsave(&guc->context_lookup, flags);
1013 	xa_for_each(&guc->context_lookup, index, ce) {
1014 		/*
1015 		 * Corner case where the ref count on the object is zero but and
1016 		 * deregister G2H was lost. In this case we don't touch the ref
1017 		 * count and finish the destroy of the context.
1018 		 */
1019 		bool do_put = kref_get_unless_zero(&ce->ref);
1020 
1021 		xa_unlock(&guc->context_lookup);
1022 
1023 		spin_lock(&ce->guc_state.lock);
1024 
1025 		/*
1026 		 * Once we are at this point submission_disabled() is guaranteed
1027 		 * to be visible to all callers who set the below flags (see above
1028 		 * flush and flushes in reset_prepare). If submission_disabled()
1029 		 * is set, the caller shouldn't set these flags.
1030 		 */
1031 
1032 		destroyed = context_destroyed(ce);
1033 		pending_enable = context_pending_enable(ce);
1034 		pending_disable = context_pending_disable(ce);
1035 		deregister = context_wait_for_deregister_to_register(ce);
1036 		banned = context_banned(ce);
1037 		init_sched_state(ce);
1038 
1039 		spin_unlock(&ce->guc_state.lock);
1040 
1041 		GEM_BUG_ON(!do_put && !destroyed);
1042 
1043 		if (pending_enable || destroyed || deregister) {
1044 			decr_outstanding_submission_g2h(guc);
1045 			if (deregister)
1046 				guc_signal_context_fence(ce);
1047 			if (destroyed) {
1048 				intel_gt_pm_put_async(guc_to_gt(guc));
1049 				release_guc_id(guc, ce);
1050 				__guc_context_destroy(ce);
1051 			}
1052 			if (pending_enable || deregister)
1053 				intel_context_put(ce);
1054 		}
1055 
1056 		/* Not mutualy exclusive with above if statement. */
1057 		if (pending_disable) {
1058 			guc_signal_context_fence(ce);
1059 			if (banned) {
1060 				guc_cancel_context_requests(ce);
1061 				intel_engine_signal_breadcrumbs(ce->engine);
1062 			}
1063 			intel_context_sched_disable_unpin(ce);
1064 			decr_outstanding_submission_g2h(guc);
1065 
1066 			spin_lock(&ce->guc_state.lock);
1067 			guc_blocked_fence_complete(ce);
1068 			spin_unlock(&ce->guc_state.lock);
1069 
1070 			intel_context_put(ce);
1071 		}
1072 
1073 		if (do_put)
1074 			intel_context_put(ce);
1075 		xa_lock(&guc->context_lookup);
1076 	}
1077 	xa_unlock_irqrestore(&guc->context_lookup, flags);
1078 }
1079 
1080 static inline bool
1081 submission_disabled(struct intel_guc *guc)
1082 {
1083 	struct i915_sched_engine * const sched_engine = guc->sched_engine;
1084 
1085 	return unlikely(!sched_engine ||
1086 			!__tasklet_is_enabled(&sched_engine->tasklet));
1087 }
1088 
1089 static void disable_submission(struct intel_guc *guc)
1090 {
1091 	struct i915_sched_engine * const sched_engine = guc->sched_engine;
1092 
1093 	if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1094 		GEM_BUG_ON(!guc->ct.enabled);
1095 		__tasklet_disable_sync_once(&sched_engine->tasklet);
1096 		sched_engine->tasklet.callback = NULL;
1097 	}
1098 }
1099 
1100 static void enable_submission(struct intel_guc *guc)
1101 {
1102 	struct i915_sched_engine * const sched_engine = guc->sched_engine;
1103 	unsigned long flags;
1104 
1105 	spin_lock_irqsave(&guc->sched_engine->lock, flags);
1106 	sched_engine->tasklet.callback = guc_submission_tasklet;
1107 	wmb();	/* Make sure callback visible */
1108 	if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1109 	    __tasklet_enable(&sched_engine->tasklet)) {
1110 		GEM_BUG_ON(!guc->ct.enabled);
1111 
1112 		/* And kick in case we missed a new request submission. */
1113 		tasklet_hi_schedule(&sched_engine->tasklet);
1114 	}
1115 	spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1116 }
1117 
1118 static void guc_flush_submissions(struct intel_guc *guc)
1119 {
1120 	struct i915_sched_engine * const sched_engine = guc->sched_engine;
1121 	unsigned long flags;
1122 
1123 	spin_lock_irqsave(&sched_engine->lock, flags);
1124 	spin_unlock_irqrestore(&sched_engine->lock, flags);
1125 }
1126 
1127 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1128 
1129 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1130 {
1131 	int i;
1132 
1133 	if (unlikely(!guc_submission_initialized(guc))) {
1134 		/* Reset called during driver load? GuC not yet initialised! */
1135 		return;
1136 	}
1137 
1138 	intel_gt_park_heartbeats(guc_to_gt(guc));
1139 	disable_submission(guc);
1140 	guc->interrupts.disable(guc);
1141 
1142 	/* Flush IRQ handler */
1143 	spin_lock_irq(&guc_to_gt(guc)->irq_lock);
1144 	spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
1145 
1146 	guc_flush_submissions(guc);
1147 	guc_flush_destroyed_contexts(guc);
1148 
1149 	/*
1150 	 * Handle any outstanding G2Hs before reset. Call IRQ handler directly
1151 	 * each pass as interrupt have been disabled. We always scrub for
1152 	 * outstanding G2H as it is possible for outstanding_submission_g2h to
1153 	 * be incremented after the context state update.
1154 	 */
1155 	for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) {
1156 		intel_guc_to_host_event_handler(guc);
1157 #define wait_for_reset(guc, wait_var) \
1158 		intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20))
1159 		do {
1160 			wait_for_reset(guc, &guc->outstanding_submission_g2h);
1161 		} while (!list_empty(&guc->ct.requests.incoming));
1162 	}
1163 
1164 	scrub_guc_desc_for_outstanding_g2h(guc);
1165 }
1166 
1167 static struct intel_engine_cs *
1168 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1169 {
1170 	struct intel_engine_cs *engine;
1171 	intel_engine_mask_t tmp, mask = ve->mask;
1172 	unsigned int num_siblings = 0;
1173 
1174 	for_each_engine_masked(engine, ve->gt, mask, tmp)
1175 		if (num_siblings++ == sibling)
1176 			return engine;
1177 
1178 	return NULL;
1179 }
1180 
1181 static inline struct intel_engine_cs *
1182 __context_to_physical_engine(struct intel_context *ce)
1183 {
1184 	struct intel_engine_cs *engine = ce->engine;
1185 
1186 	if (intel_engine_is_virtual(engine))
1187 		engine = guc_virtual_get_sibling(engine, 0);
1188 
1189 	return engine;
1190 }
1191 
1192 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1193 {
1194 	struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1195 
1196 	if (intel_context_is_banned(ce))
1197 		return;
1198 
1199 	GEM_BUG_ON(!intel_context_is_pinned(ce));
1200 
1201 	/*
1202 	 * We want a simple context + ring to execute the breadcrumb update.
1203 	 * We cannot rely on the context being intact across the GPU hang,
1204 	 * so clear it and rebuild just what we need for the breadcrumb.
1205 	 * All pending requests for this context will be zapped, and any
1206 	 * future request will be after userspace has had the opportunity
1207 	 * to recreate its own state.
1208 	 */
1209 	if (scrub)
1210 		lrc_init_regs(ce, engine, true);
1211 
1212 	/* Rerun the request; its payload has been neutered (if guilty). */
1213 	lrc_update_regs(ce, engine, head);
1214 }
1215 
1216 static void guc_reset_nop(struct intel_engine_cs *engine)
1217 {
1218 }
1219 
1220 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1221 {
1222 }
1223 
1224 static void
1225 __unwind_incomplete_requests(struct intel_context *ce)
1226 {
1227 	struct i915_request *rq, *rn;
1228 	struct list_head *pl;
1229 	int prio = I915_PRIORITY_INVALID;
1230 	struct i915_sched_engine * const sched_engine =
1231 		ce->engine->sched_engine;
1232 	unsigned long flags;
1233 
1234 	spin_lock_irqsave(&sched_engine->lock, flags);
1235 	spin_lock(&ce->guc_state.lock);
1236 	list_for_each_entry_safe_reverse(rq, rn,
1237 					 &ce->guc_state.requests,
1238 					 sched.link) {
1239 		if (i915_request_completed(rq))
1240 			continue;
1241 
1242 		list_del_init(&rq->sched.link);
1243 		__i915_request_unsubmit(rq);
1244 
1245 		/* Push the request back into the queue for later resubmission. */
1246 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1247 		if (rq_prio(rq) != prio) {
1248 			prio = rq_prio(rq);
1249 			pl = i915_sched_lookup_priolist(sched_engine, prio);
1250 		}
1251 		GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1252 
1253 		list_add(&rq->sched.link, pl);
1254 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1255 	}
1256 	spin_unlock(&ce->guc_state.lock);
1257 	spin_unlock_irqrestore(&sched_engine->lock, flags);
1258 }
1259 
1260 static void __guc_reset_context(struct intel_context *ce, bool stalled)
1261 {
1262 	bool local_stalled;
1263 	struct i915_request *rq;
1264 	unsigned long flags;
1265 	u32 head;
1266 	int i, number_children = ce->parallel.number_children;
1267 	bool skip = false;
1268 	struct intel_context *parent = ce;
1269 
1270 	GEM_BUG_ON(intel_context_is_child(ce));
1271 
1272 	intel_context_get(ce);
1273 
1274 	/*
1275 	 * GuC will implicitly mark the context as non-schedulable when it sends
1276 	 * the reset notification. Make sure our state reflects this change. The
1277 	 * context will be marked enabled on resubmission.
1278 	 *
1279 	 * XXX: If the context is reset as a result of the request cancellation
1280 	 * this G2H is received after the schedule disable complete G2H which is
1281 	 * wrong as this creates a race between the request cancellation code
1282 	 * re-submitting the context and this G2H handler. This is a bug in the
1283 	 * GuC but can be worked around in the meantime but converting this to a
1284 	 * NOP if a pending enable is in flight as this indicates that a request
1285 	 * cancellation has occurred.
1286 	 */
1287 	spin_lock_irqsave(&ce->guc_state.lock, flags);
1288 	if (likely(!context_pending_enable(ce)))
1289 		clr_context_enabled(ce);
1290 	else
1291 		skip = true;
1292 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1293 	if (unlikely(skip))
1294 		goto out_put;
1295 
1296 	/*
1297 	 * For each context in the relationship find the hanging request
1298 	 * resetting each context / request as needed
1299 	 */
1300 	for (i = 0; i < number_children + 1; ++i) {
1301 		if (!intel_context_is_pinned(ce))
1302 			goto next_context;
1303 
1304 		local_stalled = false;
1305 		rq = intel_context_find_active_request(ce);
1306 		if (!rq) {
1307 			head = ce->ring->tail;
1308 			goto out_replay;
1309 		}
1310 
1311 		if (i915_request_started(rq))
1312 			local_stalled = true;
1313 
1314 		GEM_BUG_ON(i915_active_is_idle(&ce->active));
1315 		head = intel_ring_wrap(ce->ring, rq->head);
1316 
1317 		__i915_request_reset(rq, local_stalled && stalled);
1318 out_replay:
1319 		guc_reset_state(ce, head, local_stalled && stalled);
1320 next_context:
1321 		if (i != number_children)
1322 			ce = list_next_entry(ce, parallel.child_link);
1323 	}
1324 
1325 	__unwind_incomplete_requests(parent);
1326 out_put:
1327 	intel_context_put(parent);
1328 }
1329 
1330 void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
1331 {
1332 	struct intel_context *ce;
1333 	unsigned long index;
1334 	unsigned long flags;
1335 
1336 	if (unlikely(!guc_submission_initialized(guc))) {
1337 		/* Reset called during driver load? GuC not yet initialised! */
1338 		return;
1339 	}
1340 
1341 	xa_lock_irqsave(&guc->context_lookup, flags);
1342 	xa_for_each(&guc->context_lookup, index, ce) {
1343 		if (!kref_get_unless_zero(&ce->ref))
1344 			continue;
1345 
1346 		xa_unlock(&guc->context_lookup);
1347 
1348 		if (intel_context_is_pinned(ce) &&
1349 		    !intel_context_is_child(ce))
1350 			__guc_reset_context(ce, stalled);
1351 
1352 		intel_context_put(ce);
1353 
1354 		xa_lock(&guc->context_lookup);
1355 	}
1356 	xa_unlock_irqrestore(&guc->context_lookup, flags);
1357 
1358 	/* GuC is blown away, drop all references to contexts */
1359 	xa_destroy(&guc->context_lookup);
1360 }
1361 
1362 static void guc_cancel_context_requests(struct intel_context *ce)
1363 {
1364 	struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1365 	struct i915_request *rq;
1366 	unsigned long flags;
1367 
1368 	/* Mark all executing requests as skipped. */
1369 	spin_lock_irqsave(&sched_engine->lock, flags);
1370 	spin_lock(&ce->guc_state.lock);
1371 	list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1372 		i915_request_put(i915_request_mark_eio(rq));
1373 	spin_unlock(&ce->guc_state.lock);
1374 	spin_unlock_irqrestore(&sched_engine->lock, flags);
1375 }
1376 
1377 static void
1378 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1379 {
1380 	struct i915_request *rq, *rn;
1381 	struct rb_node *rb;
1382 	unsigned long flags;
1383 
1384 	/* Can be called during boot if GuC fails to load */
1385 	if (!sched_engine)
1386 		return;
1387 
1388 	/*
1389 	 * Before we call engine->cancel_requests(), we should have exclusive
1390 	 * access to the submission state. This is arranged for us by the
1391 	 * caller disabling the interrupt generation, the tasklet and other
1392 	 * threads that may then access the same state, giving us a free hand
1393 	 * to reset state. However, we still need to let lockdep be aware that
1394 	 * we know this state may be accessed in hardirq context, so we
1395 	 * disable the irq around this manipulation and we want to keep
1396 	 * the spinlock focused on its duties and not accidentally conflate
1397 	 * coverage to the submission's irq state. (Similarly, although we
1398 	 * shouldn't need to disable irq around the manipulation of the
1399 	 * submission's irq state, we also wish to remind ourselves that
1400 	 * it is irq state.)
1401 	 */
1402 	spin_lock_irqsave(&sched_engine->lock, flags);
1403 
1404 	/* Flush the queued requests to the timeline list (for retiring). */
1405 	while ((rb = rb_first_cached(&sched_engine->queue))) {
1406 		struct i915_priolist *p = to_priolist(rb);
1407 
1408 		priolist_for_each_request_consume(rq, rn, p) {
1409 			list_del_init(&rq->sched.link);
1410 
1411 			__i915_request_submit(rq);
1412 
1413 			i915_request_put(i915_request_mark_eio(rq));
1414 		}
1415 
1416 		rb_erase_cached(&p->node, &sched_engine->queue);
1417 		i915_priolist_free(p);
1418 	}
1419 
1420 	/* Remaining _unready_ requests will be nop'ed when submitted */
1421 
1422 	sched_engine->queue_priority_hint = INT_MIN;
1423 	sched_engine->queue = RB_ROOT_CACHED;
1424 
1425 	spin_unlock_irqrestore(&sched_engine->lock, flags);
1426 }
1427 
1428 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1429 {
1430 	struct intel_context *ce;
1431 	unsigned long index;
1432 	unsigned long flags;
1433 
1434 	xa_lock_irqsave(&guc->context_lookup, flags);
1435 	xa_for_each(&guc->context_lookup, index, ce) {
1436 		if (!kref_get_unless_zero(&ce->ref))
1437 			continue;
1438 
1439 		xa_unlock(&guc->context_lookup);
1440 
1441 		if (intel_context_is_pinned(ce) &&
1442 		    !intel_context_is_child(ce))
1443 			guc_cancel_context_requests(ce);
1444 
1445 		intel_context_put(ce);
1446 
1447 		xa_lock(&guc->context_lookup);
1448 	}
1449 	xa_unlock_irqrestore(&guc->context_lookup, flags);
1450 
1451 	guc_cancel_sched_engine_requests(guc->sched_engine);
1452 
1453 	/* GuC is blown away, drop all references to contexts */
1454 	xa_destroy(&guc->context_lookup);
1455 }
1456 
1457 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1458 {
1459 	/* Reset called during driver load or during wedge? */
1460 	if (unlikely(!guc_submission_initialized(guc) ||
1461 		     test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) {
1462 		return;
1463 	}
1464 
1465 	/*
1466 	 * Technically possible for either of these values to be non-zero here,
1467 	 * but very unlikely + harmless. Regardless let's add a warn so we can
1468 	 * see in CI if this happens frequently / a precursor to taking down the
1469 	 * machine.
1470 	 */
1471 	GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1472 	atomic_set(&guc->outstanding_submission_g2h, 0);
1473 
1474 	intel_guc_global_policies_update(guc);
1475 	enable_submission(guc);
1476 	intel_gt_unpark_heartbeats(guc_to_gt(guc));
1477 }
1478 
1479 static void destroyed_worker_func(struct work_struct *w);
1480 
1481 /*
1482  * Set up the memory resources to be shared with the GuC (via the GGTT)
1483  * at firmware loading time.
1484  */
1485 int intel_guc_submission_init(struct intel_guc *guc)
1486 {
1487 	int ret;
1488 
1489 	if (guc->lrc_desc_pool)
1490 		return 0;
1491 
1492 	ret = guc_lrc_desc_pool_create(guc);
1493 	if (ret)
1494 		return ret;
1495 	/*
1496 	 * Keep static analysers happy, let them know that we allocated the
1497 	 * vma after testing that it didn't exist earlier.
1498 	 */
1499 	GEM_BUG_ON(!guc->lrc_desc_pool);
1500 
1501 	xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
1502 
1503 	spin_lock_init(&guc->submission_state.lock);
1504 	INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
1505 	ida_init(&guc->submission_state.guc_ids);
1506 	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
1507 	INIT_WORK(&guc->submission_state.destroyed_worker,
1508 		  destroyed_worker_func);
1509 
1510 	guc->submission_state.guc_ids_bitmap =
1511 		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
1512 	if (!guc->submission_state.guc_ids_bitmap)
1513 		return -ENOMEM;
1514 
1515 	return 0;
1516 }
1517 
1518 void intel_guc_submission_fini(struct intel_guc *guc)
1519 {
1520 	if (!guc->lrc_desc_pool)
1521 		return;
1522 
1523 	guc_flush_destroyed_contexts(guc);
1524 	guc_lrc_desc_pool_destroy(guc);
1525 	i915_sched_engine_put(guc->sched_engine);
1526 	bitmap_free(guc->submission_state.guc_ids_bitmap);
1527 }
1528 
1529 static inline void queue_request(struct i915_sched_engine *sched_engine,
1530 				 struct i915_request *rq,
1531 				 int prio)
1532 {
1533 	GEM_BUG_ON(!list_empty(&rq->sched.link));
1534 	list_add_tail(&rq->sched.link,
1535 		      i915_sched_lookup_priolist(sched_engine, prio));
1536 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1537 	tasklet_hi_schedule(&sched_engine->tasklet);
1538 }
1539 
1540 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
1541 				     struct i915_request *rq)
1542 {
1543 	int ret = 0;
1544 
1545 	__i915_request_submit(rq);
1546 
1547 	trace_i915_request_in(rq, 0);
1548 
1549 	if (is_multi_lrc_rq(rq)) {
1550 		if (multi_lrc_submit(rq)) {
1551 			ret = guc_wq_item_append(guc, rq);
1552 			if (!ret)
1553 				ret = guc_add_request(guc, rq);
1554 		}
1555 	} else {
1556 		guc_set_lrc_tail(rq);
1557 		ret = guc_add_request(guc, rq);
1558 	}
1559 
1560 	if (unlikely(ret == -EPIPE))
1561 		disable_submission(guc);
1562 
1563 	return ret;
1564 }
1565 
1566 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
1567 {
1568 	struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1569 	struct intel_context *ce = request_to_scheduling_context(rq);
1570 
1571 	return submission_disabled(guc) || guc->stalled_request ||
1572 		!i915_sched_engine_is_empty(sched_engine) ||
1573 		!lrc_desc_registered(guc, ce->guc_id.id);
1574 }
1575 
1576 static void guc_submit_request(struct i915_request *rq)
1577 {
1578 	struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1579 	struct intel_guc *guc = &rq->engine->gt->uc.guc;
1580 	unsigned long flags;
1581 
1582 	/* Will be called from irq-context when using foreign fences. */
1583 	spin_lock_irqsave(&sched_engine->lock, flags);
1584 
1585 	if (need_tasklet(guc, rq))
1586 		queue_request(sched_engine, rq, rq_prio(rq));
1587 	else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
1588 		tasklet_hi_schedule(&sched_engine->tasklet);
1589 
1590 	spin_unlock_irqrestore(&sched_engine->lock, flags);
1591 }
1592 
1593 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
1594 {
1595 	int ret;
1596 
1597 	GEM_BUG_ON(intel_context_is_child(ce));
1598 
1599 	if (intel_context_is_parent(ce))
1600 		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
1601 					      NUMBER_MULTI_LRC_GUC_ID,
1602 					      order_base_2(ce->parallel.number_children
1603 							   + 1));
1604 	else
1605 		ret = ida_simple_get(&guc->submission_state.guc_ids,
1606 				     NUMBER_MULTI_LRC_GUC_ID,
1607 				     GUC_MAX_LRC_DESCRIPTORS,
1608 				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
1609 				     __GFP_NOWARN);
1610 	if (unlikely(ret < 0))
1611 		return ret;
1612 
1613 	ce->guc_id.id = ret;
1614 	return 0;
1615 }
1616 
1617 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
1618 {
1619 	GEM_BUG_ON(intel_context_is_child(ce));
1620 
1621 	if (!context_guc_id_invalid(ce)) {
1622 		if (intel_context_is_parent(ce))
1623 			bitmap_release_region(guc->submission_state.guc_ids_bitmap,
1624 					      ce->guc_id.id,
1625 					      order_base_2(ce->parallel.number_children
1626 							   + 1));
1627 		else
1628 			ida_simple_remove(&guc->submission_state.guc_ids,
1629 					  ce->guc_id.id);
1630 		reset_lrc_desc(guc, ce->guc_id.id);
1631 		set_context_guc_id_invalid(ce);
1632 	}
1633 	if (!list_empty(&ce->guc_id.link))
1634 		list_del_init(&ce->guc_id.link);
1635 }
1636 
1637 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
1638 {
1639 	unsigned long flags;
1640 
1641 	spin_lock_irqsave(&guc->submission_state.lock, flags);
1642 	__release_guc_id(guc, ce);
1643 	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
1644 }
1645 
1646 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
1647 {
1648 	struct intel_context *cn;
1649 
1650 	lockdep_assert_held(&guc->submission_state.lock);
1651 	GEM_BUG_ON(intel_context_is_child(ce));
1652 	GEM_BUG_ON(intel_context_is_parent(ce));
1653 
1654 	if (!list_empty(&guc->submission_state.guc_id_list)) {
1655 		cn = list_first_entry(&guc->submission_state.guc_id_list,
1656 				      struct intel_context,
1657 				      guc_id.link);
1658 
1659 		GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
1660 		GEM_BUG_ON(context_guc_id_invalid(cn));
1661 		GEM_BUG_ON(intel_context_is_child(cn));
1662 		GEM_BUG_ON(intel_context_is_parent(cn));
1663 
1664 		list_del_init(&cn->guc_id.link);
1665 		ce->guc_id = cn->guc_id;
1666 
1667 		spin_lock(&ce->guc_state.lock);
1668 		clr_context_registered(cn);
1669 		spin_unlock(&ce->guc_state.lock);
1670 
1671 		set_context_guc_id_invalid(cn);
1672 
1673 		return 0;
1674 	} else {
1675 		return -EAGAIN;
1676 	}
1677 }
1678 
1679 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
1680 {
1681 	int ret;
1682 
1683 	lockdep_assert_held(&guc->submission_state.lock);
1684 	GEM_BUG_ON(intel_context_is_child(ce));
1685 
1686 	ret = new_guc_id(guc, ce);
1687 	if (unlikely(ret < 0)) {
1688 		if (intel_context_is_parent(ce))
1689 			return -ENOSPC;
1690 
1691 		ret = steal_guc_id(guc, ce);
1692 		if (ret < 0)
1693 			return ret;
1694 	}
1695 
1696 	if (intel_context_is_parent(ce)) {
1697 		struct intel_context *child;
1698 		int i = 1;
1699 
1700 		for_each_child(ce, child)
1701 			child->guc_id.id = ce->guc_id.id + i++;
1702 	}
1703 
1704 	return 0;
1705 }
1706 
1707 #define PIN_GUC_ID_TRIES	4
1708 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
1709 {
1710 	int ret = 0;
1711 	unsigned long flags, tries = PIN_GUC_ID_TRIES;
1712 
1713 	GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
1714 
1715 try_again:
1716 	spin_lock_irqsave(&guc->submission_state.lock, flags);
1717 
1718 	might_lock(&ce->guc_state.lock);
1719 
1720 	if (context_guc_id_invalid(ce)) {
1721 		ret = assign_guc_id(guc, ce);
1722 		if (ret)
1723 			goto out_unlock;
1724 		ret = 1;	/* Indidcates newly assigned guc_id */
1725 	}
1726 	if (!list_empty(&ce->guc_id.link))
1727 		list_del_init(&ce->guc_id.link);
1728 	atomic_inc(&ce->guc_id.ref);
1729 
1730 out_unlock:
1731 	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
1732 
1733 	/*
1734 	 * -EAGAIN indicates no guc_id are available, let's retire any
1735 	 * outstanding requests to see if that frees up a guc_id. If the first
1736 	 * retire didn't help, insert a sleep with the timeslice duration before
1737 	 * attempting to retire more requests. Double the sleep period each
1738 	 * subsequent pass before finally giving up. The sleep period has max of
1739 	 * 100ms and minimum of 1ms.
1740 	 */
1741 	if (ret == -EAGAIN && --tries) {
1742 		if (PIN_GUC_ID_TRIES - tries > 1) {
1743 			unsigned int timeslice_shifted =
1744 				ce->engine->props.timeslice_duration_ms <<
1745 				(PIN_GUC_ID_TRIES - tries - 2);
1746 			unsigned int max = min_t(unsigned int, 100,
1747 						 timeslice_shifted);
1748 
1749 			msleep(max_t(unsigned int, max, 1));
1750 		}
1751 		intel_gt_retire_requests(guc_to_gt(guc));
1752 		goto try_again;
1753 	}
1754 
1755 	return ret;
1756 }
1757 
1758 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
1759 {
1760 	unsigned long flags;
1761 
1762 	GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
1763 	GEM_BUG_ON(intel_context_is_child(ce));
1764 
1765 	if (unlikely(context_guc_id_invalid(ce) ||
1766 		     intel_context_is_parent(ce)))
1767 		return;
1768 
1769 	spin_lock_irqsave(&guc->submission_state.lock, flags);
1770 	if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
1771 	    !atomic_read(&ce->guc_id.ref))
1772 		list_add_tail(&ce->guc_id.link,
1773 			      &guc->submission_state.guc_id_list);
1774 	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
1775 }
1776 
1777 static int __guc_action_register_multi_lrc(struct intel_guc *guc,
1778 					   struct intel_context *ce,
1779 					   u32 guc_id,
1780 					   u32 offset,
1781 					   bool loop)
1782 {
1783 	struct intel_context *child;
1784 	u32 action[4 + MAX_ENGINE_INSTANCE];
1785 	int len = 0;
1786 
1787 	GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
1788 
1789 	action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
1790 	action[len++] = guc_id;
1791 	action[len++] = ce->parallel.number_children + 1;
1792 	action[len++] = offset;
1793 	for_each_child(ce, child) {
1794 		offset += sizeof(struct guc_lrc_desc);
1795 		action[len++] = offset;
1796 	}
1797 
1798 	return guc_submission_send_busy_loop(guc, action, len, 0, loop);
1799 }
1800 
1801 static int __guc_action_register_context(struct intel_guc *guc,
1802 					 u32 guc_id,
1803 					 u32 offset,
1804 					 bool loop)
1805 {
1806 	u32 action[] = {
1807 		INTEL_GUC_ACTION_REGISTER_CONTEXT,
1808 		guc_id,
1809 		offset,
1810 	};
1811 
1812 	return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
1813 					     0, loop);
1814 }
1815 
1816 static int register_context(struct intel_context *ce, bool loop)
1817 {
1818 	struct intel_guc *guc = ce_to_guc(ce);
1819 	u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
1820 		ce->guc_id.id * sizeof(struct guc_lrc_desc);
1821 	int ret;
1822 
1823 	GEM_BUG_ON(intel_context_is_child(ce));
1824 	trace_intel_context_register(ce);
1825 
1826 	if (intel_context_is_parent(ce))
1827 		ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
1828 						      offset, loop);
1829 	else
1830 		ret = __guc_action_register_context(guc, ce->guc_id.id, offset,
1831 						    loop);
1832 	if (likely(!ret)) {
1833 		unsigned long flags;
1834 
1835 		spin_lock_irqsave(&ce->guc_state.lock, flags);
1836 		set_context_registered(ce);
1837 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1838 	}
1839 
1840 	return ret;
1841 }
1842 
1843 static int __guc_action_deregister_context(struct intel_guc *guc,
1844 					   u32 guc_id)
1845 {
1846 	u32 action[] = {
1847 		INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
1848 		guc_id,
1849 	};
1850 
1851 	return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
1852 					     G2H_LEN_DW_DEREGISTER_CONTEXT,
1853 					     true);
1854 }
1855 
1856 static int deregister_context(struct intel_context *ce, u32 guc_id)
1857 {
1858 	struct intel_guc *guc = ce_to_guc(ce);
1859 
1860 	GEM_BUG_ON(intel_context_is_child(ce));
1861 	trace_intel_context_deregister(ce);
1862 
1863 	return __guc_action_deregister_context(guc, guc_id);
1864 }
1865 
1866 static inline void clear_children_join_go_memory(struct intel_context *ce)
1867 {
1868 	struct parent_scratch *ps = __get_parent_scratch(ce);
1869 	int i;
1870 
1871 	ps->go.semaphore = 0;
1872 	for (i = 0; i < ce->parallel.number_children + 1; ++i)
1873 		ps->join[i].semaphore = 0;
1874 }
1875 
1876 static inline u32 get_children_go_value(struct intel_context *ce)
1877 {
1878 	return __get_parent_scratch(ce)->go.semaphore;
1879 }
1880 
1881 static inline u32 get_children_join_value(struct intel_context *ce,
1882 					  u8 child_index)
1883 {
1884 	return __get_parent_scratch(ce)->join[child_index].semaphore;
1885 }
1886 
1887 static void guc_context_policy_init(struct intel_engine_cs *engine,
1888 				    struct guc_lrc_desc *desc)
1889 {
1890 	desc->policy_flags = 0;
1891 
1892 	if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
1893 		desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE;
1894 
1895 	/* NB: For both of these, zero means disabled. */
1896 	desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
1897 	desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
1898 }
1899 
1900 static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
1901 {
1902 	struct intel_engine_cs *engine = ce->engine;
1903 	struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
1904 	struct intel_guc *guc = &engine->gt->uc.guc;
1905 	u32 desc_idx = ce->guc_id.id;
1906 	struct guc_lrc_desc *desc;
1907 	bool context_registered;
1908 	intel_wakeref_t wakeref;
1909 	struct intel_context *child;
1910 	int ret = 0;
1911 
1912 	GEM_BUG_ON(!engine->mask);
1913 	GEM_BUG_ON(!sched_state_is_init(ce));
1914 
1915 	/*
1916 	 * Ensure LRC + CT vmas are is same region as write barrier is done
1917 	 * based on CT vma region.
1918 	 */
1919 	GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
1920 		   i915_gem_object_is_lmem(ce->ring->vma->obj));
1921 
1922 	context_registered = lrc_desc_registered(guc, desc_idx);
1923 
1924 	reset_lrc_desc(guc, desc_idx);
1925 	set_lrc_desc_registered(guc, desc_idx, ce);
1926 
1927 	desc = __get_lrc_desc(guc, desc_idx);
1928 	desc->engine_class = engine_class_to_guc_class(engine->class);
1929 	desc->engine_submit_mask = engine->logical_mask;
1930 	desc->hw_context_desc = ce->lrc.lrca;
1931 	desc->priority = ce->guc_state.prio;
1932 	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
1933 	guc_context_policy_init(engine, desc);
1934 
1935 	/*
1936 	 * If context is a parent, we need to register a process descriptor
1937 	 * describing a work queue and register all child contexts.
1938 	 */
1939 	if (intel_context_is_parent(ce)) {
1940 		struct guc_process_desc *pdesc;
1941 
1942 		ce->parallel.guc.wqi_tail = 0;
1943 		ce->parallel.guc.wqi_head = 0;
1944 
1945 		desc->process_desc = i915_ggtt_offset(ce->state) +
1946 			__get_parent_scratch_offset(ce);
1947 		desc->wq_addr = i915_ggtt_offset(ce->state) +
1948 			__get_wq_offset(ce);
1949 		desc->wq_size = WQ_SIZE;
1950 
1951 		pdesc = __get_process_desc(ce);
1952 		memset(pdesc, 0, sizeof(*(pdesc)));
1953 		pdesc->stage_id = ce->guc_id.id;
1954 		pdesc->wq_base_addr = desc->wq_addr;
1955 		pdesc->wq_size_bytes = desc->wq_size;
1956 		pdesc->wq_status = WQ_STATUS_ACTIVE;
1957 
1958 		for_each_child(ce, child) {
1959 			desc = __get_lrc_desc(guc, child->guc_id.id);
1960 
1961 			desc->engine_class =
1962 				engine_class_to_guc_class(engine->class);
1963 			desc->hw_context_desc = child->lrc.lrca;
1964 			desc->priority = ce->guc_state.prio;
1965 			desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
1966 			guc_context_policy_init(engine, desc);
1967 		}
1968 
1969 		clear_children_join_go_memory(ce);
1970 	}
1971 
1972 	/*
1973 	 * The context_lookup xarray is used to determine if the hardware
1974 	 * context is currently registered. There are two cases in which it
1975 	 * could be registered either the guc_id has been stolen from another
1976 	 * context or the lrc descriptor address of this context has changed. In
1977 	 * either case the context needs to be deregistered with the GuC before
1978 	 * registering this context.
1979 	 */
1980 	if (context_registered) {
1981 		bool disabled;
1982 		unsigned long flags;
1983 
1984 		trace_intel_context_steal_guc_id(ce);
1985 		GEM_BUG_ON(!loop);
1986 
1987 		/* Seal race with Reset */
1988 		spin_lock_irqsave(&ce->guc_state.lock, flags);
1989 		disabled = submission_disabled(guc);
1990 		if (likely(!disabled)) {
1991 			set_context_wait_for_deregister_to_register(ce);
1992 			intel_context_get(ce);
1993 		}
1994 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1995 		if (unlikely(disabled)) {
1996 			reset_lrc_desc(guc, desc_idx);
1997 			return 0;	/* Will get registered later */
1998 		}
1999 
2000 		/*
2001 		 * If stealing the guc_id, this ce has the same guc_id as the
2002 		 * context whose guc_id was stolen.
2003 		 */
2004 		with_intel_runtime_pm(runtime_pm, wakeref)
2005 			ret = deregister_context(ce, ce->guc_id.id);
2006 		if (unlikely(ret == -ENODEV))
2007 			ret = 0;	/* Will get registered later */
2008 	} else {
2009 		with_intel_runtime_pm(runtime_pm, wakeref)
2010 			ret = register_context(ce, loop);
2011 		if (unlikely(ret == -EBUSY)) {
2012 			reset_lrc_desc(guc, desc_idx);
2013 		} else if (unlikely(ret == -ENODEV)) {
2014 			reset_lrc_desc(guc, desc_idx);
2015 			ret = 0;	/* Will get registered later */
2016 		}
2017 	}
2018 
2019 	return ret;
2020 }
2021 
2022 static int __guc_context_pre_pin(struct intel_context *ce,
2023 				 struct intel_engine_cs *engine,
2024 				 struct i915_gem_ww_ctx *ww,
2025 				 void **vaddr)
2026 {
2027 	return lrc_pre_pin(ce, engine, ww, vaddr);
2028 }
2029 
2030 static int __guc_context_pin(struct intel_context *ce,
2031 			     struct intel_engine_cs *engine,
2032 			     void *vaddr)
2033 {
2034 	if (i915_ggtt_offset(ce->state) !=
2035 	    (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2036 		set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2037 
2038 	/*
2039 	 * GuC context gets pinned in guc_request_alloc. See that function for
2040 	 * explaination of why.
2041 	 */
2042 
2043 	return lrc_pin(ce, engine, vaddr);
2044 }
2045 
2046 static int guc_context_pre_pin(struct intel_context *ce,
2047 			       struct i915_gem_ww_ctx *ww,
2048 			       void **vaddr)
2049 {
2050 	return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2051 }
2052 
2053 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2054 {
2055 	int ret = __guc_context_pin(ce, ce->engine, vaddr);
2056 
2057 	if (likely(!ret && !intel_context_is_barrier(ce)))
2058 		intel_engine_pm_get(ce->engine);
2059 
2060 	return ret;
2061 }
2062 
2063 static void guc_context_unpin(struct intel_context *ce)
2064 {
2065 	struct intel_guc *guc = ce_to_guc(ce);
2066 
2067 	unpin_guc_id(guc, ce);
2068 	lrc_unpin(ce);
2069 
2070 	if (likely(!intel_context_is_barrier(ce)))
2071 		intel_engine_pm_put_async(ce->engine);
2072 }
2073 
2074 static void guc_context_post_unpin(struct intel_context *ce)
2075 {
2076 	lrc_post_unpin(ce);
2077 }
2078 
2079 static void __guc_context_sched_enable(struct intel_guc *guc,
2080 				       struct intel_context *ce)
2081 {
2082 	u32 action[] = {
2083 		INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2084 		ce->guc_id.id,
2085 		GUC_CONTEXT_ENABLE
2086 	};
2087 
2088 	trace_intel_context_sched_enable(ce);
2089 
2090 	guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2091 				      G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2092 }
2093 
2094 static void __guc_context_sched_disable(struct intel_guc *guc,
2095 					struct intel_context *ce,
2096 					u16 guc_id)
2097 {
2098 	u32 action[] = {
2099 		INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2100 		guc_id,	/* ce->guc_id.id not stable */
2101 		GUC_CONTEXT_DISABLE
2102 	};
2103 
2104 	GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
2105 
2106 	GEM_BUG_ON(intel_context_is_child(ce));
2107 	trace_intel_context_sched_disable(ce);
2108 
2109 	guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2110 				      G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2111 }
2112 
2113 static void guc_blocked_fence_complete(struct intel_context *ce)
2114 {
2115 	lockdep_assert_held(&ce->guc_state.lock);
2116 
2117 	if (!i915_sw_fence_done(&ce->guc_state.blocked))
2118 		i915_sw_fence_complete(&ce->guc_state.blocked);
2119 }
2120 
2121 static void guc_blocked_fence_reinit(struct intel_context *ce)
2122 {
2123 	lockdep_assert_held(&ce->guc_state.lock);
2124 	GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2125 
2126 	/*
2127 	 * This fence is always complete unless a pending schedule disable is
2128 	 * outstanding. We arm the fence here and complete it when we receive
2129 	 * the pending schedule disable complete message.
2130 	 */
2131 	i915_sw_fence_fini(&ce->guc_state.blocked);
2132 	i915_sw_fence_reinit(&ce->guc_state.blocked);
2133 	i915_sw_fence_await(&ce->guc_state.blocked);
2134 	i915_sw_fence_commit(&ce->guc_state.blocked);
2135 }
2136 
2137 static u16 prep_context_pending_disable(struct intel_context *ce)
2138 {
2139 	lockdep_assert_held(&ce->guc_state.lock);
2140 
2141 	set_context_pending_disable(ce);
2142 	clr_context_enabled(ce);
2143 	guc_blocked_fence_reinit(ce);
2144 	intel_context_get(ce);
2145 
2146 	return ce->guc_id.id;
2147 }
2148 
2149 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2150 {
2151 	struct intel_guc *guc = ce_to_guc(ce);
2152 	unsigned long flags;
2153 	struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2154 	intel_wakeref_t wakeref;
2155 	u16 guc_id;
2156 	bool enabled;
2157 
2158 	GEM_BUG_ON(intel_context_is_child(ce));
2159 
2160 	spin_lock_irqsave(&ce->guc_state.lock, flags);
2161 
2162 	incr_context_blocked(ce);
2163 
2164 	enabled = context_enabled(ce);
2165 	if (unlikely(!enabled || submission_disabled(guc))) {
2166 		if (enabled)
2167 			clr_context_enabled(ce);
2168 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2169 		return &ce->guc_state.blocked;
2170 	}
2171 
2172 	/*
2173 	 * We add +2 here as the schedule disable complete CTB handler calls
2174 	 * intel_context_sched_disable_unpin (-2 to pin_count).
2175 	 */
2176 	atomic_add(2, &ce->pin_count);
2177 
2178 	guc_id = prep_context_pending_disable(ce);
2179 
2180 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2181 
2182 	with_intel_runtime_pm(runtime_pm, wakeref)
2183 		__guc_context_sched_disable(guc, ce, guc_id);
2184 
2185 	return &ce->guc_state.blocked;
2186 }
2187 
2188 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2189 	(SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2190 #define SCHED_STATE_NO_UNBLOCK \
2191 	(SCHED_STATE_MULTI_BLOCKED_MASK | \
2192 	 SCHED_STATE_PENDING_DISABLE | \
2193 	 SCHED_STATE_BANNED)
2194 
2195 static bool context_cant_unblock(struct intel_context *ce)
2196 {
2197 	lockdep_assert_held(&ce->guc_state.lock);
2198 
2199 	return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2200 		context_guc_id_invalid(ce) ||
2201 		!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) ||
2202 		!intel_context_is_pinned(ce);
2203 }
2204 
2205 static void guc_context_unblock(struct intel_context *ce)
2206 {
2207 	struct intel_guc *guc = ce_to_guc(ce);
2208 	unsigned long flags;
2209 	struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2210 	intel_wakeref_t wakeref;
2211 	bool enable;
2212 
2213 	GEM_BUG_ON(context_enabled(ce));
2214 	GEM_BUG_ON(intel_context_is_child(ce));
2215 
2216 	spin_lock_irqsave(&ce->guc_state.lock, flags);
2217 
2218 	if (unlikely(submission_disabled(guc) ||
2219 		     context_cant_unblock(ce))) {
2220 		enable = false;
2221 	} else {
2222 		enable = true;
2223 		set_context_pending_enable(ce);
2224 		set_context_enabled(ce);
2225 		intel_context_get(ce);
2226 	}
2227 
2228 	decr_context_blocked(ce);
2229 
2230 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2231 
2232 	if (enable) {
2233 		with_intel_runtime_pm(runtime_pm, wakeref)
2234 			__guc_context_sched_enable(guc, ce);
2235 	}
2236 }
2237 
2238 static void guc_context_cancel_request(struct intel_context *ce,
2239 				       struct i915_request *rq)
2240 {
2241 	struct intel_context *block_context =
2242 		request_to_scheduling_context(rq);
2243 
2244 	if (i915_sw_fence_signaled(&rq->submit)) {
2245 		struct i915_sw_fence *fence;
2246 
2247 		intel_context_get(ce);
2248 		fence = guc_context_block(block_context);
2249 		i915_sw_fence_wait(fence);
2250 		if (!i915_request_completed(rq)) {
2251 			__i915_request_skip(rq);
2252 			guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
2253 					true);
2254 		}
2255 
2256 		/*
2257 		 * XXX: Racey if context is reset, see comment in
2258 		 * __guc_reset_context().
2259 		 */
2260 		flush_work(&ce_to_guc(ce)->ct.requests.worker);
2261 
2262 		guc_context_unblock(block_context);
2263 		intel_context_put(ce);
2264 	}
2265 }
2266 
2267 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
2268 						 u16 guc_id,
2269 						 u32 preemption_timeout)
2270 {
2271 	u32 action[] = {
2272 		INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT,
2273 		guc_id,
2274 		preemption_timeout
2275 	};
2276 
2277 	intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
2278 }
2279 
2280 static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
2281 {
2282 	struct intel_guc *guc = ce_to_guc(ce);
2283 	struct intel_runtime_pm *runtime_pm =
2284 		&ce->engine->gt->i915->runtime_pm;
2285 	intel_wakeref_t wakeref;
2286 	unsigned long flags;
2287 
2288 	GEM_BUG_ON(intel_context_is_child(ce));
2289 
2290 	guc_flush_submissions(guc);
2291 
2292 	spin_lock_irqsave(&ce->guc_state.lock, flags);
2293 	set_context_banned(ce);
2294 
2295 	if (submission_disabled(guc) ||
2296 	    (!context_enabled(ce) && !context_pending_disable(ce))) {
2297 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2298 
2299 		guc_cancel_context_requests(ce);
2300 		intel_engine_signal_breadcrumbs(ce->engine);
2301 	} else if (!context_pending_disable(ce)) {
2302 		u16 guc_id;
2303 
2304 		/*
2305 		 * We add +2 here as the schedule disable complete CTB handler
2306 		 * calls intel_context_sched_disable_unpin (-2 to pin_count).
2307 		 */
2308 		atomic_add(2, &ce->pin_count);
2309 
2310 		guc_id = prep_context_pending_disable(ce);
2311 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2312 
2313 		/*
2314 		 * In addition to disabling scheduling, set the preemption
2315 		 * timeout to the minimum value (1 us) so the banned context
2316 		 * gets kicked off the HW ASAP.
2317 		 */
2318 		with_intel_runtime_pm(runtime_pm, wakeref) {
2319 			__guc_context_set_preemption_timeout(guc, guc_id, 1);
2320 			__guc_context_sched_disable(guc, ce, guc_id);
2321 		}
2322 	} else {
2323 		if (!context_guc_id_invalid(ce))
2324 			with_intel_runtime_pm(runtime_pm, wakeref)
2325 				__guc_context_set_preemption_timeout(guc,
2326 								     ce->guc_id.id,
2327 								     1);
2328 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2329 	}
2330 }
2331 
2332 static void guc_context_sched_disable(struct intel_context *ce)
2333 {
2334 	struct intel_guc *guc = ce_to_guc(ce);
2335 	unsigned long flags;
2336 	struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
2337 	intel_wakeref_t wakeref;
2338 	u16 guc_id;
2339 
2340 	GEM_BUG_ON(intel_context_is_child(ce));
2341 
2342 	spin_lock_irqsave(&ce->guc_state.lock, flags);
2343 
2344 	/*
2345 	 * We have to check if the context has been disabled by another thread,
2346 	 * check if submssion has been disabled to seal a race with reset and
2347 	 * finally check if any more requests have been committed to the
2348 	 * context ensursing that a request doesn't slip through the
2349 	 * 'context_pending_disable' fence.
2350 	 */
2351 	if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
2352 		     context_has_committed_requests(ce))) {
2353 		clr_context_enabled(ce);
2354 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2355 		goto unpin;
2356 	}
2357 	guc_id = prep_context_pending_disable(ce);
2358 
2359 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2360 
2361 	with_intel_runtime_pm(runtime_pm, wakeref)
2362 		__guc_context_sched_disable(guc, ce, guc_id);
2363 
2364 	return;
2365 unpin:
2366 	intel_context_sched_disable_unpin(ce);
2367 }
2368 
2369 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
2370 {
2371 	struct intel_guc *guc = ce_to_guc(ce);
2372 	struct intel_gt *gt = guc_to_gt(guc);
2373 	unsigned long flags;
2374 	bool disabled;
2375 
2376 	lockdep_assert_held(&guc->submission_state.lock);
2377 	GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
2378 	GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
2379 	GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
2380 	GEM_BUG_ON(context_enabled(ce));
2381 
2382 	/* Seal race with Reset */
2383 	spin_lock_irqsave(&ce->guc_state.lock, flags);
2384 	disabled = submission_disabled(guc);
2385 	if (likely(!disabled)) {
2386 		__intel_gt_pm_get(gt);
2387 		set_context_destroyed(ce);
2388 		clr_context_registered(ce);
2389 	}
2390 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2391 	if (unlikely(disabled)) {
2392 		__release_guc_id(guc, ce);
2393 		__guc_context_destroy(ce);
2394 		return;
2395 	}
2396 
2397 	deregister_context(ce, ce->guc_id.id);
2398 }
2399 
2400 static void __guc_context_destroy(struct intel_context *ce)
2401 {
2402 	GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
2403 		   ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
2404 		   ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
2405 		   ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
2406 	GEM_BUG_ON(ce->guc_state.number_committed_requests);
2407 
2408 	lrc_fini(ce);
2409 	intel_context_fini(ce);
2410 
2411 	if (intel_engine_is_virtual(ce->engine)) {
2412 		struct guc_virtual_engine *ve =
2413 			container_of(ce, typeof(*ve), context);
2414 
2415 		if (ve->base.breadcrumbs)
2416 			intel_breadcrumbs_put(ve->base.breadcrumbs);
2417 
2418 		kfree(ve);
2419 	} else {
2420 		intel_context_free(ce);
2421 	}
2422 }
2423 
2424 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
2425 {
2426 	struct intel_context *ce, *cn;
2427 	unsigned long flags;
2428 
2429 	GEM_BUG_ON(!submission_disabled(guc) &&
2430 		   guc_submission_initialized(guc));
2431 
2432 	spin_lock_irqsave(&guc->submission_state.lock, flags);
2433 	list_for_each_entry_safe(ce, cn,
2434 				 &guc->submission_state.destroyed_contexts,
2435 				 destroyed_link) {
2436 		list_del_init(&ce->destroyed_link);
2437 		__release_guc_id(guc, ce);
2438 		__guc_context_destroy(ce);
2439 	}
2440 	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2441 }
2442 
2443 static void deregister_destroyed_contexts(struct intel_guc *guc)
2444 {
2445 	struct intel_context *ce, *cn;
2446 	unsigned long flags;
2447 
2448 	spin_lock_irqsave(&guc->submission_state.lock, flags);
2449 	list_for_each_entry_safe(ce, cn,
2450 				 &guc->submission_state.destroyed_contexts,
2451 				 destroyed_link) {
2452 		list_del_init(&ce->destroyed_link);
2453 		guc_lrc_desc_unpin(ce);
2454 	}
2455 	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2456 }
2457 
2458 static void destroyed_worker_func(struct work_struct *w)
2459 {
2460 	struct intel_guc *guc = container_of(w, struct intel_guc,
2461 					     submission_state.destroyed_worker);
2462 	struct intel_gt *gt = guc_to_gt(guc);
2463 	int tmp;
2464 
2465 	with_intel_gt_pm(gt, tmp)
2466 		deregister_destroyed_contexts(guc);
2467 }
2468 
2469 static void guc_context_destroy(struct kref *kref)
2470 {
2471 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2472 	struct intel_guc *guc = ce_to_guc(ce);
2473 	unsigned long flags;
2474 	bool destroy;
2475 
2476 	/*
2477 	 * If the guc_id is invalid this context has been stolen and we can free
2478 	 * it immediately. Also can be freed immediately if the context is not
2479 	 * registered with the GuC or the GuC is in the middle of a reset.
2480 	 */
2481 	spin_lock_irqsave(&guc->submission_state.lock, flags);
2482 	destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
2483 		!lrc_desc_registered(guc, ce->guc_id.id);
2484 	if (likely(!destroy)) {
2485 		if (!list_empty(&ce->guc_id.link))
2486 			list_del_init(&ce->guc_id.link);
2487 		list_add_tail(&ce->destroyed_link,
2488 			      &guc->submission_state.destroyed_contexts);
2489 	} else {
2490 		__release_guc_id(guc, ce);
2491 	}
2492 	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2493 	if (unlikely(destroy)) {
2494 		__guc_context_destroy(ce);
2495 		return;
2496 	}
2497 
2498 	/*
2499 	 * We use a worker to issue the H2G to deregister the context as we can
2500 	 * take the GT PM for the first time which isn't allowed from an atomic
2501 	 * context.
2502 	 */
2503 	queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
2504 }
2505 
2506 static int guc_context_alloc(struct intel_context *ce)
2507 {
2508 	return lrc_alloc(ce, ce->engine);
2509 }
2510 
2511 static void guc_context_set_prio(struct intel_guc *guc,
2512 				 struct intel_context *ce,
2513 				 u8 prio)
2514 {
2515 	u32 action[] = {
2516 		INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY,
2517 		ce->guc_id.id,
2518 		prio,
2519 	};
2520 
2521 	GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
2522 		   prio > GUC_CLIENT_PRIORITY_NORMAL);
2523 	lockdep_assert_held(&ce->guc_state.lock);
2524 
2525 	if (ce->guc_state.prio == prio || submission_disabled(guc) ||
2526 	    !context_registered(ce)) {
2527 		ce->guc_state.prio = prio;
2528 		return;
2529 	}
2530 
2531 	guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
2532 
2533 	ce->guc_state.prio = prio;
2534 	trace_intel_context_set_prio(ce);
2535 }
2536 
2537 static inline u8 map_i915_prio_to_guc_prio(int prio)
2538 {
2539 	if (prio == I915_PRIORITY_NORMAL)
2540 		return GUC_CLIENT_PRIORITY_KMD_NORMAL;
2541 	else if (prio < I915_PRIORITY_NORMAL)
2542 		return GUC_CLIENT_PRIORITY_NORMAL;
2543 	else if (prio < I915_PRIORITY_DISPLAY)
2544 		return GUC_CLIENT_PRIORITY_HIGH;
2545 	else
2546 		return GUC_CLIENT_PRIORITY_KMD_HIGH;
2547 }
2548 
2549 static inline void add_context_inflight_prio(struct intel_context *ce,
2550 					     u8 guc_prio)
2551 {
2552 	lockdep_assert_held(&ce->guc_state.lock);
2553 	GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
2554 
2555 	++ce->guc_state.prio_count[guc_prio];
2556 
2557 	/* Overflow protection */
2558 	GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
2559 }
2560 
2561 static inline void sub_context_inflight_prio(struct intel_context *ce,
2562 					     u8 guc_prio)
2563 {
2564 	lockdep_assert_held(&ce->guc_state.lock);
2565 	GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
2566 
2567 	/* Underflow protection */
2568 	GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
2569 
2570 	--ce->guc_state.prio_count[guc_prio];
2571 }
2572 
2573 static inline void update_context_prio(struct intel_context *ce)
2574 {
2575 	struct intel_guc *guc = &ce->engine->gt->uc.guc;
2576 	int i;
2577 
2578 	BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
2579 	BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
2580 
2581 	lockdep_assert_held(&ce->guc_state.lock);
2582 
2583 	for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
2584 		if (ce->guc_state.prio_count[i]) {
2585 			guc_context_set_prio(guc, ce, i);
2586 			break;
2587 		}
2588 	}
2589 }
2590 
2591 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
2592 {
2593 	/* Lower value is higher priority */
2594 	return new_guc_prio < old_guc_prio;
2595 }
2596 
2597 static void add_to_context(struct i915_request *rq)
2598 {
2599 	struct intel_context *ce = request_to_scheduling_context(rq);
2600 	u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
2601 
2602 	GEM_BUG_ON(intel_context_is_child(ce));
2603 	GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
2604 
2605 	spin_lock(&ce->guc_state.lock);
2606 	list_move_tail(&rq->sched.link, &ce->guc_state.requests);
2607 
2608 	if (rq->guc_prio == GUC_PRIO_INIT) {
2609 		rq->guc_prio = new_guc_prio;
2610 		add_context_inflight_prio(ce, rq->guc_prio);
2611 	} else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
2612 		sub_context_inflight_prio(ce, rq->guc_prio);
2613 		rq->guc_prio = new_guc_prio;
2614 		add_context_inflight_prio(ce, rq->guc_prio);
2615 	}
2616 	update_context_prio(ce);
2617 
2618 	spin_unlock(&ce->guc_state.lock);
2619 }
2620 
2621 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
2622 {
2623 	lockdep_assert_held(&ce->guc_state.lock);
2624 
2625 	if (rq->guc_prio != GUC_PRIO_INIT &&
2626 	    rq->guc_prio != GUC_PRIO_FINI) {
2627 		sub_context_inflight_prio(ce, rq->guc_prio);
2628 		update_context_prio(ce);
2629 	}
2630 	rq->guc_prio = GUC_PRIO_FINI;
2631 }
2632 
2633 static void remove_from_context(struct i915_request *rq)
2634 {
2635 	struct intel_context *ce = request_to_scheduling_context(rq);
2636 
2637 	GEM_BUG_ON(intel_context_is_child(ce));
2638 
2639 	spin_lock_irq(&ce->guc_state.lock);
2640 
2641 	list_del_init(&rq->sched.link);
2642 	clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2643 
2644 	/* Prevent further __await_execution() registering a cb, then flush */
2645 	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2646 
2647 	guc_prio_fini(rq, ce);
2648 
2649 	decr_context_committed_requests(ce);
2650 
2651 	spin_unlock_irq(&ce->guc_state.lock);
2652 
2653 	atomic_dec(&ce->guc_id.ref);
2654 	i915_request_notify_execute_cb_imm(rq);
2655 }
2656 
2657 static const struct intel_context_ops guc_context_ops = {
2658 	.alloc = guc_context_alloc,
2659 
2660 	.pre_pin = guc_context_pre_pin,
2661 	.pin = guc_context_pin,
2662 	.unpin = guc_context_unpin,
2663 	.post_unpin = guc_context_post_unpin,
2664 
2665 	.ban = guc_context_ban,
2666 
2667 	.cancel_request = guc_context_cancel_request,
2668 
2669 	.enter = intel_context_enter_engine,
2670 	.exit = intel_context_exit_engine,
2671 
2672 	.sched_disable = guc_context_sched_disable,
2673 
2674 	.reset = lrc_reset,
2675 	.destroy = guc_context_destroy,
2676 
2677 	.create_virtual = guc_create_virtual,
2678 	.create_parallel = guc_create_parallel,
2679 };
2680 
2681 static void submit_work_cb(struct irq_work *wrk)
2682 {
2683 	struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
2684 
2685 	might_lock(&rq->engine->sched_engine->lock);
2686 	i915_sw_fence_complete(&rq->submit);
2687 }
2688 
2689 static void __guc_signal_context_fence(struct intel_context *ce)
2690 {
2691 	struct i915_request *rq, *rn;
2692 
2693 	lockdep_assert_held(&ce->guc_state.lock);
2694 
2695 	if (!list_empty(&ce->guc_state.fences))
2696 		trace_intel_context_fence_release(ce);
2697 
2698 	/*
2699 	 * Use an IRQ to ensure locking order of sched_engine->lock ->
2700 	 * ce->guc_state.lock is preserved.
2701 	 */
2702 	list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
2703 				 guc_fence_link) {
2704 		list_del(&rq->guc_fence_link);
2705 		irq_work_queue(&rq->submit_work);
2706 	}
2707 
2708 	INIT_LIST_HEAD(&ce->guc_state.fences);
2709 }
2710 
2711 static void guc_signal_context_fence(struct intel_context *ce)
2712 {
2713 	unsigned long flags;
2714 
2715 	GEM_BUG_ON(intel_context_is_child(ce));
2716 
2717 	spin_lock_irqsave(&ce->guc_state.lock, flags);
2718 	clr_context_wait_for_deregister_to_register(ce);
2719 	__guc_signal_context_fence(ce);
2720 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2721 }
2722 
2723 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
2724 {
2725 	return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
2726 		!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) &&
2727 		!submission_disabled(ce_to_guc(ce));
2728 }
2729 
2730 static void guc_context_init(struct intel_context *ce)
2731 {
2732 	const struct i915_gem_context *ctx;
2733 	int prio = I915_CONTEXT_DEFAULT_PRIORITY;
2734 
2735 	rcu_read_lock();
2736 	ctx = rcu_dereference(ce->gem_context);
2737 	if (ctx)
2738 		prio = ctx->sched.priority;
2739 	rcu_read_unlock();
2740 
2741 	ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
2742 	set_bit(CONTEXT_GUC_INIT, &ce->flags);
2743 }
2744 
2745 static int guc_request_alloc(struct i915_request *rq)
2746 {
2747 	struct intel_context *ce = request_to_scheduling_context(rq);
2748 	struct intel_guc *guc = ce_to_guc(ce);
2749 	unsigned long flags;
2750 	int ret;
2751 
2752 	GEM_BUG_ON(!intel_context_is_pinned(rq->context));
2753 
2754 	/*
2755 	 * Flush enough space to reduce the likelihood of waiting after
2756 	 * we start building the request - in which case we will just
2757 	 * have to repeat work.
2758 	 */
2759 	rq->reserved_space += GUC_REQUEST_SIZE;
2760 
2761 	/*
2762 	 * Note that after this point, we have committed to using
2763 	 * this request as it is being used to both track the
2764 	 * state of engine initialisation and liveness of the
2765 	 * golden renderstate above. Think twice before you try
2766 	 * to cancel/unwind this request now.
2767 	 */
2768 
2769 	/* Unconditionally invalidate GPU caches and TLBs. */
2770 	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
2771 	if (ret)
2772 		return ret;
2773 
2774 	rq->reserved_space -= GUC_REQUEST_SIZE;
2775 
2776 	if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
2777 		guc_context_init(ce);
2778 
2779 	/*
2780 	 * Call pin_guc_id here rather than in the pinning step as with
2781 	 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
2782 	 * guc_id and creating horrible race conditions. This is especially bad
2783 	 * when guc_id are being stolen due to over subscription. By the time
2784 	 * this function is reached, it is guaranteed that the guc_id will be
2785 	 * persistent until the generated request is retired. Thus, sealing these
2786 	 * race conditions. It is still safe to fail here if guc_id are
2787 	 * exhausted and return -EAGAIN to the user indicating that they can try
2788 	 * again in the future.
2789 	 *
2790 	 * There is no need for a lock here as the timeline mutex ensures at
2791 	 * most one context can be executing this code path at once. The
2792 	 * guc_id_ref is incremented once for every request in flight and
2793 	 * decremented on each retire. When it is zero, a lock around the
2794 	 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
2795 	 */
2796 	if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
2797 		goto out;
2798 
2799 	ret = pin_guc_id(guc, ce);	/* returns 1 if new guc_id assigned */
2800 	if (unlikely(ret < 0))
2801 		return ret;
2802 	if (context_needs_register(ce, !!ret)) {
2803 		ret = guc_lrc_desc_pin(ce, true);
2804 		if (unlikely(ret)) {	/* unwind */
2805 			if (ret == -EPIPE) {
2806 				disable_submission(guc);
2807 				goto out;	/* GPU will be reset */
2808 			}
2809 			atomic_dec(&ce->guc_id.ref);
2810 			unpin_guc_id(guc, ce);
2811 			return ret;
2812 		}
2813 	}
2814 
2815 	clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2816 
2817 out:
2818 	/*
2819 	 * We block all requests on this context if a G2H is pending for a
2820 	 * schedule disable or context deregistration as the GuC will fail a
2821 	 * schedule enable or context registration if either G2H is pending
2822 	 * respectfully. Once a G2H returns, the fence is released that is
2823 	 * blocking these requests (see guc_signal_context_fence).
2824 	 */
2825 	spin_lock_irqsave(&ce->guc_state.lock, flags);
2826 	if (context_wait_for_deregister_to_register(ce) ||
2827 	    context_pending_disable(ce)) {
2828 		init_irq_work(&rq->submit_work, submit_work_cb);
2829 		i915_sw_fence_await(&rq->submit);
2830 
2831 		list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
2832 	}
2833 	incr_context_committed_requests(ce);
2834 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2835 
2836 	return 0;
2837 }
2838 
2839 static int guc_virtual_context_pre_pin(struct intel_context *ce,
2840 				       struct i915_gem_ww_ctx *ww,
2841 				       void **vaddr)
2842 {
2843 	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2844 
2845 	return __guc_context_pre_pin(ce, engine, ww, vaddr);
2846 }
2847 
2848 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
2849 {
2850 	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2851 	int ret = __guc_context_pin(ce, engine, vaddr);
2852 	intel_engine_mask_t tmp, mask = ce->engine->mask;
2853 
2854 	if (likely(!ret))
2855 		for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2856 			intel_engine_pm_get(engine);
2857 
2858 	return ret;
2859 }
2860 
2861 static void guc_virtual_context_unpin(struct intel_context *ce)
2862 {
2863 	intel_engine_mask_t tmp, mask = ce->engine->mask;
2864 	struct intel_engine_cs *engine;
2865 	struct intel_guc *guc = ce_to_guc(ce);
2866 
2867 	GEM_BUG_ON(context_enabled(ce));
2868 	GEM_BUG_ON(intel_context_is_barrier(ce));
2869 
2870 	unpin_guc_id(guc, ce);
2871 	lrc_unpin(ce);
2872 
2873 	for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2874 		intel_engine_pm_put_async(engine);
2875 }
2876 
2877 static void guc_virtual_context_enter(struct intel_context *ce)
2878 {
2879 	intel_engine_mask_t tmp, mask = ce->engine->mask;
2880 	struct intel_engine_cs *engine;
2881 
2882 	for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2883 		intel_engine_pm_get(engine);
2884 
2885 	intel_timeline_enter(ce->timeline);
2886 }
2887 
2888 static void guc_virtual_context_exit(struct intel_context *ce)
2889 {
2890 	intel_engine_mask_t tmp, mask = ce->engine->mask;
2891 	struct intel_engine_cs *engine;
2892 
2893 	for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
2894 		intel_engine_pm_put(engine);
2895 
2896 	intel_timeline_exit(ce->timeline);
2897 }
2898 
2899 static int guc_virtual_context_alloc(struct intel_context *ce)
2900 {
2901 	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2902 
2903 	return lrc_alloc(ce, engine);
2904 }
2905 
2906 static const struct intel_context_ops virtual_guc_context_ops = {
2907 	.alloc = guc_virtual_context_alloc,
2908 
2909 	.pre_pin = guc_virtual_context_pre_pin,
2910 	.pin = guc_virtual_context_pin,
2911 	.unpin = guc_virtual_context_unpin,
2912 	.post_unpin = guc_context_post_unpin,
2913 
2914 	.ban = guc_context_ban,
2915 
2916 	.cancel_request = guc_context_cancel_request,
2917 
2918 	.enter = guc_virtual_context_enter,
2919 	.exit = guc_virtual_context_exit,
2920 
2921 	.sched_disable = guc_context_sched_disable,
2922 
2923 	.destroy = guc_context_destroy,
2924 
2925 	.get_sibling = guc_virtual_get_sibling,
2926 };
2927 
2928 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
2929 {
2930 	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2931 	struct intel_guc *guc = ce_to_guc(ce);
2932 	int ret;
2933 
2934 	GEM_BUG_ON(!intel_context_is_parent(ce));
2935 	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2936 
2937 	ret = pin_guc_id(guc, ce);
2938 	if (unlikely(ret < 0))
2939 		return ret;
2940 
2941 	return __guc_context_pin(ce, engine, vaddr);
2942 }
2943 
2944 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
2945 {
2946 	struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
2947 
2948 	GEM_BUG_ON(!intel_context_is_child(ce));
2949 	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2950 
2951 	__intel_context_pin(ce->parallel.parent);
2952 	return __guc_context_pin(ce, engine, vaddr);
2953 }
2954 
2955 static void guc_parent_context_unpin(struct intel_context *ce)
2956 {
2957 	struct intel_guc *guc = ce_to_guc(ce);
2958 
2959 	GEM_BUG_ON(context_enabled(ce));
2960 	GEM_BUG_ON(intel_context_is_barrier(ce));
2961 	GEM_BUG_ON(!intel_context_is_parent(ce));
2962 	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2963 
2964 	if (ce->parallel.last_rq)
2965 		i915_request_put(ce->parallel.last_rq);
2966 	unpin_guc_id(guc, ce);
2967 	lrc_unpin(ce);
2968 }
2969 
2970 static void guc_child_context_unpin(struct intel_context *ce)
2971 {
2972 	GEM_BUG_ON(context_enabled(ce));
2973 	GEM_BUG_ON(intel_context_is_barrier(ce));
2974 	GEM_BUG_ON(!intel_context_is_child(ce));
2975 	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2976 
2977 	lrc_unpin(ce);
2978 }
2979 
2980 static void guc_child_context_post_unpin(struct intel_context *ce)
2981 {
2982 	GEM_BUG_ON(!intel_context_is_child(ce));
2983 	GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
2984 	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
2985 
2986 	lrc_post_unpin(ce);
2987 	intel_context_unpin(ce->parallel.parent);
2988 }
2989 
2990 static void guc_child_context_destroy(struct kref *kref)
2991 {
2992 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
2993 
2994 	__guc_context_destroy(ce);
2995 }
2996 
2997 static const struct intel_context_ops virtual_parent_context_ops = {
2998 	.alloc = guc_virtual_context_alloc,
2999 
3000 	.pre_pin = guc_context_pre_pin,
3001 	.pin = guc_parent_context_pin,
3002 	.unpin = guc_parent_context_unpin,
3003 	.post_unpin = guc_context_post_unpin,
3004 
3005 	.ban = guc_context_ban,
3006 
3007 	.cancel_request = guc_context_cancel_request,
3008 
3009 	.enter = guc_virtual_context_enter,
3010 	.exit = guc_virtual_context_exit,
3011 
3012 	.sched_disable = guc_context_sched_disable,
3013 
3014 	.destroy = guc_context_destroy,
3015 
3016 	.get_sibling = guc_virtual_get_sibling,
3017 };
3018 
3019 static const struct intel_context_ops virtual_child_context_ops = {
3020 	.alloc = guc_virtual_context_alloc,
3021 
3022 	.pre_pin = guc_context_pre_pin,
3023 	.pin = guc_child_context_pin,
3024 	.unpin = guc_child_context_unpin,
3025 	.post_unpin = guc_child_context_post_unpin,
3026 
3027 	.cancel_request = guc_context_cancel_request,
3028 
3029 	.enter = guc_virtual_context_enter,
3030 	.exit = guc_virtual_context_exit,
3031 
3032 	.destroy = guc_child_context_destroy,
3033 
3034 	.get_sibling = guc_virtual_get_sibling,
3035 };
3036 
3037 /*
3038  * The below override of the breadcrumbs is enabled when the user configures a
3039  * context for parallel submission (multi-lrc, parent-child).
3040  *
3041  * The overridden breadcrumbs implements an algorithm which allows the GuC to
3042  * safely preempt all the hw contexts configured for parallel submission
3043  * between each BB. The contract between the i915 and GuC is if the parent
3044  * context can be preempted, all the children can be preempted, and the GuC will
3045  * always try to preempt the parent before the children. A handshake between the
3046  * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3047  * creating a window to preempt between each set of BBs.
3048  */
3049 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3050 						     u64 offset, u32 len,
3051 						     const unsigned int flags);
3052 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3053 						    u64 offset, u32 len,
3054 						    const unsigned int flags);
3055 static u32 *
3056 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3057 						 u32 *cs);
3058 static u32 *
3059 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3060 						u32 *cs);
3061 
3062 static struct intel_context *
3063 guc_create_parallel(struct intel_engine_cs **engines,
3064 		    unsigned int num_siblings,
3065 		    unsigned int width)
3066 {
3067 	struct intel_engine_cs **siblings = NULL;
3068 	struct intel_context *parent = NULL, *ce, *err;
3069 	int i, j;
3070 
3071 	siblings = kmalloc_array(num_siblings,
3072 				 sizeof(*siblings),
3073 				 GFP_KERNEL);
3074 	if (!siblings)
3075 		return ERR_PTR(-ENOMEM);
3076 
3077 	for (i = 0; i < width; ++i) {
3078 		for (j = 0; j < num_siblings; ++j)
3079 			siblings[j] = engines[i * num_siblings + j];
3080 
3081 		ce = intel_engine_create_virtual(siblings, num_siblings,
3082 						 FORCE_VIRTUAL);
3083 		if (IS_ERR(ce)) {
3084 			err = ERR_CAST(ce);
3085 			goto unwind;
3086 		}
3087 
3088 		if (i == 0) {
3089 			parent = ce;
3090 			parent->ops = &virtual_parent_context_ops;
3091 		} else {
3092 			ce->ops = &virtual_child_context_ops;
3093 			intel_context_bind_parent_child(parent, ce);
3094 		}
3095 	}
3096 
3097 	parent->parallel.fence_context = dma_fence_context_alloc(1);
3098 
3099 	parent->engine->emit_bb_start =
3100 		emit_bb_start_parent_no_preempt_mid_batch;
3101 	parent->engine->emit_fini_breadcrumb =
3102 		emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3103 	parent->engine->emit_fini_breadcrumb_dw =
3104 		12 + 4 * parent->parallel.number_children;
3105 	for_each_child(parent, ce) {
3106 		ce->engine->emit_bb_start =
3107 			emit_bb_start_child_no_preempt_mid_batch;
3108 		ce->engine->emit_fini_breadcrumb =
3109 			emit_fini_breadcrumb_child_no_preempt_mid_batch;
3110 		ce->engine->emit_fini_breadcrumb_dw = 16;
3111 	}
3112 
3113 	kfree(siblings);
3114 	return parent;
3115 
3116 unwind:
3117 	if (parent)
3118 		intel_context_put(parent);
3119 	kfree(siblings);
3120 	return err;
3121 }
3122 
3123 static bool
3124 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
3125 {
3126 	struct intel_engine_cs *sibling;
3127 	intel_engine_mask_t tmp, mask = b->engine_mask;
3128 	bool result = false;
3129 
3130 	for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3131 		result |= intel_engine_irq_enable(sibling);
3132 
3133 	return result;
3134 }
3135 
3136 static void
3137 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
3138 {
3139 	struct intel_engine_cs *sibling;
3140 	intel_engine_mask_t tmp, mask = b->engine_mask;
3141 
3142 	for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3143 		intel_engine_irq_disable(sibling);
3144 }
3145 
3146 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
3147 {
3148 	int i;
3149 
3150 	/*
3151 	 * In GuC submission mode we do not know which physical engine a request
3152 	 * will be scheduled on, this creates a problem because the breadcrumb
3153 	 * interrupt is per physical engine. To work around this we attach
3154 	 * requests and direct all breadcrumb interrupts to the first instance
3155 	 * of an engine per class. In addition all breadcrumb interrupts are
3156 	 * enabled / disabled across an engine class in unison.
3157 	 */
3158 	for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
3159 		struct intel_engine_cs *sibling =
3160 			engine->gt->engine_class[engine->class][i];
3161 
3162 		if (sibling) {
3163 			if (engine->breadcrumbs != sibling->breadcrumbs) {
3164 				intel_breadcrumbs_put(engine->breadcrumbs);
3165 				engine->breadcrumbs =
3166 					intel_breadcrumbs_get(sibling->breadcrumbs);
3167 			}
3168 			break;
3169 		}
3170 	}
3171 
3172 	if (engine->breadcrumbs) {
3173 		engine->breadcrumbs->engine_mask |= engine->mask;
3174 		engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
3175 		engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
3176 	}
3177 }
3178 
3179 static void guc_bump_inflight_request_prio(struct i915_request *rq,
3180 					   int prio)
3181 {
3182 	struct intel_context *ce = request_to_scheduling_context(rq);
3183 	u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
3184 
3185 	/* Short circuit function */
3186 	if (prio < I915_PRIORITY_NORMAL ||
3187 	    rq->guc_prio == GUC_PRIO_FINI ||
3188 	    (rq->guc_prio != GUC_PRIO_INIT &&
3189 	     !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
3190 		return;
3191 
3192 	spin_lock(&ce->guc_state.lock);
3193 	if (rq->guc_prio != GUC_PRIO_FINI) {
3194 		if (rq->guc_prio != GUC_PRIO_INIT)
3195 			sub_context_inflight_prio(ce, rq->guc_prio);
3196 		rq->guc_prio = new_guc_prio;
3197 		add_context_inflight_prio(ce, rq->guc_prio);
3198 		update_context_prio(ce);
3199 	}
3200 	spin_unlock(&ce->guc_state.lock);
3201 }
3202 
3203 static void guc_retire_inflight_request_prio(struct i915_request *rq)
3204 {
3205 	struct intel_context *ce = request_to_scheduling_context(rq);
3206 
3207 	spin_lock(&ce->guc_state.lock);
3208 	guc_prio_fini(rq, ce);
3209 	spin_unlock(&ce->guc_state.lock);
3210 }
3211 
3212 static void sanitize_hwsp(struct intel_engine_cs *engine)
3213 {
3214 	struct intel_timeline *tl;
3215 
3216 	list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
3217 		intel_timeline_reset_seqno(tl);
3218 }
3219 
3220 static void guc_sanitize(struct intel_engine_cs *engine)
3221 {
3222 	/*
3223 	 * Poison residual state on resume, in case the suspend didn't!
3224 	 *
3225 	 * We have to assume that across suspend/resume (or other loss
3226 	 * of control) that the contents of our pinned buffers has been
3227 	 * lost, replaced by garbage. Since this doesn't always happen,
3228 	 * let's poison such state so that we more quickly spot when
3229 	 * we falsely assume it has been preserved.
3230 	 */
3231 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3232 		memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
3233 
3234 	/*
3235 	 * The kernel_context HWSP is stored in the status_page. As above,
3236 	 * that may be lost on resume/initialisation, and so we need to
3237 	 * reset the value in the HWSP.
3238 	 */
3239 	sanitize_hwsp(engine);
3240 
3241 	/* And scrub the dirty cachelines for the HWSP */
3242 	clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
3243 
3244 	intel_engine_reset_pinned_contexts(engine);
3245 }
3246 
3247 static void setup_hwsp(struct intel_engine_cs *engine)
3248 {
3249 	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3250 
3251 	ENGINE_WRITE_FW(engine,
3252 			RING_HWS_PGA,
3253 			i915_ggtt_offset(engine->status_page.vma));
3254 }
3255 
3256 static void start_engine(struct intel_engine_cs *engine)
3257 {
3258 	ENGINE_WRITE_FW(engine,
3259 			RING_MODE_GEN7,
3260 			_MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
3261 
3262 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3263 	ENGINE_POSTING_READ(engine, RING_MI_MODE);
3264 }
3265 
3266 static int guc_resume(struct intel_engine_cs *engine)
3267 {
3268 	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3269 
3270 	intel_mocs_init_engine(engine);
3271 
3272 	intel_breadcrumbs_reset(engine->breadcrumbs);
3273 
3274 	setup_hwsp(engine);
3275 	start_engine(engine);
3276 
3277 	return 0;
3278 }
3279 
3280 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
3281 {
3282 	return !sched_engine->tasklet.callback;
3283 }
3284 
3285 static void guc_set_default_submission(struct intel_engine_cs *engine)
3286 {
3287 	engine->submit_request = guc_submit_request;
3288 }
3289 
3290 static inline void guc_kernel_context_pin(struct intel_guc *guc,
3291 					  struct intel_context *ce)
3292 {
3293 	if (context_guc_id_invalid(ce))
3294 		pin_guc_id(guc, ce);
3295 	guc_lrc_desc_pin(ce, true);
3296 }
3297 
3298 static inline void guc_init_lrc_mapping(struct intel_guc *guc)
3299 {
3300 	struct intel_gt *gt = guc_to_gt(guc);
3301 	struct intel_engine_cs *engine;
3302 	enum intel_engine_id id;
3303 
3304 	/* make sure all descriptors are clean... */
3305 	xa_destroy(&guc->context_lookup);
3306 
3307 	/*
3308 	 * Some contexts might have been pinned before we enabled GuC
3309 	 * submission, so we need to add them to the GuC bookeeping.
3310 	 * Also, after a reset the of the GuC we want to make sure that the
3311 	 * information shared with GuC is properly reset. The kernel LRCs are
3312 	 * not attached to the gem_context, so they need to be added separately.
3313 	 *
3314 	 * Note: we purposefully do not check the return of guc_lrc_desc_pin,
3315 	 * because that function can only fail if a reset is just starting. This
3316 	 * is at the end of reset so presumably another reset isn't happening
3317 	 * and even it did this code would be run again.
3318 	 */
3319 
3320 	for_each_engine(engine, gt, id) {
3321 		struct intel_context *ce;
3322 
3323 		list_for_each_entry(ce, &engine->pinned_contexts_list,
3324 				    pinned_contexts_link)
3325 			guc_kernel_context_pin(guc, ce);
3326 	}
3327 }
3328 
3329 static void guc_release(struct intel_engine_cs *engine)
3330 {
3331 	engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
3332 
3333 	intel_engine_cleanup_common(engine);
3334 	lrc_fini_wa_ctx(engine);
3335 }
3336 
3337 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
3338 {
3339 	struct intel_engine_cs *e;
3340 	intel_engine_mask_t tmp, mask = engine->mask;
3341 
3342 	for_each_engine_masked(e, engine->gt, mask, tmp)
3343 		e->serial++;
3344 }
3345 
3346 static void guc_default_vfuncs(struct intel_engine_cs *engine)
3347 {
3348 	/* Default vfuncs which can be overridden by each engine. */
3349 
3350 	engine->resume = guc_resume;
3351 
3352 	engine->cops = &guc_context_ops;
3353 	engine->request_alloc = guc_request_alloc;
3354 	engine->add_active_request = add_to_context;
3355 	engine->remove_active_request = remove_from_context;
3356 
3357 	engine->sched_engine->schedule = i915_schedule;
3358 
3359 	engine->reset.prepare = guc_reset_nop;
3360 	engine->reset.rewind = guc_rewind_nop;
3361 	engine->reset.cancel = guc_reset_nop;
3362 	engine->reset.finish = guc_reset_nop;
3363 
3364 	engine->emit_flush = gen8_emit_flush_xcs;
3365 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
3366 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
3367 	if (GRAPHICS_VER(engine->i915) >= 12) {
3368 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
3369 		engine->emit_flush = gen12_emit_flush_xcs;
3370 	}
3371 	engine->set_default_submission = guc_set_default_submission;
3372 
3373 	engine->flags |= I915_ENGINE_HAS_PREEMPTION;
3374 	engine->flags |= I915_ENGINE_HAS_TIMESLICES;
3375 
3376 	/*
3377 	 * TODO: GuC supports timeslicing and semaphores as well, but they're
3378 	 * handled by the firmware so some minor tweaks are required before
3379 	 * enabling.
3380 	 *
3381 	 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
3382 	 */
3383 
3384 	engine->emit_bb_start = gen8_emit_bb_start;
3385 }
3386 
3387 static void rcs_submission_override(struct intel_engine_cs *engine)
3388 {
3389 	switch (GRAPHICS_VER(engine->i915)) {
3390 	case 12:
3391 		engine->emit_flush = gen12_emit_flush_rcs;
3392 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
3393 		break;
3394 	case 11:
3395 		engine->emit_flush = gen11_emit_flush_rcs;
3396 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
3397 		break;
3398 	default:
3399 		engine->emit_flush = gen8_emit_flush_rcs;
3400 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
3401 		break;
3402 	}
3403 }
3404 
3405 static inline void guc_default_irqs(struct intel_engine_cs *engine)
3406 {
3407 	engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
3408 	intel_engine_set_irq_handler(engine, cs_irq_handler);
3409 }
3410 
3411 static void guc_sched_engine_destroy(struct kref *kref)
3412 {
3413 	struct i915_sched_engine *sched_engine =
3414 		container_of(kref, typeof(*sched_engine), ref);
3415 	struct intel_guc *guc = sched_engine->private_data;
3416 
3417 	guc->sched_engine = NULL;
3418 	tasklet_kill(&sched_engine->tasklet); /* flush the callback */
3419 	kfree(sched_engine);
3420 }
3421 
3422 int intel_guc_submission_setup(struct intel_engine_cs *engine)
3423 {
3424 	struct drm_i915_private *i915 = engine->i915;
3425 	struct intel_guc *guc = &engine->gt->uc.guc;
3426 
3427 	/*
3428 	 * The setup relies on several assumptions (e.g. irqs always enabled)
3429 	 * that are only valid on gen11+
3430 	 */
3431 	GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
3432 
3433 	if (!guc->sched_engine) {
3434 		guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
3435 		if (!guc->sched_engine)
3436 			return -ENOMEM;
3437 
3438 		guc->sched_engine->schedule = i915_schedule;
3439 		guc->sched_engine->disabled = guc_sched_engine_disabled;
3440 		guc->sched_engine->private_data = guc;
3441 		guc->sched_engine->destroy = guc_sched_engine_destroy;
3442 		guc->sched_engine->bump_inflight_request_prio =
3443 			guc_bump_inflight_request_prio;
3444 		guc->sched_engine->retire_inflight_request_prio =
3445 			guc_retire_inflight_request_prio;
3446 		tasklet_setup(&guc->sched_engine->tasklet,
3447 			      guc_submission_tasklet);
3448 	}
3449 	i915_sched_engine_put(engine->sched_engine);
3450 	engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
3451 
3452 	guc_default_vfuncs(engine);
3453 	guc_default_irqs(engine);
3454 	guc_init_breadcrumbs(engine);
3455 
3456 	if (engine->class == RENDER_CLASS)
3457 		rcs_submission_override(engine);
3458 
3459 	lrc_init_wa_ctx(engine);
3460 
3461 	/* Finally, take ownership and responsibility for cleanup! */
3462 	engine->sanitize = guc_sanitize;
3463 	engine->release = guc_release;
3464 
3465 	return 0;
3466 }
3467 
3468 void intel_guc_submission_enable(struct intel_guc *guc)
3469 {
3470 	guc_init_lrc_mapping(guc);
3471 }
3472 
3473 void intel_guc_submission_disable(struct intel_guc *guc)
3474 {
3475 	/* Note: By the time we're here, GuC may have already been reset */
3476 }
3477 
3478 static bool __guc_submission_supported(struct intel_guc *guc)
3479 {
3480 	/* GuC submission is unavailable for pre-Gen11 */
3481 	return intel_guc_is_supported(guc) &&
3482 	       GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
3483 }
3484 
3485 static bool __guc_submission_selected(struct intel_guc *guc)
3486 {
3487 	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
3488 
3489 	if (!intel_guc_submission_is_supported(guc))
3490 		return false;
3491 
3492 	return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
3493 }
3494 
3495 void intel_guc_submission_init_early(struct intel_guc *guc)
3496 {
3497 	guc->submission_supported = __guc_submission_supported(guc);
3498 	guc->submission_selected = __guc_submission_selected(guc);
3499 }
3500 
3501 static inline struct intel_context *
3502 g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
3503 {
3504 	struct intel_context *ce;
3505 
3506 	if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
3507 		drm_err(&guc_to_gt(guc)->i915->drm,
3508 			"Invalid desc_idx %u", desc_idx);
3509 		return NULL;
3510 	}
3511 
3512 	ce = __get_context(guc, desc_idx);
3513 	if (unlikely(!ce)) {
3514 		drm_err(&guc_to_gt(guc)->i915->drm,
3515 			"Context is NULL, desc_idx %u", desc_idx);
3516 		return NULL;
3517 	}
3518 
3519 	if (unlikely(intel_context_is_child(ce))) {
3520 		drm_err(&guc_to_gt(guc)->i915->drm,
3521 			"Context is child, desc_idx %u", desc_idx);
3522 		return NULL;
3523 	}
3524 
3525 	return ce;
3526 }
3527 
3528 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
3529 					  const u32 *msg,
3530 					  u32 len)
3531 {
3532 	struct intel_context *ce;
3533 	u32 desc_idx = msg[0];
3534 
3535 	if (unlikely(len < 1)) {
3536 		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
3537 		return -EPROTO;
3538 	}
3539 
3540 	ce = g2h_context_lookup(guc, desc_idx);
3541 	if (unlikely(!ce))
3542 		return -EPROTO;
3543 
3544 	trace_intel_context_deregister_done(ce);
3545 
3546 #ifdef CONFIG_DRM_I915_SELFTEST
3547 	if (unlikely(ce->drop_deregister)) {
3548 		ce->drop_deregister = false;
3549 		return 0;
3550 	}
3551 #endif
3552 
3553 	if (context_wait_for_deregister_to_register(ce)) {
3554 		struct intel_runtime_pm *runtime_pm =
3555 			&ce->engine->gt->i915->runtime_pm;
3556 		intel_wakeref_t wakeref;
3557 
3558 		/*
3559 		 * Previous owner of this guc_id has been deregistered, now safe
3560 		 * register this context.
3561 		 */
3562 		with_intel_runtime_pm(runtime_pm, wakeref)
3563 			register_context(ce, true);
3564 		guc_signal_context_fence(ce);
3565 		intel_context_put(ce);
3566 	} else if (context_destroyed(ce)) {
3567 		/* Context has been destroyed */
3568 		intel_gt_pm_put_async(guc_to_gt(guc));
3569 		release_guc_id(guc, ce);
3570 		__guc_context_destroy(ce);
3571 	}
3572 
3573 	decr_outstanding_submission_g2h(guc);
3574 
3575 	return 0;
3576 }
3577 
3578 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
3579 				     const u32 *msg,
3580 				     u32 len)
3581 {
3582 	struct intel_context *ce;
3583 	unsigned long flags;
3584 	u32 desc_idx = msg[0];
3585 
3586 	if (unlikely(len < 2)) {
3587 		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
3588 		return -EPROTO;
3589 	}
3590 
3591 	ce = g2h_context_lookup(guc, desc_idx);
3592 	if (unlikely(!ce))
3593 		return -EPROTO;
3594 
3595 	if (unlikely(context_destroyed(ce) ||
3596 		     (!context_pending_enable(ce) &&
3597 		     !context_pending_disable(ce)))) {
3598 		drm_err(&guc_to_gt(guc)->i915->drm,
3599 			"Bad context sched_state 0x%x, desc_idx %u",
3600 			ce->guc_state.sched_state, desc_idx);
3601 		return -EPROTO;
3602 	}
3603 
3604 	trace_intel_context_sched_done(ce);
3605 
3606 	if (context_pending_enable(ce)) {
3607 #ifdef CONFIG_DRM_I915_SELFTEST
3608 		if (unlikely(ce->drop_schedule_enable)) {
3609 			ce->drop_schedule_enable = false;
3610 			return 0;
3611 		}
3612 #endif
3613 
3614 		spin_lock_irqsave(&ce->guc_state.lock, flags);
3615 		clr_context_pending_enable(ce);
3616 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3617 	} else if (context_pending_disable(ce)) {
3618 		bool banned;
3619 
3620 #ifdef CONFIG_DRM_I915_SELFTEST
3621 		if (unlikely(ce->drop_schedule_disable)) {
3622 			ce->drop_schedule_disable = false;
3623 			return 0;
3624 		}
3625 #endif
3626 
3627 		/*
3628 		 * Unpin must be done before __guc_signal_context_fence,
3629 		 * otherwise a race exists between the requests getting
3630 		 * submitted + retired before this unpin completes resulting in
3631 		 * the pin_count going to zero and the context still being
3632 		 * enabled.
3633 		 */
3634 		intel_context_sched_disable_unpin(ce);
3635 
3636 		spin_lock_irqsave(&ce->guc_state.lock, flags);
3637 		banned = context_banned(ce);
3638 		clr_context_banned(ce);
3639 		clr_context_pending_disable(ce);
3640 		__guc_signal_context_fence(ce);
3641 		guc_blocked_fence_complete(ce);
3642 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3643 
3644 		if (banned) {
3645 			guc_cancel_context_requests(ce);
3646 			intel_engine_signal_breadcrumbs(ce->engine);
3647 		}
3648 	}
3649 
3650 	decr_outstanding_submission_g2h(guc);
3651 	intel_context_put(ce);
3652 
3653 	return 0;
3654 }
3655 
3656 static void capture_error_state(struct intel_guc *guc,
3657 				struct intel_context *ce)
3658 {
3659 	struct intel_gt *gt = guc_to_gt(guc);
3660 	struct drm_i915_private *i915 = gt->i915;
3661 	struct intel_engine_cs *engine = __context_to_physical_engine(ce);
3662 	intel_wakeref_t wakeref;
3663 
3664 	intel_engine_set_hung_context(engine, ce);
3665 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
3666 		i915_capture_error_state(gt, engine->mask);
3667 	atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
3668 }
3669 
3670 static void guc_context_replay(struct intel_context *ce)
3671 {
3672 	struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
3673 
3674 	__guc_reset_context(ce, true);
3675 	tasklet_hi_schedule(&sched_engine->tasklet);
3676 }
3677 
3678 static void guc_handle_context_reset(struct intel_guc *guc,
3679 				     struct intel_context *ce)
3680 {
3681 	trace_intel_context_reset(ce);
3682 
3683 	/*
3684 	 * XXX: Racey if request cancellation has occurred, see comment in
3685 	 * __guc_reset_context().
3686 	 */
3687 	if (likely(!intel_context_is_banned(ce) &&
3688 		   !context_blocked(ce))) {
3689 		capture_error_state(guc, ce);
3690 		guc_context_replay(ce);
3691 	}
3692 }
3693 
3694 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
3695 					const u32 *msg, u32 len)
3696 {
3697 	struct intel_context *ce;
3698 	int desc_idx;
3699 
3700 	if (unlikely(len != 1)) {
3701 		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
3702 		return -EPROTO;
3703 	}
3704 
3705 	desc_idx = msg[0];
3706 	ce = g2h_context_lookup(guc, desc_idx);
3707 	if (unlikely(!ce))
3708 		return -EPROTO;
3709 
3710 	guc_handle_context_reset(guc, ce);
3711 
3712 	return 0;
3713 }
3714 
3715 static struct intel_engine_cs *
3716 guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
3717 {
3718 	struct intel_gt *gt = guc_to_gt(guc);
3719 	u8 engine_class = guc_class_to_engine_class(guc_class);
3720 
3721 	/* Class index is checked in class converter */
3722 	GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
3723 
3724 	return gt->engine_class[engine_class][instance];
3725 }
3726 
3727 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
3728 					 const u32 *msg, u32 len)
3729 {
3730 	struct intel_engine_cs *engine;
3731 	u8 guc_class, instance;
3732 	u32 reason;
3733 
3734 	if (unlikely(len != 3)) {
3735 		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
3736 		return -EPROTO;
3737 	}
3738 
3739 	guc_class = msg[0];
3740 	instance = msg[1];
3741 	reason = msg[2];
3742 
3743 	engine = guc_lookup_engine(guc, guc_class, instance);
3744 	if (unlikely(!engine)) {
3745 		drm_err(&guc_to_gt(guc)->i915->drm,
3746 			"Invalid engine %d:%d", guc_class, instance);
3747 		return -EPROTO;
3748 	}
3749 
3750 	intel_gt_handle_error(guc_to_gt(guc), engine->mask,
3751 			      I915_ERROR_CAPTURE,
3752 			      "GuC failed to reset %s (reason=0x%08x)\n",
3753 			      engine->name, reason);
3754 
3755 	return 0;
3756 }
3757 
3758 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
3759 {
3760 	struct intel_guc *guc = &engine->gt->uc.guc;
3761 	struct intel_context *ce;
3762 	struct i915_request *rq;
3763 	unsigned long index;
3764 	unsigned long flags;
3765 
3766 	/* Reset called during driver load? GuC not yet initialised! */
3767 	if (unlikely(!guc_submission_initialized(guc)))
3768 		return;
3769 
3770 	xa_lock_irqsave(&guc->context_lookup, flags);
3771 	xa_for_each(&guc->context_lookup, index, ce) {
3772 		if (!kref_get_unless_zero(&ce->ref))
3773 			continue;
3774 
3775 		xa_unlock(&guc->context_lookup);
3776 
3777 		if (!intel_context_is_pinned(ce))
3778 			goto next;
3779 
3780 		if (intel_engine_is_virtual(ce->engine)) {
3781 			if (!(ce->engine->mask & engine->mask))
3782 				goto next;
3783 		} else {
3784 			if (ce->engine != engine)
3785 				goto next;
3786 		}
3787 
3788 		list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
3789 			if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
3790 				continue;
3791 
3792 			intel_engine_set_hung_context(engine, ce);
3793 
3794 			/* Can only cope with one hang at a time... */
3795 			intel_context_put(ce);
3796 			xa_lock(&guc->context_lookup);
3797 			goto done;
3798 		}
3799 next:
3800 		intel_context_put(ce);
3801 		xa_lock(&guc->context_lookup);
3802 	}
3803 done:
3804 	xa_unlock_irqrestore(&guc->context_lookup, flags);
3805 }
3806 
3807 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
3808 				    struct i915_request *hung_rq,
3809 				    struct drm_printer *m)
3810 {
3811 	struct intel_guc *guc = &engine->gt->uc.guc;
3812 	struct intel_context *ce;
3813 	unsigned long index;
3814 	unsigned long flags;
3815 
3816 	/* Reset called during driver load? GuC not yet initialised! */
3817 	if (unlikely(!guc_submission_initialized(guc)))
3818 		return;
3819 
3820 	xa_lock_irqsave(&guc->context_lookup, flags);
3821 	xa_for_each(&guc->context_lookup, index, ce) {
3822 		if (!kref_get_unless_zero(&ce->ref))
3823 			continue;
3824 
3825 		xa_unlock(&guc->context_lookup);
3826 
3827 		if (!intel_context_is_pinned(ce))
3828 			goto next;
3829 
3830 		if (intel_engine_is_virtual(ce->engine)) {
3831 			if (!(ce->engine->mask & engine->mask))
3832 				goto next;
3833 		} else {
3834 			if (ce->engine != engine)
3835 				goto next;
3836 		}
3837 
3838 		spin_lock(&ce->guc_state.lock);
3839 		intel_engine_dump_active_requests(&ce->guc_state.requests,
3840 						  hung_rq, m);
3841 		spin_unlock(&ce->guc_state.lock);
3842 
3843 next:
3844 		intel_context_put(ce);
3845 		xa_lock(&guc->context_lookup);
3846 	}
3847 	xa_unlock_irqrestore(&guc->context_lookup, flags);
3848 }
3849 
3850 void intel_guc_submission_print_info(struct intel_guc *guc,
3851 				     struct drm_printer *p)
3852 {
3853 	struct i915_sched_engine *sched_engine = guc->sched_engine;
3854 	struct rb_node *rb;
3855 	unsigned long flags;
3856 
3857 	if (!sched_engine)
3858 		return;
3859 
3860 	drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
3861 		   atomic_read(&guc->outstanding_submission_g2h));
3862 	drm_printf(p, "GuC tasklet count: %u\n\n",
3863 		   atomic_read(&sched_engine->tasklet.count));
3864 
3865 	spin_lock_irqsave(&sched_engine->lock, flags);
3866 	drm_printf(p, "Requests in GuC submit tasklet:\n");
3867 	for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
3868 		struct i915_priolist *pl = to_priolist(rb);
3869 		struct i915_request *rq;
3870 
3871 		priolist_for_each_request(rq, pl)
3872 			drm_printf(p, "guc_id=%u, seqno=%llu\n",
3873 				   rq->context->guc_id.id,
3874 				   rq->fence.seqno);
3875 	}
3876 	spin_unlock_irqrestore(&sched_engine->lock, flags);
3877 	drm_printf(p, "\n");
3878 }
3879 
3880 static inline void guc_log_context_priority(struct drm_printer *p,
3881 					    struct intel_context *ce)
3882 {
3883 	int i;
3884 
3885 	drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
3886 	drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
3887 	for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
3888 	     i < GUC_CLIENT_PRIORITY_NUM; ++i) {
3889 		drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
3890 			   i, ce->guc_state.prio_count[i]);
3891 	}
3892 	drm_printf(p, "\n");
3893 }
3894 
3895 static inline void guc_log_context(struct drm_printer *p,
3896 				   struct intel_context *ce)
3897 {
3898 	drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
3899 	drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
3900 	drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
3901 		   ce->ring->head,
3902 		   ce->lrc_reg_state[CTX_RING_HEAD]);
3903 	drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
3904 		   ce->ring->tail,
3905 		   ce->lrc_reg_state[CTX_RING_TAIL]);
3906 	drm_printf(p, "\t\tContext Pin Count: %u\n",
3907 		   atomic_read(&ce->pin_count));
3908 	drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
3909 		   atomic_read(&ce->guc_id.ref));
3910 	drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
3911 		   ce->guc_state.sched_state);
3912 }
3913 
3914 void intel_guc_submission_print_context_info(struct intel_guc *guc,
3915 					     struct drm_printer *p)
3916 {
3917 	struct intel_context *ce;
3918 	unsigned long index;
3919 	unsigned long flags;
3920 
3921 	xa_lock_irqsave(&guc->context_lookup, flags);
3922 	xa_for_each(&guc->context_lookup, index, ce) {
3923 		GEM_BUG_ON(intel_context_is_child(ce));
3924 
3925 		guc_log_context(p, ce);
3926 		guc_log_context_priority(p, ce);
3927 
3928 		if (intel_context_is_parent(ce)) {
3929 			struct guc_process_desc *desc = __get_process_desc(ce);
3930 			struct intel_context *child;
3931 
3932 			drm_printf(p, "\t\tNumber children: %u\n",
3933 				   ce->parallel.number_children);
3934 			drm_printf(p, "\t\tWQI Head: %u\n",
3935 				   READ_ONCE(desc->head));
3936 			drm_printf(p, "\t\tWQI Tail: %u\n",
3937 				   READ_ONCE(desc->tail));
3938 			drm_printf(p, "\t\tWQI Status: %u\n\n",
3939 				   READ_ONCE(desc->wq_status));
3940 
3941 			if (ce->engine->emit_bb_start ==
3942 			    emit_bb_start_parent_no_preempt_mid_batch) {
3943 				u8 i;
3944 
3945 				drm_printf(p, "\t\tChildren Go: %u\n\n",
3946 					   get_children_go_value(ce));
3947 				for (i = 0; i < ce->parallel.number_children; ++i)
3948 					drm_printf(p, "\t\tChildren Join: %u\n",
3949 						   get_children_join_value(ce, i));
3950 			}
3951 
3952 			for_each_child(ce, child)
3953 				guc_log_context(p, child);
3954 		}
3955 	}
3956 	xa_unlock_irqrestore(&guc->context_lookup, flags);
3957 }
3958 
3959 static inline u32 get_children_go_addr(struct intel_context *ce)
3960 {
3961 	GEM_BUG_ON(!intel_context_is_parent(ce));
3962 
3963 	return i915_ggtt_offset(ce->state) +
3964 		__get_parent_scratch_offset(ce) +
3965 		offsetof(struct parent_scratch, go.semaphore);
3966 }
3967 
3968 static inline u32 get_children_join_addr(struct intel_context *ce,
3969 					 u8 child_index)
3970 {
3971 	GEM_BUG_ON(!intel_context_is_parent(ce));
3972 
3973 	return i915_ggtt_offset(ce->state) +
3974 		__get_parent_scratch_offset(ce) +
3975 		offsetof(struct parent_scratch, join[child_index].semaphore);
3976 }
3977 
3978 #define PARENT_GO_BB			1
3979 #define PARENT_GO_FINI_BREADCRUMB	0
3980 #define CHILD_GO_BB			1
3981 #define CHILD_GO_FINI_BREADCRUMB	0
3982 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3983 						     u64 offset, u32 len,
3984 						     const unsigned int flags)
3985 {
3986 	struct intel_context *ce = rq->context;
3987 	u32 *cs;
3988 	u8 i;
3989 
3990 	GEM_BUG_ON(!intel_context_is_parent(ce));
3991 
3992 	cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
3993 	if (IS_ERR(cs))
3994 		return PTR_ERR(cs);
3995 
3996 	/* Wait on children */
3997 	for (i = 0; i < ce->parallel.number_children; ++i) {
3998 		*cs++ = (MI_SEMAPHORE_WAIT |
3999 			 MI_SEMAPHORE_GLOBAL_GTT |
4000 			 MI_SEMAPHORE_POLL |
4001 			 MI_SEMAPHORE_SAD_EQ_SDD);
4002 		*cs++ = PARENT_GO_BB;
4003 		*cs++ = get_children_join_addr(ce, i);
4004 		*cs++ = 0;
4005 	}
4006 
4007 	/* Turn off preemption */
4008 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4009 	*cs++ = MI_NOOP;
4010 
4011 	/* Tell children go */
4012 	cs = gen8_emit_ggtt_write(cs,
4013 				  CHILD_GO_BB,
4014 				  get_children_go_addr(ce),
4015 				  0);
4016 
4017 	/* Jump to batch */
4018 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
4019 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4020 	*cs++ = lower_32_bits(offset);
4021 	*cs++ = upper_32_bits(offset);
4022 	*cs++ = MI_NOOP;
4023 
4024 	intel_ring_advance(rq, cs);
4025 
4026 	return 0;
4027 }
4028 
4029 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
4030 						    u64 offset, u32 len,
4031 						    const unsigned int flags)
4032 {
4033 	struct intel_context *ce = rq->context;
4034 	struct intel_context *parent = intel_context_to_parent(ce);
4035 	u32 *cs;
4036 
4037 	GEM_BUG_ON(!intel_context_is_child(ce));
4038 
4039 	cs = intel_ring_begin(rq, 12);
4040 	if (IS_ERR(cs))
4041 		return PTR_ERR(cs);
4042 
4043 	/* Signal parent */
4044 	cs = gen8_emit_ggtt_write(cs,
4045 				  PARENT_GO_BB,
4046 				  get_children_join_addr(parent,
4047 							 ce->parallel.child_index),
4048 				  0);
4049 
4050 	/* Wait on parent for go */
4051 	*cs++ = (MI_SEMAPHORE_WAIT |
4052 		 MI_SEMAPHORE_GLOBAL_GTT |
4053 		 MI_SEMAPHORE_POLL |
4054 		 MI_SEMAPHORE_SAD_EQ_SDD);
4055 	*cs++ = CHILD_GO_BB;
4056 	*cs++ = get_children_go_addr(parent);
4057 	*cs++ = 0;
4058 
4059 	/* Turn off preemption */
4060 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4061 
4062 	/* Jump to batch */
4063 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
4064 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4065 	*cs++ = lower_32_bits(offset);
4066 	*cs++ = upper_32_bits(offset);
4067 
4068 	intel_ring_advance(rq, cs);
4069 
4070 	return 0;
4071 }
4072 
4073 static u32 *
4074 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4075 						   u32 *cs)
4076 {
4077 	struct intel_context *ce = rq->context;
4078 	u8 i;
4079 
4080 	GEM_BUG_ON(!intel_context_is_parent(ce));
4081 
4082 	/* Wait on children */
4083 	for (i = 0; i < ce->parallel.number_children; ++i) {
4084 		*cs++ = (MI_SEMAPHORE_WAIT |
4085 			 MI_SEMAPHORE_GLOBAL_GTT |
4086 			 MI_SEMAPHORE_POLL |
4087 			 MI_SEMAPHORE_SAD_EQ_SDD);
4088 		*cs++ = PARENT_GO_FINI_BREADCRUMB;
4089 		*cs++ = get_children_join_addr(ce, i);
4090 		*cs++ = 0;
4091 	}
4092 
4093 	/* Turn on preemption */
4094 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4095 	*cs++ = MI_NOOP;
4096 
4097 	/* Tell children go */
4098 	cs = gen8_emit_ggtt_write(cs,
4099 				  CHILD_GO_FINI_BREADCRUMB,
4100 				  get_children_go_addr(ce),
4101 				  0);
4102 
4103 	return cs;
4104 }
4105 
4106 /*
4107  * If this true, a submission of multi-lrc requests had an error and the
4108  * requests need to be skipped. The front end (execuf IOCTL) should've called
4109  * i915_request_skip which squashes the BB but we still need to emit the fini
4110  * breadrcrumbs seqno write. At this point we don't know how many of the
4111  * requests in the multi-lrc submission were generated so we can't do the
4112  * handshake between the parent and children (e.g. if 4 requests should be
4113  * generated but 2nd hit an error only 1 would be seen by the GuC backend).
4114  * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
4115  * has occurred on any of the requests in submission / relationship.
4116  */
4117 static inline bool skip_handshake(struct i915_request *rq)
4118 {
4119 	return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
4120 }
4121 
4122 static u32 *
4123 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4124 						 u32 *cs)
4125 {
4126 	struct intel_context *ce = rq->context;
4127 
4128 	GEM_BUG_ON(!intel_context_is_parent(ce));
4129 
4130 	if (unlikely(skip_handshake(rq))) {
4131 		/*
4132 		 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
4133 		 * the -6 comes from the length of the emits below.
4134 		 */
4135 		memset(cs, 0, sizeof(u32) *
4136 		       (ce->engine->emit_fini_breadcrumb_dw - 6));
4137 		cs += ce->engine->emit_fini_breadcrumb_dw - 6;
4138 	} else {
4139 		cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
4140 	}
4141 
4142 	/* Emit fini breadcrumb */
4143 	cs = gen8_emit_ggtt_write(cs,
4144 				  rq->fence.seqno,
4145 				  i915_request_active_timeline(rq)->hwsp_offset,
4146 				  0);
4147 
4148 	/* User interrupt */
4149 	*cs++ = MI_USER_INTERRUPT;
4150 	*cs++ = MI_NOOP;
4151 
4152 	rq->tail = intel_ring_offset(rq, cs);
4153 
4154 	return cs;
4155 }
4156 
4157 static u32 *
4158 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
4159 						  u32 *cs)
4160 {
4161 	struct intel_context *ce = rq->context;
4162 	struct intel_context *parent = intel_context_to_parent(ce);
4163 
4164 	GEM_BUG_ON(!intel_context_is_child(ce));
4165 
4166 	/* Turn on preemption */
4167 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4168 	*cs++ = MI_NOOP;
4169 
4170 	/* Signal parent */
4171 	cs = gen8_emit_ggtt_write(cs,
4172 				  PARENT_GO_FINI_BREADCRUMB,
4173 				  get_children_join_addr(parent,
4174 							 ce->parallel.child_index),
4175 				  0);
4176 
4177 	/* Wait parent on for go */
4178 	*cs++ = (MI_SEMAPHORE_WAIT |
4179 		 MI_SEMAPHORE_GLOBAL_GTT |
4180 		 MI_SEMAPHORE_POLL |
4181 		 MI_SEMAPHORE_SAD_EQ_SDD);
4182 	*cs++ = CHILD_GO_FINI_BREADCRUMB;
4183 	*cs++ = get_children_go_addr(parent);
4184 	*cs++ = 0;
4185 
4186 	return cs;
4187 }
4188 
4189 static u32 *
4190 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
4191 						u32 *cs)
4192 {
4193 	struct intel_context *ce = rq->context;
4194 
4195 	GEM_BUG_ON(!intel_context_is_child(ce));
4196 
4197 	if (unlikely(skip_handshake(rq))) {
4198 		/*
4199 		 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
4200 		 * the -6 comes from the length of the emits below.
4201 		 */
4202 		memset(cs, 0, sizeof(u32) *
4203 		       (ce->engine->emit_fini_breadcrumb_dw - 6));
4204 		cs += ce->engine->emit_fini_breadcrumb_dw - 6;
4205 	} else {
4206 		cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
4207 	}
4208 
4209 	/* Emit fini breadcrumb */
4210 	cs = gen8_emit_ggtt_write(cs,
4211 				  rq->fence.seqno,
4212 				  i915_request_active_timeline(rq)->hwsp_offset,
4213 				  0);
4214 
4215 	/* User interrupt */
4216 	*cs++ = MI_USER_INTERRUPT;
4217 	*cs++ = MI_NOOP;
4218 
4219 	rq->tail = intel_ring_offset(rq, cs);
4220 
4221 	return cs;
4222 }
4223 
4224 static struct intel_context *
4225 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
4226 		   unsigned long flags)
4227 {
4228 	struct guc_virtual_engine *ve;
4229 	struct intel_guc *guc;
4230 	unsigned int n;
4231 	int err;
4232 
4233 	ve = kzalloc(sizeof(*ve), GFP_KERNEL);
4234 	if (!ve)
4235 		return ERR_PTR(-ENOMEM);
4236 
4237 	guc = &siblings[0]->gt->uc.guc;
4238 
4239 	ve->base.i915 = siblings[0]->i915;
4240 	ve->base.gt = siblings[0]->gt;
4241 	ve->base.uncore = siblings[0]->uncore;
4242 	ve->base.id = -1;
4243 
4244 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
4245 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4246 	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
4247 	ve->base.saturated = ALL_ENGINES;
4248 
4249 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
4250 
4251 	ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
4252 
4253 	ve->base.cops = &virtual_guc_context_ops;
4254 	ve->base.request_alloc = guc_request_alloc;
4255 	ve->base.bump_serial = virtual_guc_bump_serial;
4256 
4257 	ve->base.submit_request = guc_submit_request;
4258 
4259 	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
4260 
4261 	intel_context_init(&ve->context, &ve->base);
4262 
4263 	for (n = 0; n < count; n++) {
4264 		struct intel_engine_cs *sibling = siblings[n];
4265 
4266 		GEM_BUG_ON(!is_power_of_2(sibling->mask));
4267 		if (sibling->mask & ve->base.mask) {
4268 			DRM_DEBUG("duplicate %s entry in load balancer\n",
4269 				  sibling->name);
4270 			err = -EINVAL;
4271 			goto err_put;
4272 		}
4273 
4274 		ve->base.mask |= sibling->mask;
4275 		ve->base.logical_mask |= sibling->logical_mask;
4276 
4277 		if (n != 0 && ve->base.class != sibling->class) {
4278 			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
4279 				  sibling->class, ve->base.class);
4280 			err = -EINVAL;
4281 			goto err_put;
4282 		} else if (n == 0) {
4283 			ve->base.class = sibling->class;
4284 			ve->base.uabi_class = sibling->uabi_class;
4285 			snprintf(ve->base.name, sizeof(ve->base.name),
4286 				 "v%dx%d", ve->base.class, count);
4287 			ve->base.context_size = sibling->context_size;
4288 
4289 			ve->base.add_active_request =
4290 				sibling->add_active_request;
4291 			ve->base.remove_active_request =
4292 				sibling->remove_active_request;
4293 			ve->base.emit_bb_start = sibling->emit_bb_start;
4294 			ve->base.emit_flush = sibling->emit_flush;
4295 			ve->base.emit_init_breadcrumb =
4296 				sibling->emit_init_breadcrumb;
4297 			ve->base.emit_fini_breadcrumb =
4298 				sibling->emit_fini_breadcrumb;
4299 			ve->base.emit_fini_breadcrumb_dw =
4300 				sibling->emit_fini_breadcrumb_dw;
4301 			ve->base.breadcrumbs =
4302 				intel_breadcrumbs_get(sibling->breadcrumbs);
4303 
4304 			ve->base.flags |= sibling->flags;
4305 
4306 			ve->base.props.timeslice_duration_ms =
4307 				sibling->props.timeslice_duration_ms;
4308 			ve->base.props.preempt_timeout_ms =
4309 				sibling->props.preempt_timeout_ms;
4310 		}
4311 	}
4312 
4313 	return &ve->context;
4314 
4315 err_put:
4316 	intel_context_put(&ve->context);
4317 	return ERR_PTR(err);
4318 }
4319 
4320 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
4321 {
4322 	struct intel_engine_cs *engine;
4323 	intel_engine_mask_t tmp, mask = ve->mask;
4324 
4325 	for_each_engine_masked(engine, ve->gt, mask, tmp)
4326 		if (READ_ONCE(engine->props.heartbeat_interval_ms))
4327 			return true;
4328 
4329 	return false;
4330 }
4331 
4332 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4333 #include "selftest_guc.c"
4334 #include "selftest_guc_multi_lrc.c"
4335 #endif
4336