1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2014 Intel Corporation
4 */
5
6 #include <linux/circ_buf.h>
7
8 #include "gem/i915_gem_context.h"
9 #include "gem/i915_gem_lmem.h"
10 #include "gt/gen8_engine_cs.h"
11 #include "gt/intel_breadcrumbs.h"
12 #include "gt/intel_context.h"
13 #include "gt/intel_engine_heartbeat.h"
14 #include "gt/intel_engine_pm.h"
15 #include "gt/intel_engine_regs.h"
16 #include "gt/intel_gpu_commands.h"
17 #include "gt/intel_gt.h"
18 #include "gt/intel_gt_clock_utils.h"
19 #include "gt/intel_gt_irq.h"
20 #include "gt/intel_gt_pm.h"
21 #include "gt/intel_gt_regs.h"
22 #include "gt/intel_gt_requests.h"
23 #include "gt/intel_lrc.h"
24 #include "gt/intel_lrc_reg.h"
25 #include "gt/intel_mocs.h"
26 #include "gt/intel_ring.h"
27
28 #include "intel_guc_ads.h"
29 #include "intel_guc_capture.h"
30 #include "intel_guc_print.h"
31 #include "intel_guc_submission.h"
32
33 #include "i915_drv.h"
34 #include "i915_reg.h"
35 #include "i915_trace.h"
36
37 /**
38 * DOC: GuC-based command submission
39 *
40 * The Scratch registers:
41 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
42 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
43 * triggers an interrupt on the GuC via another register write (0xC4C8).
44 * Firmware writes a success/fail code back to the action register after
45 * processes the request. The kernel driver polls waiting for this update and
46 * then proceeds.
47 *
48 * Command Transport buffers (CTBs):
49 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
50 * - G2H) are a message interface between the i915 and GuC.
51 *
52 * Context registration:
53 * Before a context can be submitted it must be registered with the GuC via a
54 * H2G. A unique guc_id is associated with each context. The context is either
55 * registered at request creation time (normal operation) or at submission time
56 * (abnormal operation, e.g. after a reset).
57 *
58 * Context submission:
59 * The i915 updates the LRC tail value in memory. The i915 must enable the
60 * scheduling of the context within the GuC for the GuC to actually consider it.
61 * Therefore, the first time a disabled context is submitted we use a schedule
62 * enable H2G, while follow up submissions are done via the context submit H2G,
63 * which informs the GuC that a previously enabled context has new work
64 * available.
65 *
66 * Context unpin:
67 * To unpin a context a H2G is used to disable scheduling. When the
68 * corresponding G2H returns indicating the scheduling disable operation has
69 * completed it is safe to unpin the context. While a disable is in flight it
70 * isn't safe to resubmit the context so a fence is used to stall all future
71 * requests of that context until the G2H is returned. Because this interaction
72 * with the GuC takes a non-zero amount of time we delay the disabling of
73 * scheduling after the pin count goes to zero by a configurable period of time
74 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of
75 * time to resubmit something on the context before doing this costly operation.
76 * This delay is only done if the context isn't closed and the guc_id usage is
77 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD).
78 *
79 * Context deregistration:
80 * Before a context can be destroyed or if we steal its guc_id we must
81 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
82 * safe to submit anything to this guc_id until the deregister completes so a
83 * fence is used to stall all requests associated with this guc_id until the
84 * corresponding G2H returns indicating the guc_id has been deregistered.
85 *
86 * submission_state.guc_ids:
87 * Unique number associated with private GuC context data passed in during
88 * context registration / submission / deregistration. 64k available. Simple ida
89 * is used for allocation.
90 *
91 * Stealing guc_ids:
92 * If no guc_ids are available they can be stolen from another context at
93 * request creation time if that context is unpinned. If a guc_id can't be found
94 * we punt this problem to the user as we believe this is near impossible to hit
95 * during normal use cases.
96 *
97 * Locking:
98 * In the GuC submission code we have 3 basic spin locks which protect
99 * everything. Details about each below.
100 *
101 * sched_engine->lock
102 * This is the submission lock for all contexts that share an i915 schedule
103 * engine (sched_engine), thus only one of the contexts which share a
104 * sched_engine can be submitting at a time. Currently only one sched_engine is
105 * used for all of GuC submission but that could change in the future.
106 *
107 * guc->submission_state.lock
108 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
109 * list.
110 *
111 * ce->guc_state.lock
112 * Protects everything under ce->guc_state. Ensures that a context is in the
113 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
114 * on a disabled context (bad idea), we don't issue a schedule enable when a
115 * schedule disable is in flight, etc... Also protects list of inflight requests
116 * on the context and the priority management state. Lock is individual to each
117 * context.
118 *
119 * Lock ordering rules:
120 * sched_engine->lock -> ce->guc_state.lock
121 * guc->submission_state.lock -> ce->guc_state.lock
122 *
123 * Reset races:
124 * When a full GT reset is triggered it is assumed that some G2H responses to
125 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
126 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
127 * contexts, release guc_ids, etc...). When this occurs we can scrub the
128 * context state and cleanup appropriately, however this is quite racey.
129 * To avoid races, the reset code must disable submission before scrubbing for
130 * the missing G2H, while the submission code must check for submission being
131 * disabled and skip sending H2Gs and updating context states when it is. Both
132 * sides must also make sure to hold the relevant locks.
133 */
134
135 /* GuC Virtual Engine */
136 struct guc_virtual_engine {
137 struct intel_engine_cs base;
138 struct intel_context context;
139 };
140
141 static struct intel_context *
142 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
143 unsigned long flags);
144
145 static struct intel_context *
146 guc_create_parallel(struct intel_engine_cs **engines,
147 unsigned int num_siblings,
148 unsigned int width);
149
150 #define GUC_REQUEST_SIZE 64 /* bytes */
151
152 /*
153 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
154 * per the GuC submission interface. A different allocation algorithm is used
155 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
156 * partition the guc_id space. We believe the number of multi-lrc contexts in
157 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
158 * multi-lrc.
159 */
160 #define NUMBER_MULTI_LRC_GUC_ID(guc) \
161 ((guc)->submission_state.num_guc_ids / 16)
162
163 /*
164 * Below is a set of functions which control the GuC scheduling state which
165 * require a lock.
166 */
167 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
168 #define SCHED_STATE_DESTROYED BIT(1)
169 #define SCHED_STATE_PENDING_DISABLE BIT(2)
170 #define SCHED_STATE_BANNED BIT(3)
171 #define SCHED_STATE_ENABLED BIT(4)
172 #define SCHED_STATE_PENDING_ENABLE BIT(5)
173 #define SCHED_STATE_REGISTERED BIT(6)
174 #define SCHED_STATE_POLICY_REQUIRED BIT(7)
175 #define SCHED_STATE_CLOSED BIT(8)
176 #define SCHED_STATE_BLOCKED_SHIFT 9
177 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
178 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
179
init_sched_state(struct intel_context * ce)180 static inline void init_sched_state(struct intel_context *ce)
181 {
182 lockdep_assert_held(&ce->guc_state.lock);
183 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
184 }
185
186 /*
187 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend.
188 * A context close can race with the submission path, so SCHED_STATE_CLOSED
189 * can be set immediately before we try to register.
190 */
191 #define SCHED_STATE_VALID_INIT \
192 (SCHED_STATE_BLOCKED_MASK | \
193 SCHED_STATE_CLOSED | \
194 SCHED_STATE_REGISTERED)
195
196 __maybe_unused
sched_state_is_init(struct intel_context * ce)197 static bool sched_state_is_init(struct intel_context *ce)
198 {
199 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT);
200 }
201
202 static inline bool
context_wait_for_deregister_to_register(struct intel_context * ce)203 context_wait_for_deregister_to_register(struct intel_context *ce)
204 {
205 return ce->guc_state.sched_state &
206 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
207 }
208
209 static inline void
set_context_wait_for_deregister_to_register(struct intel_context * ce)210 set_context_wait_for_deregister_to_register(struct intel_context *ce)
211 {
212 lockdep_assert_held(&ce->guc_state.lock);
213 ce->guc_state.sched_state |=
214 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
215 }
216
217 static inline void
clr_context_wait_for_deregister_to_register(struct intel_context * ce)218 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
219 {
220 lockdep_assert_held(&ce->guc_state.lock);
221 ce->guc_state.sched_state &=
222 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
223 }
224
225 static inline bool
context_destroyed(struct intel_context * ce)226 context_destroyed(struct intel_context *ce)
227 {
228 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
229 }
230
231 static inline void
set_context_destroyed(struct intel_context * ce)232 set_context_destroyed(struct intel_context *ce)
233 {
234 lockdep_assert_held(&ce->guc_state.lock);
235 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
236 }
237
context_pending_disable(struct intel_context * ce)238 static inline bool context_pending_disable(struct intel_context *ce)
239 {
240 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
241 }
242
set_context_pending_disable(struct intel_context * ce)243 static inline void set_context_pending_disable(struct intel_context *ce)
244 {
245 lockdep_assert_held(&ce->guc_state.lock);
246 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
247 }
248
clr_context_pending_disable(struct intel_context * ce)249 static inline void clr_context_pending_disable(struct intel_context *ce)
250 {
251 lockdep_assert_held(&ce->guc_state.lock);
252 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
253 }
254
context_banned(struct intel_context * ce)255 static inline bool context_banned(struct intel_context *ce)
256 {
257 return ce->guc_state.sched_state & SCHED_STATE_BANNED;
258 }
259
set_context_banned(struct intel_context * ce)260 static inline void set_context_banned(struct intel_context *ce)
261 {
262 lockdep_assert_held(&ce->guc_state.lock);
263 ce->guc_state.sched_state |= SCHED_STATE_BANNED;
264 }
265
clr_context_banned(struct intel_context * ce)266 static inline void clr_context_banned(struct intel_context *ce)
267 {
268 lockdep_assert_held(&ce->guc_state.lock);
269 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
270 }
271
context_enabled(struct intel_context * ce)272 static inline bool context_enabled(struct intel_context *ce)
273 {
274 return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
275 }
276
set_context_enabled(struct intel_context * ce)277 static inline void set_context_enabled(struct intel_context *ce)
278 {
279 lockdep_assert_held(&ce->guc_state.lock);
280 ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
281 }
282
clr_context_enabled(struct intel_context * ce)283 static inline void clr_context_enabled(struct intel_context *ce)
284 {
285 lockdep_assert_held(&ce->guc_state.lock);
286 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
287 }
288
context_pending_enable(struct intel_context * ce)289 static inline bool context_pending_enable(struct intel_context *ce)
290 {
291 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
292 }
293
set_context_pending_enable(struct intel_context * ce)294 static inline void set_context_pending_enable(struct intel_context *ce)
295 {
296 lockdep_assert_held(&ce->guc_state.lock);
297 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
298 }
299
clr_context_pending_enable(struct intel_context * ce)300 static inline void clr_context_pending_enable(struct intel_context *ce)
301 {
302 lockdep_assert_held(&ce->guc_state.lock);
303 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
304 }
305
context_registered(struct intel_context * ce)306 static inline bool context_registered(struct intel_context *ce)
307 {
308 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
309 }
310
set_context_registered(struct intel_context * ce)311 static inline void set_context_registered(struct intel_context *ce)
312 {
313 lockdep_assert_held(&ce->guc_state.lock);
314 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
315 }
316
clr_context_registered(struct intel_context * ce)317 static inline void clr_context_registered(struct intel_context *ce)
318 {
319 lockdep_assert_held(&ce->guc_state.lock);
320 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
321 }
322
context_policy_required(struct intel_context * ce)323 static inline bool context_policy_required(struct intel_context *ce)
324 {
325 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
326 }
327
set_context_policy_required(struct intel_context * ce)328 static inline void set_context_policy_required(struct intel_context *ce)
329 {
330 lockdep_assert_held(&ce->guc_state.lock);
331 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
332 }
333
clr_context_policy_required(struct intel_context * ce)334 static inline void clr_context_policy_required(struct intel_context *ce)
335 {
336 lockdep_assert_held(&ce->guc_state.lock);
337 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
338 }
339
context_close_done(struct intel_context * ce)340 static inline bool context_close_done(struct intel_context *ce)
341 {
342 return ce->guc_state.sched_state & SCHED_STATE_CLOSED;
343 }
344
set_context_close_done(struct intel_context * ce)345 static inline void set_context_close_done(struct intel_context *ce)
346 {
347 lockdep_assert_held(&ce->guc_state.lock);
348 ce->guc_state.sched_state |= SCHED_STATE_CLOSED;
349 }
350
context_blocked(struct intel_context * ce)351 static inline u32 context_blocked(struct intel_context *ce)
352 {
353 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
354 SCHED_STATE_BLOCKED_SHIFT;
355 }
356
incr_context_blocked(struct intel_context * ce)357 static inline void incr_context_blocked(struct intel_context *ce)
358 {
359 lockdep_assert_held(&ce->guc_state.lock);
360
361 ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
362
363 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */
364 }
365
decr_context_blocked(struct intel_context * ce)366 static inline void decr_context_blocked(struct intel_context *ce)
367 {
368 lockdep_assert_held(&ce->guc_state.lock);
369
370 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
371
372 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
373 }
374
375 static struct intel_context *
request_to_scheduling_context(struct i915_request * rq)376 request_to_scheduling_context(struct i915_request *rq)
377 {
378 return intel_context_to_parent(rq->context);
379 }
380
context_guc_id_invalid(struct intel_context * ce)381 static inline bool context_guc_id_invalid(struct intel_context *ce)
382 {
383 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
384 }
385
set_context_guc_id_invalid(struct intel_context * ce)386 static inline void set_context_guc_id_invalid(struct intel_context *ce)
387 {
388 ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
389 }
390
ce_to_guc(struct intel_context * ce)391 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
392 {
393 return &ce->engine->gt->uc.guc;
394 }
395
to_priolist(struct rb_node * rb)396 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
397 {
398 return rb_entry(rb, struct i915_priolist, node);
399 }
400
401 /*
402 * When using multi-lrc submission a scratch memory area is reserved in the
403 * parent's context state for the process descriptor, work queue, and handshake
404 * between the parent + children contexts to insert safe preemption points
405 * between each of the BBs. Currently the scratch area is sized to a page.
406 *
407 * The layout of this scratch area is below:
408 * 0 guc_process_desc
409 * + sizeof(struct guc_process_desc) child go
410 * + CACHELINE_BYTES child join[0]
411 * ...
412 * + CACHELINE_BYTES child join[n - 1]
413 * ... unused
414 * PARENT_SCRATCH_SIZE / 2 work queue start
415 * ... work queue
416 * PARENT_SCRATCH_SIZE - 1 work queue end
417 */
418 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
419 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
420
421 struct sync_semaphore {
422 u32 semaphore;
423 u8 unused[CACHELINE_BYTES - sizeof(u32)];
424 };
425
426 struct parent_scratch {
427 union guc_descs {
428 struct guc_sched_wq_desc wq_desc;
429 struct guc_process_desc_v69 pdesc;
430 } descs;
431
432 struct sync_semaphore go;
433 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
434
435 u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
436 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
437
438 u32 wq[WQ_SIZE / sizeof(u32)];
439 };
440
__get_parent_scratch_offset(struct intel_context * ce)441 static u32 __get_parent_scratch_offset(struct intel_context *ce)
442 {
443 GEM_BUG_ON(!ce->parallel.guc.parent_page);
444
445 return ce->parallel.guc.parent_page * PAGE_SIZE;
446 }
447
__get_wq_offset(struct intel_context * ce)448 static u32 __get_wq_offset(struct intel_context *ce)
449 {
450 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
451
452 return __get_parent_scratch_offset(ce) + WQ_OFFSET;
453 }
454
455 static struct parent_scratch *
__get_parent_scratch(struct intel_context * ce)456 __get_parent_scratch(struct intel_context *ce)
457 {
458 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
459 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
460
461 /*
462 * Need to subtract LRC_STATE_OFFSET here as the
463 * parallel.guc.parent_page is the offset into ce->state while
464 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
465 */
466 return (struct parent_scratch *)
467 (ce->lrc_reg_state +
468 ((__get_parent_scratch_offset(ce) -
469 LRC_STATE_OFFSET) / sizeof(u32)));
470 }
471
472 static struct guc_process_desc_v69 *
__get_process_desc_v69(struct intel_context * ce)473 __get_process_desc_v69(struct intel_context *ce)
474 {
475 struct parent_scratch *ps = __get_parent_scratch(ce);
476
477 return &ps->descs.pdesc;
478 }
479
480 static struct guc_sched_wq_desc *
__get_wq_desc_v70(struct intel_context * ce)481 __get_wq_desc_v70(struct intel_context *ce)
482 {
483 struct parent_scratch *ps = __get_parent_scratch(ce);
484
485 return &ps->descs.wq_desc;
486 }
487
get_wq_pointer(struct intel_context * ce,u32 wqi_size)488 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
489 {
490 /*
491 * Check for space in work queue. Caching a value of head pointer in
492 * intel_context structure in order reduce the number accesses to shared
493 * GPU memory which may be across a PCIe bus.
494 */
495 #define AVAILABLE_SPACE \
496 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
497 if (wqi_size > AVAILABLE_SPACE) {
498 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
499
500 if (wqi_size > AVAILABLE_SPACE)
501 return NULL;
502 }
503 #undef AVAILABLE_SPACE
504
505 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
506 }
507
__get_context(struct intel_guc * guc,u32 id)508 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
509 {
510 struct intel_context *ce = xa_load(&guc->context_lookup, id);
511
512 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
513
514 return ce;
515 }
516
__get_lrc_desc_v69(struct intel_guc * guc,u32 index)517 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
518 {
519 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
520
521 if (!base)
522 return NULL;
523
524 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
525
526 return &base[index];
527 }
528
guc_lrc_desc_pool_create_v69(struct intel_guc * guc)529 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
530 {
531 u32 size;
532 int ret;
533
534 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
535 GUC_MAX_CONTEXT_ID);
536 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
537 (void **)&guc->lrc_desc_pool_vaddr_v69);
538 if (ret)
539 return ret;
540
541 return 0;
542 }
543
guc_lrc_desc_pool_destroy_v69(struct intel_guc * guc)544 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
545 {
546 if (!guc->lrc_desc_pool_vaddr_v69)
547 return;
548
549 guc->lrc_desc_pool_vaddr_v69 = NULL;
550 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
551 }
552
guc_submission_initialized(struct intel_guc * guc)553 static inline bool guc_submission_initialized(struct intel_guc *guc)
554 {
555 return guc->submission_initialized;
556 }
557
_reset_lrc_desc_v69(struct intel_guc * guc,u32 id)558 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
559 {
560 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
561
562 if (desc)
563 memset(desc, 0, sizeof(*desc));
564 }
565
ctx_id_mapped(struct intel_guc * guc,u32 id)566 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
567 {
568 return __get_context(guc, id);
569 }
570
set_ctx_id_mapping(struct intel_guc * guc,u32 id,struct intel_context * ce)571 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
572 struct intel_context *ce)
573 {
574 unsigned long flags;
575
576 /*
577 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
578 * lower level functions directly.
579 */
580 xa_lock_irqsave(&guc->context_lookup, flags);
581 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
582 xa_unlock_irqrestore(&guc->context_lookup, flags);
583 }
584
clr_ctx_id_mapping(struct intel_guc * guc,u32 id)585 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
586 {
587 unsigned long flags;
588
589 if (unlikely(!guc_submission_initialized(guc)))
590 return;
591
592 _reset_lrc_desc_v69(guc, id);
593
594 /*
595 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
596 * the lower level functions directly.
597 */
598 xa_lock_irqsave(&guc->context_lookup, flags);
599 __xa_erase(&guc->context_lookup, id);
600 xa_unlock_irqrestore(&guc->context_lookup, flags);
601 }
602
decr_outstanding_submission_g2h(struct intel_guc * guc)603 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
604 {
605 if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
606 wake_up_all(&guc->ct.wq);
607 }
608
guc_submission_send_busy_loop(struct intel_guc * guc,const u32 * action,u32 len,u32 g2h_len_dw,bool loop)609 static int guc_submission_send_busy_loop(struct intel_guc *guc,
610 const u32 *action,
611 u32 len,
612 u32 g2h_len_dw,
613 bool loop)
614 {
615 /*
616 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
617 * so we don't handle the case where we don't get a reply because we
618 * aborted the send due to the channel being busy.
619 */
620 GEM_BUG_ON(g2h_len_dw && !loop);
621
622 if (g2h_len_dw)
623 atomic_inc(&guc->outstanding_submission_g2h);
624
625 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
626 }
627
intel_guc_wait_for_pending_msg(struct intel_guc * guc,atomic_t * wait_var,bool interruptible,long timeout)628 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
629 atomic_t *wait_var,
630 bool interruptible,
631 long timeout)
632 {
633 const int state = interruptible ?
634 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
635 DEFINE_WAIT(wait);
636
637 might_sleep();
638 GEM_BUG_ON(timeout < 0);
639
640 if (!atomic_read(wait_var))
641 return 0;
642
643 if (!timeout)
644 return -ETIME;
645
646 for (;;) {
647 prepare_to_wait(&guc->ct.wq, &wait, state);
648
649 if (!atomic_read(wait_var))
650 break;
651
652 if (signal_pending_state(state, current)) {
653 timeout = -EINTR;
654 break;
655 }
656
657 if (!timeout) {
658 timeout = -ETIME;
659 break;
660 }
661
662 timeout = io_schedule_timeout(timeout);
663 }
664 finish_wait(&guc->ct.wq, &wait);
665
666 return (timeout < 0) ? timeout : 0;
667 }
668
intel_guc_wait_for_idle(struct intel_guc * guc,long timeout)669 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
670 {
671 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
672 return 0;
673
674 return intel_guc_wait_for_pending_msg(guc,
675 &guc->outstanding_submission_g2h,
676 true, timeout);
677 }
678
679 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
680 static int try_context_registration(struct intel_context *ce, bool loop);
681
__guc_add_request(struct intel_guc * guc,struct i915_request * rq)682 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
683 {
684 int err = 0;
685 struct intel_context *ce = request_to_scheduling_context(rq);
686 u32 action[3];
687 int len = 0;
688 u32 g2h_len_dw = 0;
689 bool enabled;
690
691 lockdep_assert_held(&rq->engine->sched_engine->lock);
692
693 /*
694 * Corner case where requests were sitting in the priority list or a
695 * request resubmitted after the context was banned.
696 */
697 if (unlikely(!intel_context_is_schedulable(ce))) {
698 i915_request_put(i915_request_mark_eio(rq));
699 intel_engine_signal_breadcrumbs(ce->engine);
700 return 0;
701 }
702
703 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
704 GEM_BUG_ON(context_guc_id_invalid(ce));
705
706 if (context_policy_required(ce)) {
707 err = guc_context_policy_init_v70(ce, false);
708 if (err)
709 return err;
710 }
711
712 spin_lock(&ce->guc_state.lock);
713
714 /*
715 * The request / context will be run on the hardware when scheduling
716 * gets enabled in the unblock. For multi-lrc we still submit the
717 * context to move the LRC tails.
718 */
719 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
720 goto out;
721
722 enabled = context_enabled(ce) || context_blocked(ce);
723
724 if (!enabled) {
725 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
726 action[len++] = ce->guc_id.id;
727 action[len++] = GUC_CONTEXT_ENABLE;
728 set_context_pending_enable(ce);
729 intel_context_get(ce);
730 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
731 } else {
732 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
733 action[len++] = ce->guc_id.id;
734 }
735
736 err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
737 if (!enabled && !err) {
738 trace_intel_context_sched_enable(ce);
739 atomic_inc(&guc->outstanding_submission_g2h);
740 set_context_enabled(ce);
741
742 /*
743 * Without multi-lrc KMD does the submission step (moving the
744 * lrc tail) so enabling scheduling is sufficient to submit the
745 * context. This isn't the case in multi-lrc submission as the
746 * GuC needs to move the tails, hence the need for another H2G
747 * to submit a multi-lrc context after enabling scheduling.
748 */
749 if (intel_context_is_parent(ce)) {
750 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
751 err = intel_guc_send_nb(guc, action, len - 1, 0);
752 }
753 } else if (!enabled) {
754 clr_context_pending_enable(ce);
755 intel_context_put(ce);
756 }
757 if (likely(!err))
758 trace_i915_request_guc_submit(rq);
759
760 out:
761 spin_unlock(&ce->guc_state.lock);
762 return err;
763 }
764
guc_add_request(struct intel_guc * guc,struct i915_request * rq)765 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
766 {
767 int ret = __guc_add_request(guc, rq);
768
769 if (unlikely(ret == -EBUSY)) {
770 guc->stalled_request = rq;
771 guc->submission_stall_reason = STALL_ADD_REQUEST;
772 }
773
774 return ret;
775 }
776
guc_set_lrc_tail(struct i915_request * rq)777 static inline void guc_set_lrc_tail(struct i915_request *rq)
778 {
779 rq->context->lrc_reg_state[CTX_RING_TAIL] =
780 intel_ring_set_tail(rq->ring, rq->tail);
781 }
782
rq_prio(const struct i915_request * rq)783 static inline int rq_prio(const struct i915_request *rq)
784 {
785 return rq->sched.attr.priority;
786 }
787
is_multi_lrc_rq(struct i915_request * rq)788 static bool is_multi_lrc_rq(struct i915_request *rq)
789 {
790 return intel_context_is_parallel(rq->context);
791 }
792
can_merge_rq(struct i915_request * rq,struct i915_request * last)793 static bool can_merge_rq(struct i915_request *rq,
794 struct i915_request *last)
795 {
796 return request_to_scheduling_context(rq) ==
797 request_to_scheduling_context(last);
798 }
799
wq_space_until_wrap(struct intel_context * ce)800 static u32 wq_space_until_wrap(struct intel_context *ce)
801 {
802 return (WQ_SIZE - ce->parallel.guc.wqi_tail);
803 }
804
write_wqi(struct intel_context * ce,u32 wqi_size)805 static void write_wqi(struct intel_context *ce, u32 wqi_size)
806 {
807 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
808
809 /*
810 * Ensure WQI are visible before updating tail
811 */
812 intel_guc_write_barrier(ce_to_guc(ce));
813
814 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
815 (WQ_SIZE - 1);
816 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
817 }
818
guc_wq_noop_append(struct intel_context * ce)819 static int guc_wq_noop_append(struct intel_context *ce)
820 {
821 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
822 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
823
824 if (!wqi)
825 return -EBUSY;
826
827 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
828
829 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
830 FIELD_PREP(WQ_LEN_MASK, len_dw);
831 ce->parallel.guc.wqi_tail = 0;
832
833 return 0;
834 }
835
__guc_wq_item_append(struct i915_request * rq)836 static int __guc_wq_item_append(struct i915_request *rq)
837 {
838 struct intel_context *ce = request_to_scheduling_context(rq);
839 struct intel_context *child;
840 unsigned int wqi_size = (ce->parallel.number_children + 4) *
841 sizeof(u32);
842 u32 *wqi;
843 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
844 int ret;
845
846 /* Ensure context is in correct state updating work queue */
847 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
848 GEM_BUG_ON(context_guc_id_invalid(ce));
849 GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
850 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
851
852 /* Insert NOOP if this work queue item will wrap the tail pointer. */
853 if (wqi_size > wq_space_until_wrap(ce)) {
854 ret = guc_wq_noop_append(ce);
855 if (ret)
856 return ret;
857 }
858
859 wqi = get_wq_pointer(ce, wqi_size);
860 if (!wqi)
861 return -EBUSY;
862
863 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
864
865 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
866 FIELD_PREP(WQ_LEN_MASK, len_dw);
867 *wqi++ = ce->lrc.lrca;
868 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
869 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
870 *wqi++ = 0; /* fence_id */
871 for_each_child(ce, child)
872 *wqi++ = child->ring->tail / sizeof(u64);
873
874 write_wqi(ce, wqi_size);
875
876 return 0;
877 }
878
guc_wq_item_append(struct intel_guc * guc,struct i915_request * rq)879 static int guc_wq_item_append(struct intel_guc *guc,
880 struct i915_request *rq)
881 {
882 struct intel_context *ce = request_to_scheduling_context(rq);
883 int ret;
884
885 if (unlikely(!intel_context_is_schedulable(ce)))
886 return 0;
887
888 ret = __guc_wq_item_append(rq);
889 if (unlikely(ret == -EBUSY)) {
890 guc->stalled_request = rq;
891 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
892 }
893
894 return ret;
895 }
896
multi_lrc_submit(struct i915_request * rq)897 static bool multi_lrc_submit(struct i915_request *rq)
898 {
899 struct intel_context *ce = request_to_scheduling_context(rq);
900
901 intel_ring_set_tail(rq->ring, rq->tail);
902
903 /*
904 * We expect the front end (execbuf IOCTL) to set this flag on the last
905 * request generated from a multi-BB submission. This indicates to the
906 * backend (GuC interface) that we should submit this context thus
907 * submitting all the requests generated in parallel.
908 */
909 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
910 !intel_context_is_schedulable(ce);
911 }
912
guc_dequeue_one_context(struct intel_guc * guc)913 static int guc_dequeue_one_context(struct intel_guc *guc)
914 {
915 struct i915_sched_engine * const sched_engine = guc->sched_engine;
916 struct i915_request *last = NULL;
917 bool submit = false;
918 struct rb_node *rb;
919 int ret;
920
921 lockdep_assert_held(&sched_engine->lock);
922
923 if (guc->stalled_request) {
924 submit = true;
925 last = guc->stalled_request;
926
927 switch (guc->submission_stall_reason) {
928 case STALL_REGISTER_CONTEXT:
929 goto register_context;
930 case STALL_MOVE_LRC_TAIL:
931 goto move_lrc_tail;
932 case STALL_ADD_REQUEST:
933 goto add_request;
934 default:
935 MISSING_CASE(guc->submission_stall_reason);
936 }
937 }
938
939 while ((rb = rb_first_cached(&sched_engine->queue))) {
940 struct i915_priolist *p = to_priolist(rb);
941 struct i915_request *rq, *rn;
942
943 priolist_for_each_request_consume(rq, rn, p) {
944 if (last && !can_merge_rq(rq, last))
945 goto register_context;
946
947 list_del_init(&rq->sched.link);
948
949 __i915_request_submit(rq);
950
951 trace_i915_request_in(rq, 0);
952 last = rq;
953
954 if (is_multi_lrc_rq(rq)) {
955 /*
956 * We need to coalesce all multi-lrc requests in
957 * a relationship into a single H2G. We are
958 * guaranteed that all of these requests will be
959 * submitted sequentially.
960 */
961 if (multi_lrc_submit(rq)) {
962 submit = true;
963 goto register_context;
964 }
965 } else {
966 submit = true;
967 }
968 }
969
970 rb_erase_cached(&p->node, &sched_engine->queue);
971 i915_priolist_free(p);
972 }
973
974 register_context:
975 if (submit) {
976 struct intel_context *ce = request_to_scheduling_context(last);
977
978 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
979 intel_context_is_schedulable(ce))) {
980 ret = try_context_registration(ce, false);
981 if (unlikely(ret == -EPIPE)) {
982 goto deadlk;
983 } else if (ret == -EBUSY) {
984 guc->stalled_request = last;
985 guc->submission_stall_reason =
986 STALL_REGISTER_CONTEXT;
987 goto schedule_tasklet;
988 } else if (ret != 0) {
989 GEM_WARN_ON(ret); /* Unexpected */
990 goto deadlk;
991 }
992 }
993
994 move_lrc_tail:
995 if (is_multi_lrc_rq(last)) {
996 ret = guc_wq_item_append(guc, last);
997 if (ret == -EBUSY) {
998 goto schedule_tasklet;
999 } else if (ret != 0) {
1000 GEM_WARN_ON(ret); /* Unexpected */
1001 goto deadlk;
1002 }
1003 } else {
1004 guc_set_lrc_tail(last);
1005 }
1006
1007 add_request:
1008 ret = guc_add_request(guc, last);
1009 if (unlikely(ret == -EPIPE)) {
1010 goto deadlk;
1011 } else if (ret == -EBUSY) {
1012 goto schedule_tasklet;
1013 } else if (ret != 0) {
1014 GEM_WARN_ON(ret); /* Unexpected */
1015 goto deadlk;
1016 }
1017 }
1018
1019 guc->stalled_request = NULL;
1020 guc->submission_stall_reason = STALL_NONE;
1021 return submit;
1022
1023 deadlk:
1024 sched_engine->tasklet.callback = NULL;
1025 tasklet_disable_nosync(&sched_engine->tasklet);
1026 return false;
1027
1028 schedule_tasklet:
1029 tasklet_schedule(&sched_engine->tasklet);
1030 return false;
1031 }
1032
guc_submission_tasklet(struct tasklet_struct * t)1033 static void guc_submission_tasklet(struct tasklet_struct *t)
1034 {
1035 struct i915_sched_engine *sched_engine =
1036 from_tasklet(sched_engine, t, tasklet);
1037 unsigned long flags;
1038 bool loop;
1039
1040 spin_lock_irqsave(&sched_engine->lock, flags);
1041
1042 do {
1043 loop = guc_dequeue_one_context(sched_engine->private_data);
1044 } while (loop);
1045
1046 i915_sched_engine_reset_on_empty(sched_engine);
1047
1048 spin_unlock_irqrestore(&sched_engine->lock, flags);
1049 }
1050
cs_irq_handler(struct intel_engine_cs * engine,u16 iir)1051 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
1052 {
1053 if (iir & GT_RENDER_USER_INTERRUPT)
1054 intel_engine_signal_breadcrumbs(engine);
1055 }
1056
1057 static void __guc_context_destroy(struct intel_context *ce);
1058 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1059 static void guc_signal_context_fence(struct intel_context *ce);
1060 static void guc_cancel_context_requests(struct intel_context *ce);
1061 static void guc_blocked_fence_complete(struct intel_context *ce);
1062
scrub_guc_desc_for_outstanding_g2h(struct intel_guc * guc)1063 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1064 {
1065 struct intel_context *ce;
1066 unsigned long index, flags;
1067 bool pending_disable, pending_enable, deregister, destroyed, banned;
1068
1069 xa_lock_irqsave(&guc->context_lookup, flags);
1070 xa_for_each(&guc->context_lookup, index, ce) {
1071 /*
1072 * Corner case where the ref count on the object is zero but and
1073 * deregister G2H was lost. In this case we don't touch the ref
1074 * count and finish the destroy of the context.
1075 */
1076 bool do_put = kref_get_unless_zero(&ce->ref);
1077
1078 xa_unlock(&guc->context_lookup);
1079
1080 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
1081 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) {
1082 /* successful cancel so jump straight to close it */
1083 intel_context_sched_disable_unpin(ce);
1084 }
1085
1086 spin_lock(&ce->guc_state.lock);
1087
1088 /*
1089 * Once we are at this point submission_disabled() is guaranteed
1090 * to be visible to all callers who set the below flags (see above
1091 * flush and flushes in reset_prepare). If submission_disabled()
1092 * is set, the caller shouldn't set these flags.
1093 */
1094
1095 destroyed = context_destroyed(ce);
1096 pending_enable = context_pending_enable(ce);
1097 pending_disable = context_pending_disable(ce);
1098 deregister = context_wait_for_deregister_to_register(ce);
1099 banned = context_banned(ce);
1100 init_sched_state(ce);
1101
1102 spin_unlock(&ce->guc_state.lock);
1103
1104 if (pending_enable || destroyed || deregister) {
1105 decr_outstanding_submission_g2h(guc);
1106 if (deregister)
1107 guc_signal_context_fence(ce);
1108 if (destroyed) {
1109 intel_gt_pm_put_async(guc_to_gt(guc));
1110 release_guc_id(guc, ce);
1111 __guc_context_destroy(ce);
1112 }
1113 if (pending_enable || deregister)
1114 intel_context_put(ce);
1115 }
1116
1117 /* Not mutualy exclusive with above if statement. */
1118 if (pending_disable) {
1119 guc_signal_context_fence(ce);
1120 if (banned) {
1121 guc_cancel_context_requests(ce);
1122 intel_engine_signal_breadcrumbs(ce->engine);
1123 }
1124 intel_context_sched_disable_unpin(ce);
1125 decr_outstanding_submission_g2h(guc);
1126
1127 spin_lock(&ce->guc_state.lock);
1128 guc_blocked_fence_complete(ce);
1129 spin_unlock(&ce->guc_state.lock);
1130
1131 intel_context_put(ce);
1132 }
1133
1134 if (do_put)
1135 intel_context_put(ce);
1136 xa_lock(&guc->context_lookup);
1137 }
1138 xa_unlock_irqrestore(&guc->context_lookup, flags);
1139 }
1140
1141 /*
1142 * GuC stores busyness stats for each engine at context in/out boundaries. A
1143 * context 'in' logs execution start time, 'out' adds in -> out delta to total.
1144 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
1145 * GuC.
1146 *
1147 * __i915_pmu_event_read samples engine busyness. When sampling, if context id
1148 * is valid (!= ~0) and start is non-zero, the engine is considered to be
1149 * active. For an active engine total busyness = total + (now - start), where
1150 * 'now' is the time at which the busyness is sampled. For inactive engine,
1151 * total busyness = total.
1152 *
1153 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
1154 *
1155 * The start and total values provided by GuC are 32 bits and wrap around in a
1156 * few minutes. Since perf pmu provides busyness as 64 bit monotonically
1157 * increasing ns values, there is a need for this implementation to account for
1158 * overflows and extend the GuC provided values to 64 bits before returning
1159 * busyness to the user. In order to do that, a worker runs periodically at
1160 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
1161 * 27 seconds for a gt clock frequency of 19.2 MHz).
1162 */
1163
1164 #define WRAP_TIME_CLKS U32_MAX
1165 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
1166
1167 static void
__extend_last_switch(struct intel_guc * guc,u64 * prev_start,u32 new_start)1168 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1169 {
1170 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1171 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
1172
1173 if (new_start == lower_32_bits(*prev_start))
1174 return;
1175
1176 /*
1177 * When gt is unparked, we update the gt timestamp and start the ping
1178 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1179 * is unparked, all switched in contexts will have a start time that is
1180 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1181 *
1182 * If neither gt_stamp nor new_start has rolled over, then the
1183 * gt_stamp_hi does not need to be adjusted, however if one of them has
1184 * rolled over, we need to adjust gt_stamp_hi accordingly.
1185 *
1186 * The below conditions address the cases of new_start rollover and
1187 * gt_stamp_last rollover respectively.
1188 */
1189 if (new_start < gt_stamp_last &&
1190 (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
1191 gt_stamp_hi++;
1192
1193 if (new_start > gt_stamp_last &&
1194 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
1195 gt_stamp_hi--;
1196
1197 *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
1198 }
1199
1200 #define record_read(map_, field_) \
1201 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
1202
1203 /*
1204 * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1205 * we run into a race where the value read is inconsistent. Sometimes the
1206 * inconsistency is in reading the upper MSB bytes of the last_in value when
1207 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1208 * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1209 * determine validity of these values. Instead we read the values multiple times
1210 * until they are consistent. In test runs, 3 attempts results in consistent
1211 * values. The upper bound is set to 6 attempts and may need to be tuned as per
1212 * any new occurences.
1213 */
__get_engine_usage_record(struct intel_engine_cs * engine,u32 * last_in,u32 * id,u32 * total)1214 static void __get_engine_usage_record(struct intel_engine_cs *engine,
1215 u32 *last_in, u32 *id, u32 *total)
1216 {
1217 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
1218 int i = 0;
1219
1220 do {
1221 *last_in = record_read(&rec_map, last_switch_in_stamp);
1222 *id = record_read(&rec_map, current_context_index);
1223 *total = record_read(&rec_map, total_runtime);
1224
1225 if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
1226 record_read(&rec_map, current_context_index) == *id &&
1227 record_read(&rec_map, total_runtime) == *total)
1228 break;
1229 } while (++i < 6);
1230 }
1231
guc_update_engine_gt_clks(struct intel_engine_cs * engine)1232 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1233 {
1234 struct intel_engine_guc_stats *stats = &engine->stats.guc;
1235 struct intel_guc *guc = &engine->gt->uc.guc;
1236 u32 last_switch, ctx_id, total;
1237
1238 lockdep_assert_held(&guc->timestamp.lock);
1239
1240 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1241
1242 stats->running = ctx_id != ~0U && last_switch;
1243 if (stats->running)
1244 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
1245
1246 /*
1247 * Instead of adjusting the total for overflow, just add the
1248 * difference from previous sample stats->total_gt_clks
1249 */
1250 if (total && total != ~0U) {
1251 stats->total_gt_clks += (u32)(total - stats->prev_total);
1252 stats->prev_total = total;
1253 }
1254 }
1255
gpm_timestamp_shift(struct intel_gt * gt)1256 static u32 gpm_timestamp_shift(struct intel_gt *gt)
1257 {
1258 intel_wakeref_t wakeref;
1259 u32 reg, shift;
1260
1261 with_intel_runtime_pm(gt->uncore->rpm, wakeref)
1262 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
1263
1264 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
1265 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
1266
1267 return 3 - shift;
1268 }
1269
guc_update_pm_timestamp(struct intel_guc * guc,ktime_t * now)1270 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
1271 {
1272 struct intel_gt *gt = guc_to_gt(guc);
1273 u32 gt_stamp_lo, gt_stamp_hi;
1274 u64 gpm_ts;
1275
1276 lockdep_assert_held(&guc->timestamp.lock);
1277
1278 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1279 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
1280 MISC_STATUS1) >> guc->timestamp.shift;
1281 gt_stamp_lo = lower_32_bits(gpm_ts);
1282 *now = ktime_get();
1283
1284 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
1285 gt_stamp_hi++;
1286
1287 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
1288 }
1289
1290 /*
1291 * Unlike the execlist mode of submission total and active times are in terms of
1292 * gt clocks. The *now parameter is retained to return the cpu time at which the
1293 * busyness was sampled.
1294 */
guc_engine_busyness(struct intel_engine_cs * engine,ktime_t * now)1295 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1296 {
1297 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
1298 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
1299 struct intel_gt *gt = engine->gt;
1300 struct intel_guc *guc = >->uc.guc;
1301 u64 total, gt_stamp_saved;
1302 unsigned long flags;
1303 u32 reset_count;
1304 bool in_reset;
1305
1306 spin_lock_irqsave(&guc->timestamp.lock, flags);
1307
1308 /*
1309 * If a reset happened, we risk reading partially updated engine
1310 * busyness from GuC, so we just use the driver stored copy of busyness.
1311 * Synchronize with gt reset using reset_count and the
1312 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
1313 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
1314 * usable by checking the flag afterwards.
1315 */
1316 reset_count = i915_reset_count(gpu_error);
1317 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags);
1318
1319 *now = ktime_get();
1320
1321 /*
1322 * The active busyness depends on start_gt_clk and gt_stamp.
1323 * gt_stamp is updated by i915 only when gt is awake and the
1324 * start_gt_clk is derived from GuC state. To get a consistent
1325 * view of activity, we query the GuC state only if gt is awake.
1326 */
1327 if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
1328 stats_saved = *stats;
1329 gt_stamp_saved = guc->timestamp.gt_stamp;
1330 /*
1331 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1332 * start_gt_clk' calculation below for active engines.
1333 */
1334 guc_update_engine_gt_clks(engine);
1335 guc_update_pm_timestamp(guc, now);
1336 intel_gt_pm_put_async(gt);
1337 if (i915_reset_count(gpu_error) != reset_count) {
1338 *stats = stats_saved;
1339 guc->timestamp.gt_stamp = gt_stamp_saved;
1340 }
1341 }
1342
1343 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
1344 if (stats->running) {
1345 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
1346
1347 total += intel_gt_clock_interval_to_ns(gt, clk);
1348 }
1349
1350 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1351
1352 return ns_to_ktime(total);
1353 }
1354
guc_enable_busyness_worker(struct intel_guc * guc)1355 static void guc_enable_busyness_worker(struct intel_guc *guc)
1356 {
1357 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay);
1358 }
1359
guc_cancel_busyness_worker(struct intel_guc * guc)1360 static void guc_cancel_busyness_worker(struct intel_guc *guc)
1361 {
1362 cancel_delayed_work_sync(&guc->timestamp.work);
1363 }
1364
__reset_guc_busyness_stats(struct intel_guc * guc)1365 static void __reset_guc_busyness_stats(struct intel_guc *guc)
1366 {
1367 struct intel_gt *gt = guc_to_gt(guc);
1368 struct intel_engine_cs *engine;
1369 enum intel_engine_id id;
1370 unsigned long flags;
1371 ktime_t unused;
1372
1373 guc_cancel_busyness_worker(guc);
1374
1375 spin_lock_irqsave(&guc->timestamp.lock, flags);
1376
1377 guc_update_pm_timestamp(guc, &unused);
1378 for_each_engine(engine, gt, id) {
1379 guc_update_engine_gt_clks(engine);
1380 engine->stats.guc.prev_total = 0;
1381 }
1382
1383 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1384 }
1385
__update_guc_busyness_stats(struct intel_guc * guc)1386 static void __update_guc_busyness_stats(struct intel_guc *guc)
1387 {
1388 struct intel_gt *gt = guc_to_gt(guc);
1389 struct intel_engine_cs *engine;
1390 enum intel_engine_id id;
1391 unsigned long flags;
1392 ktime_t unused;
1393
1394 guc->timestamp.last_stat_jiffies = jiffies;
1395
1396 spin_lock_irqsave(&guc->timestamp.lock, flags);
1397
1398 guc_update_pm_timestamp(guc, &unused);
1399 for_each_engine(engine, gt, id)
1400 guc_update_engine_gt_clks(engine);
1401
1402 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1403 }
1404
__guc_context_update_stats(struct intel_context * ce)1405 static void __guc_context_update_stats(struct intel_context *ce)
1406 {
1407 struct intel_guc *guc = ce_to_guc(ce);
1408 unsigned long flags;
1409
1410 spin_lock_irqsave(&guc->timestamp.lock, flags);
1411 lrc_update_runtime(ce);
1412 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1413 }
1414
guc_context_update_stats(struct intel_context * ce)1415 static void guc_context_update_stats(struct intel_context *ce)
1416 {
1417 if (!intel_context_pin_if_active(ce))
1418 return;
1419
1420 __guc_context_update_stats(ce);
1421 intel_context_unpin(ce);
1422 }
1423
guc_timestamp_ping(struct work_struct * wrk)1424 static void guc_timestamp_ping(struct work_struct *wrk)
1425 {
1426 struct intel_guc *guc = container_of(wrk, typeof(*guc),
1427 timestamp.work.work);
1428 struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
1429 struct intel_gt *gt = guc_to_gt(guc);
1430 struct intel_context *ce;
1431 intel_wakeref_t wakeref;
1432 unsigned long index;
1433 int srcu, ret;
1434
1435 /*
1436 * Ideally the busyness worker should take a gt pm wakeref because the
1437 * worker only needs to be active while gt is awake. However, the
1438 * gt_park path cancels the worker synchronously and this complicates
1439 * the flow if the worker is also running at the same time. The cancel
1440 * waits for the worker and when the worker releases the wakeref, that
1441 * would call gt_park and would lead to a deadlock.
1442 *
1443 * The resolution is to take the global pm wakeref if runtime pm is
1444 * already active. If not, we don't need to update the busyness stats as
1445 * the stats would already be updated when the gt was parked.
1446 *
1447 * Note:
1448 * - We do not requeue the worker if we cannot take a reference to runtime
1449 * pm since intel_guc_busyness_unpark would requeue the worker in the
1450 * resume path.
1451 *
1452 * - If the gt was parked longer than time taken for GT timestamp to roll
1453 * over, we ignore those rollovers since we don't care about tracking
1454 * the exact GT time. We only care about roll overs when the gt is
1455 * active and running workloads.
1456 *
1457 * - There is a window of time between gt_park and runtime suspend,
1458 * where the worker may run. This is acceptable since the worker will
1459 * not find any new data to update busyness.
1460 */
1461 wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm);
1462 if (!wakeref)
1463 return;
1464
1465 /*
1466 * Synchronize with gt reset to make sure the worker does not
1467 * corrupt the engine/guc stats. NB: can't actually block waiting
1468 * for a reset to complete as the reset requires flushing out
1469 * this worker thread if started. So waiting would deadlock.
1470 */
1471 ret = intel_gt_reset_trylock(gt, &srcu);
1472 if (ret)
1473 goto err_trylock;
1474
1475 __update_guc_busyness_stats(guc);
1476
1477 /* adjust context stats for overflow */
1478 xa_for_each(&guc->context_lookup, index, ce)
1479 guc_context_update_stats(ce);
1480
1481 intel_gt_reset_unlock(gt, srcu);
1482
1483 guc_enable_busyness_worker(guc);
1484
1485 err_trylock:
1486 intel_runtime_pm_put(>->i915->runtime_pm, wakeref);
1487 }
1488
guc_action_enable_usage_stats(struct intel_guc * guc)1489 static int guc_action_enable_usage_stats(struct intel_guc *guc)
1490 {
1491 u32 offset = intel_guc_engine_usage_offset(guc);
1492 u32 action[] = {
1493 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
1494 offset,
1495 0,
1496 };
1497
1498 return intel_guc_send(guc, action, ARRAY_SIZE(action));
1499 }
1500
guc_init_engine_stats(struct intel_guc * guc)1501 static int guc_init_engine_stats(struct intel_guc *guc)
1502 {
1503 struct intel_gt *gt = guc_to_gt(guc);
1504 intel_wakeref_t wakeref;
1505 int ret;
1506
1507 with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
1508 ret = guc_action_enable_usage_stats(guc);
1509
1510 if (ret)
1511 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret));
1512 else
1513 guc_enable_busyness_worker(guc);
1514
1515 return ret;
1516 }
1517
guc_fini_engine_stats(struct intel_guc * guc)1518 static void guc_fini_engine_stats(struct intel_guc *guc)
1519 {
1520 guc_cancel_busyness_worker(guc);
1521 }
1522
intel_guc_busyness_park(struct intel_gt * gt)1523 void intel_guc_busyness_park(struct intel_gt *gt)
1524 {
1525 struct intel_guc *guc = >->uc.guc;
1526
1527 if (!guc_submission_initialized(guc))
1528 return;
1529
1530 /*
1531 * There is a race with suspend flow where the worker runs after suspend
1532 * and causes an unclaimed register access warning. Cancel the worker
1533 * synchronously here.
1534 */
1535 guc_cancel_busyness_worker(guc);
1536
1537 /*
1538 * Before parking, we should sample engine busyness stats if we need to.
1539 * We can skip it if we are less than half a ping from the last time we
1540 * sampled the busyness stats.
1541 */
1542 if (guc->timestamp.last_stat_jiffies &&
1543 !time_after(jiffies, guc->timestamp.last_stat_jiffies +
1544 (guc->timestamp.ping_delay / 2)))
1545 return;
1546
1547 __update_guc_busyness_stats(guc);
1548 }
1549
intel_guc_busyness_unpark(struct intel_gt * gt)1550 void intel_guc_busyness_unpark(struct intel_gt *gt)
1551 {
1552 struct intel_guc *guc = >->uc.guc;
1553 unsigned long flags;
1554 ktime_t unused;
1555
1556 if (!guc_submission_initialized(guc))
1557 return;
1558
1559 spin_lock_irqsave(&guc->timestamp.lock, flags);
1560 guc_update_pm_timestamp(guc, &unused);
1561 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1562 guc_enable_busyness_worker(guc);
1563 }
1564
1565 static inline bool
submission_disabled(struct intel_guc * guc)1566 submission_disabled(struct intel_guc *guc)
1567 {
1568 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1569
1570 return unlikely(!sched_engine ||
1571 !__tasklet_is_enabled(&sched_engine->tasklet) ||
1572 intel_gt_is_wedged(guc_to_gt(guc)));
1573 }
1574
disable_submission(struct intel_guc * guc)1575 static void disable_submission(struct intel_guc *guc)
1576 {
1577 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1578
1579 if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1580 GEM_BUG_ON(!guc->ct.enabled);
1581 __tasklet_disable_sync_once(&sched_engine->tasklet);
1582 sched_engine->tasklet.callback = NULL;
1583 }
1584 }
1585
enable_submission(struct intel_guc * guc)1586 static void enable_submission(struct intel_guc *guc)
1587 {
1588 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1589 unsigned long flags;
1590
1591 spin_lock_irqsave(&guc->sched_engine->lock, flags);
1592 sched_engine->tasklet.callback = guc_submission_tasklet;
1593 wmb(); /* Make sure callback visible */
1594 if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1595 __tasklet_enable(&sched_engine->tasklet)) {
1596 GEM_BUG_ON(!guc->ct.enabled);
1597
1598 /* And kick in case we missed a new request submission. */
1599 tasklet_hi_schedule(&sched_engine->tasklet);
1600 }
1601 spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1602 }
1603
guc_flush_submissions(struct intel_guc * guc)1604 static void guc_flush_submissions(struct intel_guc *guc)
1605 {
1606 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1607 unsigned long flags;
1608
1609 spin_lock_irqsave(&sched_engine->lock, flags);
1610 spin_unlock_irqrestore(&sched_engine->lock, flags);
1611 }
1612
1613 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1614
intel_guc_submission_reset_prepare(struct intel_guc * guc)1615 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1616 {
1617 if (unlikely(!guc_submission_initialized(guc))) {
1618 /* Reset called during driver load? GuC not yet initialised! */
1619 return;
1620 }
1621
1622 intel_gt_park_heartbeats(guc_to_gt(guc));
1623 disable_submission(guc);
1624 guc->interrupts.disable(guc);
1625 __reset_guc_busyness_stats(guc);
1626
1627 /* Flush IRQ handler */
1628 spin_lock_irq(guc_to_gt(guc)->irq_lock);
1629 spin_unlock_irq(guc_to_gt(guc)->irq_lock);
1630
1631 guc_flush_submissions(guc);
1632 guc_flush_destroyed_contexts(guc);
1633 flush_work(&guc->ct.requests.worker);
1634
1635 scrub_guc_desc_for_outstanding_g2h(guc);
1636 }
1637
1638 static struct intel_engine_cs *
guc_virtual_get_sibling(struct intel_engine_cs * ve,unsigned int sibling)1639 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1640 {
1641 struct intel_engine_cs *engine;
1642 intel_engine_mask_t tmp, mask = ve->mask;
1643 unsigned int num_siblings = 0;
1644
1645 for_each_engine_masked(engine, ve->gt, mask, tmp)
1646 if (num_siblings++ == sibling)
1647 return engine;
1648
1649 return NULL;
1650 }
1651
1652 static inline struct intel_engine_cs *
__context_to_physical_engine(struct intel_context * ce)1653 __context_to_physical_engine(struct intel_context *ce)
1654 {
1655 struct intel_engine_cs *engine = ce->engine;
1656
1657 if (intel_engine_is_virtual(engine))
1658 engine = guc_virtual_get_sibling(engine, 0);
1659
1660 return engine;
1661 }
1662
guc_reset_state(struct intel_context * ce,u32 head,bool scrub)1663 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1664 {
1665 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1666
1667 if (!intel_context_is_schedulable(ce))
1668 return;
1669
1670 GEM_BUG_ON(!intel_context_is_pinned(ce));
1671
1672 /*
1673 * We want a simple context + ring to execute the breadcrumb update.
1674 * We cannot rely on the context being intact across the GPU hang,
1675 * so clear it and rebuild just what we need for the breadcrumb.
1676 * All pending requests for this context will be zapped, and any
1677 * future request will be after userspace has had the opportunity
1678 * to recreate its own state.
1679 */
1680 if (scrub)
1681 lrc_init_regs(ce, engine, true);
1682
1683 /* Rerun the request; its payload has been neutered (if guilty). */
1684 lrc_update_regs(ce, engine, head);
1685 }
1686
guc_engine_reset_prepare(struct intel_engine_cs * engine)1687 static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
1688 {
1689 /*
1690 * Wa_22011802037: In addition to stopping the cs, we need
1691 * to wait for any pending mi force wakeups
1692 */
1693 if (intel_engine_reset_needs_wa_22011802037(engine->gt)) {
1694 intel_engine_stop_cs(engine);
1695 intel_engine_wait_for_pending_mi_fw(engine);
1696 }
1697 }
1698
guc_reset_nop(struct intel_engine_cs * engine)1699 static void guc_reset_nop(struct intel_engine_cs *engine)
1700 {
1701 }
1702
guc_rewind_nop(struct intel_engine_cs * engine,bool stalled)1703 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1704 {
1705 }
1706
1707 static void
__unwind_incomplete_requests(struct intel_context * ce)1708 __unwind_incomplete_requests(struct intel_context *ce)
1709 {
1710 struct i915_request *rq, *rn;
1711 struct list_head *pl;
1712 int prio = I915_PRIORITY_INVALID;
1713 struct i915_sched_engine * const sched_engine =
1714 ce->engine->sched_engine;
1715 unsigned long flags;
1716
1717 spin_lock_irqsave(&sched_engine->lock, flags);
1718 spin_lock(&ce->guc_state.lock);
1719 list_for_each_entry_safe_reverse(rq, rn,
1720 &ce->guc_state.requests,
1721 sched.link) {
1722 if (i915_request_completed(rq))
1723 continue;
1724
1725 list_del_init(&rq->sched.link);
1726 __i915_request_unsubmit(rq);
1727
1728 /* Push the request back into the queue for later resubmission. */
1729 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1730 if (rq_prio(rq) != prio) {
1731 prio = rq_prio(rq);
1732 pl = i915_sched_lookup_priolist(sched_engine, prio);
1733 }
1734 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1735
1736 list_add(&rq->sched.link, pl);
1737 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1738 }
1739 spin_unlock(&ce->guc_state.lock);
1740 spin_unlock_irqrestore(&sched_engine->lock, flags);
1741 }
1742
__guc_reset_context(struct intel_context * ce,intel_engine_mask_t stalled)1743 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
1744 {
1745 bool guilty;
1746 struct i915_request *rq;
1747 unsigned long flags;
1748 u32 head;
1749 int i, number_children = ce->parallel.number_children;
1750 struct intel_context *parent = ce;
1751
1752 GEM_BUG_ON(intel_context_is_child(ce));
1753
1754 intel_context_get(ce);
1755
1756 /*
1757 * GuC will implicitly mark the context as non-schedulable when it sends
1758 * the reset notification. Make sure our state reflects this change. The
1759 * context will be marked enabled on resubmission.
1760 */
1761 spin_lock_irqsave(&ce->guc_state.lock, flags);
1762 clr_context_enabled(ce);
1763 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1764
1765 /*
1766 * For each context in the relationship find the hanging request
1767 * resetting each context / request as needed
1768 */
1769 for (i = 0; i < number_children + 1; ++i) {
1770 if (!intel_context_is_pinned(ce))
1771 goto next_context;
1772
1773 guilty = false;
1774 rq = intel_context_get_active_request(ce);
1775 if (!rq) {
1776 head = ce->ring->tail;
1777 goto out_replay;
1778 }
1779
1780 if (i915_request_started(rq))
1781 guilty = stalled & ce->engine->mask;
1782
1783 GEM_BUG_ON(i915_active_is_idle(&ce->active));
1784 head = intel_ring_wrap(ce->ring, rq->head);
1785
1786 __i915_request_reset(rq, guilty);
1787 i915_request_put(rq);
1788 out_replay:
1789 guc_reset_state(ce, head, guilty);
1790 next_context:
1791 if (i != number_children)
1792 ce = list_next_entry(ce, parallel.child_link);
1793 }
1794
1795 __unwind_incomplete_requests(parent);
1796 intel_context_put(parent);
1797 }
1798
intel_guc_submission_reset(struct intel_guc * guc,intel_engine_mask_t stalled)1799 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
1800 {
1801 struct intel_context *ce;
1802 unsigned long index;
1803 unsigned long flags;
1804
1805 if (unlikely(!guc_submission_initialized(guc))) {
1806 /* Reset called during driver load? GuC not yet initialised! */
1807 return;
1808 }
1809
1810 xa_lock_irqsave(&guc->context_lookup, flags);
1811 xa_for_each(&guc->context_lookup, index, ce) {
1812 if (!kref_get_unless_zero(&ce->ref))
1813 continue;
1814
1815 xa_unlock(&guc->context_lookup);
1816
1817 if (intel_context_is_pinned(ce) &&
1818 !intel_context_is_child(ce))
1819 __guc_reset_context(ce, stalled);
1820
1821 intel_context_put(ce);
1822
1823 xa_lock(&guc->context_lookup);
1824 }
1825 xa_unlock_irqrestore(&guc->context_lookup, flags);
1826
1827 /* GuC is blown away, drop all references to contexts */
1828 xa_destroy(&guc->context_lookup);
1829 }
1830
guc_cancel_context_requests(struct intel_context * ce)1831 static void guc_cancel_context_requests(struct intel_context *ce)
1832 {
1833 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1834 struct i915_request *rq;
1835 unsigned long flags;
1836
1837 /* Mark all executing requests as skipped. */
1838 spin_lock_irqsave(&sched_engine->lock, flags);
1839 spin_lock(&ce->guc_state.lock);
1840 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1841 i915_request_put(i915_request_mark_eio(rq));
1842 spin_unlock(&ce->guc_state.lock);
1843 spin_unlock_irqrestore(&sched_engine->lock, flags);
1844 }
1845
1846 static void
guc_cancel_sched_engine_requests(struct i915_sched_engine * sched_engine)1847 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1848 {
1849 struct i915_request *rq, *rn;
1850 struct rb_node *rb;
1851 unsigned long flags;
1852
1853 /* Can be called during boot if GuC fails to load */
1854 if (!sched_engine)
1855 return;
1856
1857 /*
1858 * Before we call engine->cancel_requests(), we should have exclusive
1859 * access to the submission state. This is arranged for us by the
1860 * caller disabling the interrupt generation, the tasklet and other
1861 * threads that may then access the same state, giving us a free hand
1862 * to reset state. However, we still need to let lockdep be aware that
1863 * we know this state may be accessed in hardirq context, so we
1864 * disable the irq around this manipulation and we want to keep
1865 * the spinlock focused on its duties and not accidentally conflate
1866 * coverage to the submission's irq state. (Similarly, although we
1867 * shouldn't need to disable irq around the manipulation of the
1868 * submission's irq state, we also wish to remind ourselves that
1869 * it is irq state.)
1870 */
1871 spin_lock_irqsave(&sched_engine->lock, flags);
1872
1873 /* Flush the queued requests to the timeline list (for retiring). */
1874 while ((rb = rb_first_cached(&sched_engine->queue))) {
1875 struct i915_priolist *p = to_priolist(rb);
1876
1877 priolist_for_each_request_consume(rq, rn, p) {
1878 list_del_init(&rq->sched.link);
1879
1880 __i915_request_submit(rq);
1881
1882 i915_request_put(i915_request_mark_eio(rq));
1883 }
1884
1885 rb_erase_cached(&p->node, &sched_engine->queue);
1886 i915_priolist_free(p);
1887 }
1888
1889 /* Remaining _unready_ requests will be nop'ed when submitted */
1890
1891 sched_engine->queue_priority_hint = INT_MIN;
1892 sched_engine->queue = RB_ROOT_CACHED;
1893
1894 spin_unlock_irqrestore(&sched_engine->lock, flags);
1895 }
1896
intel_guc_submission_cancel_requests(struct intel_guc * guc)1897 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1898 {
1899 struct intel_context *ce;
1900 unsigned long index;
1901 unsigned long flags;
1902
1903 xa_lock_irqsave(&guc->context_lookup, flags);
1904 xa_for_each(&guc->context_lookup, index, ce) {
1905 if (!kref_get_unless_zero(&ce->ref))
1906 continue;
1907
1908 xa_unlock(&guc->context_lookup);
1909
1910 if (intel_context_is_pinned(ce) &&
1911 !intel_context_is_child(ce))
1912 guc_cancel_context_requests(ce);
1913
1914 intel_context_put(ce);
1915
1916 xa_lock(&guc->context_lookup);
1917 }
1918 xa_unlock_irqrestore(&guc->context_lookup, flags);
1919
1920 guc_cancel_sched_engine_requests(guc->sched_engine);
1921
1922 /* GuC is blown away, drop all references to contexts */
1923 xa_destroy(&guc->context_lookup);
1924 }
1925
intel_guc_submission_reset_finish(struct intel_guc * guc)1926 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1927 {
1928 /* Reset called during driver load or during wedge? */
1929 if (unlikely(!guc_submission_initialized(guc) ||
1930 intel_gt_is_wedged(guc_to_gt(guc)))) {
1931 return;
1932 }
1933
1934 /*
1935 * Technically possible for either of these values to be non-zero here,
1936 * but very unlikely + harmless. Regardless let's add a warn so we can
1937 * see in CI if this happens frequently / a precursor to taking down the
1938 * machine.
1939 */
1940 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1941 atomic_set(&guc->outstanding_submission_g2h, 0);
1942
1943 intel_guc_global_policies_update(guc);
1944 enable_submission(guc);
1945 intel_gt_unpark_heartbeats(guc_to_gt(guc));
1946 }
1947
1948 static void destroyed_worker_func(struct work_struct *w);
1949 static void reset_fail_worker_func(struct work_struct *w);
1950
1951 /*
1952 * Set up the memory resources to be shared with the GuC (via the GGTT)
1953 * at firmware loading time.
1954 */
intel_guc_submission_init(struct intel_guc * guc)1955 int intel_guc_submission_init(struct intel_guc *guc)
1956 {
1957 struct intel_gt *gt = guc_to_gt(guc);
1958 int ret;
1959
1960 if (guc->submission_initialized)
1961 return 0;
1962
1963 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) {
1964 ret = guc_lrc_desc_pool_create_v69(guc);
1965 if (ret)
1966 return ret;
1967 }
1968
1969 guc->submission_state.guc_ids_bitmap =
1970 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
1971 if (!guc->submission_state.guc_ids_bitmap) {
1972 ret = -ENOMEM;
1973 goto destroy_pool;
1974 }
1975
1976 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
1977 guc->timestamp.shift = gpm_timestamp_shift(gt);
1978 guc->submission_initialized = true;
1979
1980 return 0;
1981
1982 destroy_pool:
1983 guc_lrc_desc_pool_destroy_v69(guc);
1984
1985 return ret;
1986 }
1987
intel_guc_submission_fini(struct intel_guc * guc)1988 void intel_guc_submission_fini(struct intel_guc *guc)
1989 {
1990 if (!guc->submission_initialized)
1991 return;
1992
1993 guc_flush_destroyed_contexts(guc);
1994 guc_lrc_desc_pool_destroy_v69(guc);
1995 i915_sched_engine_put(guc->sched_engine);
1996 bitmap_free(guc->submission_state.guc_ids_bitmap);
1997 guc->submission_initialized = false;
1998 }
1999
queue_request(struct i915_sched_engine * sched_engine,struct i915_request * rq,int prio)2000 static inline void queue_request(struct i915_sched_engine *sched_engine,
2001 struct i915_request *rq,
2002 int prio)
2003 {
2004 GEM_BUG_ON(!list_empty(&rq->sched.link));
2005 list_add_tail(&rq->sched.link,
2006 i915_sched_lookup_priolist(sched_engine, prio));
2007 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2008 tasklet_hi_schedule(&sched_engine->tasklet);
2009 }
2010
guc_bypass_tasklet_submit(struct intel_guc * guc,struct i915_request * rq)2011 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
2012 struct i915_request *rq)
2013 {
2014 int ret = 0;
2015
2016 __i915_request_submit(rq);
2017
2018 trace_i915_request_in(rq, 0);
2019
2020 if (is_multi_lrc_rq(rq)) {
2021 if (multi_lrc_submit(rq)) {
2022 ret = guc_wq_item_append(guc, rq);
2023 if (!ret)
2024 ret = guc_add_request(guc, rq);
2025 }
2026 } else {
2027 guc_set_lrc_tail(rq);
2028 ret = guc_add_request(guc, rq);
2029 }
2030
2031 if (unlikely(ret == -EPIPE))
2032 disable_submission(guc);
2033
2034 return ret;
2035 }
2036
need_tasklet(struct intel_guc * guc,struct i915_request * rq)2037 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
2038 {
2039 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2040 struct intel_context *ce = request_to_scheduling_context(rq);
2041
2042 return submission_disabled(guc) || guc->stalled_request ||
2043 !i915_sched_engine_is_empty(sched_engine) ||
2044 !ctx_id_mapped(guc, ce->guc_id.id);
2045 }
2046
guc_submit_request(struct i915_request * rq)2047 static void guc_submit_request(struct i915_request *rq)
2048 {
2049 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2050 struct intel_guc *guc = &rq->engine->gt->uc.guc;
2051 unsigned long flags;
2052
2053 /* Will be called from irq-context when using foreign fences. */
2054 spin_lock_irqsave(&sched_engine->lock, flags);
2055
2056 if (need_tasklet(guc, rq))
2057 queue_request(sched_engine, rq, rq_prio(rq));
2058 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
2059 tasklet_hi_schedule(&sched_engine->tasklet);
2060
2061 spin_unlock_irqrestore(&sched_engine->lock, flags);
2062 }
2063
new_guc_id(struct intel_guc * guc,struct intel_context * ce)2064 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
2065 {
2066 int ret;
2067
2068 GEM_BUG_ON(intel_context_is_child(ce));
2069
2070 if (intel_context_is_parent(ce))
2071 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
2072 NUMBER_MULTI_LRC_GUC_ID(guc),
2073 order_base_2(ce->parallel.number_children
2074 + 1));
2075 else
2076 ret = ida_simple_get(&guc->submission_state.guc_ids,
2077 NUMBER_MULTI_LRC_GUC_ID(guc),
2078 guc->submission_state.num_guc_ids,
2079 GFP_KERNEL | __GFP_RETRY_MAYFAIL |
2080 __GFP_NOWARN);
2081 if (unlikely(ret < 0))
2082 return ret;
2083
2084 if (!intel_context_is_parent(ce))
2085 ++guc->submission_state.guc_ids_in_use;
2086
2087 ce->guc_id.id = ret;
2088 return 0;
2089 }
2090
__release_guc_id(struct intel_guc * guc,struct intel_context * ce)2091 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2092 {
2093 GEM_BUG_ON(intel_context_is_child(ce));
2094
2095 if (!context_guc_id_invalid(ce)) {
2096 if (intel_context_is_parent(ce)) {
2097 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
2098 ce->guc_id.id,
2099 order_base_2(ce->parallel.number_children
2100 + 1));
2101 } else {
2102 --guc->submission_state.guc_ids_in_use;
2103 ida_simple_remove(&guc->submission_state.guc_ids,
2104 ce->guc_id.id);
2105 }
2106 clr_ctx_id_mapping(guc, ce->guc_id.id);
2107 set_context_guc_id_invalid(ce);
2108 }
2109 if (!list_empty(&ce->guc_id.link))
2110 list_del_init(&ce->guc_id.link);
2111 }
2112
release_guc_id(struct intel_guc * guc,struct intel_context * ce)2113 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2114 {
2115 unsigned long flags;
2116
2117 spin_lock_irqsave(&guc->submission_state.lock, flags);
2118 __release_guc_id(guc, ce);
2119 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2120 }
2121
steal_guc_id(struct intel_guc * guc,struct intel_context * ce)2122 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
2123 {
2124 struct intel_context *cn;
2125
2126 lockdep_assert_held(&guc->submission_state.lock);
2127 GEM_BUG_ON(intel_context_is_child(ce));
2128 GEM_BUG_ON(intel_context_is_parent(ce));
2129
2130 if (!list_empty(&guc->submission_state.guc_id_list)) {
2131 cn = list_first_entry(&guc->submission_state.guc_id_list,
2132 struct intel_context,
2133 guc_id.link);
2134
2135 GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
2136 GEM_BUG_ON(context_guc_id_invalid(cn));
2137 GEM_BUG_ON(intel_context_is_child(cn));
2138 GEM_BUG_ON(intel_context_is_parent(cn));
2139
2140 list_del_init(&cn->guc_id.link);
2141 ce->guc_id.id = cn->guc_id.id;
2142
2143 spin_lock(&cn->guc_state.lock);
2144 clr_context_registered(cn);
2145 spin_unlock(&cn->guc_state.lock);
2146
2147 set_context_guc_id_invalid(cn);
2148
2149 #ifdef CONFIG_DRM_I915_SELFTEST
2150 guc->number_guc_id_stolen++;
2151 #endif
2152
2153 return 0;
2154 } else {
2155 return -EAGAIN;
2156 }
2157 }
2158
assign_guc_id(struct intel_guc * guc,struct intel_context * ce)2159 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
2160 {
2161 int ret;
2162
2163 lockdep_assert_held(&guc->submission_state.lock);
2164 GEM_BUG_ON(intel_context_is_child(ce));
2165
2166 ret = new_guc_id(guc, ce);
2167 if (unlikely(ret < 0)) {
2168 if (intel_context_is_parent(ce))
2169 return -ENOSPC;
2170
2171 ret = steal_guc_id(guc, ce);
2172 if (ret < 0)
2173 return ret;
2174 }
2175
2176 if (intel_context_is_parent(ce)) {
2177 struct intel_context *child;
2178 int i = 1;
2179
2180 for_each_child(ce, child)
2181 child->guc_id.id = ce->guc_id.id + i++;
2182 }
2183
2184 return 0;
2185 }
2186
2187 #define PIN_GUC_ID_TRIES 4
pin_guc_id(struct intel_guc * guc,struct intel_context * ce)2188 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2189 {
2190 int ret = 0;
2191 unsigned long flags, tries = PIN_GUC_ID_TRIES;
2192
2193 GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
2194
2195 try_again:
2196 spin_lock_irqsave(&guc->submission_state.lock, flags);
2197
2198 might_lock(&ce->guc_state.lock);
2199
2200 if (context_guc_id_invalid(ce)) {
2201 ret = assign_guc_id(guc, ce);
2202 if (ret)
2203 goto out_unlock;
2204 ret = 1; /* Indidcates newly assigned guc_id */
2205 }
2206 if (!list_empty(&ce->guc_id.link))
2207 list_del_init(&ce->guc_id.link);
2208 atomic_inc(&ce->guc_id.ref);
2209
2210 out_unlock:
2211 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2212
2213 /*
2214 * -EAGAIN indicates no guc_id are available, let's retire any
2215 * outstanding requests to see if that frees up a guc_id. If the first
2216 * retire didn't help, insert a sleep with the timeslice duration before
2217 * attempting to retire more requests. Double the sleep period each
2218 * subsequent pass before finally giving up. The sleep period has max of
2219 * 100ms and minimum of 1ms.
2220 */
2221 if (ret == -EAGAIN && --tries) {
2222 if (PIN_GUC_ID_TRIES - tries > 1) {
2223 unsigned int timeslice_shifted =
2224 ce->engine->props.timeslice_duration_ms <<
2225 (PIN_GUC_ID_TRIES - tries - 2);
2226 unsigned int max = min_t(unsigned int, 100,
2227 timeslice_shifted);
2228
2229 msleep(max_t(unsigned int, max, 1));
2230 }
2231 intel_gt_retire_requests(guc_to_gt(guc));
2232 goto try_again;
2233 }
2234
2235 return ret;
2236 }
2237
unpin_guc_id(struct intel_guc * guc,struct intel_context * ce)2238 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2239 {
2240 unsigned long flags;
2241
2242 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
2243 GEM_BUG_ON(intel_context_is_child(ce));
2244
2245 if (unlikely(context_guc_id_invalid(ce) ||
2246 intel_context_is_parent(ce)))
2247 return;
2248
2249 spin_lock_irqsave(&guc->submission_state.lock, flags);
2250 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
2251 !atomic_read(&ce->guc_id.ref))
2252 list_add_tail(&ce->guc_id.link,
2253 &guc->submission_state.guc_id_list);
2254 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2255 }
2256
__guc_action_register_multi_lrc_v69(struct intel_guc * guc,struct intel_context * ce,u32 guc_id,u32 offset,bool loop)2257 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
2258 struct intel_context *ce,
2259 u32 guc_id,
2260 u32 offset,
2261 bool loop)
2262 {
2263 struct intel_context *child;
2264 u32 action[4 + MAX_ENGINE_INSTANCE];
2265 int len = 0;
2266
2267 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2268
2269 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2270 action[len++] = guc_id;
2271 action[len++] = ce->parallel.number_children + 1;
2272 action[len++] = offset;
2273 for_each_child(ce, child) {
2274 offset += sizeof(struct guc_lrc_desc_v69);
2275 action[len++] = offset;
2276 }
2277
2278 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2279 }
2280
__guc_action_register_multi_lrc_v70(struct intel_guc * guc,struct intel_context * ce,struct guc_ctxt_registration_info * info,bool loop)2281 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
2282 struct intel_context *ce,
2283 struct guc_ctxt_registration_info *info,
2284 bool loop)
2285 {
2286 struct intel_context *child;
2287 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
2288 int len = 0;
2289 u32 next_id;
2290
2291 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2292
2293 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2294 action[len++] = info->flags;
2295 action[len++] = info->context_idx;
2296 action[len++] = info->engine_class;
2297 action[len++] = info->engine_submit_mask;
2298 action[len++] = info->wq_desc_lo;
2299 action[len++] = info->wq_desc_hi;
2300 action[len++] = info->wq_base_lo;
2301 action[len++] = info->wq_base_hi;
2302 action[len++] = info->wq_size;
2303 action[len++] = ce->parallel.number_children + 1;
2304 action[len++] = info->hwlrca_lo;
2305 action[len++] = info->hwlrca_hi;
2306
2307 next_id = info->context_idx + 1;
2308 for_each_child(ce, child) {
2309 GEM_BUG_ON(next_id++ != child->guc_id.id);
2310
2311 /*
2312 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2313 * only supports 32 bit currently.
2314 */
2315 action[len++] = lower_32_bits(child->lrc.lrca);
2316 action[len++] = upper_32_bits(child->lrc.lrca);
2317 }
2318
2319 GEM_BUG_ON(len > ARRAY_SIZE(action));
2320
2321 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2322 }
2323
__guc_action_register_context_v69(struct intel_guc * guc,u32 guc_id,u32 offset,bool loop)2324 static int __guc_action_register_context_v69(struct intel_guc *guc,
2325 u32 guc_id,
2326 u32 offset,
2327 bool loop)
2328 {
2329 u32 action[] = {
2330 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2331 guc_id,
2332 offset,
2333 };
2334
2335 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2336 0, loop);
2337 }
2338
__guc_action_register_context_v70(struct intel_guc * guc,struct guc_ctxt_registration_info * info,bool loop)2339 static int __guc_action_register_context_v70(struct intel_guc *guc,
2340 struct guc_ctxt_registration_info *info,
2341 bool loop)
2342 {
2343 u32 action[] = {
2344 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2345 info->flags,
2346 info->context_idx,
2347 info->engine_class,
2348 info->engine_submit_mask,
2349 info->wq_desc_lo,
2350 info->wq_desc_hi,
2351 info->wq_base_lo,
2352 info->wq_base_hi,
2353 info->wq_size,
2354 info->hwlrca_lo,
2355 info->hwlrca_hi,
2356 };
2357
2358 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2359 0, loop);
2360 }
2361
2362 static void prepare_context_registration_info_v69(struct intel_context *ce);
2363 static void prepare_context_registration_info_v70(struct intel_context *ce,
2364 struct guc_ctxt_registration_info *info);
2365
2366 static int
register_context_v69(struct intel_guc * guc,struct intel_context * ce,bool loop)2367 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
2368 {
2369 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
2370 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
2371
2372 prepare_context_registration_info_v69(ce);
2373
2374 if (intel_context_is_parent(ce))
2375 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
2376 offset, loop);
2377 else
2378 return __guc_action_register_context_v69(guc, ce->guc_id.id,
2379 offset, loop);
2380 }
2381
2382 static int
register_context_v70(struct intel_guc * guc,struct intel_context * ce,bool loop)2383 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
2384 {
2385 struct guc_ctxt_registration_info info;
2386
2387 prepare_context_registration_info_v70(ce, &info);
2388
2389 if (intel_context_is_parent(ce))
2390 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
2391 else
2392 return __guc_action_register_context_v70(guc, &info, loop);
2393 }
2394
register_context(struct intel_context * ce,bool loop)2395 static int register_context(struct intel_context *ce, bool loop)
2396 {
2397 struct intel_guc *guc = ce_to_guc(ce);
2398 int ret;
2399
2400 GEM_BUG_ON(intel_context_is_child(ce));
2401 trace_intel_context_register(ce);
2402
2403 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
2404 ret = register_context_v70(guc, ce, loop);
2405 else
2406 ret = register_context_v69(guc, ce, loop);
2407
2408 if (likely(!ret)) {
2409 unsigned long flags;
2410
2411 spin_lock_irqsave(&ce->guc_state.lock, flags);
2412 set_context_registered(ce);
2413 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2414
2415 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
2416 guc_context_policy_init_v70(ce, loop);
2417 }
2418
2419 return ret;
2420 }
2421
__guc_action_deregister_context(struct intel_guc * guc,u32 guc_id)2422 static int __guc_action_deregister_context(struct intel_guc *guc,
2423 u32 guc_id)
2424 {
2425 u32 action[] = {
2426 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
2427 guc_id,
2428 };
2429
2430 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2431 G2H_LEN_DW_DEREGISTER_CONTEXT,
2432 true);
2433 }
2434
deregister_context(struct intel_context * ce,u32 guc_id)2435 static int deregister_context(struct intel_context *ce, u32 guc_id)
2436 {
2437 struct intel_guc *guc = ce_to_guc(ce);
2438
2439 GEM_BUG_ON(intel_context_is_child(ce));
2440 trace_intel_context_deregister(ce);
2441
2442 return __guc_action_deregister_context(guc, guc_id);
2443 }
2444
clear_children_join_go_memory(struct intel_context * ce)2445 static inline void clear_children_join_go_memory(struct intel_context *ce)
2446 {
2447 struct parent_scratch *ps = __get_parent_scratch(ce);
2448 int i;
2449
2450 ps->go.semaphore = 0;
2451 for (i = 0; i < ce->parallel.number_children + 1; ++i)
2452 ps->join[i].semaphore = 0;
2453 }
2454
get_children_go_value(struct intel_context * ce)2455 static inline u32 get_children_go_value(struct intel_context *ce)
2456 {
2457 return __get_parent_scratch(ce)->go.semaphore;
2458 }
2459
get_children_join_value(struct intel_context * ce,u8 child_index)2460 static inline u32 get_children_join_value(struct intel_context *ce,
2461 u8 child_index)
2462 {
2463 return __get_parent_scratch(ce)->join[child_index].semaphore;
2464 }
2465
2466 struct context_policy {
2467 u32 count;
2468 struct guc_update_context_policy h2g;
2469 };
2470
__guc_context_policy_action_size(struct context_policy * policy)2471 static u32 __guc_context_policy_action_size(struct context_policy *policy)
2472 {
2473 size_t bytes = sizeof(policy->h2g.header) +
2474 (sizeof(policy->h2g.klv[0]) * policy->count);
2475
2476 return bytes / sizeof(u32);
2477 }
2478
__guc_context_policy_start_klv(struct context_policy * policy,u16 guc_id)2479 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
2480 {
2481 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
2482 policy->h2g.header.ctx_id = guc_id;
2483 policy->count = 0;
2484 }
2485
2486 #define MAKE_CONTEXT_POLICY_ADD(func, id) \
2487 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
2488 { \
2489 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
2490 policy->h2g.klv[policy->count].kl = \
2491 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
2492 FIELD_PREP(GUC_KLV_0_LEN, 1); \
2493 policy->h2g.klv[policy->count].value = data; \
2494 policy->count++; \
2495 }
2496
MAKE_CONTEXT_POLICY_ADD(execution_quantum,EXECUTION_QUANTUM)2497 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
2498 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
2499 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
2500 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
2501
2502 #undef MAKE_CONTEXT_POLICY_ADD
2503
2504 static int __guc_context_set_context_policies(struct intel_guc *guc,
2505 struct context_policy *policy,
2506 bool loop)
2507 {
2508 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
2509 __guc_context_policy_action_size(policy),
2510 0, loop);
2511 }
2512
guc_context_policy_init_v70(struct intel_context * ce,bool loop)2513 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
2514 {
2515 struct intel_engine_cs *engine = ce->engine;
2516 struct intel_guc *guc = &engine->gt->uc.guc;
2517 struct context_policy policy;
2518 u32 execution_quantum;
2519 u32 preemption_timeout;
2520 unsigned long flags;
2521 int ret;
2522
2523 /* NB: For both of these, zero means disabled. */
2524 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2525 execution_quantum));
2526 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2527 preemption_timeout));
2528 execution_quantum = engine->props.timeslice_duration_ms * 1000;
2529 preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2530
2531 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
2532
2533 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2534 __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2535 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2536
2537 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2538 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2539
2540 ret = __guc_context_set_context_policies(guc, &policy, loop);
2541
2542 spin_lock_irqsave(&ce->guc_state.lock, flags);
2543 if (ret != 0)
2544 set_context_policy_required(ce);
2545 else
2546 clr_context_policy_required(ce);
2547 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2548
2549 return ret;
2550 }
2551
guc_context_policy_init_v69(struct intel_engine_cs * engine,struct guc_lrc_desc_v69 * desc)2552 static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
2553 struct guc_lrc_desc_v69 *desc)
2554 {
2555 desc->policy_flags = 0;
2556
2557 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2558 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
2559
2560 /* NB: For both of these, zero means disabled. */
2561 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2562 desc->execution_quantum));
2563 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2564 desc->preemption_timeout));
2565 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
2566 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2567 }
2568
map_guc_prio_to_lrc_desc_prio(u8 prio)2569 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
2570 {
2571 /*
2572 * this matches the mapping we do in map_i915_prio_to_guc_prio()
2573 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
2574 */
2575 switch (prio) {
2576 default:
2577 MISSING_CASE(prio);
2578 fallthrough;
2579 case GUC_CLIENT_PRIORITY_KMD_NORMAL:
2580 return GEN12_CTX_PRIORITY_NORMAL;
2581 case GUC_CLIENT_PRIORITY_NORMAL:
2582 return GEN12_CTX_PRIORITY_LOW;
2583 case GUC_CLIENT_PRIORITY_HIGH:
2584 case GUC_CLIENT_PRIORITY_KMD_HIGH:
2585 return GEN12_CTX_PRIORITY_HIGH;
2586 }
2587 }
2588
prepare_context_registration_info_v69(struct intel_context * ce)2589 static void prepare_context_registration_info_v69(struct intel_context *ce)
2590 {
2591 struct intel_engine_cs *engine = ce->engine;
2592 struct intel_guc *guc = &engine->gt->uc.guc;
2593 u32 ctx_id = ce->guc_id.id;
2594 struct guc_lrc_desc_v69 *desc;
2595 struct intel_context *child;
2596
2597 GEM_BUG_ON(!engine->mask);
2598
2599 /*
2600 * Ensure LRC + CT vmas are is same region as write barrier is done
2601 * based on CT vma region.
2602 */
2603 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2604 i915_gem_object_is_lmem(ce->ring->vma->obj));
2605
2606 desc = __get_lrc_desc_v69(guc, ctx_id);
2607 GEM_BUG_ON(!desc);
2608 desc->engine_class = engine_class_to_guc_class(engine->class);
2609 desc->engine_submit_mask = engine->logical_mask;
2610 desc->hw_context_desc = ce->lrc.lrca;
2611 desc->priority = ce->guc_state.prio;
2612 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2613 guc_context_policy_init_v69(engine, desc);
2614
2615 /*
2616 * If context is a parent, we need to register a process descriptor
2617 * describing a work queue and register all child contexts.
2618 */
2619 if (intel_context_is_parent(ce)) {
2620 struct guc_process_desc_v69 *pdesc;
2621
2622 ce->parallel.guc.wqi_tail = 0;
2623 ce->parallel.guc.wqi_head = 0;
2624
2625 desc->process_desc = i915_ggtt_offset(ce->state) +
2626 __get_parent_scratch_offset(ce);
2627 desc->wq_addr = i915_ggtt_offset(ce->state) +
2628 __get_wq_offset(ce);
2629 desc->wq_size = WQ_SIZE;
2630
2631 pdesc = __get_process_desc_v69(ce);
2632 memset(pdesc, 0, sizeof(*(pdesc)));
2633 pdesc->stage_id = ce->guc_id.id;
2634 pdesc->wq_base_addr = desc->wq_addr;
2635 pdesc->wq_size_bytes = desc->wq_size;
2636 pdesc->wq_status = WQ_STATUS_ACTIVE;
2637
2638 ce->parallel.guc.wq_head = &pdesc->head;
2639 ce->parallel.guc.wq_tail = &pdesc->tail;
2640 ce->parallel.guc.wq_status = &pdesc->wq_status;
2641
2642 for_each_child(ce, child) {
2643 desc = __get_lrc_desc_v69(guc, child->guc_id.id);
2644
2645 desc->engine_class =
2646 engine_class_to_guc_class(engine->class);
2647 desc->hw_context_desc = child->lrc.lrca;
2648 desc->priority = ce->guc_state.prio;
2649 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2650 guc_context_policy_init_v69(engine, desc);
2651 }
2652
2653 clear_children_join_go_memory(ce);
2654 }
2655 }
2656
prepare_context_registration_info_v70(struct intel_context * ce,struct guc_ctxt_registration_info * info)2657 static void prepare_context_registration_info_v70(struct intel_context *ce,
2658 struct guc_ctxt_registration_info *info)
2659 {
2660 struct intel_engine_cs *engine = ce->engine;
2661 struct intel_guc *guc = &engine->gt->uc.guc;
2662 u32 ctx_id = ce->guc_id.id;
2663
2664 GEM_BUG_ON(!engine->mask);
2665
2666 /*
2667 * Ensure LRC + CT vmas are is same region as write barrier is done
2668 * based on CT vma region.
2669 */
2670 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2671 i915_gem_object_is_lmem(ce->ring->vma->obj));
2672
2673 memset(info, 0, sizeof(*info));
2674 info->context_idx = ctx_id;
2675 info->engine_class = engine_class_to_guc_class(engine->class);
2676 info->engine_submit_mask = engine->logical_mask;
2677 /*
2678 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2679 * only supports 32 bit currently.
2680 */
2681 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
2682 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
2683 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
2684 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
2685 info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
2686
2687 /*
2688 * If context is a parent, we need to register a process descriptor
2689 * describing a work queue and register all child contexts.
2690 */
2691 if (intel_context_is_parent(ce)) {
2692 struct guc_sched_wq_desc *wq_desc;
2693 u64 wq_desc_offset, wq_base_offset;
2694
2695 ce->parallel.guc.wqi_tail = 0;
2696 ce->parallel.guc.wqi_head = 0;
2697
2698 wq_desc_offset = (u64)i915_ggtt_offset(ce->state) +
2699 __get_parent_scratch_offset(ce);
2700 wq_base_offset = (u64)i915_ggtt_offset(ce->state) +
2701 __get_wq_offset(ce);
2702 info->wq_desc_lo = lower_32_bits(wq_desc_offset);
2703 info->wq_desc_hi = upper_32_bits(wq_desc_offset);
2704 info->wq_base_lo = lower_32_bits(wq_base_offset);
2705 info->wq_base_hi = upper_32_bits(wq_base_offset);
2706 info->wq_size = WQ_SIZE;
2707
2708 wq_desc = __get_wq_desc_v70(ce);
2709 memset(wq_desc, 0, sizeof(*wq_desc));
2710 wq_desc->wq_status = WQ_STATUS_ACTIVE;
2711
2712 ce->parallel.guc.wq_head = &wq_desc->head;
2713 ce->parallel.guc.wq_tail = &wq_desc->tail;
2714 ce->parallel.guc.wq_status = &wq_desc->wq_status;
2715
2716 clear_children_join_go_memory(ce);
2717 }
2718 }
2719
try_context_registration(struct intel_context * ce,bool loop)2720 static int try_context_registration(struct intel_context *ce, bool loop)
2721 {
2722 struct intel_engine_cs *engine = ce->engine;
2723 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
2724 struct intel_guc *guc = &engine->gt->uc.guc;
2725 intel_wakeref_t wakeref;
2726 u32 ctx_id = ce->guc_id.id;
2727 bool context_registered;
2728 int ret = 0;
2729
2730 GEM_BUG_ON(!sched_state_is_init(ce));
2731
2732 context_registered = ctx_id_mapped(guc, ctx_id);
2733
2734 clr_ctx_id_mapping(guc, ctx_id);
2735 set_ctx_id_mapping(guc, ctx_id, ce);
2736
2737 /*
2738 * The context_lookup xarray is used to determine if the hardware
2739 * context is currently registered. There are two cases in which it
2740 * could be registered either the guc_id has been stolen from another
2741 * context or the lrc descriptor address of this context has changed. In
2742 * either case the context needs to be deregistered with the GuC before
2743 * registering this context.
2744 */
2745 if (context_registered) {
2746 bool disabled;
2747 unsigned long flags;
2748
2749 trace_intel_context_steal_guc_id(ce);
2750 GEM_BUG_ON(!loop);
2751
2752 /* Seal race with Reset */
2753 spin_lock_irqsave(&ce->guc_state.lock, flags);
2754 disabled = submission_disabled(guc);
2755 if (likely(!disabled)) {
2756 set_context_wait_for_deregister_to_register(ce);
2757 intel_context_get(ce);
2758 }
2759 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2760 if (unlikely(disabled)) {
2761 clr_ctx_id_mapping(guc, ctx_id);
2762 return 0; /* Will get registered later */
2763 }
2764
2765 /*
2766 * If stealing the guc_id, this ce has the same guc_id as the
2767 * context whose guc_id was stolen.
2768 */
2769 with_intel_runtime_pm(runtime_pm, wakeref)
2770 ret = deregister_context(ce, ce->guc_id.id);
2771 if (unlikely(ret == -ENODEV))
2772 ret = 0; /* Will get registered later */
2773 } else {
2774 with_intel_runtime_pm(runtime_pm, wakeref)
2775 ret = register_context(ce, loop);
2776 if (unlikely(ret == -EBUSY)) {
2777 clr_ctx_id_mapping(guc, ctx_id);
2778 } else if (unlikely(ret == -ENODEV)) {
2779 clr_ctx_id_mapping(guc, ctx_id);
2780 ret = 0; /* Will get registered later */
2781 }
2782 }
2783
2784 return ret;
2785 }
2786
__guc_context_pre_pin(struct intel_context * ce,struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,void ** vaddr)2787 static int __guc_context_pre_pin(struct intel_context *ce,
2788 struct intel_engine_cs *engine,
2789 struct i915_gem_ww_ctx *ww,
2790 void **vaddr)
2791 {
2792 return lrc_pre_pin(ce, engine, ww, vaddr);
2793 }
2794
__guc_context_pin(struct intel_context * ce,struct intel_engine_cs * engine,void * vaddr)2795 static int __guc_context_pin(struct intel_context *ce,
2796 struct intel_engine_cs *engine,
2797 void *vaddr)
2798 {
2799 if (i915_ggtt_offset(ce->state) !=
2800 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2801 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2802
2803 /*
2804 * GuC context gets pinned in guc_request_alloc. See that function for
2805 * explaination of why.
2806 */
2807
2808 return lrc_pin(ce, engine, vaddr);
2809 }
2810
guc_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)2811 static int guc_context_pre_pin(struct intel_context *ce,
2812 struct i915_gem_ww_ctx *ww,
2813 void **vaddr)
2814 {
2815 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2816 }
2817
guc_context_pin(struct intel_context * ce,void * vaddr)2818 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2819 {
2820 int ret = __guc_context_pin(ce, ce->engine, vaddr);
2821
2822 if (likely(!ret && !intel_context_is_barrier(ce)))
2823 intel_engine_pm_get(ce->engine);
2824
2825 return ret;
2826 }
2827
guc_context_unpin(struct intel_context * ce)2828 static void guc_context_unpin(struct intel_context *ce)
2829 {
2830 struct intel_guc *guc = ce_to_guc(ce);
2831
2832 __guc_context_update_stats(ce);
2833 unpin_guc_id(guc, ce);
2834 lrc_unpin(ce);
2835
2836 if (likely(!intel_context_is_barrier(ce)))
2837 intel_engine_pm_put_async(ce->engine);
2838 }
2839
guc_context_post_unpin(struct intel_context * ce)2840 static void guc_context_post_unpin(struct intel_context *ce)
2841 {
2842 lrc_post_unpin(ce);
2843 }
2844
__guc_context_sched_enable(struct intel_guc * guc,struct intel_context * ce)2845 static void __guc_context_sched_enable(struct intel_guc *guc,
2846 struct intel_context *ce)
2847 {
2848 u32 action[] = {
2849 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2850 ce->guc_id.id,
2851 GUC_CONTEXT_ENABLE
2852 };
2853
2854 trace_intel_context_sched_enable(ce);
2855
2856 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2857 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2858 }
2859
__guc_context_sched_disable(struct intel_guc * guc,struct intel_context * ce,u16 guc_id)2860 static void __guc_context_sched_disable(struct intel_guc *guc,
2861 struct intel_context *ce,
2862 u16 guc_id)
2863 {
2864 u32 action[] = {
2865 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2866 guc_id, /* ce->guc_id.id not stable */
2867 GUC_CONTEXT_DISABLE
2868 };
2869
2870 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
2871
2872 GEM_BUG_ON(intel_context_is_child(ce));
2873 trace_intel_context_sched_disable(ce);
2874
2875 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2876 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2877 }
2878
guc_blocked_fence_complete(struct intel_context * ce)2879 static void guc_blocked_fence_complete(struct intel_context *ce)
2880 {
2881 lockdep_assert_held(&ce->guc_state.lock);
2882
2883 if (!i915_sw_fence_done(&ce->guc_state.blocked))
2884 i915_sw_fence_complete(&ce->guc_state.blocked);
2885 }
2886
guc_blocked_fence_reinit(struct intel_context * ce)2887 static void guc_blocked_fence_reinit(struct intel_context *ce)
2888 {
2889 lockdep_assert_held(&ce->guc_state.lock);
2890 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2891
2892 /*
2893 * This fence is always complete unless a pending schedule disable is
2894 * outstanding. We arm the fence here and complete it when we receive
2895 * the pending schedule disable complete message.
2896 */
2897 i915_sw_fence_fini(&ce->guc_state.blocked);
2898 i915_sw_fence_reinit(&ce->guc_state.blocked);
2899 i915_sw_fence_await(&ce->guc_state.blocked);
2900 i915_sw_fence_commit(&ce->guc_state.blocked);
2901 }
2902
prep_context_pending_disable(struct intel_context * ce)2903 static u16 prep_context_pending_disable(struct intel_context *ce)
2904 {
2905 lockdep_assert_held(&ce->guc_state.lock);
2906
2907 set_context_pending_disable(ce);
2908 clr_context_enabled(ce);
2909 guc_blocked_fence_reinit(ce);
2910 intel_context_get(ce);
2911
2912 return ce->guc_id.id;
2913 }
2914
guc_context_block(struct intel_context * ce)2915 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2916 {
2917 struct intel_guc *guc = ce_to_guc(ce);
2918 unsigned long flags;
2919 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2920 intel_wakeref_t wakeref;
2921 u16 guc_id;
2922 bool enabled;
2923
2924 GEM_BUG_ON(intel_context_is_child(ce));
2925
2926 spin_lock_irqsave(&ce->guc_state.lock, flags);
2927
2928 incr_context_blocked(ce);
2929
2930 enabled = context_enabled(ce);
2931 if (unlikely(!enabled || submission_disabled(guc))) {
2932 if (enabled)
2933 clr_context_enabled(ce);
2934 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2935 return &ce->guc_state.blocked;
2936 }
2937
2938 /*
2939 * We add +2 here as the schedule disable complete CTB handler calls
2940 * intel_context_sched_disable_unpin (-2 to pin_count).
2941 */
2942 atomic_add(2, &ce->pin_count);
2943
2944 guc_id = prep_context_pending_disable(ce);
2945
2946 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2947
2948 with_intel_runtime_pm(runtime_pm, wakeref)
2949 __guc_context_sched_disable(guc, ce, guc_id);
2950
2951 return &ce->guc_state.blocked;
2952 }
2953
2954 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2955 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2956 #define SCHED_STATE_NO_UNBLOCK \
2957 (SCHED_STATE_MULTI_BLOCKED_MASK | \
2958 SCHED_STATE_PENDING_DISABLE | \
2959 SCHED_STATE_BANNED)
2960
context_cant_unblock(struct intel_context * ce)2961 static bool context_cant_unblock(struct intel_context *ce)
2962 {
2963 lockdep_assert_held(&ce->guc_state.lock);
2964
2965 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2966 context_guc_id_invalid(ce) ||
2967 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
2968 !intel_context_is_pinned(ce);
2969 }
2970
guc_context_unblock(struct intel_context * ce)2971 static void guc_context_unblock(struct intel_context *ce)
2972 {
2973 struct intel_guc *guc = ce_to_guc(ce);
2974 unsigned long flags;
2975 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2976 intel_wakeref_t wakeref;
2977 bool enable;
2978
2979 GEM_BUG_ON(context_enabled(ce));
2980 GEM_BUG_ON(intel_context_is_child(ce));
2981
2982 spin_lock_irqsave(&ce->guc_state.lock, flags);
2983
2984 if (unlikely(submission_disabled(guc) ||
2985 context_cant_unblock(ce))) {
2986 enable = false;
2987 } else {
2988 enable = true;
2989 set_context_pending_enable(ce);
2990 set_context_enabled(ce);
2991 intel_context_get(ce);
2992 }
2993
2994 decr_context_blocked(ce);
2995
2996 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2997
2998 if (enable) {
2999 with_intel_runtime_pm(runtime_pm, wakeref)
3000 __guc_context_sched_enable(guc, ce);
3001 }
3002 }
3003
guc_context_cancel_request(struct intel_context * ce,struct i915_request * rq)3004 static void guc_context_cancel_request(struct intel_context *ce,
3005 struct i915_request *rq)
3006 {
3007 struct intel_context *block_context =
3008 request_to_scheduling_context(rq);
3009
3010 if (i915_sw_fence_signaled(&rq->submit)) {
3011 struct i915_sw_fence *fence;
3012
3013 intel_context_get(ce);
3014 fence = guc_context_block(block_context);
3015 i915_sw_fence_wait(fence);
3016 if (!i915_request_completed(rq)) {
3017 __i915_request_skip(rq);
3018 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
3019 true);
3020 }
3021
3022 guc_context_unblock(block_context);
3023 intel_context_put(ce);
3024 }
3025 }
3026
__guc_context_set_preemption_timeout(struct intel_guc * guc,u16 guc_id,u32 preemption_timeout)3027 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
3028 u16 guc_id,
3029 u32 preemption_timeout)
3030 {
3031 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
3032 struct context_policy policy;
3033
3034 __guc_context_policy_start_klv(&policy, guc_id);
3035 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
3036 __guc_context_set_context_policies(guc, &policy, true);
3037 } else {
3038 u32 action[] = {
3039 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
3040 guc_id,
3041 preemption_timeout
3042 };
3043
3044 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3045 }
3046 }
3047
3048 static void
guc_context_revoke(struct intel_context * ce,struct i915_request * rq,unsigned int preempt_timeout_ms)3049 guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
3050 unsigned int preempt_timeout_ms)
3051 {
3052 struct intel_guc *guc = ce_to_guc(ce);
3053 struct intel_runtime_pm *runtime_pm =
3054 &ce->engine->gt->i915->runtime_pm;
3055 intel_wakeref_t wakeref;
3056 unsigned long flags;
3057
3058 GEM_BUG_ON(intel_context_is_child(ce));
3059
3060 guc_flush_submissions(guc);
3061
3062 spin_lock_irqsave(&ce->guc_state.lock, flags);
3063 set_context_banned(ce);
3064
3065 if (submission_disabled(guc) ||
3066 (!context_enabled(ce) && !context_pending_disable(ce))) {
3067 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3068
3069 guc_cancel_context_requests(ce);
3070 intel_engine_signal_breadcrumbs(ce->engine);
3071 } else if (!context_pending_disable(ce)) {
3072 u16 guc_id;
3073
3074 /*
3075 * We add +2 here as the schedule disable complete CTB handler
3076 * calls intel_context_sched_disable_unpin (-2 to pin_count).
3077 */
3078 atomic_add(2, &ce->pin_count);
3079
3080 guc_id = prep_context_pending_disable(ce);
3081 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3082
3083 /*
3084 * In addition to disabling scheduling, set the preemption
3085 * timeout to the minimum value (1 us) so the banned context
3086 * gets kicked off the HW ASAP.
3087 */
3088 with_intel_runtime_pm(runtime_pm, wakeref) {
3089 __guc_context_set_preemption_timeout(guc, guc_id,
3090 preempt_timeout_ms);
3091 __guc_context_sched_disable(guc, ce, guc_id);
3092 }
3093 } else {
3094 if (!context_guc_id_invalid(ce))
3095 with_intel_runtime_pm(runtime_pm, wakeref)
3096 __guc_context_set_preemption_timeout(guc,
3097 ce->guc_id.id,
3098 preempt_timeout_ms);
3099 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3100 }
3101 }
3102
do_sched_disable(struct intel_guc * guc,struct intel_context * ce,unsigned long flags)3103 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce,
3104 unsigned long flags)
3105 __releases(ce->guc_state.lock)
3106 {
3107 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
3108 intel_wakeref_t wakeref;
3109 u16 guc_id;
3110
3111 lockdep_assert_held(&ce->guc_state.lock);
3112 guc_id = prep_context_pending_disable(ce);
3113
3114 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3115
3116 with_intel_runtime_pm(runtime_pm, wakeref)
3117 __guc_context_sched_disable(guc, ce, guc_id);
3118 }
3119
bypass_sched_disable(struct intel_guc * guc,struct intel_context * ce)3120 static bool bypass_sched_disable(struct intel_guc *guc,
3121 struct intel_context *ce)
3122 {
3123 lockdep_assert_held(&ce->guc_state.lock);
3124 GEM_BUG_ON(intel_context_is_child(ce));
3125
3126 if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
3127 !ctx_id_mapped(guc, ce->guc_id.id)) {
3128 clr_context_enabled(ce);
3129 return true;
3130 }
3131
3132 return !context_enabled(ce);
3133 }
3134
__delay_sched_disable(struct work_struct * wrk)3135 static void __delay_sched_disable(struct work_struct *wrk)
3136 {
3137 struct intel_context *ce =
3138 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work);
3139 struct intel_guc *guc = ce_to_guc(ce);
3140 unsigned long flags;
3141
3142 spin_lock_irqsave(&ce->guc_state.lock, flags);
3143
3144 if (bypass_sched_disable(guc, ce)) {
3145 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3146 intel_context_sched_disable_unpin(ce);
3147 } else {
3148 do_sched_disable(guc, ce, flags);
3149 }
3150 }
3151
guc_id_pressure(struct intel_guc * guc,struct intel_context * ce)3152 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce)
3153 {
3154 /*
3155 * parent contexts are perma-pinned, if we are unpinning do schedule
3156 * disable immediately.
3157 */
3158 if (intel_context_is_parent(ce))
3159 return true;
3160
3161 /*
3162 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately.
3163 */
3164 return guc->submission_state.guc_ids_in_use >
3165 guc->submission_state.sched_disable_gucid_threshold;
3166 }
3167
guc_context_sched_disable(struct intel_context * ce)3168 static void guc_context_sched_disable(struct intel_context *ce)
3169 {
3170 struct intel_guc *guc = ce_to_guc(ce);
3171 u64 delay = guc->submission_state.sched_disable_delay_ms;
3172 unsigned long flags;
3173
3174 spin_lock_irqsave(&ce->guc_state.lock, flags);
3175
3176 if (bypass_sched_disable(guc, ce)) {
3177 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3178 intel_context_sched_disable_unpin(ce);
3179 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) &&
3180 delay) {
3181 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3182 mod_delayed_work(system_unbound_wq,
3183 &ce->guc_state.sched_disable_delay_work,
3184 msecs_to_jiffies(delay));
3185 } else {
3186 do_sched_disable(guc, ce, flags);
3187 }
3188 }
3189
guc_context_close(struct intel_context * ce)3190 static void guc_context_close(struct intel_context *ce)
3191 {
3192 unsigned long flags;
3193
3194 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
3195 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))
3196 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work);
3197
3198 spin_lock_irqsave(&ce->guc_state.lock, flags);
3199 set_context_close_done(ce);
3200 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3201 }
3202
guc_lrc_desc_unpin(struct intel_context * ce)3203 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
3204 {
3205 struct intel_guc *guc = ce_to_guc(ce);
3206 struct intel_gt *gt = guc_to_gt(guc);
3207 unsigned long flags;
3208 bool disabled;
3209
3210 GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
3211 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
3212 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
3213 GEM_BUG_ON(context_enabled(ce));
3214
3215 /* Seal race with Reset */
3216 spin_lock_irqsave(&ce->guc_state.lock, flags);
3217 disabled = submission_disabled(guc);
3218 if (likely(!disabled)) {
3219 __intel_gt_pm_get(gt);
3220 set_context_destroyed(ce);
3221 clr_context_registered(ce);
3222 }
3223 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3224 if (unlikely(disabled)) {
3225 release_guc_id(guc, ce);
3226 __guc_context_destroy(ce);
3227 return;
3228 }
3229
3230 deregister_context(ce, ce->guc_id.id);
3231 }
3232
__guc_context_destroy(struct intel_context * ce)3233 static void __guc_context_destroy(struct intel_context *ce)
3234 {
3235 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
3236 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
3237 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
3238 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
3239
3240 lrc_fini(ce);
3241 intel_context_fini(ce);
3242
3243 if (intel_engine_is_virtual(ce->engine)) {
3244 struct guc_virtual_engine *ve =
3245 container_of(ce, typeof(*ve), context);
3246
3247 if (ve->base.breadcrumbs)
3248 intel_breadcrumbs_put(ve->base.breadcrumbs);
3249
3250 kfree(ve);
3251 } else {
3252 intel_context_free(ce);
3253 }
3254 }
3255
guc_flush_destroyed_contexts(struct intel_guc * guc)3256 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
3257 {
3258 struct intel_context *ce;
3259 unsigned long flags;
3260
3261 GEM_BUG_ON(!submission_disabled(guc) &&
3262 guc_submission_initialized(guc));
3263
3264 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3265 spin_lock_irqsave(&guc->submission_state.lock, flags);
3266 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3267 struct intel_context,
3268 destroyed_link);
3269 if (ce)
3270 list_del_init(&ce->destroyed_link);
3271 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3272
3273 if (!ce)
3274 break;
3275
3276 release_guc_id(guc, ce);
3277 __guc_context_destroy(ce);
3278 }
3279 }
3280
deregister_destroyed_contexts(struct intel_guc * guc)3281 static void deregister_destroyed_contexts(struct intel_guc *guc)
3282 {
3283 struct intel_context *ce;
3284 unsigned long flags;
3285
3286 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3287 spin_lock_irqsave(&guc->submission_state.lock, flags);
3288 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3289 struct intel_context,
3290 destroyed_link);
3291 if (ce)
3292 list_del_init(&ce->destroyed_link);
3293 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3294
3295 if (!ce)
3296 break;
3297
3298 guc_lrc_desc_unpin(ce);
3299 }
3300 }
3301
destroyed_worker_func(struct work_struct * w)3302 static void destroyed_worker_func(struct work_struct *w)
3303 {
3304 struct intel_guc *guc = container_of(w, struct intel_guc,
3305 submission_state.destroyed_worker);
3306 struct intel_gt *gt = guc_to_gt(guc);
3307 int tmp;
3308
3309 with_intel_gt_pm(gt, tmp)
3310 deregister_destroyed_contexts(guc);
3311 }
3312
guc_context_destroy(struct kref * kref)3313 static void guc_context_destroy(struct kref *kref)
3314 {
3315 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3316 struct intel_guc *guc = ce_to_guc(ce);
3317 unsigned long flags;
3318 bool destroy;
3319
3320 /*
3321 * If the guc_id is invalid this context has been stolen and we can free
3322 * it immediately. Also can be freed immediately if the context is not
3323 * registered with the GuC or the GuC is in the middle of a reset.
3324 */
3325 spin_lock_irqsave(&guc->submission_state.lock, flags);
3326 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
3327 !ctx_id_mapped(guc, ce->guc_id.id);
3328 if (likely(!destroy)) {
3329 if (!list_empty(&ce->guc_id.link))
3330 list_del_init(&ce->guc_id.link);
3331 list_add_tail(&ce->destroyed_link,
3332 &guc->submission_state.destroyed_contexts);
3333 } else {
3334 __release_guc_id(guc, ce);
3335 }
3336 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3337 if (unlikely(destroy)) {
3338 __guc_context_destroy(ce);
3339 return;
3340 }
3341
3342 /*
3343 * We use a worker to issue the H2G to deregister the context as we can
3344 * take the GT PM for the first time which isn't allowed from an atomic
3345 * context.
3346 */
3347 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
3348 }
3349
guc_context_alloc(struct intel_context * ce)3350 static int guc_context_alloc(struct intel_context *ce)
3351 {
3352 return lrc_alloc(ce, ce->engine);
3353 }
3354
__guc_context_set_prio(struct intel_guc * guc,struct intel_context * ce)3355 static void __guc_context_set_prio(struct intel_guc *guc,
3356 struct intel_context *ce)
3357 {
3358 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
3359 struct context_policy policy;
3360
3361 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
3362 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
3363 __guc_context_set_context_policies(guc, &policy, true);
3364 } else {
3365 u32 action[] = {
3366 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
3367 ce->guc_id.id,
3368 ce->guc_state.prio,
3369 };
3370
3371 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3372 }
3373 }
3374
guc_context_set_prio(struct intel_guc * guc,struct intel_context * ce,u8 prio)3375 static void guc_context_set_prio(struct intel_guc *guc,
3376 struct intel_context *ce,
3377 u8 prio)
3378 {
3379 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
3380 prio > GUC_CLIENT_PRIORITY_NORMAL);
3381 lockdep_assert_held(&ce->guc_state.lock);
3382
3383 if (ce->guc_state.prio == prio || submission_disabled(guc) ||
3384 !context_registered(ce)) {
3385 ce->guc_state.prio = prio;
3386 return;
3387 }
3388
3389 ce->guc_state.prio = prio;
3390 __guc_context_set_prio(guc, ce);
3391
3392 trace_intel_context_set_prio(ce);
3393 }
3394
map_i915_prio_to_guc_prio(int prio)3395 static inline u8 map_i915_prio_to_guc_prio(int prio)
3396 {
3397 if (prio == I915_PRIORITY_NORMAL)
3398 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
3399 else if (prio < I915_PRIORITY_NORMAL)
3400 return GUC_CLIENT_PRIORITY_NORMAL;
3401 else if (prio < I915_PRIORITY_DISPLAY)
3402 return GUC_CLIENT_PRIORITY_HIGH;
3403 else
3404 return GUC_CLIENT_PRIORITY_KMD_HIGH;
3405 }
3406
add_context_inflight_prio(struct intel_context * ce,u8 guc_prio)3407 static inline void add_context_inflight_prio(struct intel_context *ce,
3408 u8 guc_prio)
3409 {
3410 lockdep_assert_held(&ce->guc_state.lock);
3411 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3412
3413 ++ce->guc_state.prio_count[guc_prio];
3414
3415 /* Overflow protection */
3416 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3417 }
3418
sub_context_inflight_prio(struct intel_context * ce,u8 guc_prio)3419 static inline void sub_context_inflight_prio(struct intel_context *ce,
3420 u8 guc_prio)
3421 {
3422 lockdep_assert_held(&ce->guc_state.lock);
3423 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3424
3425 /* Underflow protection */
3426 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3427
3428 --ce->guc_state.prio_count[guc_prio];
3429 }
3430
update_context_prio(struct intel_context * ce)3431 static inline void update_context_prio(struct intel_context *ce)
3432 {
3433 struct intel_guc *guc = &ce->engine->gt->uc.guc;
3434 int i;
3435
3436 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
3437 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
3438
3439 lockdep_assert_held(&ce->guc_state.lock);
3440
3441 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
3442 if (ce->guc_state.prio_count[i]) {
3443 guc_context_set_prio(guc, ce, i);
3444 break;
3445 }
3446 }
3447 }
3448
new_guc_prio_higher(u8 old_guc_prio,u8 new_guc_prio)3449 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
3450 {
3451 /* Lower value is higher priority */
3452 return new_guc_prio < old_guc_prio;
3453 }
3454
add_to_context(struct i915_request * rq)3455 static void add_to_context(struct i915_request *rq)
3456 {
3457 struct intel_context *ce = request_to_scheduling_context(rq);
3458 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
3459
3460 GEM_BUG_ON(intel_context_is_child(ce));
3461 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
3462
3463 spin_lock(&ce->guc_state.lock);
3464 list_move_tail(&rq->sched.link, &ce->guc_state.requests);
3465
3466 if (rq->guc_prio == GUC_PRIO_INIT) {
3467 rq->guc_prio = new_guc_prio;
3468 add_context_inflight_prio(ce, rq->guc_prio);
3469 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
3470 sub_context_inflight_prio(ce, rq->guc_prio);
3471 rq->guc_prio = new_guc_prio;
3472 add_context_inflight_prio(ce, rq->guc_prio);
3473 }
3474 update_context_prio(ce);
3475
3476 spin_unlock(&ce->guc_state.lock);
3477 }
3478
guc_prio_fini(struct i915_request * rq,struct intel_context * ce)3479 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
3480 {
3481 lockdep_assert_held(&ce->guc_state.lock);
3482
3483 if (rq->guc_prio != GUC_PRIO_INIT &&
3484 rq->guc_prio != GUC_PRIO_FINI) {
3485 sub_context_inflight_prio(ce, rq->guc_prio);
3486 update_context_prio(ce);
3487 }
3488 rq->guc_prio = GUC_PRIO_FINI;
3489 }
3490
remove_from_context(struct i915_request * rq)3491 static void remove_from_context(struct i915_request *rq)
3492 {
3493 struct intel_context *ce = request_to_scheduling_context(rq);
3494
3495 GEM_BUG_ON(intel_context_is_child(ce));
3496
3497 spin_lock_irq(&ce->guc_state.lock);
3498
3499 list_del_init(&rq->sched.link);
3500 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3501
3502 /* Prevent further __await_execution() registering a cb, then flush */
3503 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3504
3505 guc_prio_fini(rq, ce);
3506
3507 spin_unlock_irq(&ce->guc_state.lock);
3508
3509 atomic_dec(&ce->guc_id.ref);
3510 i915_request_notify_execute_cb_imm(rq);
3511 }
3512
3513 static const struct intel_context_ops guc_context_ops = {
3514 .flags = COPS_RUNTIME_CYCLES,
3515 .alloc = guc_context_alloc,
3516
3517 .close = guc_context_close,
3518
3519 .pre_pin = guc_context_pre_pin,
3520 .pin = guc_context_pin,
3521 .unpin = guc_context_unpin,
3522 .post_unpin = guc_context_post_unpin,
3523
3524 .revoke = guc_context_revoke,
3525
3526 .cancel_request = guc_context_cancel_request,
3527
3528 .enter = intel_context_enter_engine,
3529 .exit = intel_context_exit_engine,
3530
3531 .sched_disable = guc_context_sched_disable,
3532
3533 .update_stats = guc_context_update_stats,
3534
3535 .reset = lrc_reset,
3536 .destroy = guc_context_destroy,
3537
3538 .create_virtual = guc_create_virtual,
3539 .create_parallel = guc_create_parallel,
3540 };
3541
submit_work_cb(struct irq_work * wrk)3542 static void submit_work_cb(struct irq_work *wrk)
3543 {
3544 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
3545
3546 might_lock(&rq->engine->sched_engine->lock);
3547 i915_sw_fence_complete(&rq->submit);
3548 }
3549
__guc_signal_context_fence(struct intel_context * ce)3550 static void __guc_signal_context_fence(struct intel_context *ce)
3551 {
3552 struct i915_request *rq, *rn;
3553
3554 lockdep_assert_held(&ce->guc_state.lock);
3555
3556 if (!list_empty(&ce->guc_state.fences))
3557 trace_intel_context_fence_release(ce);
3558
3559 /*
3560 * Use an IRQ to ensure locking order of sched_engine->lock ->
3561 * ce->guc_state.lock is preserved.
3562 */
3563 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
3564 guc_fence_link) {
3565 list_del(&rq->guc_fence_link);
3566 irq_work_queue(&rq->submit_work);
3567 }
3568
3569 INIT_LIST_HEAD(&ce->guc_state.fences);
3570 }
3571
guc_signal_context_fence(struct intel_context * ce)3572 static void guc_signal_context_fence(struct intel_context *ce)
3573 {
3574 unsigned long flags;
3575
3576 GEM_BUG_ON(intel_context_is_child(ce));
3577
3578 spin_lock_irqsave(&ce->guc_state.lock, flags);
3579 clr_context_wait_for_deregister_to_register(ce);
3580 __guc_signal_context_fence(ce);
3581 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3582 }
3583
context_needs_register(struct intel_context * ce,bool new_guc_id)3584 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
3585 {
3586 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
3587 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
3588 !submission_disabled(ce_to_guc(ce));
3589 }
3590
guc_context_init(struct intel_context * ce)3591 static void guc_context_init(struct intel_context *ce)
3592 {
3593 const struct i915_gem_context *ctx;
3594 int prio = I915_CONTEXT_DEFAULT_PRIORITY;
3595
3596 rcu_read_lock();
3597 ctx = rcu_dereference(ce->gem_context);
3598 if (ctx)
3599 prio = ctx->sched.priority;
3600 rcu_read_unlock();
3601
3602 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
3603
3604 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work,
3605 __delay_sched_disable);
3606
3607 set_bit(CONTEXT_GUC_INIT, &ce->flags);
3608 }
3609
guc_request_alloc(struct i915_request * rq)3610 static int guc_request_alloc(struct i915_request *rq)
3611 {
3612 struct intel_context *ce = request_to_scheduling_context(rq);
3613 struct intel_guc *guc = ce_to_guc(ce);
3614 unsigned long flags;
3615 int ret;
3616
3617 GEM_BUG_ON(!intel_context_is_pinned(rq->context));
3618
3619 /*
3620 * Flush enough space to reduce the likelihood of waiting after
3621 * we start building the request - in which case we will just
3622 * have to repeat work.
3623 */
3624 rq->reserved_space += GUC_REQUEST_SIZE;
3625
3626 /*
3627 * Note that after this point, we have committed to using
3628 * this request as it is being used to both track the
3629 * state of engine initialisation and liveness of the
3630 * golden renderstate above. Think twice before you try
3631 * to cancel/unwind this request now.
3632 */
3633
3634 /* Unconditionally invalidate GPU caches and TLBs. */
3635 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
3636 if (ret)
3637 return ret;
3638
3639 rq->reserved_space -= GUC_REQUEST_SIZE;
3640
3641 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
3642 guc_context_init(ce);
3643
3644 /*
3645 * If the context gets closed while the execbuf is ongoing, the context
3646 * close code will race with the below code to cancel the delayed work.
3647 * If the context close wins the race and cancels the work, it will
3648 * immediately call the sched disable (see guc_context_close), so there
3649 * is a chance we can get past this check while the sched_disable code
3650 * is being executed. To make sure that code completes before we check
3651 * the status further down, we wait for the close process to complete.
3652 * Else, this code path could send a request down thinking that the
3653 * context is still in a schedule-enable mode while the GuC ends up
3654 * dropping the request completely because the disable did go from the
3655 * context_close path right to GuC just prior. In the event the CT is
3656 * full, we could potentially need to wait up to 1.5 seconds.
3657 */
3658 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work))
3659 intel_context_sched_disable_unpin(ce);
3660 else if (intel_context_is_closed(ce))
3661 if (wait_for(context_close_done(ce), 1500))
3662 guc_warn(guc, "timed out waiting on context sched close before realloc\n");
3663 /*
3664 * Call pin_guc_id here rather than in the pinning step as with
3665 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
3666 * guc_id and creating horrible race conditions. This is especially bad
3667 * when guc_id are being stolen due to over subscription. By the time
3668 * this function is reached, it is guaranteed that the guc_id will be
3669 * persistent until the generated request is retired. Thus, sealing these
3670 * race conditions. It is still safe to fail here if guc_id are
3671 * exhausted and return -EAGAIN to the user indicating that they can try
3672 * again in the future.
3673 *
3674 * There is no need for a lock here as the timeline mutex ensures at
3675 * most one context can be executing this code path at once. The
3676 * guc_id_ref is incremented once for every request in flight and
3677 * decremented on each retire. When it is zero, a lock around the
3678 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
3679 */
3680 if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
3681 goto out;
3682
3683 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
3684 if (unlikely(ret < 0))
3685 return ret;
3686 if (context_needs_register(ce, !!ret)) {
3687 ret = try_context_registration(ce, true);
3688 if (unlikely(ret)) { /* unwind */
3689 if (ret == -EPIPE) {
3690 disable_submission(guc);
3691 goto out; /* GPU will be reset */
3692 }
3693 atomic_dec(&ce->guc_id.ref);
3694 unpin_guc_id(guc, ce);
3695 return ret;
3696 }
3697 }
3698
3699 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
3700
3701 out:
3702 /*
3703 * We block all requests on this context if a G2H is pending for a
3704 * schedule disable or context deregistration as the GuC will fail a
3705 * schedule enable or context registration if either G2H is pending
3706 * respectfully. Once a G2H returns, the fence is released that is
3707 * blocking these requests (see guc_signal_context_fence).
3708 */
3709 spin_lock_irqsave(&ce->guc_state.lock, flags);
3710 if (context_wait_for_deregister_to_register(ce) ||
3711 context_pending_disable(ce)) {
3712 init_irq_work(&rq->submit_work, submit_work_cb);
3713 i915_sw_fence_await(&rq->submit);
3714
3715 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
3716 }
3717 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3718
3719 return 0;
3720 }
3721
guc_virtual_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)3722 static int guc_virtual_context_pre_pin(struct intel_context *ce,
3723 struct i915_gem_ww_ctx *ww,
3724 void **vaddr)
3725 {
3726 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3727
3728 return __guc_context_pre_pin(ce, engine, ww, vaddr);
3729 }
3730
guc_virtual_context_pin(struct intel_context * ce,void * vaddr)3731 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
3732 {
3733 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3734 int ret = __guc_context_pin(ce, engine, vaddr);
3735 intel_engine_mask_t tmp, mask = ce->engine->mask;
3736
3737 if (likely(!ret))
3738 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3739 intel_engine_pm_get(engine);
3740
3741 return ret;
3742 }
3743
guc_virtual_context_unpin(struct intel_context * ce)3744 static void guc_virtual_context_unpin(struct intel_context *ce)
3745 {
3746 intel_engine_mask_t tmp, mask = ce->engine->mask;
3747 struct intel_engine_cs *engine;
3748 struct intel_guc *guc = ce_to_guc(ce);
3749
3750 GEM_BUG_ON(context_enabled(ce));
3751 GEM_BUG_ON(intel_context_is_barrier(ce));
3752
3753 unpin_guc_id(guc, ce);
3754 lrc_unpin(ce);
3755
3756 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3757 intel_engine_pm_put_async(engine);
3758 }
3759
guc_virtual_context_enter(struct intel_context * ce)3760 static void guc_virtual_context_enter(struct intel_context *ce)
3761 {
3762 intel_engine_mask_t tmp, mask = ce->engine->mask;
3763 struct intel_engine_cs *engine;
3764
3765 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3766 intel_engine_pm_get(engine);
3767
3768 intel_timeline_enter(ce->timeline);
3769 }
3770
guc_virtual_context_exit(struct intel_context * ce)3771 static void guc_virtual_context_exit(struct intel_context *ce)
3772 {
3773 intel_engine_mask_t tmp, mask = ce->engine->mask;
3774 struct intel_engine_cs *engine;
3775
3776 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3777 intel_engine_pm_put(engine);
3778
3779 intel_timeline_exit(ce->timeline);
3780 }
3781
guc_virtual_context_alloc(struct intel_context * ce)3782 static int guc_virtual_context_alloc(struct intel_context *ce)
3783 {
3784 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3785
3786 return lrc_alloc(ce, engine);
3787 }
3788
3789 static const struct intel_context_ops virtual_guc_context_ops = {
3790 .flags = COPS_RUNTIME_CYCLES,
3791 .alloc = guc_virtual_context_alloc,
3792
3793 .close = guc_context_close,
3794
3795 .pre_pin = guc_virtual_context_pre_pin,
3796 .pin = guc_virtual_context_pin,
3797 .unpin = guc_virtual_context_unpin,
3798 .post_unpin = guc_context_post_unpin,
3799
3800 .revoke = guc_context_revoke,
3801
3802 .cancel_request = guc_context_cancel_request,
3803
3804 .enter = guc_virtual_context_enter,
3805 .exit = guc_virtual_context_exit,
3806
3807 .sched_disable = guc_context_sched_disable,
3808 .update_stats = guc_context_update_stats,
3809
3810 .destroy = guc_context_destroy,
3811
3812 .get_sibling = guc_virtual_get_sibling,
3813 };
3814
guc_parent_context_pin(struct intel_context * ce,void * vaddr)3815 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
3816 {
3817 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3818 struct intel_guc *guc = ce_to_guc(ce);
3819 int ret;
3820
3821 GEM_BUG_ON(!intel_context_is_parent(ce));
3822 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3823
3824 ret = pin_guc_id(guc, ce);
3825 if (unlikely(ret < 0))
3826 return ret;
3827
3828 return __guc_context_pin(ce, engine, vaddr);
3829 }
3830
guc_child_context_pin(struct intel_context * ce,void * vaddr)3831 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
3832 {
3833 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3834
3835 GEM_BUG_ON(!intel_context_is_child(ce));
3836 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3837
3838 __intel_context_pin(ce->parallel.parent);
3839 return __guc_context_pin(ce, engine, vaddr);
3840 }
3841
guc_parent_context_unpin(struct intel_context * ce)3842 static void guc_parent_context_unpin(struct intel_context *ce)
3843 {
3844 struct intel_guc *guc = ce_to_guc(ce);
3845
3846 GEM_BUG_ON(context_enabled(ce));
3847 GEM_BUG_ON(intel_context_is_barrier(ce));
3848 GEM_BUG_ON(!intel_context_is_parent(ce));
3849 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3850
3851 unpin_guc_id(guc, ce);
3852 lrc_unpin(ce);
3853 }
3854
guc_child_context_unpin(struct intel_context * ce)3855 static void guc_child_context_unpin(struct intel_context *ce)
3856 {
3857 GEM_BUG_ON(context_enabled(ce));
3858 GEM_BUG_ON(intel_context_is_barrier(ce));
3859 GEM_BUG_ON(!intel_context_is_child(ce));
3860 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3861
3862 lrc_unpin(ce);
3863 }
3864
guc_child_context_post_unpin(struct intel_context * ce)3865 static void guc_child_context_post_unpin(struct intel_context *ce)
3866 {
3867 GEM_BUG_ON(!intel_context_is_child(ce));
3868 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
3869 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3870
3871 lrc_post_unpin(ce);
3872 intel_context_unpin(ce->parallel.parent);
3873 }
3874
guc_child_context_destroy(struct kref * kref)3875 static void guc_child_context_destroy(struct kref *kref)
3876 {
3877 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3878
3879 __guc_context_destroy(ce);
3880 }
3881
3882 static const struct intel_context_ops virtual_parent_context_ops = {
3883 .alloc = guc_virtual_context_alloc,
3884
3885 .close = guc_context_close,
3886
3887 .pre_pin = guc_context_pre_pin,
3888 .pin = guc_parent_context_pin,
3889 .unpin = guc_parent_context_unpin,
3890 .post_unpin = guc_context_post_unpin,
3891
3892 .revoke = guc_context_revoke,
3893
3894 .cancel_request = guc_context_cancel_request,
3895
3896 .enter = guc_virtual_context_enter,
3897 .exit = guc_virtual_context_exit,
3898
3899 .sched_disable = guc_context_sched_disable,
3900
3901 .destroy = guc_context_destroy,
3902
3903 .get_sibling = guc_virtual_get_sibling,
3904 };
3905
3906 static const struct intel_context_ops virtual_child_context_ops = {
3907 .alloc = guc_virtual_context_alloc,
3908
3909 .pre_pin = guc_context_pre_pin,
3910 .pin = guc_child_context_pin,
3911 .unpin = guc_child_context_unpin,
3912 .post_unpin = guc_child_context_post_unpin,
3913
3914 .cancel_request = guc_context_cancel_request,
3915
3916 .enter = guc_virtual_context_enter,
3917 .exit = guc_virtual_context_exit,
3918
3919 .destroy = guc_child_context_destroy,
3920
3921 .get_sibling = guc_virtual_get_sibling,
3922 };
3923
3924 /*
3925 * The below override of the breadcrumbs is enabled when the user configures a
3926 * context for parallel submission (multi-lrc, parent-child).
3927 *
3928 * The overridden breadcrumbs implements an algorithm which allows the GuC to
3929 * safely preempt all the hw contexts configured for parallel submission
3930 * between each BB. The contract between the i915 and GuC is if the parent
3931 * context can be preempted, all the children can be preempted, and the GuC will
3932 * always try to preempt the parent before the children. A handshake between the
3933 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3934 * creating a window to preempt between each set of BBs.
3935 */
3936 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3937 u64 offset, u32 len,
3938 const unsigned int flags);
3939 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3940 u64 offset, u32 len,
3941 const unsigned int flags);
3942 static u32 *
3943 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3944 u32 *cs);
3945 static u32 *
3946 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3947 u32 *cs);
3948
3949 static struct intel_context *
guc_create_parallel(struct intel_engine_cs ** engines,unsigned int num_siblings,unsigned int width)3950 guc_create_parallel(struct intel_engine_cs **engines,
3951 unsigned int num_siblings,
3952 unsigned int width)
3953 {
3954 struct intel_engine_cs **siblings = NULL;
3955 struct intel_context *parent = NULL, *ce, *err;
3956 int i, j;
3957
3958 siblings = kmalloc_array(num_siblings,
3959 sizeof(*siblings),
3960 GFP_KERNEL);
3961 if (!siblings)
3962 return ERR_PTR(-ENOMEM);
3963
3964 for (i = 0; i < width; ++i) {
3965 for (j = 0; j < num_siblings; ++j)
3966 siblings[j] = engines[i * num_siblings + j];
3967
3968 ce = intel_engine_create_virtual(siblings, num_siblings,
3969 FORCE_VIRTUAL);
3970 if (IS_ERR(ce)) {
3971 err = ERR_CAST(ce);
3972 goto unwind;
3973 }
3974
3975 if (i == 0) {
3976 parent = ce;
3977 parent->ops = &virtual_parent_context_ops;
3978 } else {
3979 ce->ops = &virtual_child_context_ops;
3980 intel_context_bind_parent_child(parent, ce);
3981 }
3982 }
3983
3984 parent->parallel.fence_context = dma_fence_context_alloc(1);
3985
3986 parent->engine->emit_bb_start =
3987 emit_bb_start_parent_no_preempt_mid_batch;
3988 parent->engine->emit_fini_breadcrumb =
3989 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3990 parent->engine->emit_fini_breadcrumb_dw =
3991 12 + 4 * parent->parallel.number_children;
3992 for_each_child(parent, ce) {
3993 ce->engine->emit_bb_start =
3994 emit_bb_start_child_no_preempt_mid_batch;
3995 ce->engine->emit_fini_breadcrumb =
3996 emit_fini_breadcrumb_child_no_preempt_mid_batch;
3997 ce->engine->emit_fini_breadcrumb_dw = 16;
3998 }
3999
4000 kfree(siblings);
4001 return parent;
4002
4003 unwind:
4004 if (parent)
4005 intel_context_put(parent);
4006 kfree(siblings);
4007 return err;
4008 }
4009
4010 static bool
guc_irq_enable_breadcrumbs(struct intel_breadcrumbs * b)4011 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
4012 {
4013 struct intel_engine_cs *sibling;
4014 intel_engine_mask_t tmp, mask = b->engine_mask;
4015 bool result = false;
4016
4017 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
4018 result |= intel_engine_irq_enable(sibling);
4019
4020 return result;
4021 }
4022
4023 static void
guc_irq_disable_breadcrumbs(struct intel_breadcrumbs * b)4024 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
4025 {
4026 struct intel_engine_cs *sibling;
4027 intel_engine_mask_t tmp, mask = b->engine_mask;
4028
4029 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
4030 intel_engine_irq_disable(sibling);
4031 }
4032
guc_init_breadcrumbs(struct intel_engine_cs * engine)4033 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
4034 {
4035 int i;
4036
4037 /*
4038 * In GuC submission mode we do not know which physical engine a request
4039 * will be scheduled on, this creates a problem because the breadcrumb
4040 * interrupt is per physical engine. To work around this we attach
4041 * requests and direct all breadcrumb interrupts to the first instance
4042 * of an engine per class. In addition all breadcrumb interrupts are
4043 * enabled / disabled across an engine class in unison.
4044 */
4045 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
4046 struct intel_engine_cs *sibling =
4047 engine->gt->engine_class[engine->class][i];
4048
4049 if (sibling) {
4050 if (engine->breadcrumbs != sibling->breadcrumbs) {
4051 intel_breadcrumbs_put(engine->breadcrumbs);
4052 engine->breadcrumbs =
4053 intel_breadcrumbs_get(sibling->breadcrumbs);
4054 }
4055 break;
4056 }
4057 }
4058
4059 if (engine->breadcrumbs) {
4060 engine->breadcrumbs->engine_mask |= engine->mask;
4061 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
4062 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
4063 }
4064 }
4065
guc_bump_inflight_request_prio(struct i915_request * rq,int prio)4066 static void guc_bump_inflight_request_prio(struct i915_request *rq,
4067 int prio)
4068 {
4069 struct intel_context *ce = request_to_scheduling_context(rq);
4070 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
4071
4072 /* Short circuit function */
4073 if (prio < I915_PRIORITY_NORMAL ||
4074 rq->guc_prio == GUC_PRIO_FINI ||
4075 (rq->guc_prio != GUC_PRIO_INIT &&
4076 !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
4077 return;
4078
4079 spin_lock(&ce->guc_state.lock);
4080 if (rq->guc_prio != GUC_PRIO_FINI) {
4081 if (rq->guc_prio != GUC_PRIO_INIT)
4082 sub_context_inflight_prio(ce, rq->guc_prio);
4083 rq->guc_prio = new_guc_prio;
4084 add_context_inflight_prio(ce, rq->guc_prio);
4085 update_context_prio(ce);
4086 }
4087 spin_unlock(&ce->guc_state.lock);
4088 }
4089
guc_retire_inflight_request_prio(struct i915_request * rq)4090 static void guc_retire_inflight_request_prio(struct i915_request *rq)
4091 {
4092 struct intel_context *ce = request_to_scheduling_context(rq);
4093
4094 spin_lock(&ce->guc_state.lock);
4095 guc_prio_fini(rq, ce);
4096 spin_unlock(&ce->guc_state.lock);
4097 }
4098
sanitize_hwsp(struct intel_engine_cs * engine)4099 static void sanitize_hwsp(struct intel_engine_cs *engine)
4100 {
4101 struct intel_timeline *tl;
4102
4103 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
4104 intel_timeline_reset_seqno(tl);
4105 }
4106
guc_sanitize(struct intel_engine_cs * engine)4107 static void guc_sanitize(struct intel_engine_cs *engine)
4108 {
4109 /*
4110 * Poison residual state on resume, in case the suspend didn't!
4111 *
4112 * We have to assume that across suspend/resume (or other loss
4113 * of control) that the contents of our pinned buffers has been
4114 * lost, replaced by garbage. Since this doesn't always happen,
4115 * let's poison such state so that we more quickly spot when
4116 * we falsely assume it has been preserved.
4117 */
4118 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4119 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4120
4121 /*
4122 * The kernel_context HWSP is stored in the status_page. As above,
4123 * that may be lost on resume/initialisation, and so we need to
4124 * reset the value in the HWSP.
4125 */
4126 sanitize_hwsp(engine);
4127
4128 /* And scrub the dirty cachelines for the HWSP */
4129 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
4130
4131 intel_engine_reset_pinned_contexts(engine);
4132 }
4133
setup_hwsp(struct intel_engine_cs * engine)4134 static void setup_hwsp(struct intel_engine_cs *engine)
4135 {
4136 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4137
4138 ENGINE_WRITE_FW(engine,
4139 RING_HWS_PGA,
4140 i915_ggtt_offset(engine->status_page.vma));
4141 }
4142
start_engine(struct intel_engine_cs * engine)4143 static void start_engine(struct intel_engine_cs *engine)
4144 {
4145 ENGINE_WRITE_FW(engine,
4146 RING_MODE_GEN7,
4147 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
4148
4149 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4150 ENGINE_POSTING_READ(engine, RING_MI_MODE);
4151 }
4152
guc_resume(struct intel_engine_cs * engine)4153 static int guc_resume(struct intel_engine_cs *engine)
4154 {
4155 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4156
4157 intel_mocs_init_engine(engine);
4158
4159 intel_breadcrumbs_reset(engine->breadcrumbs);
4160
4161 setup_hwsp(engine);
4162 start_engine(engine);
4163
4164 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
4165 xehp_enable_ccs_engines(engine);
4166
4167 return 0;
4168 }
4169
guc_sched_engine_disabled(struct i915_sched_engine * sched_engine)4170 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
4171 {
4172 return !sched_engine->tasklet.callback;
4173 }
4174
guc_set_default_submission(struct intel_engine_cs * engine)4175 static void guc_set_default_submission(struct intel_engine_cs *engine)
4176 {
4177 engine->submit_request = guc_submit_request;
4178 }
4179
guc_kernel_context_pin(struct intel_guc * guc,struct intel_context * ce)4180 static inline int guc_kernel_context_pin(struct intel_guc *guc,
4181 struct intel_context *ce)
4182 {
4183 int ret;
4184
4185 /*
4186 * Note: we purposefully do not check the returns below because
4187 * the registration can only fail if a reset is just starting.
4188 * This is called at the end of reset so presumably another reset
4189 * isn't happening and even it did this code would be run again.
4190 */
4191
4192 if (context_guc_id_invalid(ce)) {
4193 ret = pin_guc_id(guc, ce);
4194
4195 if (ret < 0)
4196 return ret;
4197 }
4198
4199 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags))
4200 guc_context_init(ce);
4201
4202 ret = try_context_registration(ce, true);
4203 if (ret)
4204 unpin_guc_id(guc, ce);
4205
4206 return ret;
4207 }
4208
guc_init_submission(struct intel_guc * guc)4209 static inline int guc_init_submission(struct intel_guc *guc)
4210 {
4211 struct intel_gt *gt = guc_to_gt(guc);
4212 struct intel_engine_cs *engine;
4213 enum intel_engine_id id;
4214
4215 /* make sure all descriptors are clean... */
4216 xa_destroy(&guc->context_lookup);
4217
4218 /*
4219 * A reset might have occurred while we had a pending stalled request,
4220 * so make sure we clean that up.
4221 */
4222 guc->stalled_request = NULL;
4223 guc->submission_stall_reason = STALL_NONE;
4224
4225 /*
4226 * Some contexts might have been pinned before we enabled GuC
4227 * submission, so we need to add them to the GuC bookeeping.
4228 * Also, after a reset the of the GuC we want to make sure that the
4229 * information shared with GuC is properly reset. The kernel LRCs are
4230 * not attached to the gem_context, so they need to be added separately.
4231 */
4232 for_each_engine(engine, gt, id) {
4233 struct intel_context *ce;
4234
4235 list_for_each_entry(ce, &engine->pinned_contexts_list,
4236 pinned_contexts_link) {
4237 int ret = guc_kernel_context_pin(guc, ce);
4238
4239 if (ret) {
4240 /* No point in trying to clean up as i915 will wedge on failure */
4241 return ret;
4242 }
4243 }
4244 }
4245
4246 return 0;
4247 }
4248
guc_release(struct intel_engine_cs * engine)4249 static void guc_release(struct intel_engine_cs *engine)
4250 {
4251 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4252
4253 intel_engine_cleanup_common(engine);
4254 lrc_fini_wa_ctx(engine);
4255 }
4256
virtual_guc_bump_serial(struct intel_engine_cs * engine)4257 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
4258 {
4259 struct intel_engine_cs *e;
4260 intel_engine_mask_t tmp, mask = engine->mask;
4261
4262 for_each_engine_masked(e, engine->gt, mask, tmp)
4263 e->serial++;
4264 }
4265
guc_default_vfuncs(struct intel_engine_cs * engine)4266 static void guc_default_vfuncs(struct intel_engine_cs *engine)
4267 {
4268 /* Default vfuncs which can be overridden by each engine. */
4269
4270 engine->resume = guc_resume;
4271
4272 engine->cops = &guc_context_ops;
4273 engine->request_alloc = guc_request_alloc;
4274 engine->add_active_request = add_to_context;
4275 engine->remove_active_request = remove_from_context;
4276
4277 engine->sched_engine->schedule = i915_schedule;
4278
4279 engine->reset.prepare = guc_engine_reset_prepare;
4280 engine->reset.rewind = guc_rewind_nop;
4281 engine->reset.cancel = guc_reset_nop;
4282 engine->reset.finish = guc_reset_nop;
4283
4284 engine->emit_flush = gen8_emit_flush_xcs;
4285 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4286 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
4287 if (GRAPHICS_VER(engine->i915) >= 12) {
4288 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
4289 engine->emit_flush = gen12_emit_flush_xcs;
4290 }
4291 engine->set_default_submission = guc_set_default_submission;
4292 engine->busyness = guc_engine_busyness;
4293
4294 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4295 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4296 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4297
4298 /* Wa_14014475959:dg2 */
4299 if (engine->class == COMPUTE_CLASS)
4300 if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
4301 IS_DG2(engine->i915))
4302 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
4303
4304 /*
4305 * TODO: GuC supports timeslicing and semaphores as well, but they're
4306 * handled by the firmware so some minor tweaks are required before
4307 * enabling.
4308 *
4309 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4310 */
4311
4312 engine->emit_bb_start = gen8_emit_bb_start;
4313 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
4314 engine->emit_bb_start = xehp_emit_bb_start;
4315 }
4316
rcs_submission_override(struct intel_engine_cs * engine)4317 static void rcs_submission_override(struct intel_engine_cs *engine)
4318 {
4319 switch (GRAPHICS_VER(engine->i915)) {
4320 case 12:
4321 engine->emit_flush = gen12_emit_flush_rcs;
4322 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4323 break;
4324 case 11:
4325 engine->emit_flush = gen11_emit_flush_rcs;
4326 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4327 break;
4328 default:
4329 engine->emit_flush = gen8_emit_flush_rcs;
4330 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4331 break;
4332 }
4333 }
4334
guc_default_irqs(struct intel_engine_cs * engine)4335 static inline void guc_default_irqs(struct intel_engine_cs *engine)
4336 {
4337 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
4338 intel_engine_set_irq_handler(engine, cs_irq_handler);
4339 }
4340
guc_sched_engine_destroy(struct kref * kref)4341 static void guc_sched_engine_destroy(struct kref *kref)
4342 {
4343 struct i915_sched_engine *sched_engine =
4344 container_of(kref, typeof(*sched_engine), ref);
4345 struct intel_guc *guc = sched_engine->private_data;
4346
4347 guc->sched_engine = NULL;
4348 tasklet_kill(&sched_engine->tasklet); /* flush the callback */
4349 kfree(sched_engine);
4350 }
4351
intel_guc_submission_setup(struct intel_engine_cs * engine)4352 int intel_guc_submission_setup(struct intel_engine_cs *engine)
4353 {
4354 struct drm_i915_private *i915 = engine->i915;
4355 struct intel_guc *guc = &engine->gt->uc.guc;
4356
4357 /*
4358 * The setup relies on several assumptions (e.g. irqs always enabled)
4359 * that are only valid on gen11+
4360 */
4361 GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
4362
4363 if (!guc->sched_engine) {
4364 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
4365 if (!guc->sched_engine)
4366 return -ENOMEM;
4367
4368 guc->sched_engine->schedule = i915_schedule;
4369 guc->sched_engine->disabled = guc_sched_engine_disabled;
4370 guc->sched_engine->private_data = guc;
4371 guc->sched_engine->destroy = guc_sched_engine_destroy;
4372 guc->sched_engine->bump_inflight_request_prio =
4373 guc_bump_inflight_request_prio;
4374 guc->sched_engine->retire_inflight_request_prio =
4375 guc_retire_inflight_request_prio;
4376 tasklet_setup(&guc->sched_engine->tasklet,
4377 guc_submission_tasklet);
4378 }
4379 i915_sched_engine_put(engine->sched_engine);
4380 engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
4381
4382 guc_default_vfuncs(engine);
4383 guc_default_irqs(engine);
4384 guc_init_breadcrumbs(engine);
4385
4386 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
4387 rcs_submission_override(engine);
4388
4389 lrc_init_wa_ctx(engine);
4390
4391 /* Finally, take ownership and responsibility for cleanup! */
4392 engine->sanitize = guc_sanitize;
4393 engine->release = guc_release;
4394
4395 return 0;
4396 }
4397
4398 struct scheduling_policy {
4399 /* internal data */
4400 u32 max_words, num_words;
4401 u32 count;
4402 /* API data */
4403 struct guc_update_scheduling_policy h2g;
4404 };
4405
__guc_scheduling_policy_action_size(struct scheduling_policy * policy)4406 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy)
4407 {
4408 u32 *start = (void *)&policy->h2g;
4409 u32 *end = policy->h2g.data + policy->num_words;
4410 size_t delta = end - start;
4411
4412 return delta;
4413 }
4414
__guc_scheduling_policy_start_klv(struct scheduling_policy * policy)4415 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy)
4416 {
4417 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
4418 policy->max_words = ARRAY_SIZE(policy->h2g.data);
4419 policy->num_words = 0;
4420 policy->count = 0;
4421
4422 return policy;
4423 }
4424
__guc_scheduling_policy_add_klv(struct scheduling_policy * policy,u32 action,u32 * data,u32 len)4425 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy,
4426 u32 action, u32 *data, u32 len)
4427 {
4428 u32 *klv_ptr = policy->h2g.data + policy->num_words;
4429
4430 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words);
4431 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) |
4432 FIELD_PREP(GUC_KLV_0_LEN, len);
4433 memcpy(klv_ptr, data, sizeof(u32) * len);
4434 policy->num_words += 1 + len;
4435 policy->count++;
4436 }
4437
__guc_action_set_scheduling_policies(struct intel_guc * guc,struct scheduling_policy * policy)4438 static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
4439 struct scheduling_policy *policy)
4440 {
4441 int ret;
4442
4443 ret = intel_guc_send(guc, (u32 *)&policy->h2g,
4444 __guc_scheduling_policy_action_size(policy));
4445 if (ret < 0) {
4446 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n",
4447 ERR_PTR(ret));
4448 return ret;
4449 }
4450
4451 if (ret != policy->count) {
4452 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!",
4453 ret, policy->count);
4454 if (ret > policy->count)
4455 return -EPROTO;
4456 }
4457
4458 return 0;
4459 }
4460
guc_init_global_schedule_policy(struct intel_guc * guc)4461 static int guc_init_global_schedule_policy(struct intel_guc *guc)
4462 {
4463 struct scheduling_policy policy;
4464 struct intel_gt *gt = guc_to_gt(guc);
4465 intel_wakeref_t wakeref;
4466 int ret;
4467
4468 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
4469 return 0;
4470
4471 __guc_scheduling_policy_start_klv(&policy);
4472
4473 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
4474 u32 yield[] = {
4475 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION,
4476 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO,
4477 };
4478
4479 __guc_scheduling_policy_add_klv(&policy,
4480 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD,
4481 yield, ARRAY_SIZE(yield));
4482
4483 ret = __guc_action_set_scheduling_policies(guc, &policy);
4484 }
4485
4486 return ret;
4487 }
4488
guc_route_semaphores(struct intel_guc * guc,bool to_guc)4489 static void guc_route_semaphores(struct intel_guc *guc, bool to_guc)
4490 {
4491 struct intel_gt *gt = guc_to_gt(guc);
4492 u32 val;
4493
4494 if (GRAPHICS_VER(gt->i915) < 12)
4495 return;
4496
4497 if (to_guc)
4498 val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL;
4499 else
4500 val = 0;
4501
4502 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val);
4503 }
4504
intel_guc_submission_enable(struct intel_guc * guc)4505 int intel_guc_submission_enable(struct intel_guc *guc)
4506 {
4507 int ret;
4508
4509 /* Semaphore interrupt enable and route to GuC */
4510 guc_route_semaphores(guc, true);
4511
4512 ret = guc_init_submission(guc);
4513 if (ret)
4514 goto fail_sem;
4515
4516 ret = guc_init_engine_stats(guc);
4517 if (ret)
4518 goto fail_sem;
4519
4520 ret = guc_init_global_schedule_policy(guc);
4521 if (ret)
4522 goto fail_stats;
4523
4524 return 0;
4525
4526 fail_stats:
4527 guc_fini_engine_stats(guc);
4528 fail_sem:
4529 guc_route_semaphores(guc, false);
4530 return ret;
4531 }
4532
4533 /* Note: By the time we're here, GuC may have already been reset */
intel_guc_submission_disable(struct intel_guc * guc)4534 void intel_guc_submission_disable(struct intel_guc *guc)
4535 {
4536 guc_cancel_busyness_worker(guc);
4537
4538 /* Semaphore interrupt disable and route to host */
4539 guc_route_semaphores(guc, false);
4540 }
4541
__guc_submission_supported(struct intel_guc * guc)4542 static bool __guc_submission_supported(struct intel_guc *guc)
4543 {
4544 /* GuC submission is unavailable for pre-Gen11 */
4545 return intel_guc_is_supported(guc) &&
4546 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
4547 }
4548
__guc_submission_selected(struct intel_guc * guc)4549 static bool __guc_submission_selected(struct intel_guc *guc)
4550 {
4551 struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
4552
4553 if (!intel_guc_submission_is_supported(guc))
4554 return false;
4555
4556 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
4557 }
4558
intel_guc_sched_disable_gucid_threshold_max(struct intel_guc * guc)4559 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc)
4560 {
4561 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc);
4562 }
4563
4564 /*
4565 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher
4566 * workloads are able to enjoy the latency reduction when delaying the schedule-disable
4567 * operation. This matches the 30fps game-render + encode (real world) workload this
4568 * knob was tested against.
4569 */
4570 #define SCHED_DISABLE_DELAY_MS 34
4571
4572 /*
4573 * A threshold of 75% is a reasonable starting point considering that real world apps
4574 * generally don't get anywhere near this.
4575 */
4576 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \
4577 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4)
4578
intel_guc_submission_init_early(struct intel_guc * guc)4579 void intel_guc_submission_init_early(struct intel_guc *guc)
4580 {
4581 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
4582
4583 spin_lock_init(&guc->submission_state.lock);
4584 INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
4585 ida_init(&guc->submission_state.guc_ids);
4586 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
4587 INIT_WORK(&guc->submission_state.destroyed_worker,
4588 destroyed_worker_func);
4589 INIT_WORK(&guc->submission_state.reset_fail_worker,
4590 reset_fail_worker_func);
4591
4592 spin_lock_init(&guc->timestamp.lock);
4593 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
4594
4595 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS;
4596 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
4597 guc->submission_state.sched_disable_gucid_threshold =
4598 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc);
4599 guc->submission_supported = __guc_submission_supported(guc);
4600 guc->submission_selected = __guc_submission_selected(guc);
4601 }
4602
4603 static inline struct intel_context *
g2h_context_lookup(struct intel_guc * guc,u32 ctx_id)4604 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
4605 {
4606 struct intel_context *ce;
4607
4608 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
4609 guc_err(guc, "Invalid ctx_id %u\n", ctx_id);
4610 return NULL;
4611 }
4612
4613 ce = __get_context(guc, ctx_id);
4614 if (unlikely(!ce)) {
4615 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id);
4616 return NULL;
4617 }
4618
4619 if (unlikely(intel_context_is_child(ce))) {
4620 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id);
4621 return NULL;
4622 }
4623
4624 return ce;
4625 }
4626
intel_guc_deregister_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4627 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
4628 const u32 *msg,
4629 u32 len)
4630 {
4631 struct intel_context *ce;
4632 u32 ctx_id;
4633
4634 if (unlikely(len < 1)) {
4635 guc_err(guc, "Invalid length %u\n", len);
4636 return -EPROTO;
4637 }
4638 ctx_id = msg[0];
4639
4640 ce = g2h_context_lookup(guc, ctx_id);
4641 if (unlikely(!ce))
4642 return -EPROTO;
4643
4644 trace_intel_context_deregister_done(ce);
4645
4646 #ifdef CONFIG_DRM_I915_SELFTEST
4647 if (unlikely(ce->drop_deregister)) {
4648 ce->drop_deregister = false;
4649 return 0;
4650 }
4651 #endif
4652
4653 if (context_wait_for_deregister_to_register(ce)) {
4654 struct intel_runtime_pm *runtime_pm =
4655 &ce->engine->gt->i915->runtime_pm;
4656 intel_wakeref_t wakeref;
4657
4658 /*
4659 * Previous owner of this guc_id has been deregistered, now safe
4660 * register this context.
4661 */
4662 with_intel_runtime_pm(runtime_pm, wakeref)
4663 register_context(ce, true);
4664 guc_signal_context_fence(ce);
4665 intel_context_put(ce);
4666 } else if (context_destroyed(ce)) {
4667 /* Context has been destroyed */
4668 intel_gt_pm_put_async(guc_to_gt(guc));
4669 release_guc_id(guc, ce);
4670 __guc_context_destroy(ce);
4671 }
4672
4673 decr_outstanding_submission_g2h(guc);
4674
4675 return 0;
4676 }
4677
intel_guc_sched_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4678 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
4679 const u32 *msg,
4680 u32 len)
4681 {
4682 struct intel_context *ce;
4683 unsigned long flags;
4684 u32 ctx_id;
4685
4686 if (unlikely(len < 2)) {
4687 guc_err(guc, "Invalid length %u\n", len);
4688 return -EPROTO;
4689 }
4690 ctx_id = msg[0];
4691
4692 ce = g2h_context_lookup(guc, ctx_id);
4693 if (unlikely(!ce))
4694 return -EPROTO;
4695
4696 if (unlikely(context_destroyed(ce) ||
4697 (!context_pending_enable(ce) &&
4698 !context_pending_disable(ce)))) {
4699 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n",
4700 ce->guc_state.sched_state, ctx_id);
4701 return -EPROTO;
4702 }
4703
4704 trace_intel_context_sched_done(ce);
4705
4706 if (context_pending_enable(ce)) {
4707 #ifdef CONFIG_DRM_I915_SELFTEST
4708 if (unlikely(ce->drop_schedule_enable)) {
4709 ce->drop_schedule_enable = false;
4710 return 0;
4711 }
4712 #endif
4713
4714 spin_lock_irqsave(&ce->guc_state.lock, flags);
4715 clr_context_pending_enable(ce);
4716 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4717 } else if (context_pending_disable(ce)) {
4718 bool banned;
4719
4720 #ifdef CONFIG_DRM_I915_SELFTEST
4721 if (unlikely(ce->drop_schedule_disable)) {
4722 ce->drop_schedule_disable = false;
4723 return 0;
4724 }
4725 #endif
4726
4727 /*
4728 * Unpin must be done before __guc_signal_context_fence,
4729 * otherwise a race exists between the requests getting
4730 * submitted + retired before this unpin completes resulting in
4731 * the pin_count going to zero and the context still being
4732 * enabled.
4733 */
4734 intel_context_sched_disable_unpin(ce);
4735
4736 spin_lock_irqsave(&ce->guc_state.lock, flags);
4737 banned = context_banned(ce);
4738 clr_context_banned(ce);
4739 clr_context_pending_disable(ce);
4740 __guc_signal_context_fence(ce);
4741 guc_blocked_fence_complete(ce);
4742 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4743
4744 if (banned) {
4745 guc_cancel_context_requests(ce);
4746 intel_engine_signal_breadcrumbs(ce->engine);
4747 }
4748 }
4749
4750 decr_outstanding_submission_g2h(guc);
4751 intel_context_put(ce);
4752
4753 return 0;
4754 }
4755
capture_error_state(struct intel_guc * guc,struct intel_context * ce)4756 static void capture_error_state(struct intel_guc *guc,
4757 struct intel_context *ce)
4758 {
4759 struct intel_gt *gt = guc_to_gt(guc);
4760 struct drm_i915_private *i915 = gt->i915;
4761 intel_wakeref_t wakeref;
4762 intel_engine_mask_t engine_mask;
4763
4764 if (intel_engine_is_virtual(ce->engine)) {
4765 struct intel_engine_cs *e;
4766 intel_engine_mask_t tmp, virtual_mask = ce->engine->mask;
4767
4768 engine_mask = 0;
4769 for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) {
4770 bool match = intel_guc_capture_is_matching_engine(gt, ce, e);
4771
4772 if (match) {
4773 intel_engine_set_hung_context(e, ce);
4774 engine_mask |= e->mask;
4775 i915_increase_reset_engine_count(&i915->gpu_error,
4776 e);
4777 }
4778 }
4779
4780 if (!engine_mask) {
4781 guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s",
4782 ce->guc_id.id, ce->engine->name);
4783 engine_mask = ~0U;
4784 }
4785 } else {
4786 intel_engine_set_hung_context(ce->engine, ce);
4787 engine_mask = ce->engine->mask;
4788 i915_increase_reset_engine_count(&i915->gpu_error, ce->engine);
4789 }
4790
4791 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
4792 i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
4793 }
4794
guc_context_replay(struct intel_context * ce)4795 static void guc_context_replay(struct intel_context *ce)
4796 {
4797 struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
4798
4799 __guc_reset_context(ce, ce->engine->mask);
4800 tasklet_hi_schedule(&sched_engine->tasklet);
4801 }
4802
guc_handle_context_reset(struct intel_guc * guc,struct intel_context * ce)4803 static void guc_handle_context_reset(struct intel_guc *guc,
4804 struct intel_context *ce)
4805 {
4806 trace_intel_context_reset(ce);
4807
4808 guc_dbg(guc, "Got context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n",
4809 ce->guc_id.id, ce->engine->name,
4810 str_yes_no(intel_context_is_exiting(ce)),
4811 str_yes_no(intel_context_is_banned(ce)));
4812
4813 if (likely(intel_context_is_schedulable(ce))) {
4814 capture_error_state(guc, ce);
4815 guc_context_replay(ce);
4816 } else {
4817 guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s",
4818 ce->guc_id.id, ce->engine->name);
4819 }
4820 }
4821
intel_guc_context_reset_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4822 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
4823 const u32 *msg, u32 len)
4824 {
4825 struct intel_context *ce;
4826 unsigned long flags;
4827 int ctx_id;
4828
4829 if (unlikely(len != 1)) {
4830 guc_err(guc, "Invalid length %u", len);
4831 return -EPROTO;
4832 }
4833
4834 ctx_id = msg[0];
4835
4836 /*
4837 * The context lookup uses the xarray but lookups only require an RCU lock
4838 * not the full spinlock. So take the lock explicitly and keep it until the
4839 * context has been reference count locked to ensure it can't be destroyed
4840 * asynchronously until the reset is done.
4841 */
4842 xa_lock_irqsave(&guc->context_lookup, flags);
4843 ce = g2h_context_lookup(guc, ctx_id);
4844 if (ce)
4845 intel_context_get(ce);
4846 xa_unlock_irqrestore(&guc->context_lookup, flags);
4847
4848 if (unlikely(!ce))
4849 return -EPROTO;
4850
4851 guc_handle_context_reset(guc, ce);
4852 intel_context_put(ce);
4853
4854 return 0;
4855 }
4856
intel_guc_error_capture_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4857 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
4858 const u32 *msg, u32 len)
4859 {
4860 u32 status;
4861
4862 if (unlikely(len != 1)) {
4863 guc_dbg(guc, "Invalid length %u", len);
4864 return -EPROTO;
4865 }
4866
4867 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
4868 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
4869 guc_warn(guc, "No space for error capture");
4870
4871 intel_guc_capture_process(guc);
4872
4873 return 0;
4874 }
4875
4876 struct intel_engine_cs *
intel_guc_lookup_engine(struct intel_guc * guc,u8 guc_class,u8 instance)4877 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
4878 {
4879 struct intel_gt *gt = guc_to_gt(guc);
4880 u8 engine_class = guc_class_to_engine_class(guc_class);
4881
4882 /* Class index is checked in class converter */
4883 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
4884
4885 return gt->engine_class[engine_class][instance];
4886 }
4887
reset_fail_worker_func(struct work_struct * w)4888 static void reset_fail_worker_func(struct work_struct *w)
4889 {
4890 struct intel_guc *guc = container_of(w, struct intel_guc,
4891 submission_state.reset_fail_worker);
4892 struct intel_gt *gt = guc_to_gt(guc);
4893 intel_engine_mask_t reset_fail_mask;
4894 unsigned long flags;
4895
4896 spin_lock_irqsave(&guc->submission_state.lock, flags);
4897 reset_fail_mask = guc->submission_state.reset_fail_mask;
4898 guc->submission_state.reset_fail_mask = 0;
4899 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4900
4901 if (likely(reset_fail_mask)) {
4902 struct intel_engine_cs *engine;
4903 enum intel_engine_id id;
4904
4905 /*
4906 * GuC is toast at this point - it dead loops after sending the failed
4907 * reset notification. So need to manually determine the guilty context.
4908 * Note that it should be reliable to do this here because the GuC is
4909 * toast and will not be scheduling behind the KMD's back.
4910 */
4911 for_each_engine_masked(engine, gt, reset_fail_mask, id)
4912 intel_guc_find_hung_context(engine);
4913
4914 intel_gt_handle_error(gt, reset_fail_mask,
4915 I915_ERROR_CAPTURE,
4916 "GuC failed to reset engine mask=0x%x",
4917 reset_fail_mask);
4918 }
4919 }
4920
intel_guc_engine_failure_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4921 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
4922 const u32 *msg, u32 len)
4923 {
4924 struct intel_engine_cs *engine;
4925 u8 guc_class, instance;
4926 u32 reason;
4927 unsigned long flags;
4928
4929 if (unlikely(len != 3)) {
4930 guc_err(guc, "Invalid length %u", len);
4931 return -EPROTO;
4932 }
4933
4934 guc_class = msg[0];
4935 instance = msg[1];
4936 reason = msg[2];
4937
4938 engine = intel_guc_lookup_engine(guc, guc_class, instance);
4939 if (unlikely(!engine)) {
4940 guc_err(guc, "Invalid engine %d:%d", guc_class, instance);
4941 return -EPROTO;
4942 }
4943
4944 /*
4945 * This is an unexpected failure of a hardware feature. So, log a real
4946 * error message not just the informational that comes with the reset.
4947 */
4948 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X",
4949 guc_class, instance, engine->name, reason);
4950
4951 spin_lock_irqsave(&guc->submission_state.lock, flags);
4952 guc->submission_state.reset_fail_mask |= engine->mask;
4953 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4954
4955 /*
4956 * A GT reset flushes this worker queue (G2H handler) so we must use
4957 * another worker to trigger a GT reset.
4958 */
4959 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
4960
4961 return 0;
4962 }
4963
intel_guc_find_hung_context(struct intel_engine_cs * engine)4964 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
4965 {
4966 struct intel_guc *guc = &engine->gt->uc.guc;
4967 struct intel_context *ce;
4968 struct i915_request *rq;
4969 unsigned long index;
4970 unsigned long flags;
4971
4972 /* Reset called during driver load? GuC not yet initialised! */
4973 if (unlikely(!guc_submission_initialized(guc)))
4974 return;
4975
4976 xa_lock_irqsave(&guc->context_lookup, flags);
4977 xa_for_each(&guc->context_lookup, index, ce) {
4978 bool found;
4979
4980 if (!kref_get_unless_zero(&ce->ref))
4981 continue;
4982
4983 xa_unlock(&guc->context_lookup);
4984
4985 if (!intel_context_is_pinned(ce))
4986 goto next;
4987
4988 if (intel_engine_is_virtual(ce->engine)) {
4989 if (!(ce->engine->mask & engine->mask))
4990 goto next;
4991 } else {
4992 if (ce->engine != engine)
4993 goto next;
4994 }
4995
4996 found = false;
4997 spin_lock(&ce->guc_state.lock);
4998 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
4999 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
5000 continue;
5001
5002 found = true;
5003 break;
5004 }
5005 spin_unlock(&ce->guc_state.lock);
5006
5007 if (found) {
5008 intel_engine_set_hung_context(engine, ce);
5009
5010 /* Can only cope with one hang at a time... */
5011 intel_context_put(ce);
5012 xa_lock(&guc->context_lookup);
5013 goto done;
5014 }
5015
5016 next:
5017 intel_context_put(ce);
5018 xa_lock(&guc->context_lookup);
5019 }
5020 done:
5021 xa_unlock_irqrestore(&guc->context_lookup, flags);
5022 }
5023
intel_guc_dump_active_requests(struct intel_engine_cs * engine,struct i915_request * hung_rq,struct drm_printer * m)5024 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
5025 struct i915_request *hung_rq,
5026 struct drm_printer *m)
5027 {
5028 struct intel_guc *guc = &engine->gt->uc.guc;
5029 struct intel_context *ce;
5030 unsigned long index;
5031 unsigned long flags;
5032
5033 /* Reset called during driver load? GuC not yet initialised! */
5034 if (unlikely(!guc_submission_initialized(guc)))
5035 return;
5036
5037 xa_lock_irqsave(&guc->context_lookup, flags);
5038 xa_for_each(&guc->context_lookup, index, ce) {
5039 if (!kref_get_unless_zero(&ce->ref))
5040 continue;
5041
5042 xa_unlock(&guc->context_lookup);
5043
5044 if (!intel_context_is_pinned(ce))
5045 goto next;
5046
5047 if (intel_engine_is_virtual(ce->engine)) {
5048 if (!(ce->engine->mask & engine->mask))
5049 goto next;
5050 } else {
5051 if (ce->engine != engine)
5052 goto next;
5053 }
5054
5055 spin_lock(&ce->guc_state.lock);
5056 intel_engine_dump_active_requests(&ce->guc_state.requests,
5057 hung_rq, m);
5058 spin_unlock(&ce->guc_state.lock);
5059
5060 next:
5061 intel_context_put(ce);
5062 xa_lock(&guc->context_lookup);
5063 }
5064 xa_unlock_irqrestore(&guc->context_lookup, flags);
5065 }
5066
intel_guc_submission_print_info(struct intel_guc * guc,struct drm_printer * p)5067 void intel_guc_submission_print_info(struct intel_guc *guc,
5068 struct drm_printer *p)
5069 {
5070 struct i915_sched_engine *sched_engine = guc->sched_engine;
5071 struct rb_node *rb;
5072 unsigned long flags;
5073
5074 if (!sched_engine)
5075 return;
5076
5077 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n",
5078 guc->submission_version.major, guc->submission_version.minor,
5079 guc->submission_version.patch);
5080 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
5081 atomic_read(&guc->outstanding_submission_g2h));
5082 drm_printf(p, "GuC tasklet count: %u\n",
5083 atomic_read(&sched_engine->tasklet.count));
5084
5085 spin_lock_irqsave(&sched_engine->lock, flags);
5086 drm_printf(p, "Requests in GuC submit tasklet:\n");
5087 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
5088 struct i915_priolist *pl = to_priolist(rb);
5089 struct i915_request *rq;
5090
5091 priolist_for_each_request(rq, pl)
5092 drm_printf(p, "guc_id=%u, seqno=%llu\n",
5093 rq->context->guc_id.id,
5094 rq->fence.seqno);
5095 }
5096 spin_unlock_irqrestore(&sched_engine->lock, flags);
5097 drm_printf(p, "\n");
5098 }
5099
guc_log_context_priority(struct drm_printer * p,struct intel_context * ce)5100 static inline void guc_log_context_priority(struct drm_printer *p,
5101 struct intel_context *ce)
5102 {
5103 int i;
5104
5105 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
5106 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
5107 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
5108 i < GUC_CLIENT_PRIORITY_NUM; ++i) {
5109 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
5110 i, ce->guc_state.prio_count[i]);
5111 }
5112 drm_printf(p, "\n");
5113 }
5114
guc_log_context(struct drm_printer * p,struct intel_context * ce)5115 static inline void guc_log_context(struct drm_printer *p,
5116 struct intel_context *ce)
5117 {
5118 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
5119 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
5120 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
5121 ce->ring->head,
5122 ce->lrc_reg_state[CTX_RING_HEAD]);
5123 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
5124 ce->ring->tail,
5125 ce->lrc_reg_state[CTX_RING_TAIL]);
5126 drm_printf(p, "\t\tContext Pin Count: %u\n",
5127 atomic_read(&ce->pin_count));
5128 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
5129 atomic_read(&ce->guc_id.ref));
5130 drm_printf(p, "\t\tSchedule State: 0x%x\n",
5131 ce->guc_state.sched_state);
5132 }
5133
intel_guc_submission_print_context_info(struct intel_guc * guc,struct drm_printer * p)5134 void intel_guc_submission_print_context_info(struct intel_guc *guc,
5135 struct drm_printer *p)
5136 {
5137 struct intel_context *ce;
5138 unsigned long index;
5139 unsigned long flags;
5140
5141 xa_lock_irqsave(&guc->context_lookup, flags);
5142 xa_for_each(&guc->context_lookup, index, ce) {
5143 GEM_BUG_ON(intel_context_is_child(ce));
5144
5145 guc_log_context(p, ce);
5146 guc_log_context_priority(p, ce);
5147
5148 if (intel_context_is_parent(ce)) {
5149 struct intel_context *child;
5150
5151 drm_printf(p, "\t\tNumber children: %u\n",
5152 ce->parallel.number_children);
5153
5154 if (ce->parallel.guc.wq_status) {
5155 drm_printf(p, "\t\tWQI Head: %u\n",
5156 READ_ONCE(*ce->parallel.guc.wq_head));
5157 drm_printf(p, "\t\tWQI Tail: %u\n",
5158 READ_ONCE(*ce->parallel.guc.wq_tail));
5159 drm_printf(p, "\t\tWQI Status: %u\n",
5160 READ_ONCE(*ce->parallel.guc.wq_status));
5161 }
5162
5163 if (ce->engine->emit_bb_start ==
5164 emit_bb_start_parent_no_preempt_mid_batch) {
5165 u8 i;
5166
5167 drm_printf(p, "\t\tChildren Go: %u\n",
5168 get_children_go_value(ce));
5169 for (i = 0; i < ce->parallel.number_children; ++i)
5170 drm_printf(p, "\t\tChildren Join: %u\n",
5171 get_children_join_value(ce, i));
5172 }
5173
5174 for_each_child(ce, child)
5175 guc_log_context(p, child);
5176 }
5177 }
5178 xa_unlock_irqrestore(&guc->context_lookup, flags);
5179 }
5180
get_children_go_addr(struct intel_context * ce)5181 static inline u32 get_children_go_addr(struct intel_context *ce)
5182 {
5183 GEM_BUG_ON(!intel_context_is_parent(ce));
5184
5185 return i915_ggtt_offset(ce->state) +
5186 __get_parent_scratch_offset(ce) +
5187 offsetof(struct parent_scratch, go.semaphore);
5188 }
5189
get_children_join_addr(struct intel_context * ce,u8 child_index)5190 static inline u32 get_children_join_addr(struct intel_context *ce,
5191 u8 child_index)
5192 {
5193 GEM_BUG_ON(!intel_context_is_parent(ce));
5194
5195 return i915_ggtt_offset(ce->state) +
5196 __get_parent_scratch_offset(ce) +
5197 offsetof(struct parent_scratch, join[child_index].semaphore);
5198 }
5199
5200 #define PARENT_GO_BB 1
5201 #define PARENT_GO_FINI_BREADCRUMB 0
5202 #define CHILD_GO_BB 1
5203 #define CHILD_GO_FINI_BREADCRUMB 0
emit_bb_start_parent_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)5204 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
5205 u64 offset, u32 len,
5206 const unsigned int flags)
5207 {
5208 struct intel_context *ce = rq->context;
5209 u32 *cs;
5210 u8 i;
5211
5212 GEM_BUG_ON(!intel_context_is_parent(ce));
5213
5214 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
5215 if (IS_ERR(cs))
5216 return PTR_ERR(cs);
5217
5218 /* Wait on children */
5219 for (i = 0; i < ce->parallel.number_children; ++i) {
5220 *cs++ = (MI_SEMAPHORE_WAIT |
5221 MI_SEMAPHORE_GLOBAL_GTT |
5222 MI_SEMAPHORE_POLL |
5223 MI_SEMAPHORE_SAD_EQ_SDD);
5224 *cs++ = PARENT_GO_BB;
5225 *cs++ = get_children_join_addr(ce, i);
5226 *cs++ = 0;
5227 }
5228
5229 /* Turn off preemption */
5230 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5231 *cs++ = MI_NOOP;
5232
5233 /* Tell children go */
5234 cs = gen8_emit_ggtt_write(cs,
5235 CHILD_GO_BB,
5236 get_children_go_addr(ce),
5237 0);
5238
5239 /* Jump to batch */
5240 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
5241 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
5242 *cs++ = lower_32_bits(offset);
5243 *cs++ = upper_32_bits(offset);
5244 *cs++ = MI_NOOP;
5245
5246 intel_ring_advance(rq, cs);
5247
5248 return 0;
5249 }
5250
emit_bb_start_child_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)5251 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
5252 u64 offset, u32 len,
5253 const unsigned int flags)
5254 {
5255 struct intel_context *ce = rq->context;
5256 struct intel_context *parent = intel_context_to_parent(ce);
5257 u32 *cs;
5258
5259 GEM_BUG_ON(!intel_context_is_child(ce));
5260
5261 cs = intel_ring_begin(rq, 12);
5262 if (IS_ERR(cs))
5263 return PTR_ERR(cs);
5264
5265 /* Signal parent */
5266 cs = gen8_emit_ggtt_write(cs,
5267 PARENT_GO_BB,
5268 get_children_join_addr(parent,
5269 ce->parallel.child_index),
5270 0);
5271
5272 /* Wait on parent for go */
5273 *cs++ = (MI_SEMAPHORE_WAIT |
5274 MI_SEMAPHORE_GLOBAL_GTT |
5275 MI_SEMAPHORE_POLL |
5276 MI_SEMAPHORE_SAD_EQ_SDD);
5277 *cs++ = CHILD_GO_BB;
5278 *cs++ = get_children_go_addr(parent);
5279 *cs++ = 0;
5280
5281 /* Turn off preemption */
5282 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5283
5284 /* Jump to batch */
5285 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
5286 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
5287 *cs++ = lower_32_bits(offset);
5288 *cs++ = upper_32_bits(offset);
5289
5290 intel_ring_advance(rq, cs);
5291
5292 return 0;
5293 }
5294
5295 static u32 *
__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5296 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
5297 u32 *cs)
5298 {
5299 struct intel_context *ce = rq->context;
5300 u8 i;
5301
5302 GEM_BUG_ON(!intel_context_is_parent(ce));
5303
5304 /* Wait on children */
5305 for (i = 0; i < ce->parallel.number_children; ++i) {
5306 *cs++ = (MI_SEMAPHORE_WAIT |
5307 MI_SEMAPHORE_GLOBAL_GTT |
5308 MI_SEMAPHORE_POLL |
5309 MI_SEMAPHORE_SAD_EQ_SDD);
5310 *cs++ = PARENT_GO_FINI_BREADCRUMB;
5311 *cs++ = get_children_join_addr(ce, i);
5312 *cs++ = 0;
5313 }
5314
5315 /* Turn on preemption */
5316 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5317 *cs++ = MI_NOOP;
5318
5319 /* Tell children go */
5320 cs = gen8_emit_ggtt_write(cs,
5321 CHILD_GO_FINI_BREADCRUMB,
5322 get_children_go_addr(ce),
5323 0);
5324
5325 return cs;
5326 }
5327
5328 /*
5329 * If this true, a submission of multi-lrc requests had an error and the
5330 * requests need to be skipped. The front end (execuf IOCTL) should've called
5331 * i915_request_skip which squashes the BB but we still need to emit the fini
5332 * breadrcrumbs seqno write. At this point we don't know how many of the
5333 * requests in the multi-lrc submission were generated so we can't do the
5334 * handshake between the parent and children (e.g. if 4 requests should be
5335 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
5336 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
5337 * has occurred on any of the requests in submission / relationship.
5338 */
skip_handshake(struct i915_request * rq)5339 static inline bool skip_handshake(struct i915_request *rq)
5340 {
5341 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
5342 }
5343
5344 #define NON_SKIP_LEN 6
5345 static u32 *
emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5346 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
5347 u32 *cs)
5348 {
5349 struct intel_context *ce = rq->context;
5350 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5351 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5352
5353 GEM_BUG_ON(!intel_context_is_parent(ce));
5354
5355 if (unlikely(skip_handshake(rq))) {
5356 /*
5357 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
5358 * the NON_SKIP_LEN comes from the length of the emits below.
5359 */
5360 memset(cs, 0, sizeof(u32) *
5361 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5362 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5363 } else {
5364 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
5365 }
5366
5367 /* Emit fini breadcrumb */
5368 before_fini_breadcrumb_user_interrupt_cs = cs;
5369 cs = gen8_emit_ggtt_write(cs,
5370 rq->fence.seqno,
5371 i915_request_active_timeline(rq)->hwsp_offset,
5372 0);
5373
5374 /* User interrupt */
5375 *cs++ = MI_USER_INTERRUPT;
5376 *cs++ = MI_NOOP;
5377
5378 /* Ensure our math for skip + emit is correct */
5379 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5380 cs);
5381 GEM_BUG_ON(start_fini_breadcrumb_cs +
5382 ce->engine->emit_fini_breadcrumb_dw != cs);
5383
5384 rq->tail = intel_ring_offset(rq, cs);
5385
5386 return cs;
5387 }
5388
5389 static u32 *
__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5390 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5391 u32 *cs)
5392 {
5393 struct intel_context *ce = rq->context;
5394 struct intel_context *parent = intel_context_to_parent(ce);
5395
5396 GEM_BUG_ON(!intel_context_is_child(ce));
5397
5398 /* Turn on preemption */
5399 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5400 *cs++ = MI_NOOP;
5401
5402 /* Signal parent */
5403 cs = gen8_emit_ggtt_write(cs,
5404 PARENT_GO_FINI_BREADCRUMB,
5405 get_children_join_addr(parent,
5406 ce->parallel.child_index),
5407 0);
5408
5409 /* Wait parent on for go */
5410 *cs++ = (MI_SEMAPHORE_WAIT |
5411 MI_SEMAPHORE_GLOBAL_GTT |
5412 MI_SEMAPHORE_POLL |
5413 MI_SEMAPHORE_SAD_EQ_SDD);
5414 *cs++ = CHILD_GO_FINI_BREADCRUMB;
5415 *cs++ = get_children_go_addr(parent);
5416 *cs++ = 0;
5417
5418 return cs;
5419 }
5420
5421 static u32 *
emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5422 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5423 u32 *cs)
5424 {
5425 struct intel_context *ce = rq->context;
5426 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5427 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5428
5429 GEM_BUG_ON(!intel_context_is_child(ce));
5430
5431 if (unlikely(skip_handshake(rq))) {
5432 /*
5433 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
5434 * the NON_SKIP_LEN comes from the length of the emits below.
5435 */
5436 memset(cs, 0, sizeof(u32) *
5437 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5438 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5439 } else {
5440 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
5441 }
5442
5443 /* Emit fini breadcrumb */
5444 before_fini_breadcrumb_user_interrupt_cs = cs;
5445 cs = gen8_emit_ggtt_write(cs,
5446 rq->fence.seqno,
5447 i915_request_active_timeline(rq)->hwsp_offset,
5448 0);
5449
5450 /* User interrupt */
5451 *cs++ = MI_USER_INTERRUPT;
5452 *cs++ = MI_NOOP;
5453
5454 /* Ensure our math for skip + emit is correct */
5455 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5456 cs);
5457 GEM_BUG_ON(start_fini_breadcrumb_cs +
5458 ce->engine->emit_fini_breadcrumb_dw != cs);
5459
5460 rq->tail = intel_ring_offset(rq, cs);
5461
5462 return cs;
5463 }
5464
5465 #undef NON_SKIP_LEN
5466
5467 static struct intel_context *
guc_create_virtual(struct intel_engine_cs ** siblings,unsigned int count,unsigned long flags)5468 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
5469 unsigned long flags)
5470 {
5471 struct guc_virtual_engine *ve;
5472 struct intel_guc *guc;
5473 unsigned int n;
5474 int err;
5475
5476 ve = kzalloc(sizeof(*ve), GFP_KERNEL);
5477 if (!ve)
5478 return ERR_PTR(-ENOMEM);
5479
5480 guc = &siblings[0]->gt->uc.guc;
5481
5482 ve->base.i915 = siblings[0]->i915;
5483 ve->base.gt = siblings[0]->gt;
5484 ve->base.uncore = siblings[0]->uncore;
5485 ve->base.id = -1;
5486
5487 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5488 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5489 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5490 ve->base.saturated = ALL_ENGINES;
5491
5492 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5493
5494 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
5495
5496 ve->base.cops = &virtual_guc_context_ops;
5497 ve->base.request_alloc = guc_request_alloc;
5498 ve->base.bump_serial = virtual_guc_bump_serial;
5499
5500 ve->base.submit_request = guc_submit_request;
5501
5502 ve->base.flags = I915_ENGINE_IS_VIRTUAL;
5503
5504 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES);
5505 ve->base.mask = VIRTUAL_ENGINES;
5506
5507 intel_context_init(&ve->context, &ve->base);
5508
5509 for (n = 0; n < count; n++) {
5510 struct intel_engine_cs *sibling = siblings[n];
5511
5512 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5513 if (sibling->mask & ve->base.mask) {
5514 guc_dbg(guc, "duplicate %s entry in load balancer\n",
5515 sibling->name);
5516 err = -EINVAL;
5517 goto err_put;
5518 }
5519
5520 ve->base.mask |= sibling->mask;
5521 ve->base.logical_mask |= sibling->logical_mask;
5522
5523 if (n != 0 && ve->base.class != sibling->class) {
5524 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n",
5525 sibling->class, ve->base.class);
5526 err = -EINVAL;
5527 goto err_put;
5528 } else if (n == 0) {
5529 ve->base.class = sibling->class;
5530 ve->base.uabi_class = sibling->uabi_class;
5531 snprintf(ve->base.name, sizeof(ve->base.name),
5532 "v%dx%d", ve->base.class, count);
5533 ve->base.context_size = sibling->context_size;
5534
5535 ve->base.add_active_request =
5536 sibling->add_active_request;
5537 ve->base.remove_active_request =
5538 sibling->remove_active_request;
5539 ve->base.emit_bb_start = sibling->emit_bb_start;
5540 ve->base.emit_flush = sibling->emit_flush;
5541 ve->base.emit_init_breadcrumb =
5542 sibling->emit_init_breadcrumb;
5543 ve->base.emit_fini_breadcrumb =
5544 sibling->emit_fini_breadcrumb;
5545 ve->base.emit_fini_breadcrumb_dw =
5546 sibling->emit_fini_breadcrumb_dw;
5547 ve->base.breadcrumbs =
5548 intel_breadcrumbs_get(sibling->breadcrumbs);
5549
5550 ve->base.flags |= sibling->flags;
5551
5552 ve->base.props.timeslice_duration_ms =
5553 sibling->props.timeslice_duration_ms;
5554 ve->base.props.preempt_timeout_ms =
5555 sibling->props.preempt_timeout_ms;
5556 }
5557 }
5558
5559 return &ve->context;
5560
5561 err_put:
5562 intel_context_put(&ve->context);
5563 return ERR_PTR(err);
5564 }
5565
intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs * ve)5566 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
5567 {
5568 struct intel_engine_cs *engine;
5569 intel_engine_mask_t tmp, mask = ve->mask;
5570
5571 for_each_engine_masked(engine, ve->gt, mask, tmp)
5572 if (READ_ONCE(engine->props.heartbeat_interval_ms))
5573 return true;
5574
5575 return false;
5576 }
5577
5578 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5579 #include "selftest_guc.c"
5580 #include "selftest_guc_multi_lrc.c"
5581 #include "selftest_guc_hangcheck.c"
5582 #endif
5583