1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_print.h" 31 #include "intel_guc_submission.h" 32 33 #include "i915_drv.h" 34 #include "i915_reg.h" 35 #include "i915_trace.h" 36 37 /** 38 * DOC: GuC-based command submission 39 * 40 * The Scratch registers: 41 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 42 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 43 * triggers an interrupt on the GuC via another register write (0xC4C8). 44 * Firmware writes a success/fail code back to the action register after 45 * processes the request. The kernel driver polls waiting for this update and 46 * then proceeds. 47 * 48 * Command Transport buffers (CTBs): 49 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 50 * - G2H) are a message interface between the i915 and GuC. 51 * 52 * Context registration: 53 * Before a context can be submitted it must be registered with the GuC via a 54 * H2G. A unique guc_id is associated with each context. The context is either 55 * registered at request creation time (normal operation) or at submission time 56 * (abnormal operation, e.g. after a reset). 57 * 58 * Context submission: 59 * The i915 updates the LRC tail value in memory. The i915 must enable the 60 * scheduling of the context within the GuC for the GuC to actually consider it. 61 * Therefore, the first time a disabled context is submitted we use a schedule 62 * enable H2G, while follow up submissions are done via the context submit H2G, 63 * which informs the GuC that a previously enabled context has new work 64 * available. 65 * 66 * Context unpin: 67 * To unpin a context a H2G is used to disable scheduling. When the 68 * corresponding G2H returns indicating the scheduling disable operation has 69 * completed it is safe to unpin the context. While a disable is in flight it 70 * isn't safe to resubmit the context so a fence is used to stall all future 71 * requests of that context until the G2H is returned. Because this interaction 72 * with the GuC takes a non-zero amount of time we delay the disabling of 73 * scheduling after the pin count goes to zero by a configurable period of time 74 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 75 * time to resubmit something on the context before doing this costly operation. 76 * This delay is only done if the context isn't closed and the guc_id usage is 77 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 78 * 79 * Context deregistration: 80 * Before a context can be destroyed or if we steal its guc_id we must 81 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 82 * safe to submit anything to this guc_id until the deregister completes so a 83 * fence is used to stall all requests associated with this guc_id until the 84 * corresponding G2H returns indicating the guc_id has been deregistered. 85 * 86 * submission_state.guc_ids: 87 * Unique number associated with private GuC context data passed in during 88 * context registration / submission / deregistration. 64k available. Simple ida 89 * is used for allocation. 90 * 91 * Stealing guc_ids: 92 * If no guc_ids are available they can be stolen from another context at 93 * request creation time if that context is unpinned. If a guc_id can't be found 94 * we punt this problem to the user as we believe this is near impossible to hit 95 * during normal use cases. 96 * 97 * Locking: 98 * In the GuC submission code we have 3 basic spin locks which protect 99 * everything. Details about each below. 100 * 101 * sched_engine->lock 102 * This is the submission lock for all contexts that share an i915 schedule 103 * engine (sched_engine), thus only one of the contexts which share a 104 * sched_engine can be submitting at a time. Currently only one sched_engine is 105 * used for all of GuC submission but that could change in the future. 106 * 107 * guc->submission_state.lock 108 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 109 * list. 110 * 111 * ce->guc_state.lock 112 * Protects everything under ce->guc_state. Ensures that a context is in the 113 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 114 * on a disabled context (bad idea), we don't issue a schedule enable when a 115 * schedule disable is in flight, etc... Also protects list of inflight requests 116 * on the context and the priority management state. Lock is individual to each 117 * context. 118 * 119 * Lock ordering rules: 120 * sched_engine->lock -> ce->guc_state.lock 121 * guc->submission_state.lock -> ce->guc_state.lock 122 * 123 * Reset races: 124 * When a full GT reset is triggered it is assumed that some G2H responses to 125 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 126 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 127 * contexts, release guc_ids, etc...). When this occurs we can scrub the 128 * context state and cleanup appropriately, however this is quite racey. 129 * To avoid races, the reset code must disable submission before scrubbing for 130 * the missing G2H, while the submission code must check for submission being 131 * disabled and skip sending H2Gs and updating context states when it is. Both 132 * sides must also make sure to hold the relevant locks. 133 */ 134 135 /* GuC Virtual Engine */ 136 struct guc_virtual_engine { 137 struct intel_engine_cs base; 138 struct intel_context context; 139 }; 140 141 static struct intel_context * 142 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 143 unsigned long flags); 144 145 static struct intel_context * 146 guc_create_parallel(struct intel_engine_cs **engines, 147 unsigned int num_siblings, 148 unsigned int width); 149 150 #define GUC_REQUEST_SIZE 64 /* bytes */ 151 152 /* 153 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 154 * per the GuC submission interface. A different allocation algorithm is used 155 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 156 * partition the guc_id space. We believe the number of multi-lrc contexts in 157 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 158 * multi-lrc. 159 */ 160 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 161 ((guc)->submission_state.num_guc_ids / 16) 162 163 /* 164 * Below is a set of functions which control the GuC scheduling state which 165 * require a lock. 166 */ 167 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 168 #define SCHED_STATE_DESTROYED BIT(1) 169 #define SCHED_STATE_PENDING_DISABLE BIT(2) 170 #define SCHED_STATE_BANNED BIT(3) 171 #define SCHED_STATE_ENABLED BIT(4) 172 #define SCHED_STATE_PENDING_ENABLE BIT(5) 173 #define SCHED_STATE_REGISTERED BIT(6) 174 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 175 #define SCHED_STATE_CLOSED BIT(8) 176 #define SCHED_STATE_BLOCKED_SHIFT 9 177 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 178 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 179 180 static inline void init_sched_state(struct intel_context *ce) 181 { 182 lockdep_assert_held(&ce->guc_state.lock); 183 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 184 } 185 186 /* 187 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 188 * A context close can race with the submission path, so SCHED_STATE_CLOSED 189 * can be set immediately before we try to register. 190 */ 191 #define SCHED_STATE_VALID_INIT \ 192 (SCHED_STATE_BLOCKED_MASK | \ 193 SCHED_STATE_CLOSED | \ 194 SCHED_STATE_REGISTERED) 195 196 __maybe_unused 197 static bool sched_state_is_init(struct intel_context *ce) 198 { 199 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 200 } 201 202 static inline bool 203 context_wait_for_deregister_to_register(struct intel_context *ce) 204 { 205 return ce->guc_state.sched_state & 206 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 207 } 208 209 static inline void 210 set_context_wait_for_deregister_to_register(struct intel_context *ce) 211 { 212 lockdep_assert_held(&ce->guc_state.lock); 213 ce->guc_state.sched_state |= 214 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 215 } 216 217 static inline void 218 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 219 { 220 lockdep_assert_held(&ce->guc_state.lock); 221 ce->guc_state.sched_state &= 222 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 223 } 224 225 static inline bool 226 context_destroyed(struct intel_context *ce) 227 { 228 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 229 } 230 231 static inline void 232 set_context_destroyed(struct intel_context *ce) 233 { 234 lockdep_assert_held(&ce->guc_state.lock); 235 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 236 } 237 238 static inline bool context_pending_disable(struct intel_context *ce) 239 { 240 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 241 } 242 243 static inline void set_context_pending_disable(struct intel_context *ce) 244 { 245 lockdep_assert_held(&ce->guc_state.lock); 246 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 247 } 248 249 static inline void clr_context_pending_disable(struct intel_context *ce) 250 { 251 lockdep_assert_held(&ce->guc_state.lock); 252 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 253 } 254 255 static inline bool context_banned(struct intel_context *ce) 256 { 257 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 258 } 259 260 static inline void set_context_banned(struct intel_context *ce) 261 { 262 lockdep_assert_held(&ce->guc_state.lock); 263 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 264 } 265 266 static inline void clr_context_banned(struct intel_context *ce) 267 { 268 lockdep_assert_held(&ce->guc_state.lock); 269 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 270 } 271 272 static inline bool context_enabled(struct intel_context *ce) 273 { 274 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 275 } 276 277 static inline void set_context_enabled(struct intel_context *ce) 278 { 279 lockdep_assert_held(&ce->guc_state.lock); 280 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 281 } 282 283 static inline void clr_context_enabled(struct intel_context *ce) 284 { 285 lockdep_assert_held(&ce->guc_state.lock); 286 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 287 } 288 289 static inline bool context_pending_enable(struct intel_context *ce) 290 { 291 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 292 } 293 294 static inline void set_context_pending_enable(struct intel_context *ce) 295 { 296 lockdep_assert_held(&ce->guc_state.lock); 297 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 298 } 299 300 static inline void clr_context_pending_enable(struct intel_context *ce) 301 { 302 lockdep_assert_held(&ce->guc_state.lock); 303 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 304 } 305 306 static inline bool context_registered(struct intel_context *ce) 307 { 308 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 309 } 310 311 static inline void set_context_registered(struct intel_context *ce) 312 { 313 lockdep_assert_held(&ce->guc_state.lock); 314 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 315 } 316 317 static inline void clr_context_registered(struct intel_context *ce) 318 { 319 lockdep_assert_held(&ce->guc_state.lock); 320 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 321 } 322 323 static inline bool context_policy_required(struct intel_context *ce) 324 { 325 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 326 } 327 328 static inline void set_context_policy_required(struct intel_context *ce) 329 { 330 lockdep_assert_held(&ce->guc_state.lock); 331 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 332 } 333 334 static inline void clr_context_policy_required(struct intel_context *ce) 335 { 336 lockdep_assert_held(&ce->guc_state.lock); 337 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 338 } 339 340 static inline bool context_close_done(struct intel_context *ce) 341 { 342 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 343 } 344 345 static inline void set_context_close_done(struct intel_context *ce) 346 { 347 lockdep_assert_held(&ce->guc_state.lock); 348 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 349 } 350 351 static inline u32 context_blocked(struct intel_context *ce) 352 { 353 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 354 SCHED_STATE_BLOCKED_SHIFT; 355 } 356 357 static inline void incr_context_blocked(struct intel_context *ce) 358 { 359 lockdep_assert_held(&ce->guc_state.lock); 360 361 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 362 363 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 364 } 365 366 static inline void decr_context_blocked(struct intel_context *ce) 367 { 368 lockdep_assert_held(&ce->guc_state.lock); 369 370 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 371 372 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 373 } 374 375 static struct intel_context * 376 request_to_scheduling_context(struct i915_request *rq) 377 { 378 return intel_context_to_parent(rq->context); 379 } 380 381 static inline bool context_guc_id_invalid(struct intel_context *ce) 382 { 383 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 384 } 385 386 static inline void set_context_guc_id_invalid(struct intel_context *ce) 387 { 388 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 389 } 390 391 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 392 { 393 return &ce->engine->gt->uc.guc; 394 } 395 396 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 397 { 398 return rb_entry(rb, struct i915_priolist, node); 399 } 400 401 /* 402 * When using multi-lrc submission a scratch memory area is reserved in the 403 * parent's context state for the process descriptor, work queue, and handshake 404 * between the parent + children contexts to insert safe preemption points 405 * between each of the BBs. Currently the scratch area is sized to a page. 406 * 407 * The layout of this scratch area is below: 408 * 0 guc_process_desc 409 * + sizeof(struct guc_process_desc) child go 410 * + CACHELINE_BYTES child join[0] 411 * ... 412 * + CACHELINE_BYTES child join[n - 1] 413 * ... unused 414 * PARENT_SCRATCH_SIZE / 2 work queue start 415 * ... work queue 416 * PARENT_SCRATCH_SIZE - 1 work queue end 417 */ 418 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 419 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 420 421 struct sync_semaphore { 422 u32 semaphore; 423 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 424 }; 425 426 struct parent_scratch { 427 union guc_descs { 428 struct guc_sched_wq_desc wq_desc; 429 struct guc_process_desc_v69 pdesc; 430 } descs; 431 432 struct sync_semaphore go; 433 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 434 435 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 436 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 437 438 u32 wq[WQ_SIZE / sizeof(u32)]; 439 }; 440 441 static u32 __get_parent_scratch_offset(struct intel_context *ce) 442 { 443 GEM_BUG_ON(!ce->parallel.guc.parent_page); 444 445 return ce->parallel.guc.parent_page * PAGE_SIZE; 446 } 447 448 static u32 __get_wq_offset(struct intel_context *ce) 449 { 450 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 451 452 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 453 } 454 455 static struct parent_scratch * 456 __get_parent_scratch(struct intel_context *ce) 457 { 458 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 459 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 460 461 /* 462 * Need to subtract LRC_STATE_OFFSET here as the 463 * parallel.guc.parent_page is the offset into ce->state while 464 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 465 */ 466 return (struct parent_scratch *) 467 (ce->lrc_reg_state + 468 ((__get_parent_scratch_offset(ce) - 469 LRC_STATE_OFFSET) / sizeof(u32))); 470 } 471 472 static struct guc_process_desc_v69 * 473 __get_process_desc_v69(struct intel_context *ce) 474 { 475 struct parent_scratch *ps = __get_parent_scratch(ce); 476 477 return &ps->descs.pdesc; 478 } 479 480 static struct guc_sched_wq_desc * 481 __get_wq_desc_v70(struct intel_context *ce) 482 { 483 struct parent_scratch *ps = __get_parent_scratch(ce); 484 485 return &ps->descs.wq_desc; 486 } 487 488 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 489 { 490 /* 491 * Check for space in work queue. Caching a value of head pointer in 492 * intel_context structure in order reduce the number accesses to shared 493 * GPU memory which may be across a PCIe bus. 494 */ 495 #define AVAILABLE_SPACE \ 496 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 497 if (wqi_size > AVAILABLE_SPACE) { 498 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 499 500 if (wqi_size > AVAILABLE_SPACE) 501 return NULL; 502 } 503 #undef AVAILABLE_SPACE 504 505 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 506 } 507 508 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 509 { 510 struct intel_context *ce = xa_load(&guc->context_lookup, id); 511 512 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 513 514 return ce; 515 } 516 517 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 518 { 519 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 520 521 if (!base) 522 return NULL; 523 524 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 525 526 return &base[index]; 527 } 528 529 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 530 { 531 u32 size; 532 int ret; 533 534 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 535 GUC_MAX_CONTEXT_ID); 536 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 537 (void **)&guc->lrc_desc_pool_vaddr_v69); 538 if (ret) 539 return ret; 540 541 return 0; 542 } 543 544 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 545 { 546 if (!guc->lrc_desc_pool_vaddr_v69) 547 return; 548 549 guc->lrc_desc_pool_vaddr_v69 = NULL; 550 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 551 } 552 553 static inline bool guc_submission_initialized(struct intel_guc *guc) 554 { 555 return guc->submission_initialized; 556 } 557 558 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 559 { 560 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 561 562 if (desc) 563 memset(desc, 0, sizeof(*desc)); 564 } 565 566 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 567 { 568 return __get_context(guc, id); 569 } 570 571 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 572 struct intel_context *ce) 573 { 574 unsigned long flags; 575 576 /* 577 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 578 * lower level functions directly. 579 */ 580 xa_lock_irqsave(&guc->context_lookup, flags); 581 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 582 xa_unlock_irqrestore(&guc->context_lookup, flags); 583 } 584 585 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 586 { 587 unsigned long flags; 588 589 if (unlikely(!guc_submission_initialized(guc))) 590 return; 591 592 _reset_lrc_desc_v69(guc, id); 593 594 /* 595 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 596 * the lower level functions directly. 597 */ 598 xa_lock_irqsave(&guc->context_lookup, flags); 599 __xa_erase(&guc->context_lookup, id); 600 xa_unlock_irqrestore(&guc->context_lookup, flags); 601 } 602 603 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 604 { 605 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 606 wake_up_all(&guc->ct.wq); 607 } 608 609 static int guc_submission_send_busy_loop(struct intel_guc *guc, 610 const u32 *action, 611 u32 len, 612 u32 g2h_len_dw, 613 bool loop) 614 { 615 /* 616 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 617 * so we don't handle the case where we don't get a reply because we 618 * aborted the send due to the channel being busy. 619 */ 620 GEM_BUG_ON(g2h_len_dw && !loop); 621 622 if (g2h_len_dw) 623 atomic_inc(&guc->outstanding_submission_g2h); 624 625 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 626 } 627 628 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 629 atomic_t *wait_var, 630 bool interruptible, 631 long timeout) 632 { 633 const int state = interruptible ? 634 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 635 DEFINE_WAIT(wait); 636 637 might_sleep(); 638 GEM_BUG_ON(timeout < 0); 639 640 if (!atomic_read(wait_var)) 641 return 0; 642 643 if (!timeout) 644 return -ETIME; 645 646 for (;;) { 647 prepare_to_wait(&guc->ct.wq, &wait, state); 648 649 if (!atomic_read(wait_var)) 650 break; 651 652 if (signal_pending_state(state, current)) { 653 timeout = -EINTR; 654 break; 655 } 656 657 if (!timeout) { 658 timeout = -ETIME; 659 break; 660 } 661 662 timeout = io_schedule_timeout(timeout); 663 } 664 finish_wait(&guc->ct.wq, &wait); 665 666 return (timeout < 0) ? timeout : 0; 667 } 668 669 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 670 { 671 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 672 return 0; 673 674 return intel_guc_wait_for_pending_msg(guc, 675 &guc->outstanding_submission_g2h, 676 true, timeout); 677 } 678 679 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 680 static int try_context_registration(struct intel_context *ce, bool loop); 681 682 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 683 { 684 int err = 0; 685 struct intel_context *ce = request_to_scheduling_context(rq); 686 u32 action[3]; 687 int len = 0; 688 u32 g2h_len_dw = 0; 689 bool enabled; 690 691 lockdep_assert_held(&rq->engine->sched_engine->lock); 692 693 /* 694 * Corner case where requests were sitting in the priority list or a 695 * request resubmitted after the context was banned. 696 */ 697 if (unlikely(!intel_context_is_schedulable(ce))) { 698 i915_request_put(i915_request_mark_eio(rq)); 699 intel_engine_signal_breadcrumbs(ce->engine); 700 return 0; 701 } 702 703 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 704 GEM_BUG_ON(context_guc_id_invalid(ce)); 705 706 if (context_policy_required(ce)) { 707 err = guc_context_policy_init_v70(ce, false); 708 if (err) 709 return err; 710 } 711 712 spin_lock(&ce->guc_state.lock); 713 714 /* 715 * The request / context will be run on the hardware when scheduling 716 * gets enabled in the unblock. For multi-lrc we still submit the 717 * context to move the LRC tails. 718 */ 719 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 720 goto out; 721 722 enabled = context_enabled(ce) || context_blocked(ce); 723 724 if (!enabled) { 725 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 726 action[len++] = ce->guc_id.id; 727 action[len++] = GUC_CONTEXT_ENABLE; 728 set_context_pending_enable(ce); 729 intel_context_get(ce); 730 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 731 } else { 732 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 733 action[len++] = ce->guc_id.id; 734 } 735 736 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 737 if (!enabled && !err) { 738 trace_intel_context_sched_enable(ce); 739 atomic_inc(&guc->outstanding_submission_g2h); 740 set_context_enabled(ce); 741 742 /* 743 * Without multi-lrc KMD does the submission step (moving the 744 * lrc tail) so enabling scheduling is sufficient to submit the 745 * context. This isn't the case in multi-lrc submission as the 746 * GuC needs to move the tails, hence the need for another H2G 747 * to submit a multi-lrc context after enabling scheduling. 748 */ 749 if (intel_context_is_parent(ce)) { 750 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 751 err = intel_guc_send_nb(guc, action, len - 1, 0); 752 } 753 } else if (!enabled) { 754 clr_context_pending_enable(ce); 755 intel_context_put(ce); 756 } 757 if (likely(!err)) 758 trace_i915_request_guc_submit(rq); 759 760 out: 761 spin_unlock(&ce->guc_state.lock); 762 return err; 763 } 764 765 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 766 { 767 int ret = __guc_add_request(guc, rq); 768 769 if (unlikely(ret == -EBUSY)) { 770 guc->stalled_request = rq; 771 guc->submission_stall_reason = STALL_ADD_REQUEST; 772 } 773 774 return ret; 775 } 776 777 static inline void guc_set_lrc_tail(struct i915_request *rq) 778 { 779 rq->context->lrc_reg_state[CTX_RING_TAIL] = 780 intel_ring_set_tail(rq->ring, rq->tail); 781 } 782 783 static inline int rq_prio(const struct i915_request *rq) 784 { 785 return rq->sched.attr.priority; 786 } 787 788 static bool is_multi_lrc_rq(struct i915_request *rq) 789 { 790 return intel_context_is_parallel(rq->context); 791 } 792 793 static bool can_merge_rq(struct i915_request *rq, 794 struct i915_request *last) 795 { 796 return request_to_scheduling_context(rq) == 797 request_to_scheduling_context(last); 798 } 799 800 static u32 wq_space_until_wrap(struct intel_context *ce) 801 { 802 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 803 } 804 805 static void write_wqi(struct intel_context *ce, u32 wqi_size) 806 { 807 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 808 809 /* 810 * Ensure WQI are visible before updating tail 811 */ 812 intel_guc_write_barrier(ce_to_guc(ce)); 813 814 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 815 (WQ_SIZE - 1); 816 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 817 } 818 819 static int guc_wq_noop_append(struct intel_context *ce) 820 { 821 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 822 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 823 824 if (!wqi) 825 return -EBUSY; 826 827 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 828 829 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 830 FIELD_PREP(WQ_LEN_MASK, len_dw); 831 ce->parallel.guc.wqi_tail = 0; 832 833 return 0; 834 } 835 836 static int __guc_wq_item_append(struct i915_request *rq) 837 { 838 struct intel_context *ce = request_to_scheduling_context(rq); 839 struct intel_context *child; 840 unsigned int wqi_size = (ce->parallel.number_children + 4) * 841 sizeof(u32); 842 u32 *wqi; 843 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 844 int ret; 845 846 /* Ensure context is in correct state updating work queue */ 847 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 848 GEM_BUG_ON(context_guc_id_invalid(ce)); 849 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 850 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 851 852 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 853 if (wqi_size > wq_space_until_wrap(ce)) { 854 ret = guc_wq_noop_append(ce); 855 if (ret) 856 return ret; 857 } 858 859 wqi = get_wq_pointer(ce, wqi_size); 860 if (!wqi) 861 return -EBUSY; 862 863 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 864 865 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 866 FIELD_PREP(WQ_LEN_MASK, len_dw); 867 *wqi++ = ce->lrc.lrca; 868 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 869 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 870 *wqi++ = 0; /* fence_id */ 871 for_each_child(ce, child) 872 *wqi++ = child->ring->tail / sizeof(u64); 873 874 write_wqi(ce, wqi_size); 875 876 return 0; 877 } 878 879 static int guc_wq_item_append(struct intel_guc *guc, 880 struct i915_request *rq) 881 { 882 struct intel_context *ce = request_to_scheduling_context(rq); 883 int ret; 884 885 if (unlikely(!intel_context_is_schedulable(ce))) 886 return 0; 887 888 ret = __guc_wq_item_append(rq); 889 if (unlikely(ret == -EBUSY)) { 890 guc->stalled_request = rq; 891 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 892 } 893 894 return ret; 895 } 896 897 static bool multi_lrc_submit(struct i915_request *rq) 898 { 899 struct intel_context *ce = request_to_scheduling_context(rq); 900 901 intel_ring_set_tail(rq->ring, rq->tail); 902 903 /* 904 * We expect the front end (execbuf IOCTL) to set this flag on the last 905 * request generated from a multi-BB submission. This indicates to the 906 * backend (GuC interface) that we should submit this context thus 907 * submitting all the requests generated in parallel. 908 */ 909 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 910 !intel_context_is_schedulable(ce); 911 } 912 913 static int guc_dequeue_one_context(struct intel_guc *guc) 914 { 915 struct i915_sched_engine * const sched_engine = guc->sched_engine; 916 struct i915_request *last = NULL; 917 bool submit = false; 918 struct rb_node *rb; 919 int ret; 920 921 lockdep_assert_held(&sched_engine->lock); 922 923 if (guc->stalled_request) { 924 submit = true; 925 last = guc->stalled_request; 926 927 switch (guc->submission_stall_reason) { 928 case STALL_REGISTER_CONTEXT: 929 goto register_context; 930 case STALL_MOVE_LRC_TAIL: 931 goto move_lrc_tail; 932 case STALL_ADD_REQUEST: 933 goto add_request; 934 default: 935 MISSING_CASE(guc->submission_stall_reason); 936 } 937 } 938 939 while ((rb = rb_first_cached(&sched_engine->queue))) { 940 struct i915_priolist *p = to_priolist(rb); 941 struct i915_request *rq, *rn; 942 943 priolist_for_each_request_consume(rq, rn, p) { 944 if (last && !can_merge_rq(rq, last)) 945 goto register_context; 946 947 list_del_init(&rq->sched.link); 948 949 __i915_request_submit(rq); 950 951 trace_i915_request_in(rq, 0); 952 last = rq; 953 954 if (is_multi_lrc_rq(rq)) { 955 /* 956 * We need to coalesce all multi-lrc requests in 957 * a relationship into a single H2G. We are 958 * guaranteed that all of these requests will be 959 * submitted sequentially. 960 */ 961 if (multi_lrc_submit(rq)) { 962 submit = true; 963 goto register_context; 964 } 965 } else { 966 submit = true; 967 } 968 } 969 970 rb_erase_cached(&p->node, &sched_engine->queue); 971 i915_priolist_free(p); 972 } 973 974 register_context: 975 if (submit) { 976 struct intel_context *ce = request_to_scheduling_context(last); 977 978 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 979 intel_context_is_schedulable(ce))) { 980 ret = try_context_registration(ce, false); 981 if (unlikely(ret == -EPIPE)) { 982 goto deadlk; 983 } else if (ret == -EBUSY) { 984 guc->stalled_request = last; 985 guc->submission_stall_reason = 986 STALL_REGISTER_CONTEXT; 987 goto schedule_tasklet; 988 } else if (ret != 0) { 989 GEM_WARN_ON(ret); /* Unexpected */ 990 goto deadlk; 991 } 992 } 993 994 move_lrc_tail: 995 if (is_multi_lrc_rq(last)) { 996 ret = guc_wq_item_append(guc, last); 997 if (ret == -EBUSY) { 998 goto schedule_tasklet; 999 } else if (ret != 0) { 1000 GEM_WARN_ON(ret); /* Unexpected */ 1001 goto deadlk; 1002 } 1003 } else { 1004 guc_set_lrc_tail(last); 1005 } 1006 1007 add_request: 1008 ret = guc_add_request(guc, last); 1009 if (unlikely(ret == -EPIPE)) { 1010 goto deadlk; 1011 } else if (ret == -EBUSY) { 1012 goto schedule_tasklet; 1013 } else if (ret != 0) { 1014 GEM_WARN_ON(ret); /* Unexpected */ 1015 goto deadlk; 1016 } 1017 } 1018 1019 guc->stalled_request = NULL; 1020 guc->submission_stall_reason = STALL_NONE; 1021 return submit; 1022 1023 deadlk: 1024 sched_engine->tasklet.callback = NULL; 1025 tasklet_disable_nosync(&sched_engine->tasklet); 1026 return false; 1027 1028 schedule_tasklet: 1029 tasklet_schedule(&sched_engine->tasklet); 1030 return false; 1031 } 1032 1033 static void guc_submission_tasklet(struct tasklet_struct *t) 1034 { 1035 struct i915_sched_engine *sched_engine = 1036 from_tasklet(sched_engine, t, tasklet); 1037 unsigned long flags; 1038 bool loop; 1039 1040 spin_lock_irqsave(&sched_engine->lock, flags); 1041 1042 do { 1043 loop = guc_dequeue_one_context(sched_engine->private_data); 1044 } while (loop); 1045 1046 i915_sched_engine_reset_on_empty(sched_engine); 1047 1048 spin_unlock_irqrestore(&sched_engine->lock, flags); 1049 } 1050 1051 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1052 { 1053 if (iir & GT_RENDER_USER_INTERRUPT) 1054 intel_engine_signal_breadcrumbs(engine); 1055 } 1056 1057 static void __guc_context_destroy(struct intel_context *ce); 1058 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1059 static void guc_signal_context_fence(struct intel_context *ce); 1060 static void guc_cancel_context_requests(struct intel_context *ce); 1061 static void guc_blocked_fence_complete(struct intel_context *ce); 1062 1063 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1064 { 1065 struct intel_context *ce; 1066 unsigned long index, flags; 1067 bool pending_disable, pending_enable, deregister, destroyed, banned; 1068 1069 xa_lock_irqsave(&guc->context_lookup, flags); 1070 xa_for_each(&guc->context_lookup, index, ce) { 1071 /* 1072 * Corner case where the ref count on the object is zero but and 1073 * deregister G2H was lost. In this case we don't touch the ref 1074 * count and finish the destroy of the context. 1075 */ 1076 bool do_put = kref_get_unless_zero(&ce->ref); 1077 1078 xa_unlock(&guc->context_lookup); 1079 1080 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1081 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1082 /* successful cancel so jump straight to close it */ 1083 intel_context_sched_disable_unpin(ce); 1084 } 1085 1086 spin_lock(&ce->guc_state.lock); 1087 1088 /* 1089 * Once we are at this point submission_disabled() is guaranteed 1090 * to be visible to all callers who set the below flags (see above 1091 * flush and flushes in reset_prepare). If submission_disabled() 1092 * is set, the caller shouldn't set these flags. 1093 */ 1094 1095 destroyed = context_destroyed(ce); 1096 pending_enable = context_pending_enable(ce); 1097 pending_disable = context_pending_disable(ce); 1098 deregister = context_wait_for_deregister_to_register(ce); 1099 banned = context_banned(ce); 1100 init_sched_state(ce); 1101 1102 spin_unlock(&ce->guc_state.lock); 1103 1104 if (pending_enable || destroyed || deregister) { 1105 decr_outstanding_submission_g2h(guc); 1106 if (deregister) 1107 guc_signal_context_fence(ce); 1108 if (destroyed) { 1109 intel_gt_pm_put_async(guc_to_gt(guc)); 1110 release_guc_id(guc, ce); 1111 __guc_context_destroy(ce); 1112 } 1113 if (pending_enable || deregister) 1114 intel_context_put(ce); 1115 } 1116 1117 /* Not mutualy exclusive with above if statement. */ 1118 if (pending_disable) { 1119 guc_signal_context_fence(ce); 1120 if (banned) { 1121 guc_cancel_context_requests(ce); 1122 intel_engine_signal_breadcrumbs(ce->engine); 1123 } 1124 intel_context_sched_disable_unpin(ce); 1125 decr_outstanding_submission_g2h(guc); 1126 1127 spin_lock(&ce->guc_state.lock); 1128 guc_blocked_fence_complete(ce); 1129 spin_unlock(&ce->guc_state.lock); 1130 1131 intel_context_put(ce); 1132 } 1133 1134 if (do_put) 1135 intel_context_put(ce); 1136 xa_lock(&guc->context_lookup); 1137 } 1138 xa_unlock_irqrestore(&guc->context_lookup, flags); 1139 } 1140 1141 /* 1142 * GuC stores busyness stats for each engine at context in/out boundaries. A 1143 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1144 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1145 * GuC. 1146 * 1147 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1148 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1149 * active. For an active engine total busyness = total + (now - start), where 1150 * 'now' is the time at which the busyness is sampled. For inactive engine, 1151 * total busyness = total. 1152 * 1153 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1154 * 1155 * The start and total values provided by GuC are 32 bits and wrap around in a 1156 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1157 * increasing ns values, there is a need for this implementation to account for 1158 * overflows and extend the GuC provided values to 64 bits before returning 1159 * busyness to the user. In order to do that, a worker runs periodically at 1160 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1161 * 27 seconds for a gt clock frequency of 19.2 MHz). 1162 */ 1163 1164 #define WRAP_TIME_CLKS U32_MAX 1165 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1166 1167 static void 1168 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1169 { 1170 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1171 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1172 1173 if (new_start == lower_32_bits(*prev_start)) 1174 return; 1175 1176 /* 1177 * When gt is unparked, we update the gt timestamp and start the ping 1178 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1179 * is unparked, all switched in contexts will have a start time that is 1180 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1181 * 1182 * If neither gt_stamp nor new_start has rolled over, then the 1183 * gt_stamp_hi does not need to be adjusted, however if one of them has 1184 * rolled over, we need to adjust gt_stamp_hi accordingly. 1185 * 1186 * The below conditions address the cases of new_start rollover and 1187 * gt_stamp_last rollover respectively. 1188 */ 1189 if (new_start < gt_stamp_last && 1190 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1191 gt_stamp_hi++; 1192 1193 if (new_start > gt_stamp_last && 1194 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1195 gt_stamp_hi--; 1196 1197 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1198 } 1199 1200 #define record_read(map_, field_) \ 1201 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1202 1203 /* 1204 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1205 * we run into a race where the value read is inconsistent. Sometimes the 1206 * inconsistency is in reading the upper MSB bytes of the last_in value when 1207 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1208 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1209 * determine validity of these values. Instead we read the values multiple times 1210 * until they are consistent. In test runs, 3 attempts results in consistent 1211 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1212 * any new occurences. 1213 */ 1214 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1215 u32 *last_in, u32 *id, u32 *total) 1216 { 1217 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1218 int i = 0; 1219 1220 do { 1221 *last_in = record_read(&rec_map, last_switch_in_stamp); 1222 *id = record_read(&rec_map, current_context_index); 1223 *total = record_read(&rec_map, total_runtime); 1224 1225 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1226 record_read(&rec_map, current_context_index) == *id && 1227 record_read(&rec_map, total_runtime) == *total) 1228 break; 1229 } while (++i < 6); 1230 } 1231 1232 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1233 { 1234 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1235 struct intel_guc *guc = &engine->gt->uc.guc; 1236 u32 last_switch, ctx_id, total; 1237 1238 lockdep_assert_held(&guc->timestamp.lock); 1239 1240 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1241 1242 stats->running = ctx_id != ~0U && last_switch; 1243 if (stats->running) 1244 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1245 1246 /* 1247 * Instead of adjusting the total for overflow, just add the 1248 * difference from previous sample stats->total_gt_clks 1249 */ 1250 if (total && total != ~0U) { 1251 stats->total_gt_clks += (u32)(total - stats->prev_total); 1252 stats->prev_total = total; 1253 } 1254 } 1255 1256 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1257 { 1258 intel_wakeref_t wakeref; 1259 u32 reg, shift; 1260 1261 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1262 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1263 1264 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1265 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1266 1267 return 3 - shift; 1268 } 1269 1270 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1271 { 1272 struct intel_gt *gt = guc_to_gt(guc); 1273 u32 gt_stamp_lo, gt_stamp_hi; 1274 u64 gpm_ts; 1275 1276 lockdep_assert_held(&guc->timestamp.lock); 1277 1278 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1279 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1280 MISC_STATUS1) >> guc->timestamp.shift; 1281 gt_stamp_lo = lower_32_bits(gpm_ts); 1282 *now = ktime_get(); 1283 1284 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1285 gt_stamp_hi++; 1286 1287 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1288 } 1289 1290 /* 1291 * Unlike the execlist mode of submission total and active times are in terms of 1292 * gt clocks. The *now parameter is retained to return the cpu time at which the 1293 * busyness was sampled. 1294 */ 1295 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1296 { 1297 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1298 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1299 struct intel_gt *gt = engine->gt; 1300 struct intel_guc *guc = >->uc.guc; 1301 u64 total, gt_stamp_saved; 1302 unsigned long flags; 1303 u32 reset_count; 1304 bool in_reset; 1305 1306 spin_lock_irqsave(&guc->timestamp.lock, flags); 1307 1308 /* 1309 * If a reset happened, we risk reading partially updated engine 1310 * busyness from GuC, so we just use the driver stored copy of busyness. 1311 * Synchronize with gt reset using reset_count and the 1312 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1313 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1314 * usable by checking the flag afterwards. 1315 */ 1316 reset_count = i915_reset_count(gpu_error); 1317 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1318 1319 *now = ktime_get(); 1320 1321 /* 1322 * The active busyness depends on start_gt_clk and gt_stamp. 1323 * gt_stamp is updated by i915 only when gt is awake and the 1324 * start_gt_clk is derived from GuC state. To get a consistent 1325 * view of activity, we query the GuC state only if gt is awake. 1326 */ 1327 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1328 stats_saved = *stats; 1329 gt_stamp_saved = guc->timestamp.gt_stamp; 1330 /* 1331 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1332 * start_gt_clk' calculation below for active engines. 1333 */ 1334 guc_update_engine_gt_clks(engine); 1335 guc_update_pm_timestamp(guc, now); 1336 intel_gt_pm_put_async(gt); 1337 if (i915_reset_count(gpu_error) != reset_count) { 1338 *stats = stats_saved; 1339 guc->timestamp.gt_stamp = gt_stamp_saved; 1340 } 1341 } 1342 1343 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1344 if (stats->running) { 1345 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1346 1347 total += intel_gt_clock_interval_to_ns(gt, clk); 1348 } 1349 1350 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1351 1352 return ns_to_ktime(total); 1353 } 1354 1355 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1356 { 1357 struct intel_gt *gt = guc_to_gt(guc); 1358 struct intel_engine_cs *engine; 1359 enum intel_engine_id id; 1360 unsigned long flags; 1361 ktime_t unused; 1362 1363 cancel_delayed_work_sync(&guc->timestamp.work); 1364 1365 spin_lock_irqsave(&guc->timestamp.lock, flags); 1366 1367 guc_update_pm_timestamp(guc, &unused); 1368 for_each_engine(engine, gt, id) { 1369 guc_update_engine_gt_clks(engine); 1370 engine->stats.guc.prev_total = 0; 1371 } 1372 1373 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1374 } 1375 1376 static void __update_guc_busyness_stats(struct intel_guc *guc) 1377 { 1378 struct intel_gt *gt = guc_to_gt(guc); 1379 struct intel_engine_cs *engine; 1380 enum intel_engine_id id; 1381 unsigned long flags; 1382 ktime_t unused; 1383 1384 guc->timestamp.last_stat_jiffies = jiffies; 1385 1386 spin_lock_irqsave(&guc->timestamp.lock, flags); 1387 1388 guc_update_pm_timestamp(guc, &unused); 1389 for_each_engine(engine, gt, id) 1390 guc_update_engine_gt_clks(engine); 1391 1392 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1393 } 1394 1395 static void guc_timestamp_ping(struct work_struct *wrk) 1396 { 1397 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1398 timestamp.work.work); 1399 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1400 struct intel_gt *gt = guc_to_gt(guc); 1401 intel_wakeref_t wakeref; 1402 int srcu, ret; 1403 1404 /* 1405 * Synchronize with gt reset to make sure the worker does not 1406 * corrupt the engine/guc stats. NB: can't actually block waiting 1407 * for a reset to complete as the reset requires flushing out 1408 * this worker thread if started. So waiting would deadlock. 1409 */ 1410 ret = intel_gt_reset_trylock(gt, &srcu); 1411 if (ret) 1412 return; 1413 1414 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1415 __update_guc_busyness_stats(guc); 1416 1417 intel_gt_reset_unlock(gt, srcu); 1418 1419 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1420 guc->timestamp.ping_delay); 1421 } 1422 1423 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1424 { 1425 u32 offset = intel_guc_engine_usage_offset(guc); 1426 u32 action[] = { 1427 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1428 offset, 1429 0, 1430 }; 1431 1432 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1433 } 1434 1435 static void guc_init_engine_stats(struct intel_guc *guc) 1436 { 1437 struct intel_gt *gt = guc_to_gt(guc); 1438 intel_wakeref_t wakeref; 1439 1440 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1441 guc->timestamp.ping_delay); 1442 1443 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 1444 int ret = guc_action_enable_usage_stats(guc); 1445 1446 if (ret) 1447 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); 1448 } 1449 } 1450 1451 void intel_guc_busyness_park(struct intel_gt *gt) 1452 { 1453 struct intel_guc *guc = >->uc.guc; 1454 1455 if (!guc_submission_initialized(guc)) 1456 return; 1457 1458 /* 1459 * There is a race with suspend flow where the worker runs after suspend 1460 * and causes an unclaimed register access warning. Cancel the worker 1461 * synchronously here. 1462 */ 1463 cancel_delayed_work_sync(&guc->timestamp.work); 1464 1465 /* 1466 * Before parking, we should sample engine busyness stats if we need to. 1467 * We can skip it if we are less than half a ping from the last time we 1468 * sampled the busyness stats. 1469 */ 1470 if (guc->timestamp.last_stat_jiffies && 1471 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1472 (guc->timestamp.ping_delay / 2))) 1473 return; 1474 1475 __update_guc_busyness_stats(guc); 1476 } 1477 1478 void intel_guc_busyness_unpark(struct intel_gt *gt) 1479 { 1480 struct intel_guc *guc = >->uc.guc; 1481 unsigned long flags; 1482 ktime_t unused; 1483 1484 if (!guc_submission_initialized(guc)) 1485 return; 1486 1487 spin_lock_irqsave(&guc->timestamp.lock, flags); 1488 guc_update_pm_timestamp(guc, &unused); 1489 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1490 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, 1491 guc->timestamp.ping_delay); 1492 } 1493 1494 static inline bool 1495 submission_disabled(struct intel_guc *guc) 1496 { 1497 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1498 1499 return unlikely(!sched_engine || 1500 !__tasklet_is_enabled(&sched_engine->tasklet) || 1501 intel_gt_is_wedged(guc_to_gt(guc))); 1502 } 1503 1504 static void disable_submission(struct intel_guc *guc) 1505 { 1506 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1507 1508 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1509 GEM_BUG_ON(!guc->ct.enabled); 1510 __tasklet_disable_sync_once(&sched_engine->tasklet); 1511 sched_engine->tasklet.callback = NULL; 1512 } 1513 } 1514 1515 static void enable_submission(struct intel_guc *guc) 1516 { 1517 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1518 unsigned long flags; 1519 1520 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1521 sched_engine->tasklet.callback = guc_submission_tasklet; 1522 wmb(); /* Make sure callback visible */ 1523 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1524 __tasklet_enable(&sched_engine->tasklet)) { 1525 GEM_BUG_ON(!guc->ct.enabled); 1526 1527 /* And kick in case we missed a new request submission. */ 1528 tasklet_hi_schedule(&sched_engine->tasklet); 1529 } 1530 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1531 } 1532 1533 static void guc_flush_submissions(struct intel_guc *guc) 1534 { 1535 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1536 unsigned long flags; 1537 1538 spin_lock_irqsave(&sched_engine->lock, flags); 1539 spin_unlock_irqrestore(&sched_engine->lock, flags); 1540 } 1541 1542 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1543 1544 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1545 { 1546 if (unlikely(!guc_submission_initialized(guc))) { 1547 /* Reset called during driver load? GuC not yet initialised! */ 1548 return; 1549 } 1550 1551 intel_gt_park_heartbeats(guc_to_gt(guc)); 1552 disable_submission(guc); 1553 guc->interrupts.disable(guc); 1554 __reset_guc_busyness_stats(guc); 1555 1556 /* Flush IRQ handler */ 1557 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1558 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1559 1560 guc_flush_submissions(guc); 1561 guc_flush_destroyed_contexts(guc); 1562 flush_work(&guc->ct.requests.worker); 1563 1564 scrub_guc_desc_for_outstanding_g2h(guc); 1565 } 1566 1567 static struct intel_engine_cs * 1568 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1569 { 1570 struct intel_engine_cs *engine; 1571 intel_engine_mask_t tmp, mask = ve->mask; 1572 unsigned int num_siblings = 0; 1573 1574 for_each_engine_masked(engine, ve->gt, mask, tmp) 1575 if (num_siblings++ == sibling) 1576 return engine; 1577 1578 return NULL; 1579 } 1580 1581 static inline struct intel_engine_cs * 1582 __context_to_physical_engine(struct intel_context *ce) 1583 { 1584 struct intel_engine_cs *engine = ce->engine; 1585 1586 if (intel_engine_is_virtual(engine)) 1587 engine = guc_virtual_get_sibling(engine, 0); 1588 1589 return engine; 1590 } 1591 1592 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1593 { 1594 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1595 1596 if (!intel_context_is_schedulable(ce)) 1597 return; 1598 1599 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1600 1601 /* 1602 * We want a simple context + ring to execute the breadcrumb update. 1603 * We cannot rely on the context being intact across the GPU hang, 1604 * so clear it and rebuild just what we need for the breadcrumb. 1605 * All pending requests for this context will be zapped, and any 1606 * future request will be after userspace has had the opportunity 1607 * to recreate its own state. 1608 */ 1609 if (scrub) 1610 lrc_init_regs(ce, engine, true); 1611 1612 /* Rerun the request; its payload has been neutered (if guilty). */ 1613 lrc_update_regs(ce, engine, head); 1614 } 1615 1616 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1617 { 1618 if (!IS_GRAPHICS_VER(engine->i915, 11, 12)) 1619 return; 1620 1621 intel_engine_stop_cs(engine); 1622 1623 /* 1624 * Wa_22011802037: In addition to stopping the cs, we need 1625 * to wait for any pending mi force wakeups 1626 */ 1627 intel_engine_wait_for_pending_mi_fw(engine); 1628 } 1629 1630 static void guc_reset_nop(struct intel_engine_cs *engine) 1631 { 1632 } 1633 1634 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1635 { 1636 } 1637 1638 static void 1639 __unwind_incomplete_requests(struct intel_context *ce) 1640 { 1641 struct i915_request *rq, *rn; 1642 struct list_head *pl; 1643 int prio = I915_PRIORITY_INVALID; 1644 struct i915_sched_engine * const sched_engine = 1645 ce->engine->sched_engine; 1646 unsigned long flags; 1647 1648 spin_lock_irqsave(&sched_engine->lock, flags); 1649 spin_lock(&ce->guc_state.lock); 1650 list_for_each_entry_safe_reverse(rq, rn, 1651 &ce->guc_state.requests, 1652 sched.link) { 1653 if (i915_request_completed(rq)) 1654 continue; 1655 1656 list_del_init(&rq->sched.link); 1657 __i915_request_unsubmit(rq); 1658 1659 /* Push the request back into the queue for later resubmission. */ 1660 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1661 if (rq_prio(rq) != prio) { 1662 prio = rq_prio(rq); 1663 pl = i915_sched_lookup_priolist(sched_engine, prio); 1664 } 1665 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1666 1667 list_add(&rq->sched.link, pl); 1668 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1669 } 1670 spin_unlock(&ce->guc_state.lock); 1671 spin_unlock_irqrestore(&sched_engine->lock, flags); 1672 } 1673 1674 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1675 { 1676 bool guilty; 1677 struct i915_request *rq; 1678 unsigned long flags; 1679 u32 head; 1680 int i, number_children = ce->parallel.number_children; 1681 struct intel_context *parent = ce; 1682 1683 GEM_BUG_ON(intel_context_is_child(ce)); 1684 1685 intel_context_get(ce); 1686 1687 /* 1688 * GuC will implicitly mark the context as non-schedulable when it sends 1689 * the reset notification. Make sure our state reflects this change. The 1690 * context will be marked enabled on resubmission. 1691 */ 1692 spin_lock_irqsave(&ce->guc_state.lock, flags); 1693 clr_context_enabled(ce); 1694 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1695 1696 /* 1697 * For each context in the relationship find the hanging request 1698 * resetting each context / request as needed 1699 */ 1700 for (i = 0; i < number_children + 1; ++i) { 1701 if (!intel_context_is_pinned(ce)) 1702 goto next_context; 1703 1704 guilty = false; 1705 rq = intel_context_get_active_request(ce); 1706 if (!rq) { 1707 head = ce->ring->tail; 1708 goto out_replay; 1709 } 1710 1711 if (i915_request_started(rq)) 1712 guilty = stalled & ce->engine->mask; 1713 1714 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1715 head = intel_ring_wrap(ce->ring, rq->head); 1716 1717 __i915_request_reset(rq, guilty); 1718 i915_request_put(rq); 1719 out_replay: 1720 guc_reset_state(ce, head, guilty); 1721 next_context: 1722 if (i != number_children) 1723 ce = list_next_entry(ce, parallel.child_link); 1724 } 1725 1726 __unwind_incomplete_requests(parent); 1727 intel_context_put(parent); 1728 } 1729 1730 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1731 { 1732 struct intel_context *ce; 1733 unsigned long index; 1734 unsigned long flags; 1735 1736 if (unlikely(!guc_submission_initialized(guc))) { 1737 /* Reset called during driver load? GuC not yet initialised! */ 1738 return; 1739 } 1740 1741 xa_lock_irqsave(&guc->context_lookup, flags); 1742 xa_for_each(&guc->context_lookup, index, ce) { 1743 if (!kref_get_unless_zero(&ce->ref)) 1744 continue; 1745 1746 xa_unlock(&guc->context_lookup); 1747 1748 if (intel_context_is_pinned(ce) && 1749 !intel_context_is_child(ce)) 1750 __guc_reset_context(ce, stalled); 1751 1752 intel_context_put(ce); 1753 1754 xa_lock(&guc->context_lookup); 1755 } 1756 xa_unlock_irqrestore(&guc->context_lookup, flags); 1757 1758 /* GuC is blown away, drop all references to contexts */ 1759 xa_destroy(&guc->context_lookup); 1760 } 1761 1762 static void guc_cancel_context_requests(struct intel_context *ce) 1763 { 1764 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1765 struct i915_request *rq; 1766 unsigned long flags; 1767 1768 /* Mark all executing requests as skipped. */ 1769 spin_lock_irqsave(&sched_engine->lock, flags); 1770 spin_lock(&ce->guc_state.lock); 1771 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1772 i915_request_put(i915_request_mark_eio(rq)); 1773 spin_unlock(&ce->guc_state.lock); 1774 spin_unlock_irqrestore(&sched_engine->lock, flags); 1775 } 1776 1777 static void 1778 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1779 { 1780 struct i915_request *rq, *rn; 1781 struct rb_node *rb; 1782 unsigned long flags; 1783 1784 /* Can be called during boot if GuC fails to load */ 1785 if (!sched_engine) 1786 return; 1787 1788 /* 1789 * Before we call engine->cancel_requests(), we should have exclusive 1790 * access to the submission state. This is arranged for us by the 1791 * caller disabling the interrupt generation, the tasklet and other 1792 * threads that may then access the same state, giving us a free hand 1793 * to reset state. However, we still need to let lockdep be aware that 1794 * we know this state may be accessed in hardirq context, so we 1795 * disable the irq around this manipulation and we want to keep 1796 * the spinlock focused on its duties and not accidentally conflate 1797 * coverage to the submission's irq state. (Similarly, although we 1798 * shouldn't need to disable irq around the manipulation of the 1799 * submission's irq state, we also wish to remind ourselves that 1800 * it is irq state.) 1801 */ 1802 spin_lock_irqsave(&sched_engine->lock, flags); 1803 1804 /* Flush the queued requests to the timeline list (for retiring). */ 1805 while ((rb = rb_first_cached(&sched_engine->queue))) { 1806 struct i915_priolist *p = to_priolist(rb); 1807 1808 priolist_for_each_request_consume(rq, rn, p) { 1809 list_del_init(&rq->sched.link); 1810 1811 __i915_request_submit(rq); 1812 1813 i915_request_put(i915_request_mark_eio(rq)); 1814 } 1815 1816 rb_erase_cached(&p->node, &sched_engine->queue); 1817 i915_priolist_free(p); 1818 } 1819 1820 /* Remaining _unready_ requests will be nop'ed when submitted */ 1821 1822 sched_engine->queue_priority_hint = INT_MIN; 1823 sched_engine->queue = RB_ROOT_CACHED; 1824 1825 spin_unlock_irqrestore(&sched_engine->lock, flags); 1826 } 1827 1828 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1829 { 1830 struct intel_context *ce; 1831 unsigned long index; 1832 unsigned long flags; 1833 1834 xa_lock_irqsave(&guc->context_lookup, flags); 1835 xa_for_each(&guc->context_lookup, index, ce) { 1836 if (!kref_get_unless_zero(&ce->ref)) 1837 continue; 1838 1839 xa_unlock(&guc->context_lookup); 1840 1841 if (intel_context_is_pinned(ce) && 1842 !intel_context_is_child(ce)) 1843 guc_cancel_context_requests(ce); 1844 1845 intel_context_put(ce); 1846 1847 xa_lock(&guc->context_lookup); 1848 } 1849 xa_unlock_irqrestore(&guc->context_lookup, flags); 1850 1851 guc_cancel_sched_engine_requests(guc->sched_engine); 1852 1853 /* GuC is blown away, drop all references to contexts */ 1854 xa_destroy(&guc->context_lookup); 1855 } 1856 1857 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1858 { 1859 /* Reset called during driver load or during wedge? */ 1860 if (unlikely(!guc_submission_initialized(guc) || 1861 intel_gt_is_wedged(guc_to_gt(guc)))) { 1862 return; 1863 } 1864 1865 /* 1866 * Technically possible for either of these values to be non-zero here, 1867 * but very unlikely + harmless. Regardless let's add a warn so we can 1868 * see in CI if this happens frequently / a precursor to taking down the 1869 * machine. 1870 */ 1871 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1872 atomic_set(&guc->outstanding_submission_g2h, 0); 1873 1874 intel_guc_global_policies_update(guc); 1875 enable_submission(guc); 1876 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1877 } 1878 1879 static void destroyed_worker_func(struct work_struct *w); 1880 static void reset_fail_worker_func(struct work_struct *w); 1881 1882 /* 1883 * Set up the memory resources to be shared with the GuC (via the GGTT) 1884 * at firmware loading time. 1885 */ 1886 int intel_guc_submission_init(struct intel_guc *guc) 1887 { 1888 struct intel_gt *gt = guc_to_gt(guc); 1889 int ret; 1890 1891 if (guc->submission_initialized) 1892 return 0; 1893 1894 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { 1895 ret = guc_lrc_desc_pool_create_v69(guc); 1896 if (ret) 1897 return ret; 1898 } 1899 1900 guc->submission_state.guc_ids_bitmap = 1901 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1902 if (!guc->submission_state.guc_ids_bitmap) { 1903 ret = -ENOMEM; 1904 goto destroy_pool; 1905 } 1906 1907 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1908 guc->timestamp.shift = gpm_timestamp_shift(gt); 1909 guc->submission_initialized = true; 1910 1911 return 0; 1912 1913 destroy_pool: 1914 guc_lrc_desc_pool_destroy_v69(guc); 1915 1916 return ret; 1917 } 1918 1919 void intel_guc_submission_fini(struct intel_guc *guc) 1920 { 1921 if (!guc->submission_initialized) 1922 return; 1923 1924 guc_flush_destroyed_contexts(guc); 1925 guc_lrc_desc_pool_destroy_v69(guc); 1926 i915_sched_engine_put(guc->sched_engine); 1927 bitmap_free(guc->submission_state.guc_ids_bitmap); 1928 guc->submission_initialized = false; 1929 } 1930 1931 static inline void queue_request(struct i915_sched_engine *sched_engine, 1932 struct i915_request *rq, 1933 int prio) 1934 { 1935 GEM_BUG_ON(!list_empty(&rq->sched.link)); 1936 list_add_tail(&rq->sched.link, 1937 i915_sched_lookup_priolist(sched_engine, prio)); 1938 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1939 tasklet_hi_schedule(&sched_engine->tasklet); 1940 } 1941 1942 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 1943 struct i915_request *rq) 1944 { 1945 int ret = 0; 1946 1947 __i915_request_submit(rq); 1948 1949 trace_i915_request_in(rq, 0); 1950 1951 if (is_multi_lrc_rq(rq)) { 1952 if (multi_lrc_submit(rq)) { 1953 ret = guc_wq_item_append(guc, rq); 1954 if (!ret) 1955 ret = guc_add_request(guc, rq); 1956 } 1957 } else { 1958 guc_set_lrc_tail(rq); 1959 ret = guc_add_request(guc, rq); 1960 } 1961 1962 if (unlikely(ret == -EPIPE)) 1963 disable_submission(guc); 1964 1965 return ret; 1966 } 1967 1968 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 1969 { 1970 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1971 struct intel_context *ce = request_to_scheduling_context(rq); 1972 1973 return submission_disabled(guc) || guc->stalled_request || 1974 !i915_sched_engine_is_empty(sched_engine) || 1975 !ctx_id_mapped(guc, ce->guc_id.id); 1976 } 1977 1978 static void guc_submit_request(struct i915_request *rq) 1979 { 1980 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 1981 struct intel_guc *guc = &rq->engine->gt->uc.guc; 1982 unsigned long flags; 1983 1984 /* Will be called from irq-context when using foreign fences. */ 1985 spin_lock_irqsave(&sched_engine->lock, flags); 1986 1987 if (need_tasklet(guc, rq)) 1988 queue_request(sched_engine, rq, rq_prio(rq)); 1989 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 1990 tasklet_hi_schedule(&sched_engine->tasklet); 1991 1992 spin_unlock_irqrestore(&sched_engine->lock, flags); 1993 } 1994 1995 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 1996 { 1997 int ret; 1998 1999 GEM_BUG_ON(intel_context_is_child(ce)); 2000 2001 if (intel_context_is_parent(ce)) 2002 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2003 NUMBER_MULTI_LRC_GUC_ID(guc), 2004 order_base_2(ce->parallel.number_children 2005 + 1)); 2006 else 2007 ret = ida_simple_get(&guc->submission_state.guc_ids, 2008 NUMBER_MULTI_LRC_GUC_ID(guc), 2009 guc->submission_state.num_guc_ids, 2010 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2011 __GFP_NOWARN); 2012 if (unlikely(ret < 0)) 2013 return ret; 2014 2015 if (!intel_context_is_parent(ce)) 2016 ++guc->submission_state.guc_ids_in_use; 2017 2018 ce->guc_id.id = ret; 2019 return 0; 2020 } 2021 2022 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2023 { 2024 GEM_BUG_ON(intel_context_is_child(ce)); 2025 2026 if (!context_guc_id_invalid(ce)) { 2027 if (intel_context_is_parent(ce)) { 2028 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2029 ce->guc_id.id, 2030 order_base_2(ce->parallel.number_children 2031 + 1)); 2032 } else { 2033 --guc->submission_state.guc_ids_in_use; 2034 ida_simple_remove(&guc->submission_state.guc_ids, 2035 ce->guc_id.id); 2036 } 2037 clr_ctx_id_mapping(guc, ce->guc_id.id); 2038 set_context_guc_id_invalid(ce); 2039 } 2040 if (!list_empty(&ce->guc_id.link)) 2041 list_del_init(&ce->guc_id.link); 2042 } 2043 2044 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2045 { 2046 unsigned long flags; 2047 2048 spin_lock_irqsave(&guc->submission_state.lock, flags); 2049 __release_guc_id(guc, ce); 2050 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2051 } 2052 2053 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2054 { 2055 struct intel_context *cn; 2056 2057 lockdep_assert_held(&guc->submission_state.lock); 2058 GEM_BUG_ON(intel_context_is_child(ce)); 2059 GEM_BUG_ON(intel_context_is_parent(ce)); 2060 2061 if (!list_empty(&guc->submission_state.guc_id_list)) { 2062 cn = list_first_entry(&guc->submission_state.guc_id_list, 2063 struct intel_context, 2064 guc_id.link); 2065 2066 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2067 GEM_BUG_ON(context_guc_id_invalid(cn)); 2068 GEM_BUG_ON(intel_context_is_child(cn)); 2069 GEM_BUG_ON(intel_context_is_parent(cn)); 2070 2071 list_del_init(&cn->guc_id.link); 2072 ce->guc_id.id = cn->guc_id.id; 2073 2074 spin_lock(&cn->guc_state.lock); 2075 clr_context_registered(cn); 2076 spin_unlock(&cn->guc_state.lock); 2077 2078 set_context_guc_id_invalid(cn); 2079 2080 #ifdef CONFIG_DRM_I915_SELFTEST 2081 guc->number_guc_id_stolen++; 2082 #endif 2083 2084 return 0; 2085 } else { 2086 return -EAGAIN; 2087 } 2088 } 2089 2090 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2091 { 2092 int ret; 2093 2094 lockdep_assert_held(&guc->submission_state.lock); 2095 GEM_BUG_ON(intel_context_is_child(ce)); 2096 2097 ret = new_guc_id(guc, ce); 2098 if (unlikely(ret < 0)) { 2099 if (intel_context_is_parent(ce)) 2100 return -ENOSPC; 2101 2102 ret = steal_guc_id(guc, ce); 2103 if (ret < 0) 2104 return ret; 2105 } 2106 2107 if (intel_context_is_parent(ce)) { 2108 struct intel_context *child; 2109 int i = 1; 2110 2111 for_each_child(ce, child) 2112 child->guc_id.id = ce->guc_id.id + i++; 2113 } 2114 2115 return 0; 2116 } 2117 2118 #define PIN_GUC_ID_TRIES 4 2119 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2120 { 2121 int ret = 0; 2122 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2123 2124 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2125 2126 try_again: 2127 spin_lock_irqsave(&guc->submission_state.lock, flags); 2128 2129 might_lock(&ce->guc_state.lock); 2130 2131 if (context_guc_id_invalid(ce)) { 2132 ret = assign_guc_id(guc, ce); 2133 if (ret) 2134 goto out_unlock; 2135 ret = 1; /* Indidcates newly assigned guc_id */ 2136 } 2137 if (!list_empty(&ce->guc_id.link)) 2138 list_del_init(&ce->guc_id.link); 2139 atomic_inc(&ce->guc_id.ref); 2140 2141 out_unlock: 2142 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2143 2144 /* 2145 * -EAGAIN indicates no guc_id are available, let's retire any 2146 * outstanding requests to see if that frees up a guc_id. If the first 2147 * retire didn't help, insert a sleep with the timeslice duration before 2148 * attempting to retire more requests. Double the sleep period each 2149 * subsequent pass before finally giving up. The sleep period has max of 2150 * 100ms and minimum of 1ms. 2151 */ 2152 if (ret == -EAGAIN && --tries) { 2153 if (PIN_GUC_ID_TRIES - tries > 1) { 2154 unsigned int timeslice_shifted = 2155 ce->engine->props.timeslice_duration_ms << 2156 (PIN_GUC_ID_TRIES - tries - 2); 2157 unsigned int max = min_t(unsigned int, 100, 2158 timeslice_shifted); 2159 2160 msleep(max_t(unsigned int, max, 1)); 2161 } 2162 intel_gt_retire_requests(guc_to_gt(guc)); 2163 goto try_again; 2164 } 2165 2166 return ret; 2167 } 2168 2169 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2170 { 2171 unsigned long flags; 2172 2173 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2174 GEM_BUG_ON(intel_context_is_child(ce)); 2175 2176 if (unlikely(context_guc_id_invalid(ce) || 2177 intel_context_is_parent(ce))) 2178 return; 2179 2180 spin_lock_irqsave(&guc->submission_state.lock, flags); 2181 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2182 !atomic_read(&ce->guc_id.ref)) 2183 list_add_tail(&ce->guc_id.link, 2184 &guc->submission_state.guc_id_list); 2185 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2186 } 2187 2188 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2189 struct intel_context *ce, 2190 u32 guc_id, 2191 u32 offset, 2192 bool loop) 2193 { 2194 struct intel_context *child; 2195 u32 action[4 + MAX_ENGINE_INSTANCE]; 2196 int len = 0; 2197 2198 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2199 2200 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2201 action[len++] = guc_id; 2202 action[len++] = ce->parallel.number_children + 1; 2203 action[len++] = offset; 2204 for_each_child(ce, child) { 2205 offset += sizeof(struct guc_lrc_desc_v69); 2206 action[len++] = offset; 2207 } 2208 2209 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2210 } 2211 2212 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2213 struct intel_context *ce, 2214 struct guc_ctxt_registration_info *info, 2215 bool loop) 2216 { 2217 struct intel_context *child; 2218 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2219 int len = 0; 2220 u32 next_id; 2221 2222 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2223 2224 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2225 action[len++] = info->flags; 2226 action[len++] = info->context_idx; 2227 action[len++] = info->engine_class; 2228 action[len++] = info->engine_submit_mask; 2229 action[len++] = info->wq_desc_lo; 2230 action[len++] = info->wq_desc_hi; 2231 action[len++] = info->wq_base_lo; 2232 action[len++] = info->wq_base_hi; 2233 action[len++] = info->wq_size; 2234 action[len++] = ce->parallel.number_children + 1; 2235 action[len++] = info->hwlrca_lo; 2236 action[len++] = info->hwlrca_hi; 2237 2238 next_id = info->context_idx + 1; 2239 for_each_child(ce, child) { 2240 GEM_BUG_ON(next_id++ != child->guc_id.id); 2241 2242 /* 2243 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2244 * only supports 32 bit currently. 2245 */ 2246 action[len++] = lower_32_bits(child->lrc.lrca); 2247 action[len++] = upper_32_bits(child->lrc.lrca); 2248 } 2249 2250 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2251 2252 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2253 } 2254 2255 static int __guc_action_register_context_v69(struct intel_guc *guc, 2256 u32 guc_id, 2257 u32 offset, 2258 bool loop) 2259 { 2260 u32 action[] = { 2261 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2262 guc_id, 2263 offset, 2264 }; 2265 2266 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2267 0, loop); 2268 } 2269 2270 static int __guc_action_register_context_v70(struct intel_guc *guc, 2271 struct guc_ctxt_registration_info *info, 2272 bool loop) 2273 { 2274 u32 action[] = { 2275 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2276 info->flags, 2277 info->context_idx, 2278 info->engine_class, 2279 info->engine_submit_mask, 2280 info->wq_desc_lo, 2281 info->wq_desc_hi, 2282 info->wq_base_lo, 2283 info->wq_base_hi, 2284 info->wq_size, 2285 info->hwlrca_lo, 2286 info->hwlrca_hi, 2287 }; 2288 2289 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2290 0, loop); 2291 } 2292 2293 static void prepare_context_registration_info_v69(struct intel_context *ce); 2294 static void prepare_context_registration_info_v70(struct intel_context *ce, 2295 struct guc_ctxt_registration_info *info); 2296 2297 static int 2298 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2299 { 2300 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2301 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2302 2303 prepare_context_registration_info_v69(ce); 2304 2305 if (intel_context_is_parent(ce)) 2306 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2307 offset, loop); 2308 else 2309 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2310 offset, loop); 2311 } 2312 2313 static int 2314 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2315 { 2316 struct guc_ctxt_registration_info info; 2317 2318 prepare_context_registration_info_v70(ce, &info); 2319 2320 if (intel_context_is_parent(ce)) 2321 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2322 else 2323 return __guc_action_register_context_v70(guc, &info, loop); 2324 } 2325 2326 static int register_context(struct intel_context *ce, bool loop) 2327 { 2328 struct intel_guc *guc = ce_to_guc(ce); 2329 int ret; 2330 2331 GEM_BUG_ON(intel_context_is_child(ce)); 2332 trace_intel_context_register(ce); 2333 2334 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2335 ret = register_context_v70(guc, ce, loop); 2336 else 2337 ret = register_context_v69(guc, ce, loop); 2338 2339 if (likely(!ret)) { 2340 unsigned long flags; 2341 2342 spin_lock_irqsave(&ce->guc_state.lock, flags); 2343 set_context_registered(ce); 2344 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2345 2346 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2347 guc_context_policy_init_v70(ce, loop); 2348 } 2349 2350 return ret; 2351 } 2352 2353 static int __guc_action_deregister_context(struct intel_guc *guc, 2354 u32 guc_id) 2355 { 2356 u32 action[] = { 2357 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2358 guc_id, 2359 }; 2360 2361 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2362 G2H_LEN_DW_DEREGISTER_CONTEXT, 2363 true); 2364 } 2365 2366 static int deregister_context(struct intel_context *ce, u32 guc_id) 2367 { 2368 struct intel_guc *guc = ce_to_guc(ce); 2369 2370 GEM_BUG_ON(intel_context_is_child(ce)); 2371 trace_intel_context_deregister(ce); 2372 2373 return __guc_action_deregister_context(guc, guc_id); 2374 } 2375 2376 static inline void clear_children_join_go_memory(struct intel_context *ce) 2377 { 2378 struct parent_scratch *ps = __get_parent_scratch(ce); 2379 int i; 2380 2381 ps->go.semaphore = 0; 2382 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2383 ps->join[i].semaphore = 0; 2384 } 2385 2386 static inline u32 get_children_go_value(struct intel_context *ce) 2387 { 2388 return __get_parent_scratch(ce)->go.semaphore; 2389 } 2390 2391 static inline u32 get_children_join_value(struct intel_context *ce, 2392 u8 child_index) 2393 { 2394 return __get_parent_scratch(ce)->join[child_index].semaphore; 2395 } 2396 2397 struct context_policy { 2398 u32 count; 2399 struct guc_update_context_policy h2g; 2400 }; 2401 2402 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2403 { 2404 size_t bytes = sizeof(policy->h2g.header) + 2405 (sizeof(policy->h2g.klv[0]) * policy->count); 2406 2407 return bytes / sizeof(u32); 2408 } 2409 2410 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2411 { 2412 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2413 policy->h2g.header.ctx_id = guc_id; 2414 policy->count = 0; 2415 } 2416 2417 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2418 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2419 { \ 2420 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2421 policy->h2g.klv[policy->count].kl = \ 2422 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2423 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2424 policy->h2g.klv[policy->count].value = data; \ 2425 policy->count++; \ 2426 } 2427 2428 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2429 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2430 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2431 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2432 2433 #undef MAKE_CONTEXT_POLICY_ADD 2434 2435 static int __guc_context_set_context_policies(struct intel_guc *guc, 2436 struct context_policy *policy, 2437 bool loop) 2438 { 2439 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2440 __guc_context_policy_action_size(policy), 2441 0, loop); 2442 } 2443 2444 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2445 { 2446 struct intel_engine_cs *engine = ce->engine; 2447 struct intel_guc *guc = &engine->gt->uc.guc; 2448 struct context_policy policy; 2449 u32 execution_quantum; 2450 u32 preemption_timeout; 2451 unsigned long flags; 2452 int ret; 2453 2454 /* NB: For both of these, zero means disabled. */ 2455 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2456 execution_quantum)); 2457 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2458 preemption_timeout)); 2459 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2460 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2461 2462 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2463 2464 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2465 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2466 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2467 2468 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2469 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2470 2471 ret = __guc_context_set_context_policies(guc, &policy, loop); 2472 2473 spin_lock_irqsave(&ce->guc_state.lock, flags); 2474 if (ret != 0) 2475 set_context_policy_required(ce); 2476 else 2477 clr_context_policy_required(ce); 2478 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2479 2480 return ret; 2481 } 2482 2483 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2484 struct guc_lrc_desc_v69 *desc) 2485 { 2486 desc->policy_flags = 0; 2487 2488 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2489 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2490 2491 /* NB: For both of these, zero means disabled. */ 2492 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2493 desc->execution_quantum)); 2494 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2495 desc->preemption_timeout)); 2496 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2497 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2498 } 2499 2500 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2501 { 2502 /* 2503 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2504 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2505 */ 2506 switch (prio) { 2507 default: 2508 MISSING_CASE(prio); 2509 fallthrough; 2510 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2511 return GEN12_CTX_PRIORITY_NORMAL; 2512 case GUC_CLIENT_PRIORITY_NORMAL: 2513 return GEN12_CTX_PRIORITY_LOW; 2514 case GUC_CLIENT_PRIORITY_HIGH: 2515 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2516 return GEN12_CTX_PRIORITY_HIGH; 2517 } 2518 } 2519 2520 static void prepare_context_registration_info_v69(struct intel_context *ce) 2521 { 2522 struct intel_engine_cs *engine = ce->engine; 2523 struct intel_guc *guc = &engine->gt->uc.guc; 2524 u32 ctx_id = ce->guc_id.id; 2525 struct guc_lrc_desc_v69 *desc; 2526 struct intel_context *child; 2527 2528 GEM_BUG_ON(!engine->mask); 2529 2530 /* 2531 * Ensure LRC + CT vmas are is same region as write barrier is done 2532 * based on CT vma region. 2533 */ 2534 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2535 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2536 2537 desc = __get_lrc_desc_v69(guc, ctx_id); 2538 GEM_BUG_ON(!desc); 2539 desc->engine_class = engine_class_to_guc_class(engine->class); 2540 desc->engine_submit_mask = engine->logical_mask; 2541 desc->hw_context_desc = ce->lrc.lrca; 2542 desc->priority = ce->guc_state.prio; 2543 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2544 guc_context_policy_init_v69(engine, desc); 2545 2546 /* 2547 * If context is a parent, we need to register a process descriptor 2548 * describing a work queue and register all child contexts. 2549 */ 2550 if (intel_context_is_parent(ce)) { 2551 struct guc_process_desc_v69 *pdesc; 2552 2553 ce->parallel.guc.wqi_tail = 0; 2554 ce->parallel.guc.wqi_head = 0; 2555 2556 desc->process_desc = i915_ggtt_offset(ce->state) + 2557 __get_parent_scratch_offset(ce); 2558 desc->wq_addr = i915_ggtt_offset(ce->state) + 2559 __get_wq_offset(ce); 2560 desc->wq_size = WQ_SIZE; 2561 2562 pdesc = __get_process_desc_v69(ce); 2563 memset(pdesc, 0, sizeof(*(pdesc))); 2564 pdesc->stage_id = ce->guc_id.id; 2565 pdesc->wq_base_addr = desc->wq_addr; 2566 pdesc->wq_size_bytes = desc->wq_size; 2567 pdesc->wq_status = WQ_STATUS_ACTIVE; 2568 2569 ce->parallel.guc.wq_head = &pdesc->head; 2570 ce->parallel.guc.wq_tail = &pdesc->tail; 2571 ce->parallel.guc.wq_status = &pdesc->wq_status; 2572 2573 for_each_child(ce, child) { 2574 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2575 2576 desc->engine_class = 2577 engine_class_to_guc_class(engine->class); 2578 desc->hw_context_desc = child->lrc.lrca; 2579 desc->priority = ce->guc_state.prio; 2580 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2581 guc_context_policy_init_v69(engine, desc); 2582 } 2583 2584 clear_children_join_go_memory(ce); 2585 } 2586 } 2587 2588 static void prepare_context_registration_info_v70(struct intel_context *ce, 2589 struct guc_ctxt_registration_info *info) 2590 { 2591 struct intel_engine_cs *engine = ce->engine; 2592 struct intel_guc *guc = &engine->gt->uc.guc; 2593 u32 ctx_id = ce->guc_id.id; 2594 2595 GEM_BUG_ON(!engine->mask); 2596 2597 /* 2598 * Ensure LRC + CT vmas are is same region as write barrier is done 2599 * based on CT vma region. 2600 */ 2601 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2602 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2603 2604 memset(info, 0, sizeof(*info)); 2605 info->context_idx = ctx_id; 2606 info->engine_class = engine_class_to_guc_class(engine->class); 2607 info->engine_submit_mask = engine->logical_mask; 2608 /* 2609 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2610 * only supports 32 bit currently. 2611 */ 2612 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2613 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2614 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2615 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2616 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2617 2618 /* 2619 * If context is a parent, we need to register a process descriptor 2620 * describing a work queue and register all child contexts. 2621 */ 2622 if (intel_context_is_parent(ce)) { 2623 struct guc_sched_wq_desc *wq_desc; 2624 u64 wq_desc_offset, wq_base_offset; 2625 2626 ce->parallel.guc.wqi_tail = 0; 2627 ce->parallel.guc.wqi_head = 0; 2628 2629 wq_desc_offset = i915_ggtt_offset(ce->state) + 2630 __get_parent_scratch_offset(ce); 2631 wq_base_offset = i915_ggtt_offset(ce->state) + 2632 __get_wq_offset(ce); 2633 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2634 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2635 info->wq_base_lo = lower_32_bits(wq_base_offset); 2636 info->wq_base_hi = upper_32_bits(wq_base_offset); 2637 info->wq_size = WQ_SIZE; 2638 2639 wq_desc = __get_wq_desc_v70(ce); 2640 memset(wq_desc, 0, sizeof(*wq_desc)); 2641 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2642 2643 ce->parallel.guc.wq_head = &wq_desc->head; 2644 ce->parallel.guc.wq_tail = &wq_desc->tail; 2645 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2646 2647 clear_children_join_go_memory(ce); 2648 } 2649 } 2650 2651 static int try_context_registration(struct intel_context *ce, bool loop) 2652 { 2653 struct intel_engine_cs *engine = ce->engine; 2654 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2655 struct intel_guc *guc = &engine->gt->uc.guc; 2656 intel_wakeref_t wakeref; 2657 u32 ctx_id = ce->guc_id.id; 2658 bool context_registered; 2659 int ret = 0; 2660 2661 GEM_BUG_ON(!sched_state_is_init(ce)); 2662 2663 context_registered = ctx_id_mapped(guc, ctx_id); 2664 2665 clr_ctx_id_mapping(guc, ctx_id); 2666 set_ctx_id_mapping(guc, ctx_id, ce); 2667 2668 /* 2669 * The context_lookup xarray is used to determine if the hardware 2670 * context is currently registered. There are two cases in which it 2671 * could be registered either the guc_id has been stolen from another 2672 * context or the lrc descriptor address of this context has changed. In 2673 * either case the context needs to be deregistered with the GuC before 2674 * registering this context. 2675 */ 2676 if (context_registered) { 2677 bool disabled; 2678 unsigned long flags; 2679 2680 trace_intel_context_steal_guc_id(ce); 2681 GEM_BUG_ON(!loop); 2682 2683 /* Seal race with Reset */ 2684 spin_lock_irqsave(&ce->guc_state.lock, flags); 2685 disabled = submission_disabled(guc); 2686 if (likely(!disabled)) { 2687 set_context_wait_for_deregister_to_register(ce); 2688 intel_context_get(ce); 2689 } 2690 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2691 if (unlikely(disabled)) { 2692 clr_ctx_id_mapping(guc, ctx_id); 2693 return 0; /* Will get registered later */ 2694 } 2695 2696 /* 2697 * If stealing the guc_id, this ce has the same guc_id as the 2698 * context whose guc_id was stolen. 2699 */ 2700 with_intel_runtime_pm(runtime_pm, wakeref) 2701 ret = deregister_context(ce, ce->guc_id.id); 2702 if (unlikely(ret == -ENODEV)) 2703 ret = 0; /* Will get registered later */ 2704 } else { 2705 with_intel_runtime_pm(runtime_pm, wakeref) 2706 ret = register_context(ce, loop); 2707 if (unlikely(ret == -EBUSY)) { 2708 clr_ctx_id_mapping(guc, ctx_id); 2709 } else if (unlikely(ret == -ENODEV)) { 2710 clr_ctx_id_mapping(guc, ctx_id); 2711 ret = 0; /* Will get registered later */ 2712 } 2713 } 2714 2715 return ret; 2716 } 2717 2718 static int __guc_context_pre_pin(struct intel_context *ce, 2719 struct intel_engine_cs *engine, 2720 struct i915_gem_ww_ctx *ww, 2721 void **vaddr) 2722 { 2723 return lrc_pre_pin(ce, engine, ww, vaddr); 2724 } 2725 2726 static int __guc_context_pin(struct intel_context *ce, 2727 struct intel_engine_cs *engine, 2728 void *vaddr) 2729 { 2730 if (i915_ggtt_offset(ce->state) != 2731 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2732 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2733 2734 /* 2735 * GuC context gets pinned in guc_request_alloc. See that function for 2736 * explaination of why. 2737 */ 2738 2739 return lrc_pin(ce, engine, vaddr); 2740 } 2741 2742 static int guc_context_pre_pin(struct intel_context *ce, 2743 struct i915_gem_ww_ctx *ww, 2744 void **vaddr) 2745 { 2746 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2747 } 2748 2749 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2750 { 2751 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2752 2753 if (likely(!ret && !intel_context_is_barrier(ce))) 2754 intel_engine_pm_get(ce->engine); 2755 2756 return ret; 2757 } 2758 2759 static void guc_context_unpin(struct intel_context *ce) 2760 { 2761 struct intel_guc *guc = ce_to_guc(ce); 2762 2763 unpin_guc_id(guc, ce); 2764 lrc_unpin(ce); 2765 2766 if (likely(!intel_context_is_barrier(ce))) 2767 intel_engine_pm_put_async(ce->engine); 2768 } 2769 2770 static void guc_context_post_unpin(struct intel_context *ce) 2771 { 2772 lrc_post_unpin(ce); 2773 } 2774 2775 static void __guc_context_sched_enable(struct intel_guc *guc, 2776 struct intel_context *ce) 2777 { 2778 u32 action[] = { 2779 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2780 ce->guc_id.id, 2781 GUC_CONTEXT_ENABLE 2782 }; 2783 2784 trace_intel_context_sched_enable(ce); 2785 2786 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2787 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2788 } 2789 2790 static void __guc_context_sched_disable(struct intel_guc *guc, 2791 struct intel_context *ce, 2792 u16 guc_id) 2793 { 2794 u32 action[] = { 2795 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2796 guc_id, /* ce->guc_id.id not stable */ 2797 GUC_CONTEXT_DISABLE 2798 }; 2799 2800 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2801 2802 GEM_BUG_ON(intel_context_is_child(ce)); 2803 trace_intel_context_sched_disable(ce); 2804 2805 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2806 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2807 } 2808 2809 static void guc_blocked_fence_complete(struct intel_context *ce) 2810 { 2811 lockdep_assert_held(&ce->guc_state.lock); 2812 2813 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2814 i915_sw_fence_complete(&ce->guc_state.blocked); 2815 } 2816 2817 static void guc_blocked_fence_reinit(struct intel_context *ce) 2818 { 2819 lockdep_assert_held(&ce->guc_state.lock); 2820 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2821 2822 /* 2823 * This fence is always complete unless a pending schedule disable is 2824 * outstanding. We arm the fence here and complete it when we receive 2825 * the pending schedule disable complete message. 2826 */ 2827 i915_sw_fence_fini(&ce->guc_state.blocked); 2828 i915_sw_fence_reinit(&ce->guc_state.blocked); 2829 i915_sw_fence_await(&ce->guc_state.blocked); 2830 i915_sw_fence_commit(&ce->guc_state.blocked); 2831 } 2832 2833 static u16 prep_context_pending_disable(struct intel_context *ce) 2834 { 2835 lockdep_assert_held(&ce->guc_state.lock); 2836 2837 set_context_pending_disable(ce); 2838 clr_context_enabled(ce); 2839 guc_blocked_fence_reinit(ce); 2840 intel_context_get(ce); 2841 2842 return ce->guc_id.id; 2843 } 2844 2845 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2846 { 2847 struct intel_guc *guc = ce_to_guc(ce); 2848 unsigned long flags; 2849 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2850 intel_wakeref_t wakeref; 2851 u16 guc_id; 2852 bool enabled; 2853 2854 GEM_BUG_ON(intel_context_is_child(ce)); 2855 2856 spin_lock_irqsave(&ce->guc_state.lock, flags); 2857 2858 incr_context_blocked(ce); 2859 2860 enabled = context_enabled(ce); 2861 if (unlikely(!enabled || submission_disabled(guc))) { 2862 if (enabled) 2863 clr_context_enabled(ce); 2864 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2865 return &ce->guc_state.blocked; 2866 } 2867 2868 /* 2869 * We add +2 here as the schedule disable complete CTB handler calls 2870 * intel_context_sched_disable_unpin (-2 to pin_count). 2871 */ 2872 atomic_add(2, &ce->pin_count); 2873 2874 guc_id = prep_context_pending_disable(ce); 2875 2876 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2877 2878 with_intel_runtime_pm(runtime_pm, wakeref) 2879 __guc_context_sched_disable(guc, ce, guc_id); 2880 2881 return &ce->guc_state.blocked; 2882 } 2883 2884 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2885 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2886 #define SCHED_STATE_NO_UNBLOCK \ 2887 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2888 SCHED_STATE_PENDING_DISABLE | \ 2889 SCHED_STATE_BANNED) 2890 2891 static bool context_cant_unblock(struct intel_context *ce) 2892 { 2893 lockdep_assert_held(&ce->guc_state.lock); 2894 2895 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2896 context_guc_id_invalid(ce) || 2897 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2898 !intel_context_is_pinned(ce); 2899 } 2900 2901 static void guc_context_unblock(struct intel_context *ce) 2902 { 2903 struct intel_guc *guc = ce_to_guc(ce); 2904 unsigned long flags; 2905 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2906 intel_wakeref_t wakeref; 2907 bool enable; 2908 2909 GEM_BUG_ON(context_enabled(ce)); 2910 GEM_BUG_ON(intel_context_is_child(ce)); 2911 2912 spin_lock_irqsave(&ce->guc_state.lock, flags); 2913 2914 if (unlikely(submission_disabled(guc) || 2915 context_cant_unblock(ce))) { 2916 enable = false; 2917 } else { 2918 enable = true; 2919 set_context_pending_enable(ce); 2920 set_context_enabled(ce); 2921 intel_context_get(ce); 2922 } 2923 2924 decr_context_blocked(ce); 2925 2926 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2927 2928 if (enable) { 2929 with_intel_runtime_pm(runtime_pm, wakeref) 2930 __guc_context_sched_enable(guc, ce); 2931 } 2932 } 2933 2934 static void guc_context_cancel_request(struct intel_context *ce, 2935 struct i915_request *rq) 2936 { 2937 struct intel_context *block_context = 2938 request_to_scheduling_context(rq); 2939 2940 if (i915_sw_fence_signaled(&rq->submit)) { 2941 struct i915_sw_fence *fence; 2942 2943 intel_context_get(ce); 2944 fence = guc_context_block(block_context); 2945 i915_sw_fence_wait(fence); 2946 if (!i915_request_completed(rq)) { 2947 __i915_request_skip(rq); 2948 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 2949 true); 2950 } 2951 2952 guc_context_unblock(block_context); 2953 intel_context_put(ce); 2954 } 2955 } 2956 2957 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 2958 u16 guc_id, 2959 u32 preemption_timeout) 2960 { 2961 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 2962 struct context_policy policy; 2963 2964 __guc_context_policy_start_klv(&policy, guc_id); 2965 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2966 __guc_context_set_context_policies(guc, &policy, true); 2967 } else { 2968 u32 action[] = { 2969 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 2970 guc_id, 2971 preemption_timeout 2972 }; 2973 2974 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 2975 } 2976 } 2977 2978 static void 2979 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 2980 unsigned int preempt_timeout_ms) 2981 { 2982 struct intel_guc *guc = ce_to_guc(ce); 2983 struct intel_runtime_pm *runtime_pm = 2984 &ce->engine->gt->i915->runtime_pm; 2985 intel_wakeref_t wakeref; 2986 unsigned long flags; 2987 2988 GEM_BUG_ON(intel_context_is_child(ce)); 2989 2990 guc_flush_submissions(guc); 2991 2992 spin_lock_irqsave(&ce->guc_state.lock, flags); 2993 set_context_banned(ce); 2994 2995 if (submission_disabled(guc) || 2996 (!context_enabled(ce) && !context_pending_disable(ce))) { 2997 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2998 2999 guc_cancel_context_requests(ce); 3000 intel_engine_signal_breadcrumbs(ce->engine); 3001 } else if (!context_pending_disable(ce)) { 3002 u16 guc_id; 3003 3004 /* 3005 * We add +2 here as the schedule disable complete CTB handler 3006 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3007 */ 3008 atomic_add(2, &ce->pin_count); 3009 3010 guc_id = prep_context_pending_disable(ce); 3011 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3012 3013 /* 3014 * In addition to disabling scheduling, set the preemption 3015 * timeout to the minimum value (1 us) so the banned context 3016 * gets kicked off the HW ASAP. 3017 */ 3018 with_intel_runtime_pm(runtime_pm, wakeref) { 3019 __guc_context_set_preemption_timeout(guc, guc_id, 3020 preempt_timeout_ms); 3021 __guc_context_sched_disable(guc, ce, guc_id); 3022 } 3023 } else { 3024 if (!context_guc_id_invalid(ce)) 3025 with_intel_runtime_pm(runtime_pm, wakeref) 3026 __guc_context_set_preemption_timeout(guc, 3027 ce->guc_id.id, 3028 preempt_timeout_ms); 3029 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3030 } 3031 } 3032 3033 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3034 unsigned long flags) 3035 __releases(ce->guc_state.lock) 3036 { 3037 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3038 intel_wakeref_t wakeref; 3039 u16 guc_id; 3040 3041 lockdep_assert_held(&ce->guc_state.lock); 3042 guc_id = prep_context_pending_disable(ce); 3043 3044 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3045 3046 with_intel_runtime_pm(runtime_pm, wakeref) 3047 __guc_context_sched_disable(guc, ce, guc_id); 3048 } 3049 3050 static bool bypass_sched_disable(struct intel_guc *guc, 3051 struct intel_context *ce) 3052 { 3053 lockdep_assert_held(&ce->guc_state.lock); 3054 GEM_BUG_ON(intel_context_is_child(ce)); 3055 3056 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3057 !ctx_id_mapped(guc, ce->guc_id.id)) { 3058 clr_context_enabled(ce); 3059 return true; 3060 } 3061 3062 return !context_enabled(ce); 3063 } 3064 3065 static void __delay_sched_disable(struct work_struct *wrk) 3066 { 3067 struct intel_context *ce = 3068 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3069 struct intel_guc *guc = ce_to_guc(ce); 3070 unsigned long flags; 3071 3072 spin_lock_irqsave(&ce->guc_state.lock, flags); 3073 3074 if (bypass_sched_disable(guc, ce)) { 3075 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3076 intel_context_sched_disable_unpin(ce); 3077 } else { 3078 do_sched_disable(guc, ce, flags); 3079 } 3080 } 3081 3082 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3083 { 3084 /* 3085 * parent contexts are perma-pinned, if we are unpinning do schedule 3086 * disable immediately. 3087 */ 3088 if (intel_context_is_parent(ce)) 3089 return true; 3090 3091 /* 3092 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3093 */ 3094 return guc->submission_state.guc_ids_in_use > 3095 guc->submission_state.sched_disable_gucid_threshold; 3096 } 3097 3098 static void guc_context_sched_disable(struct intel_context *ce) 3099 { 3100 struct intel_guc *guc = ce_to_guc(ce); 3101 u64 delay = guc->submission_state.sched_disable_delay_ms; 3102 unsigned long flags; 3103 3104 spin_lock_irqsave(&ce->guc_state.lock, flags); 3105 3106 if (bypass_sched_disable(guc, ce)) { 3107 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3108 intel_context_sched_disable_unpin(ce); 3109 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3110 delay) { 3111 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3112 mod_delayed_work(system_unbound_wq, 3113 &ce->guc_state.sched_disable_delay_work, 3114 msecs_to_jiffies(delay)); 3115 } else { 3116 do_sched_disable(guc, ce, flags); 3117 } 3118 } 3119 3120 static void guc_context_close(struct intel_context *ce) 3121 { 3122 unsigned long flags; 3123 3124 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3125 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3126 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3127 3128 spin_lock_irqsave(&ce->guc_state.lock, flags); 3129 set_context_close_done(ce); 3130 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3131 } 3132 3133 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3134 { 3135 struct intel_guc *guc = ce_to_guc(ce); 3136 struct intel_gt *gt = guc_to_gt(guc); 3137 unsigned long flags; 3138 bool disabled; 3139 3140 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3141 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3142 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3143 GEM_BUG_ON(context_enabled(ce)); 3144 3145 /* Seal race with Reset */ 3146 spin_lock_irqsave(&ce->guc_state.lock, flags); 3147 disabled = submission_disabled(guc); 3148 if (likely(!disabled)) { 3149 __intel_gt_pm_get(gt); 3150 set_context_destroyed(ce); 3151 clr_context_registered(ce); 3152 } 3153 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3154 if (unlikely(disabled)) { 3155 release_guc_id(guc, ce); 3156 __guc_context_destroy(ce); 3157 return; 3158 } 3159 3160 deregister_context(ce, ce->guc_id.id); 3161 } 3162 3163 static void __guc_context_destroy(struct intel_context *ce) 3164 { 3165 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3166 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3167 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3168 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3169 3170 lrc_fini(ce); 3171 intel_context_fini(ce); 3172 3173 if (intel_engine_is_virtual(ce->engine)) { 3174 struct guc_virtual_engine *ve = 3175 container_of(ce, typeof(*ve), context); 3176 3177 if (ve->base.breadcrumbs) 3178 intel_breadcrumbs_put(ve->base.breadcrumbs); 3179 3180 kfree(ve); 3181 } else { 3182 intel_context_free(ce); 3183 } 3184 } 3185 3186 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3187 { 3188 struct intel_context *ce; 3189 unsigned long flags; 3190 3191 GEM_BUG_ON(!submission_disabled(guc) && 3192 guc_submission_initialized(guc)); 3193 3194 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3195 spin_lock_irqsave(&guc->submission_state.lock, flags); 3196 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3197 struct intel_context, 3198 destroyed_link); 3199 if (ce) 3200 list_del_init(&ce->destroyed_link); 3201 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3202 3203 if (!ce) 3204 break; 3205 3206 release_guc_id(guc, ce); 3207 __guc_context_destroy(ce); 3208 } 3209 } 3210 3211 static void deregister_destroyed_contexts(struct intel_guc *guc) 3212 { 3213 struct intel_context *ce; 3214 unsigned long flags; 3215 3216 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3217 spin_lock_irqsave(&guc->submission_state.lock, flags); 3218 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3219 struct intel_context, 3220 destroyed_link); 3221 if (ce) 3222 list_del_init(&ce->destroyed_link); 3223 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3224 3225 if (!ce) 3226 break; 3227 3228 guc_lrc_desc_unpin(ce); 3229 } 3230 } 3231 3232 static void destroyed_worker_func(struct work_struct *w) 3233 { 3234 struct intel_guc *guc = container_of(w, struct intel_guc, 3235 submission_state.destroyed_worker); 3236 struct intel_gt *gt = guc_to_gt(guc); 3237 int tmp; 3238 3239 with_intel_gt_pm(gt, tmp) 3240 deregister_destroyed_contexts(guc); 3241 } 3242 3243 static void guc_context_destroy(struct kref *kref) 3244 { 3245 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3246 struct intel_guc *guc = ce_to_guc(ce); 3247 unsigned long flags; 3248 bool destroy; 3249 3250 /* 3251 * If the guc_id is invalid this context has been stolen and we can free 3252 * it immediately. Also can be freed immediately if the context is not 3253 * registered with the GuC or the GuC is in the middle of a reset. 3254 */ 3255 spin_lock_irqsave(&guc->submission_state.lock, flags); 3256 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3257 !ctx_id_mapped(guc, ce->guc_id.id); 3258 if (likely(!destroy)) { 3259 if (!list_empty(&ce->guc_id.link)) 3260 list_del_init(&ce->guc_id.link); 3261 list_add_tail(&ce->destroyed_link, 3262 &guc->submission_state.destroyed_contexts); 3263 } else { 3264 __release_guc_id(guc, ce); 3265 } 3266 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3267 if (unlikely(destroy)) { 3268 __guc_context_destroy(ce); 3269 return; 3270 } 3271 3272 /* 3273 * We use a worker to issue the H2G to deregister the context as we can 3274 * take the GT PM for the first time which isn't allowed from an atomic 3275 * context. 3276 */ 3277 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3278 } 3279 3280 static int guc_context_alloc(struct intel_context *ce) 3281 { 3282 return lrc_alloc(ce, ce->engine); 3283 } 3284 3285 static void __guc_context_set_prio(struct intel_guc *guc, 3286 struct intel_context *ce) 3287 { 3288 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3289 struct context_policy policy; 3290 3291 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3292 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3293 __guc_context_set_context_policies(guc, &policy, true); 3294 } else { 3295 u32 action[] = { 3296 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3297 ce->guc_id.id, 3298 ce->guc_state.prio, 3299 }; 3300 3301 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3302 } 3303 } 3304 3305 static void guc_context_set_prio(struct intel_guc *guc, 3306 struct intel_context *ce, 3307 u8 prio) 3308 { 3309 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3310 prio > GUC_CLIENT_PRIORITY_NORMAL); 3311 lockdep_assert_held(&ce->guc_state.lock); 3312 3313 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3314 !context_registered(ce)) { 3315 ce->guc_state.prio = prio; 3316 return; 3317 } 3318 3319 ce->guc_state.prio = prio; 3320 __guc_context_set_prio(guc, ce); 3321 3322 trace_intel_context_set_prio(ce); 3323 } 3324 3325 static inline u8 map_i915_prio_to_guc_prio(int prio) 3326 { 3327 if (prio == I915_PRIORITY_NORMAL) 3328 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3329 else if (prio < I915_PRIORITY_NORMAL) 3330 return GUC_CLIENT_PRIORITY_NORMAL; 3331 else if (prio < I915_PRIORITY_DISPLAY) 3332 return GUC_CLIENT_PRIORITY_HIGH; 3333 else 3334 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3335 } 3336 3337 static inline void add_context_inflight_prio(struct intel_context *ce, 3338 u8 guc_prio) 3339 { 3340 lockdep_assert_held(&ce->guc_state.lock); 3341 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3342 3343 ++ce->guc_state.prio_count[guc_prio]; 3344 3345 /* Overflow protection */ 3346 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3347 } 3348 3349 static inline void sub_context_inflight_prio(struct intel_context *ce, 3350 u8 guc_prio) 3351 { 3352 lockdep_assert_held(&ce->guc_state.lock); 3353 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3354 3355 /* Underflow protection */ 3356 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3357 3358 --ce->guc_state.prio_count[guc_prio]; 3359 } 3360 3361 static inline void update_context_prio(struct intel_context *ce) 3362 { 3363 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3364 int i; 3365 3366 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3367 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3368 3369 lockdep_assert_held(&ce->guc_state.lock); 3370 3371 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3372 if (ce->guc_state.prio_count[i]) { 3373 guc_context_set_prio(guc, ce, i); 3374 break; 3375 } 3376 } 3377 } 3378 3379 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3380 { 3381 /* Lower value is higher priority */ 3382 return new_guc_prio < old_guc_prio; 3383 } 3384 3385 static void add_to_context(struct i915_request *rq) 3386 { 3387 struct intel_context *ce = request_to_scheduling_context(rq); 3388 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3389 3390 GEM_BUG_ON(intel_context_is_child(ce)); 3391 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3392 3393 spin_lock(&ce->guc_state.lock); 3394 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3395 3396 if (rq->guc_prio == GUC_PRIO_INIT) { 3397 rq->guc_prio = new_guc_prio; 3398 add_context_inflight_prio(ce, rq->guc_prio); 3399 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3400 sub_context_inflight_prio(ce, rq->guc_prio); 3401 rq->guc_prio = new_guc_prio; 3402 add_context_inflight_prio(ce, rq->guc_prio); 3403 } 3404 update_context_prio(ce); 3405 3406 spin_unlock(&ce->guc_state.lock); 3407 } 3408 3409 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3410 { 3411 lockdep_assert_held(&ce->guc_state.lock); 3412 3413 if (rq->guc_prio != GUC_PRIO_INIT && 3414 rq->guc_prio != GUC_PRIO_FINI) { 3415 sub_context_inflight_prio(ce, rq->guc_prio); 3416 update_context_prio(ce); 3417 } 3418 rq->guc_prio = GUC_PRIO_FINI; 3419 } 3420 3421 static void remove_from_context(struct i915_request *rq) 3422 { 3423 struct intel_context *ce = request_to_scheduling_context(rq); 3424 3425 GEM_BUG_ON(intel_context_is_child(ce)); 3426 3427 spin_lock_irq(&ce->guc_state.lock); 3428 3429 list_del_init(&rq->sched.link); 3430 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3431 3432 /* Prevent further __await_execution() registering a cb, then flush */ 3433 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3434 3435 guc_prio_fini(rq, ce); 3436 3437 spin_unlock_irq(&ce->guc_state.lock); 3438 3439 atomic_dec(&ce->guc_id.ref); 3440 i915_request_notify_execute_cb_imm(rq); 3441 } 3442 3443 static const struct intel_context_ops guc_context_ops = { 3444 .alloc = guc_context_alloc, 3445 3446 .close = guc_context_close, 3447 3448 .pre_pin = guc_context_pre_pin, 3449 .pin = guc_context_pin, 3450 .unpin = guc_context_unpin, 3451 .post_unpin = guc_context_post_unpin, 3452 3453 .revoke = guc_context_revoke, 3454 3455 .cancel_request = guc_context_cancel_request, 3456 3457 .enter = intel_context_enter_engine, 3458 .exit = intel_context_exit_engine, 3459 3460 .sched_disable = guc_context_sched_disable, 3461 3462 .reset = lrc_reset, 3463 .destroy = guc_context_destroy, 3464 3465 .create_virtual = guc_create_virtual, 3466 .create_parallel = guc_create_parallel, 3467 }; 3468 3469 static void submit_work_cb(struct irq_work *wrk) 3470 { 3471 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3472 3473 might_lock(&rq->engine->sched_engine->lock); 3474 i915_sw_fence_complete(&rq->submit); 3475 } 3476 3477 static void __guc_signal_context_fence(struct intel_context *ce) 3478 { 3479 struct i915_request *rq, *rn; 3480 3481 lockdep_assert_held(&ce->guc_state.lock); 3482 3483 if (!list_empty(&ce->guc_state.fences)) 3484 trace_intel_context_fence_release(ce); 3485 3486 /* 3487 * Use an IRQ to ensure locking order of sched_engine->lock -> 3488 * ce->guc_state.lock is preserved. 3489 */ 3490 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3491 guc_fence_link) { 3492 list_del(&rq->guc_fence_link); 3493 irq_work_queue(&rq->submit_work); 3494 } 3495 3496 INIT_LIST_HEAD(&ce->guc_state.fences); 3497 } 3498 3499 static void guc_signal_context_fence(struct intel_context *ce) 3500 { 3501 unsigned long flags; 3502 3503 GEM_BUG_ON(intel_context_is_child(ce)); 3504 3505 spin_lock_irqsave(&ce->guc_state.lock, flags); 3506 clr_context_wait_for_deregister_to_register(ce); 3507 __guc_signal_context_fence(ce); 3508 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3509 } 3510 3511 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3512 { 3513 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3514 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3515 !submission_disabled(ce_to_guc(ce)); 3516 } 3517 3518 static void guc_context_init(struct intel_context *ce) 3519 { 3520 const struct i915_gem_context *ctx; 3521 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3522 3523 rcu_read_lock(); 3524 ctx = rcu_dereference(ce->gem_context); 3525 if (ctx) 3526 prio = ctx->sched.priority; 3527 rcu_read_unlock(); 3528 3529 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3530 3531 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3532 __delay_sched_disable); 3533 3534 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3535 } 3536 3537 static int guc_request_alloc(struct i915_request *rq) 3538 { 3539 struct intel_context *ce = request_to_scheduling_context(rq); 3540 struct intel_guc *guc = ce_to_guc(ce); 3541 unsigned long flags; 3542 int ret; 3543 3544 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3545 3546 /* 3547 * Flush enough space to reduce the likelihood of waiting after 3548 * we start building the request - in which case we will just 3549 * have to repeat work. 3550 */ 3551 rq->reserved_space += GUC_REQUEST_SIZE; 3552 3553 /* 3554 * Note that after this point, we have committed to using 3555 * this request as it is being used to both track the 3556 * state of engine initialisation and liveness of the 3557 * golden renderstate above. Think twice before you try 3558 * to cancel/unwind this request now. 3559 */ 3560 3561 /* Unconditionally invalidate GPU caches and TLBs. */ 3562 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3563 if (ret) 3564 return ret; 3565 3566 rq->reserved_space -= GUC_REQUEST_SIZE; 3567 3568 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3569 guc_context_init(ce); 3570 3571 /* 3572 * If the context gets closed while the execbuf is ongoing, the context 3573 * close code will race with the below code to cancel the delayed work. 3574 * If the context close wins the race and cancels the work, it will 3575 * immediately call the sched disable (see guc_context_close), so there 3576 * is a chance we can get past this check while the sched_disable code 3577 * is being executed. To make sure that code completes before we check 3578 * the status further down, we wait for the close process to complete. 3579 * Else, this code path could send a request down thinking that the 3580 * context is still in a schedule-enable mode while the GuC ends up 3581 * dropping the request completely because the disable did go from the 3582 * context_close path right to GuC just prior. In the event the CT is 3583 * full, we could potentially need to wait up to 1.5 seconds. 3584 */ 3585 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3586 intel_context_sched_disable_unpin(ce); 3587 else if (intel_context_is_closed(ce)) 3588 if (wait_for(context_close_done(ce), 1500)) 3589 guc_warn(guc, "timed out waiting on context sched close before realloc\n"); 3590 /* 3591 * Call pin_guc_id here rather than in the pinning step as with 3592 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3593 * guc_id and creating horrible race conditions. This is especially bad 3594 * when guc_id are being stolen due to over subscription. By the time 3595 * this function is reached, it is guaranteed that the guc_id will be 3596 * persistent until the generated request is retired. Thus, sealing these 3597 * race conditions. It is still safe to fail here if guc_id are 3598 * exhausted and return -EAGAIN to the user indicating that they can try 3599 * again in the future. 3600 * 3601 * There is no need for a lock here as the timeline mutex ensures at 3602 * most one context can be executing this code path at once. The 3603 * guc_id_ref is incremented once for every request in flight and 3604 * decremented on each retire. When it is zero, a lock around the 3605 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3606 */ 3607 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3608 goto out; 3609 3610 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3611 if (unlikely(ret < 0)) 3612 return ret; 3613 if (context_needs_register(ce, !!ret)) { 3614 ret = try_context_registration(ce, true); 3615 if (unlikely(ret)) { /* unwind */ 3616 if (ret == -EPIPE) { 3617 disable_submission(guc); 3618 goto out; /* GPU will be reset */ 3619 } 3620 atomic_dec(&ce->guc_id.ref); 3621 unpin_guc_id(guc, ce); 3622 return ret; 3623 } 3624 } 3625 3626 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3627 3628 out: 3629 /* 3630 * We block all requests on this context if a G2H is pending for a 3631 * schedule disable or context deregistration as the GuC will fail a 3632 * schedule enable or context registration if either G2H is pending 3633 * respectfully. Once a G2H returns, the fence is released that is 3634 * blocking these requests (see guc_signal_context_fence). 3635 */ 3636 spin_lock_irqsave(&ce->guc_state.lock, flags); 3637 if (context_wait_for_deregister_to_register(ce) || 3638 context_pending_disable(ce)) { 3639 init_irq_work(&rq->submit_work, submit_work_cb); 3640 i915_sw_fence_await(&rq->submit); 3641 3642 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3643 } 3644 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3645 3646 return 0; 3647 } 3648 3649 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3650 struct i915_gem_ww_ctx *ww, 3651 void **vaddr) 3652 { 3653 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3654 3655 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3656 } 3657 3658 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3659 { 3660 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3661 int ret = __guc_context_pin(ce, engine, vaddr); 3662 intel_engine_mask_t tmp, mask = ce->engine->mask; 3663 3664 if (likely(!ret)) 3665 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3666 intel_engine_pm_get(engine); 3667 3668 return ret; 3669 } 3670 3671 static void guc_virtual_context_unpin(struct intel_context *ce) 3672 { 3673 intel_engine_mask_t tmp, mask = ce->engine->mask; 3674 struct intel_engine_cs *engine; 3675 struct intel_guc *guc = ce_to_guc(ce); 3676 3677 GEM_BUG_ON(context_enabled(ce)); 3678 GEM_BUG_ON(intel_context_is_barrier(ce)); 3679 3680 unpin_guc_id(guc, ce); 3681 lrc_unpin(ce); 3682 3683 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3684 intel_engine_pm_put_async(engine); 3685 } 3686 3687 static void guc_virtual_context_enter(struct intel_context *ce) 3688 { 3689 intel_engine_mask_t tmp, mask = ce->engine->mask; 3690 struct intel_engine_cs *engine; 3691 3692 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3693 intel_engine_pm_get(engine); 3694 3695 intel_timeline_enter(ce->timeline); 3696 } 3697 3698 static void guc_virtual_context_exit(struct intel_context *ce) 3699 { 3700 intel_engine_mask_t tmp, mask = ce->engine->mask; 3701 struct intel_engine_cs *engine; 3702 3703 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3704 intel_engine_pm_put(engine); 3705 3706 intel_timeline_exit(ce->timeline); 3707 } 3708 3709 static int guc_virtual_context_alloc(struct intel_context *ce) 3710 { 3711 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3712 3713 return lrc_alloc(ce, engine); 3714 } 3715 3716 static const struct intel_context_ops virtual_guc_context_ops = { 3717 .alloc = guc_virtual_context_alloc, 3718 3719 .close = guc_context_close, 3720 3721 .pre_pin = guc_virtual_context_pre_pin, 3722 .pin = guc_virtual_context_pin, 3723 .unpin = guc_virtual_context_unpin, 3724 .post_unpin = guc_context_post_unpin, 3725 3726 .revoke = guc_context_revoke, 3727 3728 .cancel_request = guc_context_cancel_request, 3729 3730 .enter = guc_virtual_context_enter, 3731 .exit = guc_virtual_context_exit, 3732 3733 .sched_disable = guc_context_sched_disable, 3734 3735 .destroy = guc_context_destroy, 3736 3737 .get_sibling = guc_virtual_get_sibling, 3738 }; 3739 3740 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3741 { 3742 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3743 struct intel_guc *guc = ce_to_guc(ce); 3744 int ret; 3745 3746 GEM_BUG_ON(!intel_context_is_parent(ce)); 3747 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3748 3749 ret = pin_guc_id(guc, ce); 3750 if (unlikely(ret < 0)) 3751 return ret; 3752 3753 return __guc_context_pin(ce, engine, vaddr); 3754 } 3755 3756 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3757 { 3758 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3759 3760 GEM_BUG_ON(!intel_context_is_child(ce)); 3761 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3762 3763 __intel_context_pin(ce->parallel.parent); 3764 return __guc_context_pin(ce, engine, vaddr); 3765 } 3766 3767 static void guc_parent_context_unpin(struct intel_context *ce) 3768 { 3769 struct intel_guc *guc = ce_to_guc(ce); 3770 3771 GEM_BUG_ON(context_enabled(ce)); 3772 GEM_BUG_ON(intel_context_is_barrier(ce)); 3773 GEM_BUG_ON(!intel_context_is_parent(ce)); 3774 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3775 3776 unpin_guc_id(guc, ce); 3777 lrc_unpin(ce); 3778 } 3779 3780 static void guc_child_context_unpin(struct intel_context *ce) 3781 { 3782 GEM_BUG_ON(context_enabled(ce)); 3783 GEM_BUG_ON(intel_context_is_barrier(ce)); 3784 GEM_BUG_ON(!intel_context_is_child(ce)); 3785 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3786 3787 lrc_unpin(ce); 3788 } 3789 3790 static void guc_child_context_post_unpin(struct intel_context *ce) 3791 { 3792 GEM_BUG_ON(!intel_context_is_child(ce)); 3793 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3794 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3795 3796 lrc_post_unpin(ce); 3797 intel_context_unpin(ce->parallel.parent); 3798 } 3799 3800 static void guc_child_context_destroy(struct kref *kref) 3801 { 3802 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3803 3804 __guc_context_destroy(ce); 3805 } 3806 3807 static const struct intel_context_ops virtual_parent_context_ops = { 3808 .alloc = guc_virtual_context_alloc, 3809 3810 .close = guc_context_close, 3811 3812 .pre_pin = guc_context_pre_pin, 3813 .pin = guc_parent_context_pin, 3814 .unpin = guc_parent_context_unpin, 3815 .post_unpin = guc_context_post_unpin, 3816 3817 .revoke = guc_context_revoke, 3818 3819 .cancel_request = guc_context_cancel_request, 3820 3821 .enter = guc_virtual_context_enter, 3822 .exit = guc_virtual_context_exit, 3823 3824 .sched_disable = guc_context_sched_disable, 3825 3826 .destroy = guc_context_destroy, 3827 3828 .get_sibling = guc_virtual_get_sibling, 3829 }; 3830 3831 static const struct intel_context_ops virtual_child_context_ops = { 3832 .alloc = guc_virtual_context_alloc, 3833 3834 .pre_pin = guc_context_pre_pin, 3835 .pin = guc_child_context_pin, 3836 .unpin = guc_child_context_unpin, 3837 .post_unpin = guc_child_context_post_unpin, 3838 3839 .cancel_request = guc_context_cancel_request, 3840 3841 .enter = guc_virtual_context_enter, 3842 .exit = guc_virtual_context_exit, 3843 3844 .destroy = guc_child_context_destroy, 3845 3846 .get_sibling = guc_virtual_get_sibling, 3847 }; 3848 3849 /* 3850 * The below override of the breadcrumbs is enabled when the user configures a 3851 * context for parallel submission (multi-lrc, parent-child). 3852 * 3853 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3854 * safely preempt all the hw contexts configured for parallel submission 3855 * between each BB. The contract between the i915 and GuC is if the parent 3856 * context can be preempted, all the children can be preempted, and the GuC will 3857 * always try to preempt the parent before the children. A handshake between the 3858 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3859 * creating a window to preempt between each set of BBs. 3860 */ 3861 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3862 u64 offset, u32 len, 3863 const unsigned int flags); 3864 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3865 u64 offset, u32 len, 3866 const unsigned int flags); 3867 static u32 * 3868 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3869 u32 *cs); 3870 static u32 * 3871 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3872 u32 *cs); 3873 3874 static struct intel_context * 3875 guc_create_parallel(struct intel_engine_cs **engines, 3876 unsigned int num_siblings, 3877 unsigned int width) 3878 { 3879 struct intel_engine_cs **siblings = NULL; 3880 struct intel_context *parent = NULL, *ce, *err; 3881 int i, j; 3882 3883 siblings = kmalloc_array(num_siblings, 3884 sizeof(*siblings), 3885 GFP_KERNEL); 3886 if (!siblings) 3887 return ERR_PTR(-ENOMEM); 3888 3889 for (i = 0; i < width; ++i) { 3890 for (j = 0; j < num_siblings; ++j) 3891 siblings[j] = engines[i * num_siblings + j]; 3892 3893 ce = intel_engine_create_virtual(siblings, num_siblings, 3894 FORCE_VIRTUAL); 3895 if (IS_ERR(ce)) { 3896 err = ERR_CAST(ce); 3897 goto unwind; 3898 } 3899 3900 if (i == 0) { 3901 parent = ce; 3902 parent->ops = &virtual_parent_context_ops; 3903 } else { 3904 ce->ops = &virtual_child_context_ops; 3905 intel_context_bind_parent_child(parent, ce); 3906 } 3907 } 3908 3909 parent->parallel.fence_context = dma_fence_context_alloc(1); 3910 3911 parent->engine->emit_bb_start = 3912 emit_bb_start_parent_no_preempt_mid_batch; 3913 parent->engine->emit_fini_breadcrumb = 3914 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3915 parent->engine->emit_fini_breadcrumb_dw = 3916 12 + 4 * parent->parallel.number_children; 3917 for_each_child(parent, ce) { 3918 ce->engine->emit_bb_start = 3919 emit_bb_start_child_no_preempt_mid_batch; 3920 ce->engine->emit_fini_breadcrumb = 3921 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3922 ce->engine->emit_fini_breadcrumb_dw = 16; 3923 } 3924 3925 kfree(siblings); 3926 return parent; 3927 3928 unwind: 3929 if (parent) 3930 intel_context_put(parent); 3931 kfree(siblings); 3932 return err; 3933 } 3934 3935 static bool 3936 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 3937 { 3938 struct intel_engine_cs *sibling; 3939 intel_engine_mask_t tmp, mask = b->engine_mask; 3940 bool result = false; 3941 3942 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3943 result |= intel_engine_irq_enable(sibling); 3944 3945 return result; 3946 } 3947 3948 static void 3949 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 3950 { 3951 struct intel_engine_cs *sibling; 3952 intel_engine_mask_t tmp, mask = b->engine_mask; 3953 3954 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 3955 intel_engine_irq_disable(sibling); 3956 } 3957 3958 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 3959 { 3960 int i; 3961 3962 /* 3963 * In GuC submission mode we do not know which physical engine a request 3964 * will be scheduled on, this creates a problem because the breadcrumb 3965 * interrupt is per physical engine. To work around this we attach 3966 * requests and direct all breadcrumb interrupts to the first instance 3967 * of an engine per class. In addition all breadcrumb interrupts are 3968 * enabled / disabled across an engine class in unison. 3969 */ 3970 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 3971 struct intel_engine_cs *sibling = 3972 engine->gt->engine_class[engine->class][i]; 3973 3974 if (sibling) { 3975 if (engine->breadcrumbs != sibling->breadcrumbs) { 3976 intel_breadcrumbs_put(engine->breadcrumbs); 3977 engine->breadcrumbs = 3978 intel_breadcrumbs_get(sibling->breadcrumbs); 3979 } 3980 break; 3981 } 3982 } 3983 3984 if (engine->breadcrumbs) { 3985 engine->breadcrumbs->engine_mask |= engine->mask; 3986 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 3987 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 3988 } 3989 } 3990 3991 static void guc_bump_inflight_request_prio(struct i915_request *rq, 3992 int prio) 3993 { 3994 struct intel_context *ce = request_to_scheduling_context(rq); 3995 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 3996 3997 /* Short circuit function */ 3998 if (prio < I915_PRIORITY_NORMAL || 3999 rq->guc_prio == GUC_PRIO_FINI || 4000 (rq->guc_prio != GUC_PRIO_INIT && 4001 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 4002 return; 4003 4004 spin_lock(&ce->guc_state.lock); 4005 if (rq->guc_prio != GUC_PRIO_FINI) { 4006 if (rq->guc_prio != GUC_PRIO_INIT) 4007 sub_context_inflight_prio(ce, rq->guc_prio); 4008 rq->guc_prio = new_guc_prio; 4009 add_context_inflight_prio(ce, rq->guc_prio); 4010 update_context_prio(ce); 4011 } 4012 spin_unlock(&ce->guc_state.lock); 4013 } 4014 4015 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4016 { 4017 struct intel_context *ce = request_to_scheduling_context(rq); 4018 4019 spin_lock(&ce->guc_state.lock); 4020 guc_prio_fini(rq, ce); 4021 spin_unlock(&ce->guc_state.lock); 4022 } 4023 4024 static void sanitize_hwsp(struct intel_engine_cs *engine) 4025 { 4026 struct intel_timeline *tl; 4027 4028 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4029 intel_timeline_reset_seqno(tl); 4030 } 4031 4032 static void guc_sanitize(struct intel_engine_cs *engine) 4033 { 4034 /* 4035 * Poison residual state on resume, in case the suspend didn't! 4036 * 4037 * We have to assume that across suspend/resume (or other loss 4038 * of control) that the contents of our pinned buffers has been 4039 * lost, replaced by garbage. Since this doesn't always happen, 4040 * let's poison such state so that we more quickly spot when 4041 * we falsely assume it has been preserved. 4042 */ 4043 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4044 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4045 4046 /* 4047 * The kernel_context HWSP is stored in the status_page. As above, 4048 * that may be lost on resume/initialisation, and so we need to 4049 * reset the value in the HWSP. 4050 */ 4051 sanitize_hwsp(engine); 4052 4053 /* And scrub the dirty cachelines for the HWSP */ 4054 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4055 4056 intel_engine_reset_pinned_contexts(engine); 4057 } 4058 4059 static void setup_hwsp(struct intel_engine_cs *engine) 4060 { 4061 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4062 4063 ENGINE_WRITE_FW(engine, 4064 RING_HWS_PGA, 4065 i915_ggtt_offset(engine->status_page.vma)); 4066 } 4067 4068 static void start_engine(struct intel_engine_cs *engine) 4069 { 4070 ENGINE_WRITE_FW(engine, 4071 RING_MODE_GEN7, 4072 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4073 4074 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4075 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4076 } 4077 4078 static int guc_resume(struct intel_engine_cs *engine) 4079 { 4080 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4081 4082 intel_mocs_init_engine(engine); 4083 4084 intel_breadcrumbs_reset(engine->breadcrumbs); 4085 4086 setup_hwsp(engine); 4087 start_engine(engine); 4088 4089 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4090 xehp_enable_ccs_engines(engine); 4091 4092 return 0; 4093 } 4094 4095 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4096 { 4097 return !sched_engine->tasklet.callback; 4098 } 4099 4100 static void guc_set_default_submission(struct intel_engine_cs *engine) 4101 { 4102 engine->submit_request = guc_submit_request; 4103 } 4104 4105 static inline void guc_kernel_context_pin(struct intel_guc *guc, 4106 struct intel_context *ce) 4107 { 4108 /* 4109 * Note: we purposefully do not check the returns below because 4110 * the registration can only fail if a reset is just starting. 4111 * This is called at the end of reset so presumably another reset 4112 * isn't happening and even it did this code would be run again. 4113 */ 4114 4115 if (context_guc_id_invalid(ce)) 4116 pin_guc_id(guc, ce); 4117 4118 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) 4119 guc_context_init(ce); 4120 4121 try_context_registration(ce, true); 4122 } 4123 4124 static inline void guc_init_lrc_mapping(struct intel_guc *guc) 4125 { 4126 struct intel_gt *gt = guc_to_gt(guc); 4127 struct intel_engine_cs *engine; 4128 enum intel_engine_id id; 4129 4130 /* make sure all descriptors are clean... */ 4131 xa_destroy(&guc->context_lookup); 4132 4133 /* 4134 * A reset might have occurred while we had a pending stalled request, 4135 * so make sure we clean that up. 4136 */ 4137 guc->stalled_request = NULL; 4138 guc->submission_stall_reason = STALL_NONE; 4139 4140 /* 4141 * Some contexts might have been pinned before we enabled GuC 4142 * submission, so we need to add them to the GuC bookeeping. 4143 * Also, after a reset the of the GuC we want to make sure that the 4144 * information shared with GuC is properly reset. The kernel LRCs are 4145 * not attached to the gem_context, so they need to be added separately. 4146 */ 4147 for_each_engine(engine, gt, id) { 4148 struct intel_context *ce; 4149 4150 list_for_each_entry(ce, &engine->pinned_contexts_list, 4151 pinned_contexts_link) 4152 guc_kernel_context_pin(guc, ce); 4153 } 4154 } 4155 4156 static void guc_release(struct intel_engine_cs *engine) 4157 { 4158 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4159 4160 intel_engine_cleanup_common(engine); 4161 lrc_fini_wa_ctx(engine); 4162 } 4163 4164 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4165 { 4166 struct intel_engine_cs *e; 4167 intel_engine_mask_t tmp, mask = engine->mask; 4168 4169 for_each_engine_masked(e, engine->gt, mask, tmp) 4170 e->serial++; 4171 } 4172 4173 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4174 { 4175 /* Default vfuncs which can be overridden by each engine. */ 4176 4177 engine->resume = guc_resume; 4178 4179 engine->cops = &guc_context_ops; 4180 engine->request_alloc = guc_request_alloc; 4181 engine->add_active_request = add_to_context; 4182 engine->remove_active_request = remove_from_context; 4183 4184 engine->sched_engine->schedule = i915_schedule; 4185 4186 engine->reset.prepare = guc_engine_reset_prepare; 4187 engine->reset.rewind = guc_rewind_nop; 4188 engine->reset.cancel = guc_reset_nop; 4189 engine->reset.finish = guc_reset_nop; 4190 4191 engine->emit_flush = gen8_emit_flush_xcs; 4192 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4193 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4194 if (GRAPHICS_VER(engine->i915) >= 12) { 4195 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4196 engine->emit_flush = gen12_emit_flush_xcs; 4197 } 4198 engine->set_default_submission = guc_set_default_submission; 4199 engine->busyness = guc_engine_busyness; 4200 4201 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4202 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4203 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4204 4205 /* Wa_14014475959:dg2 */ 4206 if (engine->class == COMPUTE_CLASS) 4207 if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || 4208 IS_DG2(engine->i915)) 4209 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4210 4211 /* 4212 * TODO: GuC supports timeslicing and semaphores as well, but they're 4213 * handled by the firmware so some minor tweaks are required before 4214 * enabling. 4215 * 4216 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4217 */ 4218 4219 engine->emit_bb_start = gen8_emit_bb_start; 4220 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4221 engine->emit_bb_start = xehp_emit_bb_start; 4222 } 4223 4224 static void rcs_submission_override(struct intel_engine_cs *engine) 4225 { 4226 switch (GRAPHICS_VER(engine->i915)) { 4227 case 12: 4228 engine->emit_flush = gen12_emit_flush_rcs; 4229 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4230 break; 4231 case 11: 4232 engine->emit_flush = gen11_emit_flush_rcs; 4233 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4234 break; 4235 default: 4236 engine->emit_flush = gen8_emit_flush_rcs; 4237 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4238 break; 4239 } 4240 } 4241 4242 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4243 { 4244 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4245 intel_engine_set_irq_handler(engine, cs_irq_handler); 4246 } 4247 4248 static void guc_sched_engine_destroy(struct kref *kref) 4249 { 4250 struct i915_sched_engine *sched_engine = 4251 container_of(kref, typeof(*sched_engine), ref); 4252 struct intel_guc *guc = sched_engine->private_data; 4253 4254 guc->sched_engine = NULL; 4255 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4256 kfree(sched_engine); 4257 } 4258 4259 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4260 { 4261 struct drm_i915_private *i915 = engine->i915; 4262 struct intel_guc *guc = &engine->gt->uc.guc; 4263 4264 /* 4265 * The setup relies on several assumptions (e.g. irqs always enabled) 4266 * that are only valid on gen11+ 4267 */ 4268 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4269 4270 if (!guc->sched_engine) { 4271 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4272 if (!guc->sched_engine) 4273 return -ENOMEM; 4274 4275 guc->sched_engine->schedule = i915_schedule; 4276 guc->sched_engine->disabled = guc_sched_engine_disabled; 4277 guc->sched_engine->private_data = guc; 4278 guc->sched_engine->destroy = guc_sched_engine_destroy; 4279 guc->sched_engine->bump_inflight_request_prio = 4280 guc_bump_inflight_request_prio; 4281 guc->sched_engine->retire_inflight_request_prio = 4282 guc_retire_inflight_request_prio; 4283 tasklet_setup(&guc->sched_engine->tasklet, 4284 guc_submission_tasklet); 4285 } 4286 i915_sched_engine_put(engine->sched_engine); 4287 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4288 4289 guc_default_vfuncs(engine); 4290 guc_default_irqs(engine); 4291 guc_init_breadcrumbs(engine); 4292 4293 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4294 rcs_submission_override(engine); 4295 4296 lrc_init_wa_ctx(engine); 4297 4298 /* Finally, take ownership and responsibility for cleanup! */ 4299 engine->sanitize = guc_sanitize; 4300 engine->release = guc_release; 4301 4302 return 0; 4303 } 4304 4305 struct scheduling_policy { 4306 /* internal data */ 4307 u32 max_words, num_words; 4308 u32 count; 4309 /* API data */ 4310 struct guc_update_scheduling_policy h2g; 4311 }; 4312 4313 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4314 { 4315 u32 *start = (void *)&policy->h2g; 4316 u32 *end = policy->h2g.data + policy->num_words; 4317 size_t delta = end - start; 4318 4319 return delta; 4320 } 4321 4322 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4323 { 4324 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4325 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4326 policy->num_words = 0; 4327 policy->count = 0; 4328 4329 return policy; 4330 } 4331 4332 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4333 u32 action, u32 *data, u32 len) 4334 { 4335 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4336 4337 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4338 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4339 FIELD_PREP(GUC_KLV_0_LEN, len); 4340 memcpy(klv_ptr, data, sizeof(u32) * len); 4341 policy->num_words += 1 + len; 4342 policy->count++; 4343 } 4344 4345 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4346 struct scheduling_policy *policy) 4347 { 4348 int ret; 4349 4350 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4351 __guc_scheduling_policy_action_size(policy)); 4352 if (ret < 0) { 4353 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n", 4354 ERR_PTR(ret)); 4355 return ret; 4356 } 4357 4358 if (ret != policy->count) { 4359 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!", 4360 ret, policy->count); 4361 if (ret > policy->count) 4362 return -EPROTO; 4363 } 4364 4365 return 0; 4366 } 4367 4368 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4369 { 4370 struct scheduling_policy policy; 4371 struct intel_gt *gt = guc_to_gt(guc); 4372 intel_wakeref_t wakeref; 4373 int ret; 4374 4375 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 4376 return 0; 4377 4378 __guc_scheduling_policy_start_klv(&policy); 4379 4380 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4381 u32 yield[] = { 4382 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4383 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4384 }; 4385 4386 __guc_scheduling_policy_add_klv(&policy, 4387 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4388 yield, ARRAY_SIZE(yield)); 4389 4390 ret = __guc_action_set_scheduling_policies(guc, &policy); 4391 } 4392 4393 return ret; 4394 } 4395 4396 void intel_guc_submission_enable(struct intel_guc *guc) 4397 { 4398 struct intel_gt *gt = guc_to_gt(guc); 4399 4400 /* Enable and route to GuC */ 4401 if (GRAPHICS_VER(gt->i915) >= 12) 4402 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 4403 GUC_SEM_INTR_ROUTE_TO_GUC | 4404 GUC_SEM_INTR_ENABLE_ALL); 4405 4406 guc_init_lrc_mapping(guc); 4407 guc_init_engine_stats(guc); 4408 guc_init_global_schedule_policy(guc); 4409 } 4410 4411 void intel_guc_submission_disable(struct intel_guc *guc) 4412 { 4413 struct intel_gt *gt = guc_to_gt(guc); 4414 4415 /* Note: By the time we're here, GuC may have already been reset */ 4416 4417 /* Disable and route to host */ 4418 if (GRAPHICS_VER(gt->i915) >= 12) 4419 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0); 4420 } 4421 4422 static bool __guc_submission_supported(struct intel_guc *guc) 4423 { 4424 /* GuC submission is unavailable for pre-Gen11 */ 4425 return intel_guc_is_supported(guc) && 4426 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4427 } 4428 4429 static bool __guc_submission_selected(struct intel_guc *guc) 4430 { 4431 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4432 4433 if (!intel_guc_submission_is_supported(guc)) 4434 return false; 4435 4436 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4437 } 4438 4439 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4440 { 4441 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4442 } 4443 4444 /* 4445 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4446 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4447 * operation. This matches the 30fps game-render + encode (real world) workload this 4448 * knob was tested against. 4449 */ 4450 #define SCHED_DISABLE_DELAY_MS 34 4451 4452 /* 4453 * A threshold of 75% is a reasonable starting point considering that real world apps 4454 * generally don't get anywhere near this. 4455 */ 4456 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4457 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4458 4459 void intel_guc_submission_init_early(struct intel_guc *guc) 4460 { 4461 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4462 4463 spin_lock_init(&guc->submission_state.lock); 4464 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4465 ida_init(&guc->submission_state.guc_ids); 4466 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4467 INIT_WORK(&guc->submission_state.destroyed_worker, 4468 destroyed_worker_func); 4469 INIT_WORK(&guc->submission_state.reset_fail_worker, 4470 reset_fail_worker_func); 4471 4472 spin_lock_init(&guc->timestamp.lock); 4473 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4474 4475 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4476 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4477 guc->submission_state.sched_disable_gucid_threshold = 4478 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4479 guc->submission_supported = __guc_submission_supported(guc); 4480 guc->submission_selected = __guc_submission_selected(guc); 4481 } 4482 4483 static inline struct intel_context * 4484 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4485 { 4486 struct intel_context *ce; 4487 4488 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4489 guc_err(guc, "Invalid ctx_id %u\n", ctx_id); 4490 return NULL; 4491 } 4492 4493 ce = __get_context(guc, ctx_id); 4494 if (unlikely(!ce)) { 4495 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id); 4496 return NULL; 4497 } 4498 4499 if (unlikely(intel_context_is_child(ce))) { 4500 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id); 4501 return NULL; 4502 } 4503 4504 return ce; 4505 } 4506 4507 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4508 const u32 *msg, 4509 u32 len) 4510 { 4511 struct intel_context *ce; 4512 u32 ctx_id; 4513 4514 if (unlikely(len < 1)) { 4515 guc_err(guc, "Invalid length %u\n", len); 4516 return -EPROTO; 4517 } 4518 ctx_id = msg[0]; 4519 4520 ce = g2h_context_lookup(guc, ctx_id); 4521 if (unlikely(!ce)) 4522 return -EPROTO; 4523 4524 trace_intel_context_deregister_done(ce); 4525 4526 #ifdef CONFIG_DRM_I915_SELFTEST 4527 if (unlikely(ce->drop_deregister)) { 4528 ce->drop_deregister = false; 4529 return 0; 4530 } 4531 #endif 4532 4533 if (context_wait_for_deregister_to_register(ce)) { 4534 struct intel_runtime_pm *runtime_pm = 4535 &ce->engine->gt->i915->runtime_pm; 4536 intel_wakeref_t wakeref; 4537 4538 /* 4539 * Previous owner of this guc_id has been deregistered, now safe 4540 * register this context. 4541 */ 4542 with_intel_runtime_pm(runtime_pm, wakeref) 4543 register_context(ce, true); 4544 guc_signal_context_fence(ce); 4545 intel_context_put(ce); 4546 } else if (context_destroyed(ce)) { 4547 /* Context has been destroyed */ 4548 intel_gt_pm_put_async(guc_to_gt(guc)); 4549 release_guc_id(guc, ce); 4550 __guc_context_destroy(ce); 4551 } 4552 4553 decr_outstanding_submission_g2h(guc); 4554 4555 return 0; 4556 } 4557 4558 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4559 const u32 *msg, 4560 u32 len) 4561 { 4562 struct intel_context *ce; 4563 unsigned long flags; 4564 u32 ctx_id; 4565 4566 if (unlikely(len < 2)) { 4567 guc_err(guc, "Invalid length %u\n", len); 4568 return -EPROTO; 4569 } 4570 ctx_id = msg[0]; 4571 4572 ce = g2h_context_lookup(guc, ctx_id); 4573 if (unlikely(!ce)) 4574 return -EPROTO; 4575 4576 if (unlikely(context_destroyed(ce) || 4577 (!context_pending_enable(ce) && 4578 !context_pending_disable(ce)))) { 4579 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n", 4580 ce->guc_state.sched_state, ctx_id); 4581 return -EPROTO; 4582 } 4583 4584 trace_intel_context_sched_done(ce); 4585 4586 if (context_pending_enable(ce)) { 4587 #ifdef CONFIG_DRM_I915_SELFTEST 4588 if (unlikely(ce->drop_schedule_enable)) { 4589 ce->drop_schedule_enable = false; 4590 return 0; 4591 } 4592 #endif 4593 4594 spin_lock_irqsave(&ce->guc_state.lock, flags); 4595 clr_context_pending_enable(ce); 4596 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4597 } else if (context_pending_disable(ce)) { 4598 bool banned; 4599 4600 #ifdef CONFIG_DRM_I915_SELFTEST 4601 if (unlikely(ce->drop_schedule_disable)) { 4602 ce->drop_schedule_disable = false; 4603 return 0; 4604 } 4605 #endif 4606 4607 /* 4608 * Unpin must be done before __guc_signal_context_fence, 4609 * otherwise a race exists between the requests getting 4610 * submitted + retired before this unpin completes resulting in 4611 * the pin_count going to zero and the context still being 4612 * enabled. 4613 */ 4614 intel_context_sched_disable_unpin(ce); 4615 4616 spin_lock_irqsave(&ce->guc_state.lock, flags); 4617 banned = context_banned(ce); 4618 clr_context_banned(ce); 4619 clr_context_pending_disable(ce); 4620 __guc_signal_context_fence(ce); 4621 guc_blocked_fence_complete(ce); 4622 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4623 4624 if (banned) { 4625 guc_cancel_context_requests(ce); 4626 intel_engine_signal_breadcrumbs(ce->engine); 4627 } 4628 } 4629 4630 decr_outstanding_submission_g2h(guc); 4631 intel_context_put(ce); 4632 4633 return 0; 4634 } 4635 4636 static void capture_error_state(struct intel_guc *guc, 4637 struct intel_context *ce) 4638 { 4639 struct intel_gt *gt = guc_to_gt(guc); 4640 struct drm_i915_private *i915 = gt->i915; 4641 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 4642 intel_wakeref_t wakeref; 4643 4644 intel_engine_set_hung_context(engine, ce); 4645 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4646 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4647 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); 4648 } 4649 4650 static void guc_context_replay(struct intel_context *ce) 4651 { 4652 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4653 4654 __guc_reset_context(ce, ce->engine->mask); 4655 tasklet_hi_schedule(&sched_engine->tasklet); 4656 } 4657 4658 static void guc_handle_context_reset(struct intel_guc *guc, 4659 struct intel_context *ce) 4660 { 4661 trace_intel_context_reset(ce); 4662 4663 drm_dbg(&guc_to_gt(guc)->i915->drm, "Got GuC reset of 0x%04X, exiting = %d, banned = %d\n", 4664 ce->guc_id.id, test_bit(CONTEXT_EXITING, &ce->flags), 4665 test_bit(CONTEXT_BANNED, &ce->flags)); 4666 4667 if (likely(intel_context_is_schedulable(ce))) { 4668 capture_error_state(guc, ce); 4669 guc_context_replay(ce); 4670 } else { 4671 guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s", 4672 ce->guc_id.id, ce->engine->name); 4673 } 4674 } 4675 4676 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4677 const u32 *msg, u32 len) 4678 { 4679 struct intel_context *ce; 4680 unsigned long flags; 4681 int ctx_id; 4682 4683 if (unlikely(len != 1)) { 4684 guc_err(guc, "Invalid length %u", len); 4685 return -EPROTO; 4686 } 4687 4688 ctx_id = msg[0]; 4689 4690 /* 4691 * The context lookup uses the xarray but lookups only require an RCU lock 4692 * not the full spinlock. So take the lock explicitly and keep it until the 4693 * context has been reference count locked to ensure it can't be destroyed 4694 * asynchronously until the reset is done. 4695 */ 4696 xa_lock_irqsave(&guc->context_lookup, flags); 4697 ce = g2h_context_lookup(guc, ctx_id); 4698 if (ce) 4699 intel_context_get(ce); 4700 xa_unlock_irqrestore(&guc->context_lookup, flags); 4701 4702 if (unlikely(!ce)) 4703 return -EPROTO; 4704 4705 guc_handle_context_reset(guc, ce); 4706 intel_context_put(ce); 4707 4708 return 0; 4709 } 4710 4711 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4712 const u32 *msg, u32 len) 4713 { 4714 u32 status; 4715 4716 if (unlikely(len != 1)) { 4717 guc_dbg(guc, "Invalid length %u", len); 4718 return -EPROTO; 4719 } 4720 4721 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4722 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4723 guc_warn(guc, "No space for error capture"); 4724 4725 intel_guc_capture_process(guc); 4726 4727 return 0; 4728 } 4729 4730 struct intel_engine_cs * 4731 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4732 { 4733 struct intel_gt *gt = guc_to_gt(guc); 4734 u8 engine_class = guc_class_to_engine_class(guc_class); 4735 4736 /* Class index is checked in class converter */ 4737 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4738 4739 return gt->engine_class[engine_class][instance]; 4740 } 4741 4742 static void reset_fail_worker_func(struct work_struct *w) 4743 { 4744 struct intel_guc *guc = container_of(w, struct intel_guc, 4745 submission_state.reset_fail_worker); 4746 struct intel_gt *gt = guc_to_gt(guc); 4747 intel_engine_mask_t reset_fail_mask; 4748 unsigned long flags; 4749 4750 spin_lock_irqsave(&guc->submission_state.lock, flags); 4751 reset_fail_mask = guc->submission_state.reset_fail_mask; 4752 guc->submission_state.reset_fail_mask = 0; 4753 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4754 4755 if (likely(reset_fail_mask)) { 4756 struct intel_engine_cs *engine; 4757 enum intel_engine_id id; 4758 4759 /* 4760 * GuC is toast at this point - it dead loops after sending the failed 4761 * reset notification. So need to manually determine the guilty context. 4762 * Note that it should be reliable to do this here because the GuC is 4763 * toast and will not be scheduling behind the KMD's back. 4764 */ 4765 for_each_engine_masked(engine, gt, reset_fail_mask, id) 4766 intel_guc_find_hung_context(engine); 4767 4768 intel_gt_handle_error(gt, reset_fail_mask, 4769 I915_ERROR_CAPTURE, 4770 "GuC failed to reset engine mask=0x%x", 4771 reset_fail_mask); 4772 } 4773 } 4774 4775 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4776 const u32 *msg, u32 len) 4777 { 4778 struct intel_engine_cs *engine; 4779 u8 guc_class, instance; 4780 u32 reason; 4781 unsigned long flags; 4782 4783 if (unlikely(len != 3)) { 4784 guc_err(guc, "Invalid length %u", len); 4785 return -EPROTO; 4786 } 4787 4788 guc_class = msg[0]; 4789 instance = msg[1]; 4790 reason = msg[2]; 4791 4792 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4793 if (unlikely(!engine)) { 4794 guc_err(guc, "Invalid engine %d:%d", guc_class, instance); 4795 return -EPROTO; 4796 } 4797 4798 /* 4799 * This is an unexpected failure of a hardware feature. So, log a real 4800 * error message not just the informational that comes with the reset. 4801 */ 4802 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X", 4803 guc_class, instance, engine->name, reason); 4804 4805 spin_lock_irqsave(&guc->submission_state.lock, flags); 4806 guc->submission_state.reset_fail_mask |= engine->mask; 4807 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4808 4809 /* 4810 * A GT reset flushes this worker queue (G2H handler) so we must use 4811 * another worker to trigger a GT reset. 4812 */ 4813 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4814 4815 return 0; 4816 } 4817 4818 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4819 { 4820 struct intel_guc *guc = &engine->gt->uc.guc; 4821 struct intel_context *ce; 4822 struct i915_request *rq; 4823 unsigned long index; 4824 unsigned long flags; 4825 4826 /* Reset called during driver load? GuC not yet initialised! */ 4827 if (unlikely(!guc_submission_initialized(guc))) 4828 return; 4829 4830 xa_lock_irqsave(&guc->context_lookup, flags); 4831 xa_for_each(&guc->context_lookup, index, ce) { 4832 bool found; 4833 4834 if (!kref_get_unless_zero(&ce->ref)) 4835 continue; 4836 4837 xa_unlock(&guc->context_lookup); 4838 4839 if (!intel_context_is_pinned(ce)) 4840 goto next; 4841 4842 if (intel_engine_is_virtual(ce->engine)) { 4843 if (!(ce->engine->mask & engine->mask)) 4844 goto next; 4845 } else { 4846 if (ce->engine != engine) 4847 goto next; 4848 } 4849 4850 found = false; 4851 spin_lock(&ce->guc_state.lock); 4852 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 4853 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 4854 continue; 4855 4856 found = true; 4857 break; 4858 } 4859 spin_unlock(&ce->guc_state.lock); 4860 4861 if (found) { 4862 intel_engine_set_hung_context(engine, ce); 4863 4864 /* Can only cope with one hang at a time... */ 4865 intel_context_put(ce); 4866 xa_lock(&guc->context_lookup); 4867 goto done; 4868 } 4869 4870 next: 4871 intel_context_put(ce); 4872 xa_lock(&guc->context_lookup); 4873 } 4874 done: 4875 xa_unlock_irqrestore(&guc->context_lookup, flags); 4876 } 4877 4878 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 4879 struct i915_request *hung_rq, 4880 struct drm_printer *m) 4881 { 4882 struct intel_guc *guc = &engine->gt->uc.guc; 4883 struct intel_context *ce; 4884 unsigned long index; 4885 unsigned long flags; 4886 4887 /* Reset called during driver load? GuC not yet initialised! */ 4888 if (unlikely(!guc_submission_initialized(guc))) 4889 return; 4890 4891 xa_lock_irqsave(&guc->context_lookup, flags); 4892 xa_for_each(&guc->context_lookup, index, ce) { 4893 if (!kref_get_unless_zero(&ce->ref)) 4894 continue; 4895 4896 xa_unlock(&guc->context_lookup); 4897 4898 if (!intel_context_is_pinned(ce)) 4899 goto next; 4900 4901 if (intel_engine_is_virtual(ce->engine)) { 4902 if (!(ce->engine->mask & engine->mask)) 4903 goto next; 4904 } else { 4905 if (ce->engine != engine) 4906 goto next; 4907 } 4908 4909 spin_lock(&ce->guc_state.lock); 4910 intel_engine_dump_active_requests(&ce->guc_state.requests, 4911 hung_rq, m); 4912 spin_unlock(&ce->guc_state.lock); 4913 4914 next: 4915 intel_context_put(ce); 4916 xa_lock(&guc->context_lookup); 4917 } 4918 xa_unlock_irqrestore(&guc->context_lookup, flags); 4919 } 4920 4921 void intel_guc_submission_print_info(struct intel_guc *guc, 4922 struct drm_printer *p) 4923 { 4924 struct i915_sched_engine *sched_engine = guc->sched_engine; 4925 struct rb_node *rb; 4926 unsigned long flags; 4927 4928 if (!sched_engine) 4929 return; 4930 4931 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", 4932 guc->submission_version.major, guc->submission_version.minor, 4933 guc->submission_version.patch); 4934 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 4935 atomic_read(&guc->outstanding_submission_g2h)); 4936 drm_printf(p, "GuC tasklet count: %u\n", 4937 atomic_read(&sched_engine->tasklet.count)); 4938 4939 spin_lock_irqsave(&sched_engine->lock, flags); 4940 drm_printf(p, "Requests in GuC submit tasklet:\n"); 4941 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4942 struct i915_priolist *pl = to_priolist(rb); 4943 struct i915_request *rq; 4944 4945 priolist_for_each_request(rq, pl) 4946 drm_printf(p, "guc_id=%u, seqno=%llu\n", 4947 rq->context->guc_id.id, 4948 rq->fence.seqno); 4949 } 4950 spin_unlock_irqrestore(&sched_engine->lock, flags); 4951 drm_printf(p, "\n"); 4952 } 4953 4954 static inline void guc_log_context_priority(struct drm_printer *p, 4955 struct intel_context *ce) 4956 { 4957 int i; 4958 4959 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 4960 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 4961 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 4962 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 4963 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 4964 i, ce->guc_state.prio_count[i]); 4965 } 4966 drm_printf(p, "\n"); 4967 } 4968 4969 static inline void guc_log_context(struct drm_printer *p, 4970 struct intel_context *ce) 4971 { 4972 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 4973 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 4974 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 4975 ce->ring->head, 4976 ce->lrc_reg_state[CTX_RING_HEAD]); 4977 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 4978 ce->ring->tail, 4979 ce->lrc_reg_state[CTX_RING_TAIL]); 4980 drm_printf(p, "\t\tContext Pin Count: %u\n", 4981 atomic_read(&ce->pin_count)); 4982 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 4983 atomic_read(&ce->guc_id.ref)); 4984 drm_printf(p, "\t\tSchedule State: 0x%x\n", 4985 ce->guc_state.sched_state); 4986 } 4987 4988 void intel_guc_submission_print_context_info(struct intel_guc *guc, 4989 struct drm_printer *p) 4990 { 4991 struct intel_context *ce; 4992 unsigned long index; 4993 unsigned long flags; 4994 4995 xa_lock_irqsave(&guc->context_lookup, flags); 4996 xa_for_each(&guc->context_lookup, index, ce) { 4997 GEM_BUG_ON(intel_context_is_child(ce)); 4998 4999 guc_log_context(p, ce); 5000 guc_log_context_priority(p, ce); 5001 5002 if (intel_context_is_parent(ce)) { 5003 struct intel_context *child; 5004 5005 drm_printf(p, "\t\tNumber children: %u\n", 5006 ce->parallel.number_children); 5007 5008 if (ce->parallel.guc.wq_status) { 5009 drm_printf(p, "\t\tWQI Head: %u\n", 5010 READ_ONCE(*ce->parallel.guc.wq_head)); 5011 drm_printf(p, "\t\tWQI Tail: %u\n", 5012 READ_ONCE(*ce->parallel.guc.wq_tail)); 5013 drm_printf(p, "\t\tWQI Status: %u\n", 5014 READ_ONCE(*ce->parallel.guc.wq_status)); 5015 } 5016 5017 if (ce->engine->emit_bb_start == 5018 emit_bb_start_parent_no_preempt_mid_batch) { 5019 u8 i; 5020 5021 drm_printf(p, "\t\tChildren Go: %u\n", 5022 get_children_go_value(ce)); 5023 for (i = 0; i < ce->parallel.number_children; ++i) 5024 drm_printf(p, "\t\tChildren Join: %u\n", 5025 get_children_join_value(ce, i)); 5026 } 5027 5028 for_each_child(ce, child) 5029 guc_log_context(p, child); 5030 } 5031 } 5032 xa_unlock_irqrestore(&guc->context_lookup, flags); 5033 } 5034 5035 static inline u32 get_children_go_addr(struct intel_context *ce) 5036 { 5037 GEM_BUG_ON(!intel_context_is_parent(ce)); 5038 5039 return i915_ggtt_offset(ce->state) + 5040 __get_parent_scratch_offset(ce) + 5041 offsetof(struct parent_scratch, go.semaphore); 5042 } 5043 5044 static inline u32 get_children_join_addr(struct intel_context *ce, 5045 u8 child_index) 5046 { 5047 GEM_BUG_ON(!intel_context_is_parent(ce)); 5048 5049 return i915_ggtt_offset(ce->state) + 5050 __get_parent_scratch_offset(ce) + 5051 offsetof(struct parent_scratch, join[child_index].semaphore); 5052 } 5053 5054 #define PARENT_GO_BB 1 5055 #define PARENT_GO_FINI_BREADCRUMB 0 5056 #define CHILD_GO_BB 1 5057 #define CHILD_GO_FINI_BREADCRUMB 0 5058 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5059 u64 offset, u32 len, 5060 const unsigned int flags) 5061 { 5062 struct intel_context *ce = rq->context; 5063 u32 *cs; 5064 u8 i; 5065 5066 GEM_BUG_ON(!intel_context_is_parent(ce)); 5067 5068 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5069 if (IS_ERR(cs)) 5070 return PTR_ERR(cs); 5071 5072 /* Wait on children */ 5073 for (i = 0; i < ce->parallel.number_children; ++i) { 5074 *cs++ = (MI_SEMAPHORE_WAIT | 5075 MI_SEMAPHORE_GLOBAL_GTT | 5076 MI_SEMAPHORE_POLL | 5077 MI_SEMAPHORE_SAD_EQ_SDD); 5078 *cs++ = PARENT_GO_BB; 5079 *cs++ = get_children_join_addr(ce, i); 5080 *cs++ = 0; 5081 } 5082 5083 /* Turn off preemption */ 5084 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5085 *cs++ = MI_NOOP; 5086 5087 /* Tell children go */ 5088 cs = gen8_emit_ggtt_write(cs, 5089 CHILD_GO_BB, 5090 get_children_go_addr(ce), 5091 0); 5092 5093 /* Jump to batch */ 5094 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5095 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5096 *cs++ = lower_32_bits(offset); 5097 *cs++ = upper_32_bits(offset); 5098 *cs++ = MI_NOOP; 5099 5100 intel_ring_advance(rq, cs); 5101 5102 return 0; 5103 } 5104 5105 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5106 u64 offset, u32 len, 5107 const unsigned int flags) 5108 { 5109 struct intel_context *ce = rq->context; 5110 struct intel_context *parent = intel_context_to_parent(ce); 5111 u32 *cs; 5112 5113 GEM_BUG_ON(!intel_context_is_child(ce)); 5114 5115 cs = intel_ring_begin(rq, 12); 5116 if (IS_ERR(cs)) 5117 return PTR_ERR(cs); 5118 5119 /* Signal parent */ 5120 cs = gen8_emit_ggtt_write(cs, 5121 PARENT_GO_BB, 5122 get_children_join_addr(parent, 5123 ce->parallel.child_index), 5124 0); 5125 5126 /* Wait on parent for go */ 5127 *cs++ = (MI_SEMAPHORE_WAIT | 5128 MI_SEMAPHORE_GLOBAL_GTT | 5129 MI_SEMAPHORE_POLL | 5130 MI_SEMAPHORE_SAD_EQ_SDD); 5131 *cs++ = CHILD_GO_BB; 5132 *cs++ = get_children_go_addr(parent); 5133 *cs++ = 0; 5134 5135 /* Turn off preemption */ 5136 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5137 5138 /* Jump to batch */ 5139 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5140 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5141 *cs++ = lower_32_bits(offset); 5142 *cs++ = upper_32_bits(offset); 5143 5144 intel_ring_advance(rq, cs); 5145 5146 return 0; 5147 } 5148 5149 static u32 * 5150 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5151 u32 *cs) 5152 { 5153 struct intel_context *ce = rq->context; 5154 u8 i; 5155 5156 GEM_BUG_ON(!intel_context_is_parent(ce)); 5157 5158 /* Wait on children */ 5159 for (i = 0; i < ce->parallel.number_children; ++i) { 5160 *cs++ = (MI_SEMAPHORE_WAIT | 5161 MI_SEMAPHORE_GLOBAL_GTT | 5162 MI_SEMAPHORE_POLL | 5163 MI_SEMAPHORE_SAD_EQ_SDD); 5164 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5165 *cs++ = get_children_join_addr(ce, i); 5166 *cs++ = 0; 5167 } 5168 5169 /* Turn on preemption */ 5170 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5171 *cs++ = MI_NOOP; 5172 5173 /* Tell children go */ 5174 cs = gen8_emit_ggtt_write(cs, 5175 CHILD_GO_FINI_BREADCRUMB, 5176 get_children_go_addr(ce), 5177 0); 5178 5179 return cs; 5180 } 5181 5182 /* 5183 * If this true, a submission of multi-lrc requests had an error and the 5184 * requests need to be skipped. The front end (execuf IOCTL) should've called 5185 * i915_request_skip which squashes the BB but we still need to emit the fini 5186 * breadrcrumbs seqno write. At this point we don't know how many of the 5187 * requests in the multi-lrc submission were generated so we can't do the 5188 * handshake between the parent and children (e.g. if 4 requests should be 5189 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5190 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5191 * has occurred on any of the requests in submission / relationship. 5192 */ 5193 static inline bool skip_handshake(struct i915_request *rq) 5194 { 5195 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5196 } 5197 5198 #define NON_SKIP_LEN 6 5199 static u32 * 5200 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5201 u32 *cs) 5202 { 5203 struct intel_context *ce = rq->context; 5204 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5205 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5206 5207 GEM_BUG_ON(!intel_context_is_parent(ce)); 5208 5209 if (unlikely(skip_handshake(rq))) { 5210 /* 5211 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5212 * the NON_SKIP_LEN comes from the length of the emits below. 5213 */ 5214 memset(cs, 0, sizeof(u32) * 5215 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5216 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5217 } else { 5218 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5219 } 5220 5221 /* Emit fini breadcrumb */ 5222 before_fini_breadcrumb_user_interrupt_cs = cs; 5223 cs = gen8_emit_ggtt_write(cs, 5224 rq->fence.seqno, 5225 i915_request_active_timeline(rq)->hwsp_offset, 5226 0); 5227 5228 /* User interrupt */ 5229 *cs++ = MI_USER_INTERRUPT; 5230 *cs++ = MI_NOOP; 5231 5232 /* Ensure our math for skip + emit is correct */ 5233 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5234 cs); 5235 GEM_BUG_ON(start_fini_breadcrumb_cs + 5236 ce->engine->emit_fini_breadcrumb_dw != cs); 5237 5238 rq->tail = intel_ring_offset(rq, cs); 5239 5240 return cs; 5241 } 5242 5243 static u32 * 5244 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5245 u32 *cs) 5246 { 5247 struct intel_context *ce = rq->context; 5248 struct intel_context *parent = intel_context_to_parent(ce); 5249 5250 GEM_BUG_ON(!intel_context_is_child(ce)); 5251 5252 /* Turn on preemption */ 5253 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5254 *cs++ = MI_NOOP; 5255 5256 /* Signal parent */ 5257 cs = gen8_emit_ggtt_write(cs, 5258 PARENT_GO_FINI_BREADCRUMB, 5259 get_children_join_addr(parent, 5260 ce->parallel.child_index), 5261 0); 5262 5263 /* Wait parent on for go */ 5264 *cs++ = (MI_SEMAPHORE_WAIT | 5265 MI_SEMAPHORE_GLOBAL_GTT | 5266 MI_SEMAPHORE_POLL | 5267 MI_SEMAPHORE_SAD_EQ_SDD); 5268 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5269 *cs++ = get_children_go_addr(parent); 5270 *cs++ = 0; 5271 5272 return cs; 5273 } 5274 5275 static u32 * 5276 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5277 u32 *cs) 5278 { 5279 struct intel_context *ce = rq->context; 5280 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5281 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5282 5283 GEM_BUG_ON(!intel_context_is_child(ce)); 5284 5285 if (unlikely(skip_handshake(rq))) { 5286 /* 5287 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5288 * the NON_SKIP_LEN comes from the length of the emits below. 5289 */ 5290 memset(cs, 0, sizeof(u32) * 5291 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5292 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5293 } else { 5294 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5295 } 5296 5297 /* Emit fini breadcrumb */ 5298 before_fini_breadcrumb_user_interrupt_cs = cs; 5299 cs = gen8_emit_ggtt_write(cs, 5300 rq->fence.seqno, 5301 i915_request_active_timeline(rq)->hwsp_offset, 5302 0); 5303 5304 /* User interrupt */ 5305 *cs++ = MI_USER_INTERRUPT; 5306 *cs++ = MI_NOOP; 5307 5308 /* Ensure our math for skip + emit is correct */ 5309 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5310 cs); 5311 GEM_BUG_ON(start_fini_breadcrumb_cs + 5312 ce->engine->emit_fini_breadcrumb_dw != cs); 5313 5314 rq->tail = intel_ring_offset(rq, cs); 5315 5316 return cs; 5317 } 5318 5319 #undef NON_SKIP_LEN 5320 5321 static struct intel_context * 5322 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5323 unsigned long flags) 5324 { 5325 struct guc_virtual_engine *ve; 5326 struct intel_guc *guc; 5327 unsigned int n; 5328 int err; 5329 5330 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5331 if (!ve) 5332 return ERR_PTR(-ENOMEM); 5333 5334 guc = &siblings[0]->gt->uc.guc; 5335 5336 ve->base.i915 = siblings[0]->i915; 5337 ve->base.gt = siblings[0]->gt; 5338 ve->base.uncore = siblings[0]->uncore; 5339 ve->base.id = -1; 5340 5341 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5342 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5343 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5344 ve->base.saturated = ALL_ENGINES; 5345 5346 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5347 5348 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5349 5350 ve->base.cops = &virtual_guc_context_ops; 5351 ve->base.request_alloc = guc_request_alloc; 5352 ve->base.bump_serial = virtual_guc_bump_serial; 5353 5354 ve->base.submit_request = guc_submit_request; 5355 5356 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5357 5358 intel_context_init(&ve->context, &ve->base); 5359 5360 for (n = 0; n < count; n++) { 5361 struct intel_engine_cs *sibling = siblings[n]; 5362 5363 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5364 if (sibling->mask & ve->base.mask) { 5365 guc_dbg(guc, "duplicate %s entry in load balancer\n", 5366 sibling->name); 5367 err = -EINVAL; 5368 goto err_put; 5369 } 5370 5371 ve->base.mask |= sibling->mask; 5372 ve->base.logical_mask |= sibling->logical_mask; 5373 5374 if (n != 0 && ve->base.class != sibling->class) { 5375 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n", 5376 sibling->class, ve->base.class); 5377 err = -EINVAL; 5378 goto err_put; 5379 } else if (n == 0) { 5380 ve->base.class = sibling->class; 5381 ve->base.uabi_class = sibling->uabi_class; 5382 snprintf(ve->base.name, sizeof(ve->base.name), 5383 "v%dx%d", ve->base.class, count); 5384 ve->base.context_size = sibling->context_size; 5385 5386 ve->base.add_active_request = 5387 sibling->add_active_request; 5388 ve->base.remove_active_request = 5389 sibling->remove_active_request; 5390 ve->base.emit_bb_start = sibling->emit_bb_start; 5391 ve->base.emit_flush = sibling->emit_flush; 5392 ve->base.emit_init_breadcrumb = 5393 sibling->emit_init_breadcrumb; 5394 ve->base.emit_fini_breadcrumb = 5395 sibling->emit_fini_breadcrumb; 5396 ve->base.emit_fini_breadcrumb_dw = 5397 sibling->emit_fini_breadcrumb_dw; 5398 ve->base.breadcrumbs = 5399 intel_breadcrumbs_get(sibling->breadcrumbs); 5400 5401 ve->base.flags |= sibling->flags; 5402 5403 ve->base.props.timeslice_duration_ms = 5404 sibling->props.timeslice_duration_ms; 5405 ve->base.props.preempt_timeout_ms = 5406 sibling->props.preempt_timeout_ms; 5407 } 5408 } 5409 5410 return &ve->context; 5411 5412 err_put: 5413 intel_context_put(&ve->context); 5414 return ERR_PTR(err); 5415 } 5416 5417 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5418 { 5419 struct intel_engine_cs *engine; 5420 intel_engine_mask_t tmp, mask = ve->mask; 5421 5422 for_each_engine_masked(engine, ve->gt, mask, tmp) 5423 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5424 return true; 5425 5426 return false; 5427 } 5428 5429 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5430 #include "selftest_guc.c" 5431 #include "selftest_guc_multi_lrc.c" 5432 #include "selftest_guc_hangcheck.c" 5433 #endif 5434