1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_print.h" 31 #include "intel_guc_submission.h" 32 33 #include "i915_drv.h" 34 #include "i915_reg.h" 35 #include "i915_trace.h" 36 37 /** 38 * DOC: GuC-based command submission 39 * 40 * The Scratch registers: 41 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 42 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 43 * triggers an interrupt on the GuC via another register write (0xC4C8). 44 * Firmware writes a success/fail code back to the action register after 45 * processes the request. The kernel driver polls waiting for this update and 46 * then proceeds. 47 * 48 * Command Transport buffers (CTBs): 49 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 50 * - G2H) are a message interface between the i915 and GuC. 51 * 52 * Context registration: 53 * Before a context can be submitted it must be registered with the GuC via a 54 * H2G. A unique guc_id is associated with each context. The context is either 55 * registered at request creation time (normal operation) or at submission time 56 * (abnormal operation, e.g. after a reset). 57 * 58 * Context submission: 59 * The i915 updates the LRC tail value in memory. The i915 must enable the 60 * scheduling of the context within the GuC for the GuC to actually consider it. 61 * Therefore, the first time a disabled context is submitted we use a schedule 62 * enable H2G, while follow up submissions are done via the context submit H2G, 63 * which informs the GuC that a previously enabled context has new work 64 * available. 65 * 66 * Context unpin: 67 * To unpin a context a H2G is used to disable scheduling. When the 68 * corresponding G2H returns indicating the scheduling disable operation has 69 * completed it is safe to unpin the context. While a disable is in flight it 70 * isn't safe to resubmit the context so a fence is used to stall all future 71 * requests of that context until the G2H is returned. Because this interaction 72 * with the GuC takes a non-zero amount of time we delay the disabling of 73 * scheduling after the pin count goes to zero by a configurable period of time 74 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 75 * time to resubmit something on the context before doing this costly operation. 76 * This delay is only done if the context isn't closed and the guc_id usage is 77 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 78 * 79 * Context deregistration: 80 * Before a context can be destroyed or if we steal its guc_id we must 81 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 82 * safe to submit anything to this guc_id until the deregister completes so a 83 * fence is used to stall all requests associated with this guc_id until the 84 * corresponding G2H returns indicating the guc_id has been deregistered. 85 * 86 * submission_state.guc_ids: 87 * Unique number associated with private GuC context data passed in during 88 * context registration / submission / deregistration. 64k available. Simple ida 89 * is used for allocation. 90 * 91 * Stealing guc_ids: 92 * If no guc_ids are available they can be stolen from another context at 93 * request creation time if that context is unpinned. If a guc_id can't be found 94 * we punt this problem to the user as we believe this is near impossible to hit 95 * during normal use cases. 96 * 97 * Locking: 98 * In the GuC submission code we have 3 basic spin locks which protect 99 * everything. Details about each below. 100 * 101 * sched_engine->lock 102 * This is the submission lock for all contexts that share an i915 schedule 103 * engine (sched_engine), thus only one of the contexts which share a 104 * sched_engine can be submitting at a time. Currently only one sched_engine is 105 * used for all of GuC submission but that could change in the future. 106 * 107 * guc->submission_state.lock 108 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 109 * list. 110 * 111 * ce->guc_state.lock 112 * Protects everything under ce->guc_state. Ensures that a context is in the 113 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 114 * on a disabled context (bad idea), we don't issue a schedule enable when a 115 * schedule disable is in flight, etc... Also protects list of inflight requests 116 * on the context and the priority management state. Lock is individual to each 117 * context. 118 * 119 * Lock ordering rules: 120 * sched_engine->lock -> ce->guc_state.lock 121 * guc->submission_state.lock -> ce->guc_state.lock 122 * 123 * Reset races: 124 * When a full GT reset is triggered it is assumed that some G2H responses to 125 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 126 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 127 * contexts, release guc_ids, etc...). When this occurs we can scrub the 128 * context state and cleanup appropriately, however this is quite racey. 129 * To avoid races, the reset code must disable submission before scrubbing for 130 * the missing G2H, while the submission code must check for submission being 131 * disabled and skip sending H2Gs and updating context states when it is. Both 132 * sides must also make sure to hold the relevant locks. 133 */ 134 135 /* GuC Virtual Engine */ 136 struct guc_virtual_engine { 137 struct intel_engine_cs base; 138 struct intel_context context; 139 }; 140 141 static struct intel_context * 142 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 143 unsigned long flags); 144 145 static struct intel_context * 146 guc_create_parallel(struct intel_engine_cs **engines, 147 unsigned int num_siblings, 148 unsigned int width); 149 150 #define GUC_REQUEST_SIZE 64 /* bytes */ 151 152 /* 153 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 154 * per the GuC submission interface. A different allocation algorithm is used 155 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 156 * partition the guc_id space. We believe the number of multi-lrc contexts in 157 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 158 * multi-lrc. 159 */ 160 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 161 ((guc)->submission_state.num_guc_ids / 16) 162 163 /* 164 * Below is a set of functions which control the GuC scheduling state which 165 * require a lock. 166 */ 167 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 168 #define SCHED_STATE_DESTROYED BIT(1) 169 #define SCHED_STATE_PENDING_DISABLE BIT(2) 170 #define SCHED_STATE_BANNED BIT(3) 171 #define SCHED_STATE_ENABLED BIT(4) 172 #define SCHED_STATE_PENDING_ENABLE BIT(5) 173 #define SCHED_STATE_REGISTERED BIT(6) 174 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 175 #define SCHED_STATE_CLOSED BIT(8) 176 #define SCHED_STATE_BLOCKED_SHIFT 9 177 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 178 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 179 180 static inline void init_sched_state(struct intel_context *ce) 181 { 182 lockdep_assert_held(&ce->guc_state.lock); 183 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 184 } 185 186 /* 187 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 188 * A context close can race with the submission path, so SCHED_STATE_CLOSED 189 * can be set immediately before we try to register. 190 */ 191 #define SCHED_STATE_VALID_INIT \ 192 (SCHED_STATE_BLOCKED_MASK | \ 193 SCHED_STATE_CLOSED | \ 194 SCHED_STATE_REGISTERED) 195 196 __maybe_unused 197 static bool sched_state_is_init(struct intel_context *ce) 198 { 199 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 200 } 201 202 static inline bool 203 context_wait_for_deregister_to_register(struct intel_context *ce) 204 { 205 return ce->guc_state.sched_state & 206 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 207 } 208 209 static inline void 210 set_context_wait_for_deregister_to_register(struct intel_context *ce) 211 { 212 lockdep_assert_held(&ce->guc_state.lock); 213 ce->guc_state.sched_state |= 214 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 215 } 216 217 static inline void 218 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 219 { 220 lockdep_assert_held(&ce->guc_state.lock); 221 ce->guc_state.sched_state &= 222 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 223 } 224 225 static inline bool 226 context_destroyed(struct intel_context *ce) 227 { 228 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 229 } 230 231 static inline void 232 set_context_destroyed(struct intel_context *ce) 233 { 234 lockdep_assert_held(&ce->guc_state.lock); 235 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 236 } 237 238 static inline bool context_pending_disable(struct intel_context *ce) 239 { 240 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 241 } 242 243 static inline void set_context_pending_disable(struct intel_context *ce) 244 { 245 lockdep_assert_held(&ce->guc_state.lock); 246 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 247 } 248 249 static inline void clr_context_pending_disable(struct intel_context *ce) 250 { 251 lockdep_assert_held(&ce->guc_state.lock); 252 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 253 } 254 255 static inline bool context_banned(struct intel_context *ce) 256 { 257 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 258 } 259 260 static inline void set_context_banned(struct intel_context *ce) 261 { 262 lockdep_assert_held(&ce->guc_state.lock); 263 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 264 } 265 266 static inline void clr_context_banned(struct intel_context *ce) 267 { 268 lockdep_assert_held(&ce->guc_state.lock); 269 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 270 } 271 272 static inline bool context_enabled(struct intel_context *ce) 273 { 274 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 275 } 276 277 static inline void set_context_enabled(struct intel_context *ce) 278 { 279 lockdep_assert_held(&ce->guc_state.lock); 280 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 281 } 282 283 static inline void clr_context_enabled(struct intel_context *ce) 284 { 285 lockdep_assert_held(&ce->guc_state.lock); 286 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 287 } 288 289 static inline bool context_pending_enable(struct intel_context *ce) 290 { 291 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 292 } 293 294 static inline void set_context_pending_enable(struct intel_context *ce) 295 { 296 lockdep_assert_held(&ce->guc_state.lock); 297 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 298 } 299 300 static inline void clr_context_pending_enable(struct intel_context *ce) 301 { 302 lockdep_assert_held(&ce->guc_state.lock); 303 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 304 } 305 306 static inline bool context_registered(struct intel_context *ce) 307 { 308 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 309 } 310 311 static inline void set_context_registered(struct intel_context *ce) 312 { 313 lockdep_assert_held(&ce->guc_state.lock); 314 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 315 } 316 317 static inline void clr_context_registered(struct intel_context *ce) 318 { 319 lockdep_assert_held(&ce->guc_state.lock); 320 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 321 } 322 323 static inline bool context_policy_required(struct intel_context *ce) 324 { 325 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 326 } 327 328 static inline void set_context_policy_required(struct intel_context *ce) 329 { 330 lockdep_assert_held(&ce->guc_state.lock); 331 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 332 } 333 334 static inline void clr_context_policy_required(struct intel_context *ce) 335 { 336 lockdep_assert_held(&ce->guc_state.lock); 337 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 338 } 339 340 static inline bool context_close_done(struct intel_context *ce) 341 { 342 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 343 } 344 345 static inline void set_context_close_done(struct intel_context *ce) 346 { 347 lockdep_assert_held(&ce->guc_state.lock); 348 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 349 } 350 351 static inline u32 context_blocked(struct intel_context *ce) 352 { 353 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 354 SCHED_STATE_BLOCKED_SHIFT; 355 } 356 357 static inline void incr_context_blocked(struct intel_context *ce) 358 { 359 lockdep_assert_held(&ce->guc_state.lock); 360 361 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 362 363 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 364 } 365 366 static inline void decr_context_blocked(struct intel_context *ce) 367 { 368 lockdep_assert_held(&ce->guc_state.lock); 369 370 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 371 372 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 373 } 374 375 static struct intel_context * 376 request_to_scheduling_context(struct i915_request *rq) 377 { 378 return intel_context_to_parent(rq->context); 379 } 380 381 static inline bool context_guc_id_invalid(struct intel_context *ce) 382 { 383 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 384 } 385 386 static inline void set_context_guc_id_invalid(struct intel_context *ce) 387 { 388 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 389 } 390 391 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 392 { 393 return &ce->engine->gt->uc.guc; 394 } 395 396 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 397 { 398 return rb_entry(rb, struct i915_priolist, node); 399 } 400 401 /* 402 * When using multi-lrc submission a scratch memory area is reserved in the 403 * parent's context state for the process descriptor, work queue, and handshake 404 * between the parent + children contexts to insert safe preemption points 405 * between each of the BBs. Currently the scratch area is sized to a page. 406 * 407 * The layout of this scratch area is below: 408 * 0 guc_process_desc 409 * + sizeof(struct guc_process_desc) child go 410 * + CACHELINE_BYTES child join[0] 411 * ... 412 * + CACHELINE_BYTES child join[n - 1] 413 * ... unused 414 * PARENT_SCRATCH_SIZE / 2 work queue start 415 * ... work queue 416 * PARENT_SCRATCH_SIZE - 1 work queue end 417 */ 418 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 419 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 420 421 struct sync_semaphore { 422 u32 semaphore; 423 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 424 }; 425 426 struct parent_scratch { 427 union guc_descs { 428 struct guc_sched_wq_desc wq_desc; 429 struct guc_process_desc_v69 pdesc; 430 } descs; 431 432 struct sync_semaphore go; 433 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 434 435 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 436 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 437 438 u32 wq[WQ_SIZE / sizeof(u32)]; 439 }; 440 441 static u32 __get_parent_scratch_offset(struct intel_context *ce) 442 { 443 GEM_BUG_ON(!ce->parallel.guc.parent_page); 444 445 return ce->parallel.guc.parent_page * PAGE_SIZE; 446 } 447 448 static u32 __get_wq_offset(struct intel_context *ce) 449 { 450 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 451 452 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 453 } 454 455 static struct parent_scratch * 456 __get_parent_scratch(struct intel_context *ce) 457 { 458 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 459 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 460 461 /* 462 * Need to subtract LRC_STATE_OFFSET here as the 463 * parallel.guc.parent_page is the offset into ce->state while 464 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 465 */ 466 return (struct parent_scratch *) 467 (ce->lrc_reg_state + 468 ((__get_parent_scratch_offset(ce) - 469 LRC_STATE_OFFSET) / sizeof(u32))); 470 } 471 472 static struct guc_process_desc_v69 * 473 __get_process_desc_v69(struct intel_context *ce) 474 { 475 struct parent_scratch *ps = __get_parent_scratch(ce); 476 477 return &ps->descs.pdesc; 478 } 479 480 static struct guc_sched_wq_desc * 481 __get_wq_desc_v70(struct intel_context *ce) 482 { 483 struct parent_scratch *ps = __get_parent_scratch(ce); 484 485 return &ps->descs.wq_desc; 486 } 487 488 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 489 { 490 /* 491 * Check for space in work queue. Caching a value of head pointer in 492 * intel_context structure in order reduce the number accesses to shared 493 * GPU memory which may be across a PCIe bus. 494 */ 495 #define AVAILABLE_SPACE \ 496 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 497 if (wqi_size > AVAILABLE_SPACE) { 498 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 499 500 if (wqi_size > AVAILABLE_SPACE) 501 return NULL; 502 } 503 #undef AVAILABLE_SPACE 504 505 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 506 } 507 508 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 509 { 510 struct intel_context *ce = xa_load(&guc->context_lookup, id); 511 512 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 513 514 return ce; 515 } 516 517 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 518 { 519 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 520 521 if (!base) 522 return NULL; 523 524 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 525 526 return &base[index]; 527 } 528 529 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 530 { 531 u32 size; 532 int ret; 533 534 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 535 GUC_MAX_CONTEXT_ID); 536 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 537 (void **)&guc->lrc_desc_pool_vaddr_v69); 538 if (ret) 539 return ret; 540 541 return 0; 542 } 543 544 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 545 { 546 if (!guc->lrc_desc_pool_vaddr_v69) 547 return; 548 549 guc->lrc_desc_pool_vaddr_v69 = NULL; 550 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 551 } 552 553 static inline bool guc_submission_initialized(struct intel_guc *guc) 554 { 555 return guc->submission_initialized; 556 } 557 558 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 559 { 560 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 561 562 if (desc) 563 memset(desc, 0, sizeof(*desc)); 564 } 565 566 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 567 { 568 return __get_context(guc, id); 569 } 570 571 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 572 struct intel_context *ce) 573 { 574 unsigned long flags; 575 576 /* 577 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 578 * lower level functions directly. 579 */ 580 xa_lock_irqsave(&guc->context_lookup, flags); 581 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 582 xa_unlock_irqrestore(&guc->context_lookup, flags); 583 } 584 585 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 586 { 587 unsigned long flags; 588 589 if (unlikely(!guc_submission_initialized(guc))) 590 return; 591 592 _reset_lrc_desc_v69(guc, id); 593 594 /* 595 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 596 * the lower level functions directly. 597 */ 598 xa_lock_irqsave(&guc->context_lookup, flags); 599 __xa_erase(&guc->context_lookup, id); 600 xa_unlock_irqrestore(&guc->context_lookup, flags); 601 } 602 603 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 604 { 605 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 606 wake_up_all(&guc->ct.wq); 607 } 608 609 static int guc_submission_send_busy_loop(struct intel_guc *guc, 610 const u32 *action, 611 u32 len, 612 u32 g2h_len_dw, 613 bool loop) 614 { 615 /* 616 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 617 * so we don't handle the case where we don't get a reply because we 618 * aborted the send due to the channel being busy. 619 */ 620 GEM_BUG_ON(g2h_len_dw && !loop); 621 622 if (g2h_len_dw) 623 atomic_inc(&guc->outstanding_submission_g2h); 624 625 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 626 } 627 628 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 629 atomic_t *wait_var, 630 bool interruptible, 631 long timeout) 632 { 633 const int state = interruptible ? 634 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 635 DEFINE_WAIT(wait); 636 637 might_sleep(); 638 GEM_BUG_ON(timeout < 0); 639 640 if (!atomic_read(wait_var)) 641 return 0; 642 643 if (!timeout) 644 return -ETIME; 645 646 for (;;) { 647 prepare_to_wait(&guc->ct.wq, &wait, state); 648 649 if (!atomic_read(wait_var)) 650 break; 651 652 if (signal_pending_state(state, current)) { 653 timeout = -EINTR; 654 break; 655 } 656 657 if (!timeout) { 658 timeout = -ETIME; 659 break; 660 } 661 662 timeout = io_schedule_timeout(timeout); 663 } 664 finish_wait(&guc->ct.wq, &wait); 665 666 return (timeout < 0) ? timeout : 0; 667 } 668 669 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 670 { 671 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 672 return 0; 673 674 return intel_guc_wait_for_pending_msg(guc, 675 &guc->outstanding_submission_g2h, 676 true, timeout); 677 } 678 679 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 680 static int try_context_registration(struct intel_context *ce, bool loop); 681 682 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 683 { 684 int err = 0; 685 struct intel_context *ce = request_to_scheduling_context(rq); 686 u32 action[3]; 687 int len = 0; 688 u32 g2h_len_dw = 0; 689 bool enabled; 690 691 lockdep_assert_held(&rq->engine->sched_engine->lock); 692 693 /* 694 * Corner case where requests were sitting in the priority list or a 695 * request resubmitted after the context was banned. 696 */ 697 if (unlikely(!intel_context_is_schedulable(ce))) { 698 i915_request_put(i915_request_mark_eio(rq)); 699 intel_engine_signal_breadcrumbs(ce->engine); 700 return 0; 701 } 702 703 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 704 GEM_BUG_ON(context_guc_id_invalid(ce)); 705 706 if (context_policy_required(ce)) { 707 err = guc_context_policy_init_v70(ce, false); 708 if (err) 709 return err; 710 } 711 712 spin_lock(&ce->guc_state.lock); 713 714 /* 715 * The request / context will be run on the hardware when scheduling 716 * gets enabled in the unblock. For multi-lrc we still submit the 717 * context to move the LRC tails. 718 */ 719 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 720 goto out; 721 722 enabled = context_enabled(ce) || context_blocked(ce); 723 724 if (!enabled) { 725 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 726 action[len++] = ce->guc_id.id; 727 action[len++] = GUC_CONTEXT_ENABLE; 728 set_context_pending_enable(ce); 729 intel_context_get(ce); 730 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 731 } else { 732 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 733 action[len++] = ce->guc_id.id; 734 } 735 736 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 737 if (!enabled && !err) { 738 trace_intel_context_sched_enable(ce); 739 atomic_inc(&guc->outstanding_submission_g2h); 740 set_context_enabled(ce); 741 742 /* 743 * Without multi-lrc KMD does the submission step (moving the 744 * lrc tail) so enabling scheduling is sufficient to submit the 745 * context. This isn't the case in multi-lrc submission as the 746 * GuC needs to move the tails, hence the need for another H2G 747 * to submit a multi-lrc context after enabling scheduling. 748 */ 749 if (intel_context_is_parent(ce)) { 750 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 751 err = intel_guc_send_nb(guc, action, len - 1, 0); 752 } 753 } else if (!enabled) { 754 clr_context_pending_enable(ce); 755 intel_context_put(ce); 756 } 757 if (likely(!err)) 758 trace_i915_request_guc_submit(rq); 759 760 out: 761 spin_unlock(&ce->guc_state.lock); 762 return err; 763 } 764 765 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 766 { 767 int ret = __guc_add_request(guc, rq); 768 769 if (unlikely(ret == -EBUSY)) { 770 guc->stalled_request = rq; 771 guc->submission_stall_reason = STALL_ADD_REQUEST; 772 } 773 774 return ret; 775 } 776 777 static inline void guc_set_lrc_tail(struct i915_request *rq) 778 { 779 rq->context->lrc_reg_state[CTX_RING_TAIL] = 780 intel_ring_set_tail(rq->ring, rq->tail); 781 } 782 783 static inline int rq_prio(const struct i915_request *rq) 784 { 785 return rq->sched.attr.priority; 786 } 787 788 static bool is_multi_lrc_rq(struct i915_request *rq) 789 { 790 return intel_context_is_parallel(rq->context); 791 } 792 793 static bool can_merge_rq(struct i915_request *rq, 794 struct i915_request *last) 795 { 796 return request_to_scheduling_context(rq) == 797 request_to_scheduling_context(last); 798 } 799 800 static u32 wq_space_until_wrap(struct intel_context *ce) 801 { 802 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 803 } 804 805 static void write_wqi(struct intel_context *ce, u32 wqi_size) 806 { 807 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 808 809 /* 810 * Ensure WQI are visible before updating tail 811 */ 812 intel_guc_write_barrier(ce_to_guc(ce)); 813 814 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 815 (WQ_SIZE - 1); 816 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 817 } 818 819 static int guc_wq_noop_append(struct intel_context *ce) 820 { 821 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 822 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 823 824 if (!wqi) 825 return -EBUSY; 826 827 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 828 829 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 830 FIELD_PREP(WQ_LEN_MASK, len_dw); 831 ce->parallel.guc.wqi_tail = 0; 832 833 return 0; 834 } 835 836 static int __guc_wq_item_append(struct i915_request *rq) 837 { 838 struct intel_context *ce = request_to_scheduling_context(rq); 839 struct intel_context *child; 840 unsigned int wqi_size = (ce->parallel.number_children + 4) * 841 sizeof(u32); 842 u32 *wqi; 843 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 844 int ret; 845 846 /* Ensure context is in correct state updating work queue */ 847 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 848 GEM_BUG_ON(context_guc_id_invalid(ce)); 849 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 850 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 851 852 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 853 if (wqi_size > wq_space_until_wrap(ce)) { 854 ret = guc_wq_noop_append(ce); 855 if (ret) 856 return ret; 857 } 858 859 wqi = get_wq_pointer(ce, wqi_size); 860 if (!wqi) 861 return -EBUSY; 862 863 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 864 865 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 866 FIELD_PREP(WQ_LEN_MASK, len_dw); 867 *wqi++ = ce->lrc.lrca; 868 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 869 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 870 *wqi++ = 0; /* fence_id */ 871 for_each_child(ce, child) 872 *wqi++ = child->ring->tail / sizeof(u64); 873 874 write_wqi(ce, wqi_size); 875 876 return 0; 877 } 878 879 static int guc_wq_item_append(struct intel_guc *guc, 880 struct i915_request *rq) 881 { 882 struct intel_context *ce = request_to_scheduling_context(rq); 883 int ret; 884 885 if (unlikely(!intel_context_is_schedulable(ce))) 886 return 0; 887 888 ret = __guc_wq_item_append(rq); 889 if (unlikely(ret == -EBUSY)) { 890 guc->stalled_request = rq; 891 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 892 } 893 894 return ret; 895 } 896 897 static bool multi_lrc_submit(struct i915_request *rq) 898 { 899 struct intel_context *ce = request_to_scheduling_context(rq); 900 901 intel_ring_set_tail(rq->ring, rq->tail); 902 903 /* 904 * We expect the front end (execbuf IOCTL) to set this flag on the last 905 * request generated from a multi-BB submission. This indicates to the 906 * backend (GuC interface) that we should submit this context thus 907 * submitting all the requests generated in parallel. 908 */ 909 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 910 !intel_context_is_schedulable(ce); 911 } 912 913 static int guc_dequeue_one_context(struct intel_guc *guc) 914 { 915 struct i915_sched_engine * const sched_engine = guc->sched_engine; 916 struct i915_request *last = NULL; 917 bool submit = false; 918 struct rb_node *rb; 919 int ret; 920 921 lockdep_assert_held(&sched_engine->lock); 922 923 if (guc->stalled_request) { 924 submit = true; 925 last = guc->stalled_request; 926 927 switch (guc->submission_stall_reason) { 928 case STALL_REGISTER_CONTEXT: 929 goto register_context; 930 case STALL_MOVE_LRC_TAIL: 931 goto move_lrc_tail; 932 case STALL_ADD_REQUEST: 933 goto add_request; 934 default: 935 MISSING_CASE(guc->submission_stall_reason); 936 } 937 } 938 939 while ((rb = rb_first_cached(&sched_engine->queue))) { 940 struct i915_priolist *p = to_priolist(rb); 941 struct i915_request *rq, *rn; 942 943 priolist_for_each_request_consume(rq, rn, p) { 944 if (last && !can_merge_rq(rq, last)) 945 goto register_context; 946 947 list_del_init(&rq->sched.link); 948 949 __i915_request_submit(rq); 950 951 trace_i915_request_in(rq, 0); 952 last = rq; 953 954 if (is_multi_lrc_rq(rq)) { 955 /* 956 * We need to coalesce all multi-lrc requests in 957 * a relationship into a single H2G. We are 958 * guaranteed that all of these requests will be 959 * submitted sequentially. 960 */ 961 if (multi_lrc_submit(rq)) { 962 submit = true; 963 goto register_context; 964 } 965 } else { 966 submit = true; 967 } 968 } 969 970 rb_erase_cached(&p->node, &sched_engine->queue); 971 i915_priolist_free(p); 972 } 973 974 register_context: 975 if (submit) { 976 struct intel_context *ce = request_to_scheduling_context(last); 977 978 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 979 intel_context_is_schedulable(ce))) { 980 ret = try_context_registration(ce, false); 981 if (unlikely(ret == -EPIPE)) { 982 goto deadlk; 983 } else if (ret == -EBUSY) { 984 guc->stalled_request = last; 985 guc->submission_stall_reason = 986 STALL_REGISTER_CONTEXT; 987 goto schedule_tasklet; 988 } else if (ret != 0) { 989 GEM_WARN_ON(ret); /* Unexpected */ 990 goto deadlk; 991 } 992 } 993 994 move_lrc_tail: 995 if (is_multi_lrc_rq(last)) { 996 ret = guc_wq_item_append(guc, last); 997 if (ret == -EBUSY) { 998 goto schedule_tasklet; 999 } else if (ret != 0) { 1000 GEM_WARN_ON(ret); /* Unexpected */ 1001 goto deadlk; 1002 } 1003 } else { 1004 guc_set_lrc_tail(last); 1005 } 1006 1007 add_request: 1008 ret = guc_add_request(guc, last); 1009 if (unlikely(ret == -EPIPE)) { 1010 goto deadlk; 1011 } else if (ret == -EBUSY) { 1012 goto schedule_tasklet; 1013 } else if (ret != 0) { 1014 GEM_WARN_ON(ret); /* Unexpected */ 1015 goto deadlk; 1016 } 1017 } 1018 1019 guc->stalled_request = NULL; 1020 guc->submission_stall_reason = STALL_NONE; 1021 return submit; 1022 1023 deadlk: 1024 sched_engine->tasklet.callback = NULL; 1025 tasklet_disable_nosync(&sched_engine->tasklet); 1026 return false; 1027 1028 schedule_tasklet: 1029 tasklet_schedule(&sched_engine->tasklet); 1030 return false; 1031 } 1032 1033 static void guc_submission_tasklet(struct tasklet_struct *t) 1034 { 1035 struct i915_sched_engine *sched_engine = 1036 from_tasklet(sched_engine, t, tasklet); 1037 unsigned long flags; 1038 bool loop; 1039 1040 spin_lock_irqsave(&sched_engine->lock, flags); 1041 1042 do { 1043 loop = guc_dequeue_one_context(sched_engine->private_data); 1044 } while (loop); 1045 1046 i915_sched_engine_reset_on_empty(sched_engine); 1047 1048 spin_unlock_irqrestore(&sched_engine->lock, flags); 1049 } 1050 1051 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1052 { 1053 if (iir & GT_RENDER_USER_INTERRUPT) 1054 intel_engine_signal_breadcrumbs(engine); 1055 } 1056 1057 static void __guc_context_destroy(struct intel_context *ce); 1058 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1059 static void guc_signal_context_fence(struct intel_context *ce); 1060 static void guc_cancel_context_requests(struct intel_context *ce); 1061 static void guc_blocked_fence_complete(struct intel_context *ce); 1062 1063 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1064 { 1065 struct intel_context *ce; 1066 unsigned long index, flags; 1067 bool pending_disable, pending_enable, deregister, destroyed, banned; 1068 1069 xa_lock_irqsave(&guc->context_lookup, flags); 1070 xa_for_each(&guc->context_lookup, index, ce) { 1071 /* 1072 * Corner case where the ref count on the object is zero but and 1073 * deregister G2H was lost. In this case we don't touch the ref 1074 * count and finish the destroy of the context. 1075 */ 1076 bool do_put = kref_get_unless_zero(&ce->ref); 1077 1078 xa_unlock(&guc->context_lookup); 1079 1080 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1081 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1082 /* successful cancel so jump straight to close it */ 1083 intel_context_sched_disable_unpin(ce); 1084 } 1085 1086 spin_lock(&ce->guc_state.lock); 1087 1088 /* 1089 * Once we are at this point submission_disabled() is guaranteed 1090 * to be visible to all callers who set the below flags (see above 1091 * flush and flushes in reset_prepare). If submission_disabled() 1092 * is set, the caller shouldn't set these flags. 1093 */ 1094 1095 destroyed = context_destroyed(ce); 1096 pending_enable = context_pending_enable(ce); 1097 pending_disable = context_pending_disable(ce); 1098 deregister = context_wait_for_deregister_to_register(ce); 1099 banned = context_banned(ce); 1100 init_sched_state(ce); 1101 1102 spin_unlock(&ce->guc_state.lock); 1103 1104 if (pending_enable || destroyed || deregister) { 1105 decr_outstanding_submission_g2h(guc); 1106 if (deregister) 1107 guc_signal_context_fence(ce); 1108 if (destroyed) { 1109 intel_gt_pm_put_async(guc_to_gt(guc)); 1110 release_guc_id(guc, ce); 1111 __guc_context_destroy(ce); 1112 } 1113 if (pending_enable || deregister) 1114 intel_context_put(ce); 1115 } 1116 1117 /* Not mutualy exclusive with above if statement. */ 1118 if (pending_disable) { 1119 guc_signal_context_fence(ce); 1120 if (banned) { 1121 guc_cancel_context_requests(ce); 1122 intel_engine_signal_breadcrumbs(ce->engine); 1123 } 1124 intel_context_sched_disable_unpin(ce); 1125 decr_outstanding_submission_g2h(guc); 1126 1127 spin_lock(&ce->guc_state.lock); 1128 guc_blocked_fence_complete(ce); 1129 spin_unlock(&ce->guc_state.lock); 1130 1131 intel_context_put(ce); 1132 } 1133 1134 if (do_put) 1135 intel_context_put(ce); 1136 xa_lock(&guc->context_lookup); 1137 } 1138 xa_unlock_irqrestore(&guc->context_lookup, flags); 1139 } 1140 1141 /* 1142 * GuC stores busyness stats for each engine at context in/out boundaries. A 1143 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1144 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1145 * GuC. 1146 * 1147 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1148 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1149 * active. For an active engine total busyness = total + (now - start), where 1150 * 'now' is the time at which the busyness is sampled. For inactive engine, 1151 * total busyness = total. 1152 * 1153 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1154 * 1155 * The start and total values provided by GuC are 32 bits and wrap around in a 1156 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1157 * increasing ns values, there is a need for this implementation to account for 1158 * overflows and extend the GuC provided values to 64 bits before returning 1159 * busyness to the user. In order to do that, a worker runs periodically at 1160 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1161 * 27 seconds for a gt clock frequency of 19.2 MHz). 1162 */ 1163 1164 #define WRAP_TIME_CLKS U32_MAX 1165 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1166 1167 static void 1168 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1169 { 1170 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1171 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1172 1173 if (new_start == lower_32_bits(*prev_start)) 1174 return; 1175 1176 /* 1177 * When gt is unparked, we update the gt timestamp and start the ping 1178 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1179 * is unparked, all switched in contexts will have a start time that is 1180 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1181 * 1182 * If neither gt_stamp nor new_start has rolled over, then the 1183 * gt_stamp_hi does not need to be adjusted, however if one of them has 1184 * rolled over, we need to adjust gt_stamp_hi accordingly. 1185 * 1186 * The below conditions address the cases of new_start rollover and 1187 * gt_stamp_last rollover respectively. 1188 */ 1189 if (new_start < gt_stamp_last && 1190 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1191 gt_stamp_hi++; 1192 1193 if (new_start > gt_stamp_last && 1194 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1195 gt_stamp_hi--; 1196 1197 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1198 } 1199 1200 #define record_read(map_, field_) \ 1201 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1202 1203 /* 1204 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1205 * we run into a race where the value read is inconsistent. Sometimes the 1206 * inconsistency is in reading the upper MSB bytes of the last_in value when 1207 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1208 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1209 * determine validity of these values. Instead we read the values multiple times 1210 * until they are consistent. In test runs, 3 attempts results in consistent 1211 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1212 * any new occurences. 1213 */ 1214 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1215 u32 *last_in, u32 *id, u32 *total) 1216 { 1217 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1218 int i = 0; 1219 1220 do { 1221 *last_in = record_read(&rec_map, last_switch_in_stamp); 1222 *id = record_read(&rec_map, current_context_index); 1223 *total = record_read(&rec_map, total_runtime); 1224 1225 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1226 record_read(&rec_map, current_context_index) == *id && 1227 record_read(&rec_map, total_runtime) == *total) 1228 break; 1229 } while (++i < 6); 1230 } 1231 1232 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1233 { 1234 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1235 struct intel_guc *guc = &engine->gt->uc.guc; 1236 u32 last_switch, ctx_id, total; 1237 1238 lockdep_assert_held(&guc->timestamp.lock); 1239 1240 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1241 1242 stats->running = ctx_id != ~0U && last_switch; 1243 if (stats->running) 1244 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1245 1246 /* 1247 * Instead of adjusting the total for overflow, just add the 1248 * difference from previous sample stats->total_gt_clks 1249 */ 1250 if (total && total != ~0U) { 1251 stats->total_gt_clks += (u32)(total - stats->prev_total); 1252 stats->prev_total = total; 1253 } 1254 } 1255 1256 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1257 { 1258 intel_wakeref_t wakeref; 1259 u32 reg, shift; 1260 1261 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1262 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1263 1264 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1265 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1266 1267 return 3 - shift; 1268 } 1269 1270 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1271 { 1272 struct intel_gt *gt = guc_to_gt(guc); 1273 u32 gt_stamp_lo, gt_stamp_hi; 1274 u64 gpm_ts; 1275 1276 lockdep_assert_held(&guc->timestamp.lock); 1277 1278 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1279 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1280 MISC_STATUS1) >> guc->timestamp.shift; 1281 gt_stamp_lo = lower_32_bits(gpm_ts); 1282 *now = ktime_get(); 1283 1284 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1285 gt_stamp_hi++; 1286 1287 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1288 } 1289 1290 /* 1291 * Unlike the execlist mode of submission total and active times are in terms of 1292 * gt clocks. The *now parameter is retained to return the cpu time at which the 1293 * busyness was sampled. 1294 */ 1295 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1296 { 1297 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1298 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1299 struct intel_gt *gt = engine->gt; 1300 struct intel_guc *guc = >->uc.guc; 1301 u64 total, gt_stamp_saved; 1302 unsigned long flags; 1303 u32 reset_count; 1304 bool in_reset; 1305 1306 spin_lock_irqsave(&guc->timestamp.lock, flags); 1307 1308 /* 1309 * If a reset happened, we risk reading partially updated engine 1310 * busyness from GuC, so we just use the driver stored copy of busyness. 1311 * Synchronize with gt reset using reset_count and the 1312 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1313 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1314 * usable by checking the flag afterwards. 1315 */ 1316 reset_count = i915_reset_count(gpu_error); 1317 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1318 1319 *now = ktime_get(); 1320 1321 /* 1322 * The active busyness depends on start_gt_clk and gt_stamp. 1323 * gt_stamp is updated by i915 only when gt is awake and the 1324 * start_gt_clk is derived from GuC state. To get a consistent 1325 * view of activity, we query the GuC state only if gt is awake. 1326 */ 1327 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1328 stats_saved = *stats; 1329 gt_stamp_saved = guc->timestamp.gt_stamp; 1330 /* 1331 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1332 * start_gt_clk' calculation below for active engines. 1333 */ 1334 guc_update_engine_gt_clks(engine); 1335 guc_update_pm_timestamp(guc, now); 1336 intel_gt_pm_put_async(gt); 1337 if (i915_reset_count(gpu_error) != reset_count) { 1338 *stats = stats_saved; 1339 guc->timestamp.gt_stamp = gt_stamp_saved; 1340 } 1341 } 1342 1343 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1344 if (stats->running) { 1345 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1346 1347 total += intel_gt_clock_interval_to_ns(gt, clk); 1348 } 1349 1350 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1351 1352 return ns_to_ktime(total); 1353 } 1354 1355 static void guc_enable_busyness_worker(struct intel_guc *guc) 1356 { 1357 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); 1358 } 1359 1360 static void guc_cancel_busyness_worker(struct intel_guc *guc) 1361 { 1362 cancel_delayed_work_sync(&guc->timestamp.work); 1363 } 1364 1365 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1366 { 1367 struct intel_gt *gt = guc_to_gt(guc); 1368 struct intel_engine_cs *engine; 1369 enum intel_engine_id id; 1370 unsigned long flags; 1371 ktime_t unused; 1372 1373 guc_cancel_busyness_worker(guc); 1374 1375 spin_lock_irqsave(&guc->timestamp.lock, flags); 1376 1377 guc_update_pm_timestamp(guc, &unused); 1378 for_each_engine(engine, gt, id) { 1379 guc_update_engine_gt_clks(engine); 1380 engine->stats.guc.prev_total = 0; 1381 } 1382 1383 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1384 } 1385 1386 static void __update_guc_busyness_stats(struct intel_guc *guc) 1387 { 1388 struct intel_gt *gt = guc_to_gt(guc); 1389 struct intel_engine_cs *engine; 1390 enum intel_engine_id id; 1391 unsigned long flags; 1392 ktime_t unused; 1393 1394 guc->timestamp.last_stat_jiffies = jiffies; 1395 1396 spin_lock_irqsave(&guc->timestamp.lock, flags); 1397 1398 guc_update_pm_timestamp(guc, &unused); 1399 for_each_engine(engine, gt, id) 1400 guc_update_engine_gt_clks(engine); 1401 1402 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1403 } 1404 1405 static void __guc_context_update_stats(struct intel_context *ce) 1406 { 1407 struct intel_guc *guc = ce_to_guc(ce); 1408 unsigned long flags; 1409 1410 spin_lock_irqsave(&guc->timestamp.lock, flags); 1411 lrc_update_runtime(ce); 1412 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1413 } 1414 1415 static void guc_context_update_stats(struct intel_context *ce) 1416 { 1417 if (!intel_context_pin_if_active(ce)) 1418 return; 1419 1420 __guc_context_update_stats(ce); 1421 intel_context_unpin(ce); 1422 } 1423 1424 static void guc_timestamp_ping(struct work_struct *wrk) 1425 { 1426 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1427 timestamp.work.work); 1428 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1429 struct intel_gt *gt = guc_to_gt(guc); 1430 struct intel_context *ce; 1431 intel_wakeref_t wakeref; 1432 unsigned long index; 1433 int srcu, ret; 1434 1435 /* 1436 * Ideally the busyness worker should take a gt pm wakeref because the 1437 * worker only needs to be active while gt is awake. However, the 1438 * gt_park path cancels the worker synchronously and this complicates 1439 * the flow if the worker is also running at the same time. The cancel 1440 * waits for the worker and when the worker releases the wakeref, that 1441 * would call gt_park and would lead to a deadlock. 1442 * 1443 * The resolution is to take the global pm wakeref if runtime pm is 1444 * already active. If not, we don't need to update the busyness stats as 1445 * the stats would already be updated when the gt was parked. 1446 * 1447 * Note: 1448 * - We do not requeue the worker if we cannot take a reference to runtime 1449 * pm since intel_guc_busyness_unpark would requeue the worker in the 1450 * resume path. 1451 * 1452 * - If the gt was parked longer than time taken for GT timestamp to roll 1453 * over, we ignore those rollovers since we don't care about tracking 1454 * the exact GT time. We only care about roll overs when the gt is 1455 * active and running workloads. 1456 * 1457 * - There is a window of time between gt_park and runtime suspend, 1458 * where the worker may run. This is acceptable since the worker will 1459 * not find any new data to update busyness. 1460 */ 1461 wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); 1462 if (!wakeref) 1463 return; 1464 1465 /* 1466 * Synchronize with gt reset to make sure the worker does not 1467 * corrupt the engine/guc stats. NB: can't actually block waiting 1468 * for a reset to complete as the reset requires flushing out 1469 * this worker thread if started. So waiting would deadlock. 1470 */ 1471 ret = intel_gt_reset_trylock(gt, &srcu); 1472 if (ret) 1473 goto err_trylock; 1474 1475 __update_guc_busyness_stats(guc); 1476 1477 /* adjust context stats for overflow */ 1478 xa_for_each(&guc->context_lookup, index, ce) 1479 guc_context_update_stats(ce); 1480 1481 intel_gt_reset_unlock(gt, srcu); 1482 1483 guc_enable_busyness_worker(guc); 1484 1485 err_trylock: 1486 intel_runtime_pm_put(>->i915->runtime_pm, wakeref); 1487 } 1488 1489 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1490 { 1491 u32 offset = intel_guc_engine_usage_offset(guc); 1492 u32 action[] = { 1493 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1494 offset, 1495 0, 1496 }; 1497 1498 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1499 } 1500 1501 static int guc_init_engine_stats(struct intel_guc *guc) 1502 { 1503 struct intel_gt *gt = guc_to_gt(guc); 1504 intel_wakeref_t wakeref; 1505 int ret; 1506 1507 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1508 ret = guc_action_enable_usage_stats(guc); 1509 1510 if (ret) 1511 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); 1512 else 1513 guc_enable_busyness_worker(guc); 1514 1515 return ret; 1516 } 1517 1518 static void guc_fini_engine_stats(struct intel_guc *guc) 1519 { 1520 guc_cancel_busyness_worker(guc); 1521 } 1522 1523 void intel_guc_busyness_park(struct intel_gt *gt) 1524 { 1525 struct intel_guc *guc = >->uc.guc; 1526 1527 if (!guc_submission_initialized(guc)) 1528 return; 1529 1530 /* 1531 * There is a race with suspend flow where the worker runs after suspend 1532 * and causes an unclaimed register access warning. Cancel the worker 1533 * synchronously here. 1534 */ 1535 guc_cancel_busyness_worker(guc); 1536 1537 /* 1538 * Before parking, we should sample engine busyness stats if we need to. 1539 * We can skip it if we are less than half a ping from the last time we 1540 * sampled the busyness stats. 1541 */ 1542 if (guc->timestamp.last_stat_jiffies && 1543 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1544 (guc->timestamp.ping_delay / 2))) 1545 return; 1546 1547 __update_guc_busyness_stats(guc); 1548 } 1549 1550 void intel_guc_busyness_unpark(struct intel_gt *gt) 1551 { 1552 struct intel_guc *guc = >->uc.guc; 1553 unsigned long flags; 1554 ktime_t unused; 1555 1556 if (!guc_submission_initialized(guc)) 1557 return; 1558 1559 spin_lock_irqsave(&guc->timestamp.lock, flags); 1560 guc_update_pm_timestamp(guc, &unused); 1561 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1562 guc_enable_busyness_worker(guc); 1563 } 1564 1565 static inline bool 1566 submission_disabled(struct intel_guc *guc) 1567 { 1568 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1569 1570 return unlikely(!sched_engine || 1571 !__tasklet_is_enabled(&sched_engine->tasklet) || 1572 intel_gt_is_wedged(guc_to_gt(guc))); 1573 } 1574 1575 static void disable_submission(struct intel_guc *guc) 1576 { 1577 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1578 1579 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1580 GEM_BUG_ON(!guc->ct.enabled); 1581 __tasklet_disable_sync_once(&sched_engine->tasklet); 1582 sched_engine->tasklet.callback = NULL; 1583 } 1584 } 1585 1586 static void enable_submission(struct intel_guc *guc) 1587 { 1588 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1589 unsigned long flags; 1590 1591 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1592 sched_engine->tasklet.callback = guc_submission_tasklet; 1593 wmb(); /* Make sure callback visible */ 1594 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1595 __tasklet_enable(&sched_engine->tasklet)) { 1596 GEM_BUG_ON(!guc->ct.enabled); 1597 1598 /* And kick in case we missed a new request submission. */ 1599 tasklet_hi_schedule(&sched_engine->tasklet); 1600 } 1601 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1602 } 1603 1604 static void guc_flush_submissions(struct intel_guc *guc) 1605 { 1606 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1607 unsigned long flags; 1608 1609 spin_lock_irqsave(&sched_engine->lock, flags); 1610 spin_unlock_irqrestore(&sched_engine->lock, flags); 1611 } 1612 1613 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1614 1615 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1616 { 1617 if (unlikely(!guc_submission_initialized(guc))) { 1618 /* Reset called during driver load? GuC not yet initialised! */ 1619 return; 1620 } 1621 1622 intel_gt_park_heartbeats(guc_to_gt(guc)); 1623 disable_submission(guc); 1624 guc->interrupts.disable(guc); 1625 __reset_guc_busyness_stats(guc); 1626 1627 /* Flush IRQ handler */ 1628 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1629 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1630 1631 guc_flush_submissions(guc); 1632 guc_flush_destroyed_contexts(guc); 1633 flush_work(&guc->ct.requests.worker); 1634 1635 scrub_guc_desc_for_outstanding_g2h(guc); 1636 } 1637 1638 static struct intel_engine_cs * 1639 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1640 { 1641 struct intel_engine_cs *engine; 1642 intel_engine_mask_t tmp, mask = ve->mask; 1643 unsigned int num_siblings = 0; 1644 1645 for_each_engine_masked(engine, ve->gt, mask, tmp) 1646 if (num_siblings++ == sibling) 1647 return engine; 1648 1649 return NULL; 1650 } 1651 1652 static inline struct intel_engine_cs * 1653 __context_to_physical_engine(struct intel_context *ce) 1654 { 1655 struct intel_engine_cs *engine = ce->engine; 1656 1657 if (intel_engine_is_virtual(engine)) 1658 engine = guc_virtual_get_sibling(engine, 0); 1659 1660 return engine; 1661 } 1662 1663 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1664 { 1665 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1666 1667 if (!intel_context_is_schedulable(ce)) 1668 return; 1669 1670 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1671 1672 /* 1673 * We want a simple context + ring to execute the breadcrumb update. 1674 * We cannot rely on the context being intact across the GPU hang, 1675 * so clear it and rebuild just what we need for the breadcrumb. 1676 * All pending requests for this context will be zapped, and any 1677 * future request will be after userspace has had the opportunity 1678 * to recreate its own state. 1679 */ 1680 if (scrub) 1681 lrc_init_regs(ce, engine, true); 1682 1683 /* Rerun the request; its payload has been neutered (if guilty). */ 1684 lrc_update_regs(ce, engine, head); 1685 } 1686 1687 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1688 { 1689 /* 1690 * Wa_22011802037: In addition to stopping the cs, we need 1691 * to wait for any pending mi force wakeups 1692 */ 1693 if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || 1694 (GRAPHICS_VER(engine->i915) >= 11 && 1695 GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) { 1696 intel_engine_stop_cs(engine); 1697 intel_engine_wait_for_pending_mi_fw(engine); 1698 } 1699 } 1700 1701 static void guc_reset_nop(struct intel_engine_cs *engine) 1702 { 1703 } 1704 1705 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1706 { 1707 } 1708 1709 static void 1710 __unwind_incomplete_requests(struct intel_context *ce) 1711 { 1712 struct i915_request *rq, *rn; 1713 struct list_head *pl; 1714 int prio = I915_PRIORITY_INVALID; 1715 struct i915_sched_engine * const sched_engine = 1716 ce->engine->sched_engine; 1717 unsigned long flags; 1718 1719 spin_lock_irqsave(&sched_engine->lock, flags); 1720 spin_lock(&ce->guc_state.lock); 1721 list_for_each_entry_safe_reverse(rq, rn, 1722 &ce->guc_state.requests, 1723 sched.link) { 1724 if (i915_request_completed(rq)) 1725 continue; 1726 1727 list_del_init(&rq->sched.link); 1728 __i915_request_unsubmit(rq); 1729 1730 /* Push the request back into the queue for later resubmission. */ 1731 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1732 if (rq_prio(rq) != prio) { 1733 prio = rq_prio(rq); 1734 pl = i915_sched_lookup_priolist(sched_engine, prio); 1735 } 1736 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1737 1738 list_add(&rq->sched.link, pl); 1739 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1740 } 1741 spin_unlock(&ce->guc_state.lock); 1742 spin_unlock_irqrestore(&sched_engine->lock, flags); 1743 } 1744 1745 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1746 { 1747 bool guilty; 1748 struct i915_request *rq; 1749 unsigned long flags; 1750 u32 head; 1751 int i, number_children = ce->parallel.number_children; 1752 struct intel_context *parent = ce; 1753 1754 GEM_BUG_ON(intel_context_is_child(ce)); 1755 1756 intel_context_get(ce); 1757 1758 /* 1759 * GuC will implicitly mark the context as non-schedulable when it sends 1760 * the reset notification. Make sure our state reflects this change. The 1761 * context will be marked enabled on resubmission. 1762 */ 1763 spin_lock_irqsave(&ce->guc_state.lock, flags); 1764 clr_context_enabled(ce); 1765 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1766 1767 /* 1768 * For each context in the relationship find the hanging request 1769 * resetting each context / request as needed 1770 */ 1771 for (i = 0; i < number_children + 1; ++i) { 1772 if (!intel_context_is_pinned(ce)) 1773 goto next_context; 1774 1775 guilty = false; 1776 rq = intel_context_get_active_request(ce); 1777 if (!rq) { 1778 head = ce->ring->tail; 1779 goto out_replay; 1780 } 1781 1782 if (i915_request_started(rq)) 1783 guilty = stalled & ce->engine->mask; 1784 1785 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1786 head = intel_ring_wrap(ce->ring, rq->head); 1787 1788 __i915_request_reset(rq, guilty); 1789 i915_request_put(rq); 1790 out_replay: 1791 guc_reset_state(ce, head, guilty); 1792 next_context: 1793 if (i != number_children) 1794 ce = list_next_entry(ce, parallel.child_link); 1795 } 1796 1797 __unwind_incomplete_requests(parent); 1798 intel_context_put(parent); 1799 } 1800 1801 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1802 { 1803 struct intel_context *ce; 1804 unsigned long index; 1805 unsigned long flags; 1806 1807 if (unlikely(!guc_submission_initialized(guc))) { 1808 /* Reset called during driver load? GuC not yet initialised! */ 1809 return; 1810 } 1811 1812 xa_lock_irqsave(&guc->context_lookup, flags); 1813 xa_for_each(&guc->context_lookup, index, ce) { 1814 if (!kref_get_unless_zero(&ce->ref)) 1815 continue; 1816 1817 xa_unlock(&guc->context_lookup); 1818 1819 if (intel_context_is_pinned(ce) && 1820 !intel_context_is_child(ce)) 1821 __guc_reset_context(ce, stalled); 1822 1823 intel_context_put(ce); 1824 1825 xa_lock(&guc->context_lookup); 1826 } 1827 xa_unlock_irqrestore(&guc->context_lookup, flags); 1828 1829 /* GuC is blown away, drop all references to contexts */ 1830 xa_destroy(&guc->context_lookup); 1831 } 1832 1833 static void guc_cancel_context_requests(struct intel_context *ce) 1834 { 1835 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1836 struct i915_request *rq; 1837 unsigned long flags; 1838 1839 /* Mark all executing requests as skipped. */ 1840 spin_lock_irqsave(&sched_engine->lock, flags); 1841 spin_lock(&ce->guc_state.lock); 1842 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1843 i915_request_put(i915_request_mark_eio(rq)); 1844 spin_unlock(&ce->guc_state.lock); 1845 spin_unlock_irqrestore(&sched_engine->lock, flags); 1846 } 1847 1848 static void 1849 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1850 { 1851 struct i915_request *rq, *rn; 1852 struct rb_node *rb; 1853 unsigned long flags; 1854 1855 /* Can be called during boot if GuC fails to load */ 1856 if (!sched_engine) 1857 return; 1858 1859 /* 1860 * Before we call engine->cancel_requests(), we should have exclusive 1861 * access to the submission state. This is arranged for us by the 1862 * caller disabling the interrupt generation, the tasklet and other 1863 * threads that may then access the same state, giving us a free hand 1864 * to reset state. However, we still need to let lockdep be aware that 1865 * we know this state may be accessed in hardirq context, so we 1866 * disable the irq around this manipulation and we want to keep 1867 * the spinlock focused on its duties and not accidentally conflate 1868 * coverage to the submission's irq state. (Similarly, although we 1869 * shouldn't need to disable irq around the manipulation of the 1870 * submission's irq state, we also wish to remind ourselves that 1871 * it is irq state.) 1872 */ 1873 spin_lock_irqsave(&sched_engine->lock, flags); 1874 1875 /* Flush the queued requests to the timeline list (for retiring). */ 1876 while ((rb = rb_first_cached(&sched_engine->queue))) { 1877 struct i915_priolist *p = to_priolist(rb); 1878 1879 priolist_for_each_request_consume(rq, rn, p) { 1880 list_del_init(&rq->sched.link); 1881 1882 __i915_request_submit(rq); 1883 1884 i915_request_put(i915_request_mark_eio(rq)); 1885 } 1886 1887 rb_erase_cached(&p->node, &sched_engine->queue); 1888 i915_priolist_free(p); 1889 } 1890 1891 /* Remaining _unready_ requests will be nop'ed when submitted */ 1892 1893 sched_engine->queue_priority_hint = INT_MIN; 1894 sched_engine->queue = RB_ROOT_CACHED; 1895 1896 spin_unlock_irqrestore(&sched_engine->lock, flags); 1897 } 1898 1899 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1900 { 1901 struct intel_context *ce; 1902 unsigned long index; 1903 unsigned long flags; 1904 1905 xa_lock_irqsave(&guc->context_lookup, flags); 1906 xa_for_each(&guc->context_lookup, index, ce) { 1907 if (!kref_get_unless_zero(&ce->ref)) 1908 continue; 1909 1910 xa_unlock(&guc->context_lookup); 1911 1912 if (intel_context_is_pinned(ce) && 1913 !intel_context_is_child(ce)) 1914 guc_cancel_context_requests(ce); 1915 1916 intel_context_put(ce); 1917 1918 xa_lock(&guc->context_lookup); 1919 } 1920 xa_unlock_irqrestore(&guc->context_lookup, flags); 1921 1922 guc_cancel_sched_engine_requests(guc->sched_engine); 1923 1924 /* GuC is blown away, drop all references to contexts */ 1925 xa_destroy(&guc->context_lookup); 1926 } 1927 1928 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1929 { 1930 /* Reset called during driver load or during wedge? */ 1931 if (unlikely(!guc_submission_initialized(guc) || 1932 intel_gt_is_wedged(guc_to_gt(guc)))) { 1933 return; 1934 } 1935 1936 /* 1937 * Technically possible for either of these values to be non-zero here, 1938 * but very unlikely + harmless. Regardless let's add a warn so we can 1939 * see in CI if this happens frequently / a precursor to taking down the 1940 * machine. 1941 */ 1942 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1943 atomic_set(&guc->outstanding_submission_g2h, 0); 1944 1945 intel_guc_global_policies_update(guc); 1946 enable_submission(guc); 1947 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1948 } 1949 1950 static void destroyed_worker_func(struct work_struct *w); 1951 static void reset_fail_worker_func(struct work_struct *w); 1952 1953 /* 1954 * Set up the memory resources to be shared with the GuC (via the GGTT) 1955 * at firmware loading time. 1956 */ 1957 int intel_guc_submission_init(struct intel_guc *guc) 1958 { 1959 struct intel_gt *gt = guc_to_gt(guc); 1960 int ret; 1961 1962 if (guc->submission_initialized) 1963 return 0; 1964 1965 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { 1966 ret = guc_lrc_desc_pool_create_v69(guc); 1967 if (ret) 1968 return ret; 1969 } 1970 1971 guc->submission_state.guc_ids_bitmap = 1972 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 1973 if (!guc->submission_state.guc_ids_bitmap) { 1974 ret = -ENOMEM; 1975 goto destroy_pool; 1976 } 1977 1978 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 1979 guc->timestamp.shift = gpm_timestamp_shift(gt); 1980 guc->submission_initialized = true; 1981 1982 return 0; 1983 1984 destroy_pool: 1985 guc_lrc_desc_pool_destroy_v69(guc); 1986 1987 return ret; 1988 } 1989 1990 void intel_guc_submission_fini(struct intel_guc *guc) 1991 { 1992 if (!guc->submission_initialized) 1993 return; 1994 1995 guc_flush_destroyed_contexts(guc); 1996 guc_lrc_desc_pool_destroy_v69(guc); 1997 i915_sched_engine_put(guc->sched_engine); 1998 bitmap_free(guc->submission_state.guc_ids_bitmap); 1999 guc->submission_initialized = false; 2000 } 2001 2002 static inline void queue_request(struct i915_sched_engine *sched_engine, 2003 struct i915_request *rq, 2004 int prio) 2005 { 2006 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2007 list_add_tail(&rq->sched.link, 2008 i915_sched_lookup_priolist(sched_engine, prio)); 2009 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2010 tasklet_hi_schedule(&sched_engine->tasklet); 2011 } 2012 2013 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 2014 struct i915_request *rq) 2015 { 2016 int ret = 0; 2017 2018 __i915_request_submit(rq); 2019 2020 trace_i915_request_in(rq, 0); 2021 2022 if (is_multi_lrc_rq(rq)) { 2023 if (multi_lrc_submit(rq)) { 2024 ret = guc_wq_item_append(guc, rq); 2025 if (!ret) 2026 ret = guc_add_request(guc, rq); 2027 } 2028 } else { 2029 guc_set_lrc_tail(rq); 2030 ret = guc_add_request(guc, rq); 2031 } 2032 2033 if (unlikely(ret == -EPIPE)) 2034 disable_submission(guc); 2035 2036 return ret; 2037 } 2038 2039 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 2040 { 2041 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2042 struct intel_context *ce = request_to_scheduling_context(rq); 2043 2044 return submission_disabled(guc) || guc->stalled_request || 2045 !i915_sched_engine_is_empty(sched_engine) || 2046 !ctx_id_mapped(guc, ce->guc_id.id); 2047 } 2048 2049 static void guc_submit_request(struct i915_request *rq) 2050 { 2051 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2052 struct intel_guc *guc = &rq->engine->gt->uc.guc; 2053 unsigned long flags; 2054 2055 /* Will be called from irq-context when using foreign fences. */ 2056 spin_lock_irqsave(&sched_engine->lock, flags); 2057 2058 if (need_tasklet(guc, rq)) 2059 queue_request(sched_engine, rq, rq_prio(rq)); 2060 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 2061 tasklet_hi_schedule(&sched_engine->tasklet); 2062 2063 spin_unlock_irqrestore(&sched_engine->lock, flags); 2064 } 2065 2066 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 2067 { 2068 int ret; 2069 2070 GEM_BUG_ON(intel_context_is_child(ce)); 2071 2072 if (intel_context_is_parent(ce)) 2073 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2074 NUMBER_MULTI_LRC_GUC_ID(guc), 2075 order_base_2(ce->parallel.number_children 2076 + 1)); 2077 else 2078 ret = ida_simple_get(&guc->submission_state.guc_ids, 2079 NUMBER_MULTI_LRC_GUC_ID(guc), 2080 guc->submission_state.num_guc_ids, 2081 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2082 __GFP_NOWARN); 2083 if (unlikely(ret < 0)) 2084 return ret; 2085 2086 if (!intel_context_is_parent(ce)) 2087 ++guc->submission_state.guc_ids_in_use; 2088 2089 ce->guc_id.id = ret; 2090 return 0; 2091 } 2092 2093 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2094 { 2095 GEM_BUG_ON(intel_context_is_child(ce)); 2096 2097 if (!context_guc_id_invalid(ce)) { 2098 if (intel_context_is_parent(ce)) { 2099 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2100 ce->guc_id.id, 2101 order_base_2(ce->parallel.number_children 2102 + 1)); 2103 } else { 2104 --guc->submission_state.guc_ids_in_use; 2105 ida_simple_remove(&guc->submission_state.guc_ids, 2106 ce->guc_id.id); 2107 } 2108 clr_ctx_id_mapping(guc, ce->guc_id.id); 2109 set_context_guc_id_invalid(ce); 2110 } 2111 if (!list_empty(&ce->guc_id.link)) 2112 list_del_init(&ce->guc_id.link); 2113 } 2114 2115 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2116 { 2117 unsigned long flags; 2118 2119 spin_lock_irqsave(&guc->submission_state.lock, flags); 2120 __release_guc_id(guc, ce); 2121 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2122 } 2123 2124 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2125 { 2126 struct intel_context *cn; 2127 2128 lockdep_assert_held(&guc->submission_state.lock); 2129 GEM_BUG_ON(intel_context_is_child(ce)); 2130 GEM_BUG_ON(intel_context_is_parent(ce)); 2131 2132 if (!list_empty(&guc->submission_state.guc_id_list)) { 2133 cn = list_first_entry(&guc->submission_state.guc_id_list, 2134 struct intel_context, 2135 guc_id.link); 2136 2137 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2138 GEM_BUG_ON(context_guc_id_invalid(cn)); 2139 GEM_BUG_ON(intel_context_is_child(cn)); 2140 GEM_BUG_ON(intel_context_is_parent(cn)); 2141 2142 list_del_init(&cn->guc_id.link); 2143 ce->guc_id.id = cn->guc_id.id; 2144 2145 spin_lock(&cn->guc_state.lock); 2146 clr_context_registered(cn); 2147 spin_unlock(&cn->guc_state.lock); 2148 2149 set_context_guc_id_invalid(cn); 2150 2151 #ifdef CONFIG_DRM_I915_SELFTEST 2152 guc->number_guc_id_stolen++; 2153 #endif 2154 2155 return 0; 2156 } else { 2157 return -EAGAIN; 2158 } 2159 } 2160 2161 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2162 { 2163 int ret; 2164 2165 lockdep_assert_held(&guc->submission_state.lock); 2166 GEM_BUG_ON(intel_context_is_child(ce)); 2167 2168 ret = new_guc_id(guc, ce); 2169 if (unlikely(ret < 0)) { 2170 if (intel_context_is_parent(ce)) 2171 return -ENOSPC; 2172 2173 ret = steal_guc_id(guc, ce); 2174 if (ret < 0) 2175 return ret; 2176 } 2177 2178 if (intel_context_is_parent(ce)) { 2179 struct intel_context *child; 2180 int i = 1; 2181 2182 for_each_child(ce, child) 2183 child->guc_id.id = ce->guc_id.id + i++; 2184 } 2185 2186 return 0; 2187 } 2188 2189 #define PIN_GUC_ID_TRIES 4 2190 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2191 { 2192 int ret = 0; 2193 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2194 2195 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2196 2197 try_again: 2198 spin_lock_irqsave(&guc->submission_state.lock, flags); 2199 2200 might_lock(&ce->guc_state.lock); 2201 2202 if (context_guc_id_invalid(ce)) { 2203 ret = assign_guc_id(guc, ce); 2204 if (ret) 2205 goto out_unlock; 2206 ret = 1; /* Indidcates newly assigned guc_id */ 2207 } 2208 if (!list_empty(&ce->guc_id.link)) 2209 list_del_init(&ce->guc_id.link); 2210 atomic_inc(&ce->guc_id.ref); 2211 2212 out_unlock: 2213 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2214 2215 /* 2216 * -EAGAIN indicates no guc_id are available, let's retire any 2217 * outstanding requests to see if that frees up a guc_id. If the first 2218 * retire didn't help, insert a sleep with the timeslice duration before 2219 * attempting to retire more requests. Double the sleep period each 2220 * subsequent pass before finally giving up. The sleep period has max of 2221 * 100ms and minimum of 1ms. 2222 */ 2223 if (ret == -EAGAIN && --tries) { 2224 if (PIN_GUC_ID_TRIES - tries > 1) { 2225 unsigned int timeslice_shifted = 2226 ce->engine->props.timeslice_duration_ms << 2227 (PIN_GUC_ID_TRIES - tries - 2); 2228 unsigned int max = min_t(unsigned int, 100, 2229 timeslice_shifted); 2230 2231 msleep(max_t(unsigned int, max, 1)); 2232 } 2233 intel_gt_retire_requests(guc_to_gt(guc)); 2234 goto try_again; 2235 } 2236 2237 return ret; 2238 } 2239 2240 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2241 { 2242 unsigned long flags; 2243 2244 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2245 GEM_BUG_ON(intel_context_is_child(ce)); 2246 2247 if (unlikely(context_guc_id_invalid(ce) || 2248 intel_context_is_parent(ce))) 2249 return; 2250 2251 spin_lock_irqsave(&guc->submission_state.lock, flags); 2252 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2253 !atomic_read(&ce->guc_id.ref)) 2254 list_add_tail(&ce->guc_id.link, 2255 &guc->submission_state.guc_id_list); 2256 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2257 } 2258 2259 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2260 struct intel_context *ce, 2261 u32 guc_id, 2262 u32 offset, 2263 bool loop) 2264 { 2265 struct intel_context *child; 2266 u32 action[4 + MAX_ENGINE_INSTANCE]; 2267 int len = 0; 2268 2269 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2270 2271 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2272 action[len++] = guc_id; 2273 action[len++] = ce->parallel.number_children + 1; 2274 action[len++] = offset; 2275 for_each_child(ce, child) { 2276 offset += sizeof(struct guc_lrc_desc_v69); 2277 action[len++] = offset; 2278 } 2279 2280 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2281 } 2282 2283 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2284 struct intel_context *ce, 2285 struct guc_ctxt_registration_info *info, 2286 bool loop) 2287 { 2288 struct intel_context *child; 2289 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2290 int len = 0; 2291 u32 next_id; 2292 2293 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2294 2295 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2296 action[len++] = info->flags; 2297 action[len++] = info->context_idx; 2298 action[len++] = info->engine_class; 2299 action[len++] = info->engine_submit_mask; 2300 action[len++] = info->wq_desc_lo; 2301 action[len++] = info->wq_desc_hi; 2302 action[len++] = info->wq_base_lo; 2303 action[len++] = info->wq_base_hi; 2304 action[len++] = info->wq_size; 2305 action[len++] = ce->parallel.number_children + 1; 2306 action[len++] = info->hwlrca_lo; 2307 action[len++] = info->hwlrca_hi; 2308 2309 next_id = info->context_idx + 1; 2310 for_each_child(ce, child) { 2311 GEM_BUG_ON(next_id++ != child->guc_id.id); 2312 2313 /* 2314 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2315 * only supports 32 bit currently. 2316 */ 2317 action[len++] = lower_32_bits(child->lrc.lrca); 2318 action[len++] = upper_32_bits(child->lrc.lrca); 2319 } 2320 2321 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2322 2323 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2324 } 2325 2326 static int __guc_action_register_context_v69(struct intel_guc *guc, 2327 u32 guc_id, 2328 u32 offset, 2329 bool loop) 2330 { 2331 u32 action[] = { 2332 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2333 guc_id, 2334 offset, 2335 }; 2336 2337 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2338 0, loop); 2339 } 2340 2341 static int __guc_action_register_context_v70(struct intel_guc *guc, 2342 struct guc_ctxt_registration_info *info, 2343 bool loop) 2344 { 2345 u32 action[] = { 2346 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2347 info->flags, 2348 info->context_idx, 2349 info->engine_class, 2350 info->engine_submit_mask, 2351 info->wq_desc_lo, 2352 info->wq_desc_hi, 2353 info->wq_base_lo, 2354 info->wq_base_hi, 2355 info->wq_size, 2356 info->hwlrca_lo, 2357 info->hwlrca_hi, 2358 }; 2359 2360 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2361 0, loop); 2362 } 2363 2364 static void prepare_context_registration_info_v69(struct intel_context *ce); 2365 static void prepare_context_registration_info_v70(struct intel_context *ce, 2366 struct guc_ctxt_registration_info *info); 2367 2368 static int 2369 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2370 { 2371 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2372 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2373 2374 prepare_context_registration_info_v69(ce); 2375 2376 if (intel_context_is_parent(ce)) 2377 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2378 offset, loop); 2379 else 2380 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2381 offset, loop); 2382 } 2383 2384 static int 2385 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2386 { 2387 struct guc_ctxt_registration_info info; 2388 2389 prepare_context_registration_info_v70(ce, &info); 2390 2391 if (intel_context_is_parent(ce)) 2392 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2393 else 2394 return __guc_action_register_context_v70(guc, &info, loop); 2395 } 2396 2397 static int register_context(struct intel_context *ce, bool loop) 2398 { 2399 struct intel_guc *guc = ce_to_guc(ce); 2400 int ret; 2401 2402 GEM_BUG_ON(intel_context_is_child(ce)); 2403 trace_intel_context_register(ce); 2404 2405 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2406 ret = register_context_v70(guc, ce, loop); 2407 else 2408 ret = register_context_v69(guc, ce, loop); 2409 2410 if (likely(!ret)) { 2411 unsigned long flags; 2412 2413 spin_lock_irqsave(&ce->guc_state.lock, flags); 2414 set_context_registered(ce); 2415 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2416 2417 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2418 guc_context_policy_init_v70(ce, loop); 2419 } 2420 2421 return ret; 2422 } 2423 2424 static int __guc_action_deregister_context(struct intel_guc *guc, 2425 u32 guc_id) 2426 { 2427 u32 action[] = { 2428 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2429 guc_id, 2430 }; 2431 2432 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2433 G2H_LEN_DW_DEREGISTER_CONTEXT, 2434 true); 2435 } 2436 2437 static int deregister_context(struct intel_context *ce, u32 guc_id) 2438 { 2439 struct intel_guc *guc = ce_to_guc(ce); 2440 2441 GEM_BUG_ON(intel_context_is_child(ce)); 2442 trace_intel_context_deregister(ce); 2443 2444 return __guc_action_deregister_context(guc, guc_id); 2445 } 2446 2447 static inline void clear_children_join_go_memory(struct intel_context *ce) 2448 { 2449 struct parent_scratch *ps = __get_parent_scratch(ce); 2450 int i; 2451 2452 ps->go.semaphore = 0; 2453 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2454 ps->join[i].semaphore = 0; 2455 } 2456 2457 static inline u32 get_children_go_value(struct intel_context *ce) 2458 { 2459 return __get_parent_scratch(ce)->go.semaphore; 2460 } 2461 2462 static inline u32 get_children_join_value(struct intel_context *ce, 2463 u8 child_index) 2464 { 2465 return __get_parent_scratch(ce)->join[child_index].semaphore; 2466 } 2467 2468 struct context_policy { 2469 u32 count; 2470 struct guc_update_context_policy h2g; 2471 }; 2472 2473 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2474 { 2475 size_t bytes = sizeof(policy->h2g.header) + 2476 (sizeof(policy->h2g.klv[0]) * policy->count); 2477 2478 return bytes / sizeof(u32); 2479 } 2480 2481 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2482 { 2483 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2484 policy->h2g.header.ctx_id = guc_id; 2485 policy->count = 0; 2486 } 2487 2488 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2489 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2490 { \ 2491 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2492 policy->h2g.klv[policy->count].kl = \ 2493 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2494 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2495 policy->h2g.klv[policy->count].value = data; \ 2496 policy->count++; \ 2497 } 2498 2499 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2500 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2501 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2502 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2503 2504 #undef MAKE_CONTEXT_POLICY_ADD 2505 2506 static int __guc_context_set_context_policies(struct intel_guc *guc, 2507 struct context_policy *policy, 2508 bool loop) 2509 { 2510 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2511 __guc_context_policy_action_size(policy), 2512 0, loop); 2513 } 2514 2515 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2516 { 2517 struct intel_engine_cs *engine = ce->engine; 2518 struct intel_guc *guc = &engine->gt->uc.guc; 2519 struct context_policy policy; 2520 u32 execution_quantum; 2521 u32 preemption_timeout; 2522 unsigned long flags; 2523 int ret; 2524 2525 /* NB: For both of these, zero means disabled. */ 2526 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2527 execution_quantum)); 2528 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2529 preemption_timeout)); 2530 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2531 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2532 2533 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2534 2535 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2536 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2537 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2538 2539 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2540 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2541 2542 ret = __guc_context_set_context_policies(guc, &policy, loop); 2543 2544 spin_lock_irqsave(&ce->guc_state.lock, flags); 2545 if (ret != 0) 2546 set_context_policy_required(ce); 2547 else 2548 clr_context_policy_required(ce); 2549 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2550 2551 return ret; 2552 } 2553 2554 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2555 struct guc_lrc_desc_v69 *desc) 2556 { 2557 desc->policy_flags = 0; 2558 2559 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2560 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2561 2562 /* NB: For both of these, zero means disabled. */ 2563 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2564 desc->execution_quantum)); 2565 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2566 desc->preemption_timeout)); 2567 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2568 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2569 } 2570 2571 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2572 { 2573 /* 2574 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2575 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2576 */ 2577 switch (prio) { 2578 default: 2579 MISSING_CASE(prio); 2580 fallthrough; 2581 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2582 return GEN12_CTX_PRIORITY_NORMAL; 2583 case GUC_CLIENT_PRIORITY_NORMAL: 2584 return GEN12_CTX_PRIORITY_LOW; 2585 case GUC_CLIENT_PRIORITY_HIGH: 2586 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2587 return GEN12_CTX_PRIORITY_HIGH; 2588 } 2589 } 2590 2591 static void prepare_context_registration_info_v69(struct intel_context *ce) 2592 { 2593 struct intel_engine_cs *engine = ce->engine; 2594 struct intel_guc *guc = &engine->gt->uc.guc; 2595 u32 ctx_id = ce->guc_id.id; 2596 struct guc_lrc_desc_v69 *desc; 2597 struct intel_context *child; 2598 2599 GEM_BUG_ON(!engine->mask); 2600 2601 /* 2602 * Ensure LRC + CT vmas are is same region as write barrier is done 2603 * based on CT vma region. 2604 */ 2605 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2606 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2607 2608 desc = __get_lrc_desc_v69(guc, ctx_id); 2609 GEM_BUG_ON(!desc); 2610 desc->engine_class = engine_class_to_guc_class(engine->class); 2611 desc->engine_submit_mask = engine->logical_mask; 2612 desc->hw_context_desc = ce->lrc.lrca; 2613 desc->priority = ce->guc_state.prio; 2614 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2615 guc_context_policy_init_v69(engine, desc); 2616 2617 /* 2618 * If context is a parent, we need to register a process descriptor 2619 * describing a work queue and register all child contexts. 2620 */ 2621 if (intel_context_is_parent(ce)) { 2622 struct guc_process_desc_v69 *pdesc; 2623 2624 ce->parallel.guc.wqi_tail = 0; 2625 ce->parallel.guc.wqi_head = 0; 2626 2627 desc->process_desc = i915_ggtt_offset(ce->state) + 2628 __get_parent_scratch_offset(ce); 2629 desc->wq_addr = i915_ggtt_offset(ce->state) + 2630 __get_wq_offset(ce); 2631 desc->wq_size = WQ_SIZE; 2632 2633 pdesc = __get_process_desc_v69(ce); 2634 memset(pdesc, 0, sizeof(*(pdesc))); 2635 pdesc->stage_id = ce->guc_id.id; 2636 pdesc->wq_base_addr = desc->wq_addr; 2637 pdesc->wq_size_bytes = desc->wq_size; 2638 pdesc->wq_status = WQ_STATUS_ACTIVE; 2639 2640 ce->parallel.guc.wq_head = &pdesc->head; 2641 ce->parallel.guc.wq_tail = &pdesc->tail; 2642 ce->parallel.guc.wq_status = &pdesc->wq_status; 2643 2644 for_each_child(ce, child) { 2645 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2646 2647 desc->engine_class = 2648 engine_class_to_guc_class(engine->class); 2649 desc->hw_context_desc = child->lrc.lrca; 2650 desc->priority = ce->guc_state.prio; 2651 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2652 guc_context_policy_init_v69(engine, desc); 2653 } 2654 2655 clear_children_join_go_memory(ce); 2656 } 2657 } 2658 2659 static void prepare_context_registration_info_v70(struct intel_context *ce, 2660 struct guc_ctxt_registration_info *info) 2661 { 2662 struct intel_engine_cs *engine = ce->engine; 2663 struct intel_guc *guc = &engine->gt->uc.guc; 2664 u32 ctx_id = ce->guc_id.id; 2665 2666 GEM_BUG_ON(!engine->mask); 2667 2668 /* 2669 * Ensure LRC + CT vmas are is same region as write barrier is done 2670 * based on CT vma region. 2671 */ 2672 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2673 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2674 2675 memset(info, 0, sizeof(*info)); 2676 info->context_idx = ctx_id; 2677 info->engine_class = engine_class_to_guc_class(engine->class); 2678 info->engine_submit_mask = engine->logical_mask; 2679 /* 2680 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2681 * only supports 32 bit currently. 2682 */ 2683 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2684 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2685 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2686 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2687 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2688 2689 /* 2690 * If context is a parent, we need to register a process descriptor 2691 * describing a work queue and register all child contexts. 2692 */ 2693 if (intel_context_is_parent(ce)) { 2694 struct guc_sched_wq_desc *wq_desc; 2695 u64 wq_desc_offset, wq_base_offset; 2696 2697 ce->parallel.guc.wqi_tail = 0; 2698 ce->parallel.guc.wqi_head = 0; 2699 2700 wq_desc_offset = i915_ggtt_offset(ce->state) + 2701 __get_parent_scratch_offset(ce); 2702 wq_base_offset = i915_ggtt_offset(ce->state) + 2703 __get_wq_offset(ce); 2704 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2705 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2706 info->wq_base_lo = lower_32_bits(wq_base_offset); 2707 info->wq_base_hi = upper_32_bits(wq_base_offset); 2708 info->wq_size = WQ_SIZE; 2709 2710 wq_desc = __get_wq_desc_v70(ce); 2711 memset(wq_desc, 0, sizeof(*wq_desc)); 2712 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2713 2714 ce->parallel.guc.wq_head = &wq_desc->head; 2715 ce->parallel.guc.wq_tail = &wq_desc->tail; 2716 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2717 2718 clear_children_join_go_memory(ce); 2719 } 2720 } 2721 2722 static int try_context_registration(struct intel_context *ce, bool loop) 2723 { 2724 struct intel_engine_cs *engine = ce->engine; 2725 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2726 struct intel_guc *guc = &engine->gt->uc.guc; 2727 intel_wakeref_t wakeref; 2728 u32 ctx_id = ce->guc_id.id; 2729 bool context_registered; 2730 int ret = 0; 2731 2732 GEM_BUG_ON(!sched_state_is_init(ce)); 2733 2734 context_registered = ctx_id_mapped(guc, ctx_id); 2735 2736 clr_ctx_id_mapping(guc, ctx_id); 2737 set_ctx_id_mapping(guc, ctx_id, ce); 2738 2739 /* 2740 * The context_lookup xarray is used to determine if the hardware 2741 * context is currently registered. There are two cases in which it 2742 * could be registered either the guc_id has been stolen from another 2743 * context or the lrc descriptor address of this context has changed. In 2744 * either case the context needs to be deregistered with the GuC before 2745 * registering this context. 2746 */ 2747 if (context_registered) { 2748 bool disabled; 2749 unsigned long flags; 2750 2751 trace_intel_context_steal_guc_id(ce); 2752 GEM_BUG_ON(!loop); 2753 2754 /* Seal race with Reset */ 2755 spin_lock_irqsave(&ce->guc_state.lock, flags); 2756 disabled = submission_disabled(guc); 2757 if (likely(!disabled)) { 2758 set_context_wait_for_deregister_to_register(ce); 2759 intel_context_get(ce); 2760 } 2761 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2762 if (unlikely(disabled)) { 2763 clr_ctx_id_mapping(guc, ctx_id); 2764 return 0; /* Will get registered later */ 2765 } 2766 2767 /* 2768 * If stealing the guc_id, this ce has the same guc_id as the 2769 * context whose guc_id was stolen. 2770 */ 2771 with_intel_runtime_pm(runtime_pm, wakeref) 2772 ret = deregister_context(ce, ce->guc_id.id); 2773 if (unlikely(ret == -ENODEV)) 2774 ret = 0; /* Will get registered later */ 2775 } else { 2776 with_intel_runtime_pm(runtime_pm, wakeref) 2777 ret = register_context(ce, loop); 2778 if (unlikely(ret == -EBUSY)) { 2779 clr_ctx_id_mapping(guc, ctx_id); 2780 } else if (unlikely(ret == -ENODEV)) { 2781 clr_ctx_id_mapping(guc, ctx_id); 2782 ret = 0; /* Will get registered later */ 2783 } 2784 } 2785 2786 return ret; 2787 } 2788 2789 static int __guc_context_pre_pin(struct intel_context *ce, 2790 struct intel_engine_cs *engine, 2791 struct i915_gem_ww_ctx *ww, 2792 void **vaddr) 2793 { 2794 return lrc_pre_pin(ce, engine, ww, vaddr); 2795 } 2796 2797 static int __guc_context_pin(struct intel_context *ce, 2798 struct intel_engine_cs *engine, 2799 void *vaddr) 2800 { 2801 if (i915_ggtt_offset(ce->state) != 2802 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2803 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2804 2805 /* 2806 * GuC context gets pinned in guc_request_alloc. See that function for 2807 * explaination of why. 2808 */ 2809 2810 return lrc_pin(ce, engine, vaddr); 2811 } 2812 2813 static int guc_context_pre_pin(struct intel_context *ce, 2814 struct i915_gem_ww_ctx *ww, 2815 void **vaddr) 2816 { 2817 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2818 } 2819 2820 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2821 { 2822 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2823 2824 if (likely(!ret && !intel_context_is_barrier(ce))) 2825 intel_engine_pm_get(ce->engine); 2826 2827 return ret; 2828 } 2829 2830 static void guc_context_unpin(struct intel_context *ce) 2831 { 2832 struct intel_guc *guc = ce_to_guc(ce); 2833 2834 __guc_context_update_stats(ce); 2835 unpin_guc_id(guc, ce); 2836 lrc_unpin(ce); 2837 2838 if (likely(!intel_context_is_barrier(ce))) 2839 intel_engine_pm_put_async(ce->engine); 2840 } 2841 2842 static void guc_context_post_unpin(struct intel_context *ce) 2843 { 2844 lrc_post_unpin(ce); 2845 } 2846 2847 static void __guc_context_sched_enable(struct intel_guc *guc, 2848 struct intel_context *ce) 2849 { 2850 u32 action[] = { 2851 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2852 ce->guc_id.id, 2853 GUC_CONTEXT_ENABLE 2854 }; 2855 2856 trace_intel_context_sched_enable(ce); 2857 2858 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2859 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2860 } 2861 2862 static void __guc_context_sched_disable(struct intel_guc *guc, 2863 struct intel_context *ce, 2864 u16 guc_id) 2865 { 2866 u32 action[] = { 2867 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2868 guc_id, /* ce->guc_id.id not stable */ 2869 GUC_CONTEXT_DISABLE 2870 }; 2871 2872 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2873 2874 GEM_BUG_ON(intel_context_is_child(ce)); 2875 trace_intel_context_sched_disable(ce); 2876 2877 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2878 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2879 } 2880 2881 static void guc_blocked_fence_complete(struct intel_context *ce) 2882 { 2883 lockdep_assert_held(&ce->guc_state.lock); 2884 2885 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2886 i915_sw_fence_complete(&ce->guc_state.blocked); 2887 } 2888 2889 static void guc_blocked_fence_reinit(struct intel_context *ce) 2890 { 2891 lockdep_assert_held(&ce->guc_state.lock); 2892 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2893 2894 /* 2895 * This fence is always complete unless a pending schedule disable is 2896 * outstanding. We arm the fence here and complete it when we receive 2897 * the pending schedule disable complete message. 2898 */ 2899 i915_sw_fence_fini(&ce->guc_state.blocked); 2900 i915_sw_fence_reinit(&ce->guc_state.blocked); 2901 i915_sw_fence_await(&ce->guc_state.blocked); 2902 i915_sw_fence_commit(&ce->guc_state.blocked); 2903 } 2904 2905 static u16 prep_context_pending_disable(struct intel_context *ce) 2906 { 2907 lockdep_assert_held(&ce->guc_state.lock); 2908 2909 set_context_pending_disable(ce); 2910 clr_context_enabled(ce); 2911 guc_blocked_fence_reinit(ce); 2912 intel_context_get(ce); 2913 2914 return ce->guc_id.id; 2915 } 2916 2917 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2918 { 2919 struct intel_guc *guc = ce_to_guc(ce); 2920 unsigned long flags; 2921 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2922 intel_wakeref_t wakeref; 2923 u16 guc_id; 2924 bool enabled; 2925 2926 GEM_BUG_ON(intel_context_is_child(ce)); 2927 2928 spin_lock_irqsave(&ce->guc_state.lock, flags); 2929 2930 incr_context_blocked(ce); 2931 2932 enabled = context_enabled(ce); 2933 if (unlikely(!enabled || submission_disabled(guc))) { 2934 if (enabled) 2935 clr_context_enabled(ce); 2936 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2937 return &ce->guc_state.blocked; 2938 } 2939 2940 /* 2941 * We add +2 here as the schedule disable complete CTB handler calls 2942 * intel_context_sched_disable_unpin (-2 to pin_count). 2943 */ 2944 atomic_add(2, &ce->pin_count); 2945 2946 guc_id = prep_context_pending_disable(ce); 2947 2948 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2949 2950 with_intel_runtime_pm(runtime_pm, wakeref) 2951 __guc_context_sched_disable(guc, ce, guc_id); 2952 2953 return &ce->guc_state.blocked; 2954 } 2955 2956 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2957 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2958 #define SCHED_STATE_NO_UNBLOCK \ 2959 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2960 SCHED_STATE_PENDING_DISABLE | \ 2961 SCHED_STATE_BANNED) 2962 2963 static bool context_cant_unblock(struct intel_context *ce) 2964 { 2965 lockdep_assert_held(&ce->guc_state.lock); 2966 2967 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 2968 context_guc_id_invalid(ce) || 2969 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 2970 !intel_context_is_pinned(ce); 2971 } 2972 2973 static void guc_context_unblock(struct intel_context *ce) 2974 { 2975 struct intel_guc *guc = ce_to_guc(ce); 2976 unsigned long flags; 2977 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2978 intel_wakeref_t wakeref; 2979 bool enable; 2980 2981 GEM_BUG_ON(context_enabled(ce)); 2982 GEM_BUG_ON(intel_context_is_child(ce)); 2983 2984 spin_lock_irqsave(&ce->guc_state.lock, flags); 2985 2986 if (unlikely(submission_disabled(guc) || 2987 context_cant_unblock(ce))) { 2988 enable = false; 2989 } else { 2990 enable = true; 2991 set_context_pending_enable(ce); 2992 set_context_enabled(ce); 2993 intel_context_get(ce); 2994 } 2995 2996 decr_context_blocked(ce); 2997 2998 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2999 3000 if (enable) { 3001 with_intel_runtime_pm(runtime_pm, wakeref) 3002 __guc_context_sched_enable(guc, ce); 3003 } 3004 } 3005 3006 static void guc_context_cancel_request(struct intel_context *ce, 3007 struct i915_request *rq) 3008 { 3009 struct intel_context *block_context = 3010 request_to_scheduling_context(rq); 3011 3012 if (i915_sw_fence_signaled(&rq->submit)) { 3013 struct i915_sw_fence *fence; 3014 3015 intel_context_get(ce); 3016 fence = guc_context_block(block_context); 3017 i915_sw_fence_wait(fence); 3018 if (!i915_request_completed(rq)) { 3019 __i915_request_skip(rq); 3020 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 3021 true); 3022 } 3023 3024 guc_context_unblock(block_context); 3025 intel_context_put(ce); 3026 } 3027 } 3028 3029 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 3030 u16 guc_id, 3031 u32 preemption_timeout) 3032 { 3033 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3034 struct context_policy policy; 3035 3036 __guc_context_policy_start_klv(&policy, guc_id); 3037 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 3038 __guc_context_set_context_policies(guc, &policy, true); 3039 } else { 3040 u32 action[] = { 3041 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 3042 guc_id, 3043 preemption_timeout 3044 }; 3045 3046 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3047 } 3048 } 3049 3050 static void 3051 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 3052 unsigned int preempt_timeout_ms) 3053 { 3054 struct intel_guc *guc = ce_to_guc(ce); 3055 struct intel_runtime_pm *runtime_pm = 3056 &ce->engine->gt->i915->runtime_pm; 3057 intel_wakeref_t wakeref; 3058 unsigned long flags; 3059 3060 GEM_BUG_ON(intel_context_is_child(ce)); 3061 3062 guc_flush_submissions(guc); 3063 3064 spin_lock_irqsave(&ce->guc_state.lock, flags); 3065 set_context_banned(ce); 3066 3067 if (submission_disabled(guc) || 3068 (!context_enabled(ce) && !context_pending_disable(ce))) { 3069 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3070 3071 guc_cancel_context_requests(ce); 3072 intel_engine_signal_breadcrumbs(ce->engine); 3073 } else if (!context_pending_disable(ce)) { 3074 u16 guc_id; 3075 3076 /* 3077 * We add +2 here as the schedule disable complete CTB handler 3078 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3079 */ 3080 atomic_add(2, &ce->pin_count); 3081 3082 guc_id = prep_context_pending_disable(ce); 3083 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3084 3085 /* 3086 * In addition to disabling scheduling, set the preemption 3087 * timeout to the minimum value (1 us) so the banned context 3088 * gets kicked off the HW ASAP. 3089 */ 3090 with_intel_runtime_pm(runtime_pm, wakeref) { 3091 __guc_context_set_preemption_timeout(guc, guc_id, 3092 preempt_timeout_ms); 3093 __guc_context_sched_disable(guc, ce, guc_id); 3094 } 3095 } else { 3096 if (!context_guc_id_invalid(ce)) 3097 with_intel_runtime_pm(runtime_pm, wakeref) 3098 __guc_context_set_preemption_timeout(guc, 3099 ce->guc_id.id, 3100 preempt_timeout_ms); 3101 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3102 } 3103 } 3104 3105 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3106 unsigned long flags) 3107 __releases(ce->guc_state.lock) 3108 { 3109 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3110 intel_wakeref_t wakeref; 3111 u16 guc_id; 3112 3113 lockdep_assert_held(&ce->guc_state.lock); 3114 guc_id = prep_context_pending_disable(ce); 3115 3116 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3117 3118 with_intel_runtime_pm(runtime_pm, wakeref) 3119 __guc_context_sched_disable(guc, ce, guc_id); 3120 } 3121 3122 static bool bypass_sched_disable(struct intel_guc *guc, 3123 struct intel_context *ce) 3124 { 3125 lockdep_assert_held(&ce->guc_state.lock); 3126 GEM_BUG_ON(intel_context_is_child(ce)); 3127 3128 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3129 !ctx_id_mapped(guc, ce->guc_id.id)) { 3130 clr_context_enabled(ce); 3131 return true; 3132 } 3133 3134 return !context_enabled(ce); 3135 } 3136 3137 static void __delay_sched_disable(struct work_struct *wrk) 3138 { 3139 struct intel_context *ce = 3140 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3141 struct intel_guc *guc = ce_to_guc(ce); 3142 unsigned long flags; 3143 3144 spin_lock_irqsave(&ce->guc_state.lock, flags); 3145 3146 if (bypass_sched_disable(guc, ce)) { 3147 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3148 intel_context_sched_disable_unpin(ce); 3149 } else { 3150 do_sched_disable(guc, ce, flags); 3151 } 3152 } 3153 3154 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3155 { 3156 /* 3157 * parent contexts are perma-pinned, if we are unpinning do schedule 3158 * disable immediately. 3159 */ 3160 if (intel_context_is_parent(ce)) 3161 return true; 3162 3163 /* 3164 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3165 */ 3166 return guc->submission_state.guc_ids_in_use > 3167 guc->submission_state.sched_disable_gucid_threshold; 3168 } 3169 3170 static void guc_context_sched_disable(struct intel_context *ce) 3171 { 3172 struct intel_guc *guc = ce_to_guc(ce); 3173 u64 delay = guc->submission_state.sched_disable_delay_ms; 3174 unsigned long flags; 3175 3176 spin_lock_irqsave(&ce->guc_state.lock, flags); 3177 3178 if (bypass_sched_disable(guc, ce)) { 3179 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3180 intel_context_sched_disable_unpin(ce); 3181 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3182 delay) { 3183 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3184 mod_delayed_work(system_unbound_wq, 3185 &ce->guc_state.sched_disable_delay_work, 3186 msecs_to_jiffies(delay)); 3187 } else { 3188 do_sched_disable(guc, ce, flags); 3189 } 3190 } 3191 3192 static void guc_context_close(struct intel_context *ce) 3193 { 3194 unsigned long flags; 3195 3196 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3197 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3198 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3199 3200 spin_lock_irqsave(&ce->guc_state.lock, flags); 3201 set_context_close_done(ce); 3202 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3203 } 3204 3205 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3206 { 3207 struct intel_guc *guc = ce_to_guc(ce); 3208 struct intel_gt *gt = guc_to_gt(guc); 3209 unsigned long flags; 3210 bool disabled; 3211 3212 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3213 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3214 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3215 GEM_BUG_ON(context_enabled(ce)); 3216 3217 /* Seal race with Reset */ 3218 spin_lock_irqsave(&ce->guc_state.lock, flags); 3219 disabled = submission_disabled(guc); 3220 if (likely(!disabled)) { 3221 __intel_gt_pm_get(gt); 3222 set_context_destroyed(ce); 3223 clr_context_registered(ce); 3224 } 3225 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3226 if (unlikely(disabled)) { 3227 release_guc_id(guc, ce); 3228 __guc_context_destroy(ce); 3229 return; 3230 } 3231 3232 deregister_context(ce, ce->guc_id.id); 3233 } 3234 3235 static void __guc_context_destroy(struct intel_context *ce) 3236 { 3237 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3238 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3239 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3240 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3241 3242 lrc_fini(ce); 3243 intel_context_fini(ce); 3244 3245 if (intel_engine_is_virtual(ce->engine)) { 3246 struct guc_virtual_engine *ve = 3247 container_of(ce, typeof(*ve), context); 3248 3249 if (ve->base.breadcrumbs) 3250 intel_breadcrumbs_put(ve->base.breadcrumbs); 3251 3252 kfree(ve); 3253 } else { 3254 intel_context_free(ce); 3255 } 3256 } 3257 3258 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3259 { 3260 struct intel_context *ce; 3261 unsigned long flags; 3262 3263 GEM_BUG_ON(!submission_disabled(guc) && 3264 guc_submission_initialized(guc)); 3265 3266 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3267 spin_lock_irqsave(&guc->submission_state.lock, flags); 3268 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3269 struct intel_context, 3270 destroyed_link); 3271 if (ce) 3272 list_del_init(&ce->destroyed_link); 3273 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3274 3275 if (!ce) 3276 break; 3277 3278 release_guc_id(guc, ce); 3279 __guc_context_destroy(ce); 3280 } 3281 } 3282 3283 static void deregister_destroyed_contexts(struct intel_guc *guc) 3284 { 3285 struct intel_context *ce; 3286 unsigned long flags; 3287 3288 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3289 spin_lock_irqsave(&guc->submission_state.lock, flags); 3290 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3291 struct intel_context, 3292 destroyed_link); 3293 if (ce) 3294 list_del_init(&ce->destroyed_link); 3295 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3296 3297 if (!ce) 3298 break; 3299 3300 guc_lrc_desc_unpin(ce); 3301 } 3302 } 3303 3304 static void destroyed_worker_func(struct work_struct *w) 3305 { 3306 struct intel_guc *guc = container_of(w, struct intel_guc, 3307 submission_state.destroyed_worker); 3308 struct intel_gt *gt = guc_to_gt(guc); 3309 int tmp; 3310 3311 with_intel_gt_pm(gt, tmp) 3312 deregister_destroyed_contexts(guc); 3313 } 3314 3315 static void guc_context_destroy(struct kref *kref) 3316 { 3317 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3318 struct intel_guc *guc = ce_to_guc(ce); 3319 unsigned long flags; 3320 bool destroy; 3321 3322 /* 3323 * If the guc_id is invalid this context has been stolen and we can free 3324 * it immediately. Also can be freed immediately if the context is not 3325 * registered with the GuC or the GuC is in the middle of a reset. 3326 */ 3327 spin_lock_irqsave(&guc->submission_state.lock, flags); 3328 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3329 !ctx_id_mapped(guc, ce->guc_id.id); 3330 if (likely(!destroy)) { 3331 if (!list_empty(&ce->guc_id.link)) 3332 list_del_init(&ce->guc_id.link); 3333 list_add_tail(&ce->destroyed_link, 3334 &guc->submission_state.destroyed_contexts); 3335 } else { 3336 __release_guc_id(guc, ce); 3337 } 3338 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3339 if (unlikely(destroy)) { 3340 __guc_context_destroy(ce); 3341 return; 3342 } 3343 3344 /* 3345 * We use a worker to issue the H2G to deregister the context as we can 3346 * take the GT PM for the first time which isn't allowed from an atomic 3347 * context. 3348 */ 3349 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3350 } 3351 3352 static int guc_context_alloc(struct intel_context *ce) 3353 { 3354 return lrc_alloc(ce, ce->engine); 3355 } 3356 3357 static void __guc_context_set_prio(struct intel_guc *guc, 3358 struct intel_context *ce) 3359 { 3360 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3361 struct context_policy policy; 3362 3363 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3364 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3365 __guc_context_set_context_policies(guc, &policy, true); 3366 } else { 3367 u32 action[] = { 3368 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3369 ce->guc_id.id, 3370 ce->guc_state.prio, 3371 }; 3372 3373 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3374 } 3375 } 3376 3377 static void guc_context_set_prio(struct intel_guc *guc, 3378 struct intel_context *ce, 3379 u8 prio) 3380 { 3381 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3382 prio > GUC_CLIENT_PRIORITY_NORMAL); 3383 lockdep_assert_held(&ce->guc_state.lock); 3384 3385 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3386 !context_registered(ce)) { 3387 ce->guc_state.prio = prio; 3388 return; 3389 } 3390 3391 ce->guc_state.prio = prio; 3392 __guc_context_set_prio(guc, ce); 3393 3394 trace_intel_context_set_prio(ce); 3395 } 3396 3397 static inline u8 map_i915_prio_to_guc_prio(int prio) 3398 { 3399 if (prio == I915_PRIORITY_NORMAL) 3400 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3401 else if (prio < I915_PRIORITY_NORMAL) 3402 return GUC_CLIENT_PRIORITY_NORMAL; 3403 else if (prio < I915_PRIORITY_DISPLAY) 3404 return GUC_CLIENT_PRIORITY_HIGH; 3405 else 3406 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3407 } 3408 3409 static inline void add_context_inflight_prio(struct intel_context *ce, 3410 u8 guc_prio) 3411 { 3412 lockdep_assert_held(&ce->guc_state.lock); 3413 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3414 3415 ++ce->guc_state.prio_count[guc_prio]; 3416 3417 /* Overflow protection */ 3418 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3419 } 3420 3421 static inline void sub_context_inflight_prio(struct intel_context *ce, 3422 u8 guc_prio) 3423 { 3424 lockdep_assert_held(&ce->guc_state.lock); 3425 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3426 3427 /* Underflow protection */ 3428 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3429 3430 --ce->guc_state.prio_count[guc_prio]; 3431 } 3432 3433 static inline void update_context_prio(struct intel_context *ce) 3434 { 3435 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3436 int i; 3437 3438 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3439 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3440 3441 lockdep_assert_held(&ce->guc_state.lock); 3442 3443 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3444 if (ce->guc_state.prio_count[i]) { 3445 guc_context_set_prio(guc, ce, i); 3446 break; 3447 } 3448 } 3449 } 3450 3451 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3452 { 3453 /* Lower value is higher priority */ 3454 return new_guc_prio < old_guc_prio; 3455 } 3456 3457 static void add_to_context(struct i915_request *rq) 3458 { 3459 struct intel_context *ce = request_to_scheduling_context(rq); 3460 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3461 3462 GEM_BUG_ON(intel_context_is_child(ce)); 3463 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3464 3465 spin_lock(&ce->guc_state.lock); 3466 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3467 3468 if (rq->guc_prio == GUC_PRIO_INIT) { 3469 rq->guc_prio = new_guc_prio; 3470 add_context_inflight_prio(ce, rq->guc_prio); 3471 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3472 sub_context_inflight_prio(ce, rq->guc_prio); 3473 rq->guc_prio = new_guc_prio; 3474 add_context_inflight_prio(ce, rq->guc_prio); 3475 } 3476 update_context_prio(ce); 3477 3478 spin_unlock(&ce->guc_state.lock); 3479 } 3480 3481 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3482 { 3483 lockdep_assert_held(&ce->guc_state.lock); 3484 3485 if (rq->guc_prio != GUC_PRIO_INIT && 3486 rq->guc_prio != GUC_PRIO_FINI) { 3487 sub_context_inflight_prio(ce, rq->guc_prio); 3488 update_context_prio(ce); 3489 } 3490 rq->guc_prio = GUC_PRIO_FINI; 3491 } 3492 3493 static void remove_from_context(struct i915_request *rq) 3494 { 3495 struct intel_context *ce = request_to_scheduling_context(rq); 3496 3497 GEM_BUG_ON(intel_context_is_child(ce)); 3498 3499 spin_lock_irq(&ce->guc_state.lock); 3500 3501 list_del_init(&rq->sched.link); 3502 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3503 3504 /* Prevent further __await_execution() registering a cb, then flush */ 3505 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3506 3507 guc_prio_fini(rq, ce); 3508 3509 spin_unlock_irq(&ce->guc_state.lock); 3510 3511 atomic_dec(&ce->guc_id.ref); 3512 i915_request_notify_execute_cb_imm(rq); 3513 } 3514 3515 static const struct intel_context_ops guc_context_ops = { 3516 .flags = COPS_RUNTIME_CYCLES, 3517 .alloc = guc_context_alloc, 3518 3519 .close = guc_context_close, 3520 3521 .pre_pin = guc_context_pre_pin, 3522 .pin = guc_context_pin, 3523 .unpin = guc_context_unpin, 3524 .post_unpin = guc_context_post_unpin, 3525 3526 .revoke = guc_context_revoke, 3527 3528 .cancel_request = guc_context_cancel_request, 3529 3530 .enter = intel_context_enter_engine, 3531 .exit = intel_context_exit_engine, 3532 3533 .sched_disable = guc_context_sched_disable, 3534 3535 .update_stats = guc_context_update_stats, 3536 3537 .reset = lrc_reset, 3538 .destroy = guc_context_destroy, 3539 3540 .create_virtual = guc_create_virtual, 3541 .create_parallel = guc_create_parallel, 3542 }; 3543 3544 static void submit_work_cb(struct irq_work *wrk) 3545 { 3546 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3547 3548 might_lock(&rq->engine->sched_engine->lock); 3549 i915_sw_fence_complete(&rq->submit); 3550 } 3551 3552 static void __guc_signal_context_fence(struct intel_context *ce) 3553 { 3554 struct i915_request *rq, *rn; 3555 3556 lockdep_assert_held(&ce->guc_state.lock); 3557 3558 if (!list_empty(&ce->guc_state.fences)) 3559 trace_intel_context_fence_release(ce); 3560 3561 /* 3562 * Use an IRQ to ensure locking order of sched_engine->lock -> 3563 * ce->guc_state.lock is preserved. 3564 */ 3565 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3566 guc_fence_link) { 3567 list_del(&rq->guc_fence_link); 3568 irq_work_queue(&rq->submit_work); 3569 } 3570 3571 INIT_LIST_HEAD(&ce->guc_state.fences); 3572 } 3573 3574 static void guc_signal_context_fence(struct intel_context *ce) 3575 { 3576 unsigned long flags; 3577 3578 GEM_BUG_ON(intel_context_is_child(ce)); 3579 3580 spin_lock_irqsave(&ce->guc_state.lock, flags); 3581 clr_context_wait_for_deregister_to_register(ce); 3582 __guc_signal_context_fence(ce); 3583 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3584 } 3585 3586 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3587 { 3588 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3589 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3590 !submission_disabled(ce_to_guc(ce)); 3591 } 3592 3593 static void guc_context_init(struct intel_context *ce) 3594 { 3595 const struct i915_gem_context *ctx; 3596 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3597 3598 rcu_read_lock(); 3599 ctx = rcu_dereference(ce->gem_context); 3600 if (ctx) 3601 prio = ctx->sched.priority; 3602 rcu_read_unlock(); 3603 3604 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3605 3606 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3607 __delay_sched_disable); 3608 3609 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3610 } 3611 3612 static int guc_request_alloc(struct i915_request *rq) 3613 { 3614 struct intel_context *ce = request_to_scheduling_context(rq); 3615 struct intel_guc *guc = ce_to_guc(ce); 3616 unsigned long flags; 3617 int ret; 3618 3619 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3620 3621 /* 3622 * Flush enough space to reduce the likelihood of waiting after 3623 * we start building the request - in which case we will just 3624 * have to repeat work. 3625 */ 3626 rq->reserved_space += GUC_REQUEST_SIZE; 3627 3628 /* 3629 * Note that after this point, we have committed to using 3630 * this request as it is being used to both track the 3631 * state of engine initialisation and liveness of the 3632 * golden renderstate above. Think twice before you try 3633 * to cancel/unwind this request now. 3634 */ 3635 3636 /* Unconditionally invalidate GPU caches and TLBs. */ 3637 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3638 if (ret) 3639 return ret; 3640 3641 rq->reserved_space -= GUC_REQUEST_SIZE; 3642 3643 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3644 guc_context_init(ce); 3645 3646 /* 3647 * If the context gets closed while the execbuf is ongoing, the context 3648 * close code will race with the below code to cancel the delayed work. 3649 * If the context close wins the race and cancels the work, it will 3650 * immediately call the sched disable (see guc_context_close), so there 3651 * is a chance we can get past this check while the sched_disable code 3652 * is being executed. To make sure that code completes before we check 3653 * the status further down, we wait for the close process to complete. 3654 * Else, this code path could send a request down thinking that the 3655 * context is still in a schedule-enable mode while the GuC ends up 3656 * dropping the request completely because the disable did go from the 3657 * context_close path right to GuC just prior. In the event the CT is 3658 * full, we could potentially need to wait up to 1.5 seconds. 3659 */ 3660 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3661 intel_context_sched_disable_unpin(ce); 3662 else if (intel_context_is_closed(ce)) 3663 if (wait_for(context_close_done(ce), 1500)) 3664 guc_warn(guc, "timed out waiting on context sched close before realloc\n"); 3665 /* 3666 * Call pin_guc_id here rather than in the pinning step as with 3667 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3668 * guc_id and creating horrible race conditions. This is especially bad 3669 * when guc_id are being stolen due to over subscription. By the time 3670 * this function is reached, it is guaranteed that the guc_id will be 3671 * persistent until the generated request is retired. Thus, sealing these 3672 * race conditions. It is still safe to fail here if guc_id are 3673 * exhausted and return -EAGAIN to the user indicating that they can try 3674 * again in the future. 3675 * 3676 * There is no need for a lock here as the timeline mutex ensures at 3677 * most one context can be executing this code path at once. The 3678 * guc_id_ref is incremented once for every request in flight and 3679 * decremented on each retire. When it is zero, a lock around the 3680 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3681 */ 3682 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3683 goto out; 3684 3685 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3686 if (unlikely(ret < 0)) 3687 return ret; 3688 if (context_needs_register(ce, !!ret)) { 3689 ret = try_context_registration(ce, true); 3690 if (unlikely(ret)) { /* unwind */ 3691 if (ret == -EPIPE) { 3692 disable_submission(guc); 3693 goto out; /* GPU will be reset */ 3694 } 3695 atomic_dec(&ce->guc_id.ref); 3696 unpin_guc_id(guc, ce); 3697 return ret; 3698 } 3699 } 3700 3701 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3702 3703 out: 3704 /* 3705 * We block all requests on this context if a G2H is pending for a 3706 * schedule disable or context deregistration as the GuC will fail a 3707 * schedule enable or context registration if either G2H is pending 3708 * respectfully. Once a G2H returns, the fence is released that is 3709 * blocking these requests (see guc_signal_context_fence). 3710 */ 3711 spin_lock_irqsave(&ce->guc_state.lock, flags); 3712 if (context_wait_for_deregister_to_register(ce) || 3713 context_pending_disable(ce)) { 3714 init_irq_work(&rq->submit_work, submit_work_cb); 3715 i915_sw_fence_await(&rq->submit); 3716 3717 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3718 } 3719 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3720 3721 return 0; 3722 } 3723 3724 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3725 struct i915_gem_ww_ctx *ww, 3726 void **vaddr) 3727 { 3728 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3729 3730 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3731 } 3732 3733 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3734 { 3735 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3736 int ret = __guc_context_pin(ce, engine, vaddr); 3737 intel_engine_mask_t tmp, mask = ce->engine->mask; 3738 3739 if (likely(!ret)) 3740 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3741 intel_engine_pm_get(engine); 3742 3743 return ret; 3744 } 3745 3746 static void guc_virtual_context_unpin(struct intel_context *ce) 3747 { 3748 intel_engine_mask_t tmp, mask = ce->engine->mask; 3749 struct intel_engine_cs *engine; 3750 struct intel_guc *guc = ce_to_guc(ce); 3751 3752 GEM_BUG_ON(context_enabled(ce)); 3753 GEM_BUG_ON(intel_context_is_barrier(ce)); 3754 3755 unpin_guc_id(guc, ce); 3756 lrc_unpin(ce); 3757 3758 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3759 intel_engine_pm_put_async(engine); 3760 } 3761 3762 static void guc_virtual_context_enter(struct intel_context *ce) 3763 { 3764 intel_engine_mask_t tmp, mask = ce->engine->mask; 3765 struct intel_engine_cs *engine; 3766 3767 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3768 intel_engine_pm_get(engine); 3769 3770 intel_timeline_enter(ce->timeline); 3771 } 3772 3773 static void guc_virtual_context_exit(struct intel_context *ce) 3774 { 3775 intel_engine_mask_t tmp, mask = ce->engine->mask; 3776 struct intel_engine_cs *engine; 3777 3778 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3779 intel_engine_pm_put(engine); 3780 3781 intel_timeline_exit(ce->timeline); 3782 } 3783 3784 static int guc_virtual_context_alloc(struct intel_context *ce) 3785 { 3786 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3787 3788 return lrc_alloc(ce, engine); 3789 } 3790 3791 static const struct intel_context_ops virtual_guc_context_ops = { 3792 .flags = COPS_RUNTIME_CYCLES, 3793 .alloc = guc_virtual_context_alloc, 3794 3795 .close = guc_context_close, 3796 3797 .pre_pin = guc_virtual_context_pre_pin, 3798 .pin = guc_virtual_context_pin, 3799 .unpin = guc_virtual_context_unpin, 3800 .post_unpin = guc_context_post_unpin, 3801 3802 .revoke = guc_context_revoke, 3803 3804 .cancel_request = guc_context_cancel_request, 3805 3806 .enter = guc_virtual_context_enter, 3807 .exit = guc_virtual_context_exit, 3808 3809 .sched_disable = guc_context_sched_disable, 3810 .update_stats = guc_context_update_stats, 3811 3812 .destroy = guc_context_destroy, 3813 3814 .get_sibling = guc_virtual_get_sibling, 3815 }; 3816 3817 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3818 { 3819 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3820 struct intel_guc *guc = ce_to_guc(ce); 3821 int ret; 3822 3823 GEM_BUG_ON(!intel_context_is_parent(ce)); 3824 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3825 3826 ret = pin_guc_id(guc, ce); 3827 if (unlikely(ret < 0)) 3828 return ret; 3829 3830 return __guc_context_pin(ce, engine, vaddr); 3831 } 3832 3833 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3834 { 3835 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3836 3837 GEM_BUG_ON(!intel_context_is_child(ce)); 3838 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3839 3840 __intel_context_pin(ce->parallel.parent); 3841 return __guc_context_pin(ce, engine, vaddr); 3842 } 3843 3844 static void guc_parent_context_unpin(struct intel_context *ce) 3845 { 3846 struct intel_guc *guc = ce_to_guc(ce); 3847 3848 GEM_BUG_ON(context_enabled(ce)); 3849 GEM_BUG_ON(intel_context_is_barrier(ce)); 3850 GEM_BUG_ON(!intel_context_is_parent(ce)); 3851 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3852 3853 unpin_guc_id(guc, ce); 3854 lrc_unpin(ce); 3855 } 3856 3857 static void guc_child_context_unpin(struct intel_context *ce) 3858 { 3859 GEM_BUG_ON(context_enabled(ce)); 3860 GEM_BUG_ON(intel_context_is_barrier(ce)); 3861 GEM_BUG_ON(!intel_context_is_child(ce)); 3862 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3863 3864 lrc_unpin(ce); 3865 } 3866 3867 static void guc_child_context_post_unpin(struct intel_context *ce) 3868 { 3869 GEM_BUG_ON(!intel_context_is_child(ce)); 3870 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3871 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3872 3873 lrc_post_unpin(ce); 3874 intel_context_unpin(ce->parallel.parent); 3875 } 3876 3877 static void guc_child_context_destroy(struct kref *kref) 3878 { 3879 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3880 3881 __guc_context_destroy(ce); 3882 } 3883 3884 static const struct intel_context_ops virtual_parent_context_ops = { 3885 .alloc = guc_virtual_context_alloc, 3886 3887 .close = guc_context_close, 3888 3889 .pre_pin = guc_context_pre_pin, 3890 .pin = guc_parent_context_pin, 3891 .unpin = guc_parent_context_unpin, 3892 .post_unpin = guc_context_post_unpin, 3893 3894 .revoke = guc_context_revoke, 3895 3896 .cancel_request = guc_context_cancel_request, 3897 3898 .enter = guc_virtual_context_enter, 3899 .exit = guc_virtual_context_exit, 3900 3901 .sched_disable = guc_context_sched_disable, 3902 3903 .destroy = guc_context_destroy, 3904 3905 .get_sibling = guc_virtual_get_sibling, 3906 }; 3907 3908 static const struct intel_context_ops virtual_child_context_ops = { 3909 .alloc = guc_virtual_context_alloc, 3910 3911 .pre_pin = guc_context_pre_pin, 3912 .pin = guc_child_context_pin, 3913 .unpin = guc_child_context_unpin, 3914 .post_unpin = guc_child_context_post_unpin, 3915 3916 .cancel_request = guc_context_cancel_request, 3917 3918 .enter = guc_virtual_context_enter, 3919 .exit = guc_virtual_context_exit, 3920 3921 .destroy = guc_child_context_destroy, 3922 3923 .get_sibling = guc_virtual_get_sibling, 3924 }; 3925 3926 /* 3927 * The below override of the breadcrumbs is enabled when the user configures a 3928 * context for parallel submission (multi-lrc, parent-child). 3929 * 3930 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3931 * safely preempt all the hw contexts configured for parallel submission 3932 * between each BB. The contract between the i915 and GuC is if the parent 3933 * context can be preempted, all the children can be preempted, and the GuC will 3934 * always try to preempt the parent before the children. A handshake between the 3935 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3936 * creating a window to preempt between each set of BBs. 3937 */ 3938 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3939 u64 offset, u32 len, 3940 const unsigned int flags); 3941 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3942 u64 offset, u32 len, 3943 const unsigned int flags); 3944 static u32 * 3945 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3946 u32 *cs); 3947 static u32 * 3948 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3949 u32 *cs); 3950 3951 static struct intel_context * 3952 guc_create_parallel(struct intel_engine_cs **engines, 3953 unsigned int num_siblings, 3954 unsigned int width) 3955 { 3956 struct intel_engine_cs **siblings = NULL; 3957 struct intel_context *parent = NULL, *ce, *err; 3958 int i, j; 3959 3960 siblings = kmalloc_array(num_siblings, 3961 sizeof(*siblings), 3962 GFP_KERNEL); 3963 if (!siblings) 3964 return ERR_PTR(-ENOMEM); 3965 3966 for (i = 0; i < width; ++i) { 3967 for (j = 0; j < num_siblings; ++j) 3968 siblings[j] = engines[i * num_siblings + j]; 3969 3970 ce = intel_engine_create_virtual(siblings, num_siblings, 3971 FORCE_VIRTUAL); 3972 if (IS_ERR(ce)) { 3973 err = ERR_CAST(ce); 3974 goto unwind; 3975 } 3976 3977 if (i == 0) { 3978 parent = ce; 3979 parent->ops = &virtual_parent_context_ops; 3980 } else { 3981 ce->ops = &virtual_child_context_ops; 3982 intel_context_bind_parent_child(parent, ce); 3983 } 3984 } 3985 3986 parent->parallel.fence_context = dma_fence_context_alloc(1); 3987 3988 parent->engine->emit_bb_start = 3989 emit_bb_start_parent_no_preempt_mid_batch; 3990 parent->engine->emit_fini_breadcrumb = 3991 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 3992 parent->engine->emit_fini_breadcrumb_dw = 3993 12 + 4 * parent->parallel.number_children; 3994 for_each_child(parent, ce) { 3995 ce->engine->emit_bb_start = 3996 emit_bb_start_child_no_preempt_mid_batch; 3997 ce->engine->emit_fini_breadcrumb = 3998 emit_fini_breadcrumb_child_no_preempt_mid_batch; 3999 ce->engine->emit_fini_breadcrumb_dw = 16; 4000 } 4001 4002 kfree(siblings); 4003 return parent; 4004 4005 unwind: 4006 if (parent) 4007 intel_context_put(parent); 4008 kfree(siblings); 4009 return err; 4010 } 4011 4012 static bool 4013 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 4014 { 4015 struct intel_engine_cs *sibling; 4016 intel_engine_mask_t tmp, mask = b->engine_mask; 4017 bool result = false; 4018 4019 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4020 result |= intel_engine_irq_enable(sibling); 4021 4022 return result; 4023 } 4024 4025 static void 4026 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 4027 { 4028 struct intel_engine_cs *sibling; 4029 intel_engine_mask_t tmp, mask = b->engine_mask; 4030 4031 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4032 intel_engine_irq_disable(sibling); 4033 } 4034 4035 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 4036 { 4037 int i; 4038 4039 /* 4040 * In GuC submission mode we do not know which physical engine a request 4041 * will be scheduled on, this creates a problem because the breadcrumb 4042 * interrupt is per physical engine. To work around this we attach 4043 * requests and direct all breadcrumb interrupts to the first instance 4044 * of an engine per class. In addition all breadcrumb interrupts are 4045 * enabled / disabled across an engine class in unison. 4046 */ 4047 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 4048 struct intel_engine_cs *sibling = 4049 engine->gt->engine_class[engine->class][i]; 4050 4051 if (sibling) { 4052 if (engine->breadcrumbs != sibling->breadcrumbs) { 4053 intel_breadcrumbs_put(engine->breadcrumbs); 4054 engine->breadcrumbs = 4055 intel_breadcrumbs_get(sibling->breadcrumbs); 4056 } 4057 break; 4058 } 4059 } 4060 4061 if (engine->breadcrumbs) { 4062 engine->breadcrumbs->engine_mask |= engine->mask; 4063 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 4064 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 4065 } 4066 } 4067 4068 static void guc_bump_inflight_request_prio(struct i915_request *rq, 4069 int prio) 4070 { 4071 struct intel_context *ce = request_to_scheduling_context(rq); 4072 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 4073 4074 /* Short circuit function */ 4075 if (prio < I915_PRIORITY_NORMAL || 4076 rq->guc_prio == GUC_PRIO_FINI || 4077 (rq->guc_prio != GUC_PRIO_INIT && 4078 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 4079 return; 4080 4081 spin_lock(&ce->guc_state.lock); 4082 if (rq->guc_prio != GUC_PRIO_FINI) { 4083 if (rq->guc_prio != GUC_PRIO_INIT) 4084 sub_context_inflight_prio(ce, rq->guc_prio); 4085 rq->guc_prio = new_guc_prio; 4086 add_context_inflight_prio(ce, rq->guc_prio); 4087 update_context_prio(ce); 4088 } 4089 spin_unlock(&ce->guc_state.lock); 4090 } 4091 4092 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4093 { 4094 struct intel_context *ce = request_to_scheduling_context(rq); 4095 4096 spin_lock(&ce->guc_state.lock); 4097 guc_prio_fini(rq, ce); 4098 spin_unlock(&ce->guc_state.lock); 4099 } 4100 4101 static void sanitize_hwsp(struct intel_engine_cs *engine) 4102 { 4103 struct intel_timeline *tl; 4104 4105 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4106 intel_timeline_reset_seqno(tl); 4107 } 4108 4109 static void guc_sanitize(struct intel_engine_cs *engine) 4110 { 4111 /* 4112 * Poison residual state on resume, in case the suspend didn't! 4113 * 4114 * We have to assume that across suspend/resume (or other loss 4115 * of control) that the contents of our pinned buffers has been 4116 * lost, replaced by garbage. Since this doesn't always happen, 4117 * let's poison such state so that we more quickly spot when 4118 * we falsely assume it has been preserved. 4119 */ 4120 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4121 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4122 4123 /* 4124 * The kernel_context HWSP is stored in the status_page. As above, 4125 * that may be lost on resume/initialisation, and so we need to 4126 * reset the value in the HWSP. 4127 */ 4128 sanitize_hwsp(engine); 4129 4130 /* And scrub the dirty cachelines for the HWSP */ 4131 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4132 4133 intel_engine_reset_pinned_contexts(engine); 4134 } 4135 4136 static void setup_hwsp(struct intel_engine_cs *engine) 4137 { 4138 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4139 4140 ENGINE_WRITE_FW(engine, 4141 RING_HWS_PGA, 4142 i915_ggtt_offset(engine->status_page.vma)); 4143 } 4144 4145 static void start_engine(struct intel_engine_cs *engine) 4146 { 4147 ENGINE_WRITE_FW(engine, 4148 RING_MODE_GEN7, 4149 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4150 4151 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4152 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4153 } 4154 4155 static int guc_resume(struct intel_engine_cs *engine) 4156 { 4157 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4158 4159 intel_mocs_init_engine(engine); 4160 4161 intel_breadcrumbs_reset(engine->breadcrumbs); 4162 4163 setup_hwsp(engine); 4164 start_engine(engine); 4165 4166 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4167 xehp_enable_ccs_engines(engine); 4168 4169 return 0; 4170 } 4171 4172 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4173 { 4174 return !sched_engine->tasklet.callback; 4175 } 4176 4177 static void guc_set_default_submission(struct intel_engine_cs *engine) 4178 { 4179 engine->submit_request = guc_submit_request; 4180 } 4181 4182 static inline int guc_kernel_context_pin(struct intel_guc *guc, 4183 struct intel_context *ce) 4184 { 4185 int ret; 4186 4187 /* 4188 * Note: we purposefully do not check the returns below because 4189 * the registration can only fail if a reset is just starting. 4190 * This is called at the end of reset so presumably another reset 4191 * isn't happening and even it did this code would be run again. 4192 */ 4193 4194 if (context_guc_id_invalid(ce)) { 4195 ret = pin_guc_id(guc, ce); 4196 4197 if (ret < 0) 4198 return ret; 4199 } 4200 4201 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) 4202 guc_context_init(ce); 4203 4204 ret = try_context_registration(ce, true); 4205 if (ret) 4206 unpin_guc_id(guc, ce); 4207 4208 return ret; 4209 } 4210 4211 static inline int guc_init_submission(struct intel_guc *guc) 4212 { 4213 struct intel_gt *gt = guc_to_gt(guc); 4214 struct intel_engine_cs *engine; 4215 enum intel_engine_id id; 4216 4217 /* make sure all descriptors are clean... */ 4218 xa_destroy(&guc->context_lookup); 4219 4220 /* 4221 * A reset might have occurred while we had a pending stalled request, 4222 * so make sure we clean that up. 4223 */ 4224 guc->stalled_request = NULL; 4225 guc->submission_stall_reason = STALL_NONE; 4226 4227 /* 4228 * Some contexts might have been pinned before we enabled GuC 4229 * submission, so we need to add them to the GuC bookeeping. 4230 * Also, after a reset the of the GuC we want to make sure that the 4231 * information shared with GuC is properly reset. The kernel LRCs are 4232 * not attached to the gem_context, so they need to be added separately. 4233 */ 4234 for_each_engine(engine, gt, id) { 4235 struct intel_context *ce; 4236 4237 list_for_each_entry(ce, &engine->pinned_contexts_list, 4238 pinned_contexts_link) { 4239 int ret = guc_kernel_context_pin(guc, ce); 4240 4241 if (ret) { 4242 /* No point in trying to clean up as i915 will wedge on failure */ 4243 return ret; 4244 } 4245 } 4246 } 4247 4248 return 0; 4249 } 4250 4251 static void guc_release(struct intel_engine_cs *engine) 4252 { 4253 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4254 4255 intel_engine_cleanup_common(engine); 4256 lrc_fini_wa_ctx(engine); 4257 } 4258 4259 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4260 { 4261 struct intel_engine_cs *e; 4262 intel_engine_mask_t tmp, mask = engine->mask; 4263 4264 for_each_engine_masked(e, engine->gt, mask, tmp) 4265 e->serial++; 4266 } 4267 4268 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4269 { 4270 /* Default vfuncs which can be overridden by each engine. */ 4271 4272 engine->resume = guc_resume; 4273 4274 engine->cops = &guc_context_ops; 4275 engine->request_alloc = guc_request_alloc; 4276 engine->add_active_request = add_to_context; 4277 engine->remove_active_request = remove_from_context; 4278 4279 engine->sched_engine->schedule = i915_schedule; 4280 4281 engine->reset.prepare = guc_engine_reset_prepare; 4282 engine->reset.rewind = guc_rewind_nop; 4283 engine->reset.cancel = guc_reset_nop; 4284 engine->reset.finish = guc_reset_nop; 4285 4286 engine->emit_flush = gen8_emit_flush_xcs; 4287 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4288 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4289 if (GRAPHICS_VER(engine->i915) >= 12) { 4290 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4291 engine->emit_flush = gen12_emit_flush_xcs; 4292 } 4293 engine->set_default_submission = guc_set_default_submission; 4294 engine->busyness = guc_engine_busyness; 4295 4296 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4297 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4298 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4299 4300 /* Wa_14014475959:dg2 */ 4301 if (engine->class == COMPUTE_CLASS) 4302 if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || 4303 IS_DG2(engine->i915)) 4304 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4305 4306 /* 4307 * TODO: GuC supports timeslicing and semaphores as well, but they're 4308 * handled by the firmware so some minor tweaks are required before 4309 * enabling. 4310 * 4311 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4312 */ 4313 4314 engine->emit_bb_start = gen8_emit_bb_start; 4315 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4316 engine->emit_bb_start = xehp_emit_bb_start; 4317 } 4318 4319 static void rcs_submission_override(struct intel_engine_cs *engine) 4320 { 4321 switch (GRAPHICS_VER(engine->i915)) { 4322 case 12: 4323 engine->emit_flush = gen12_emit_flush_rcs; 4324 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4325 break; 4326 case 11: 4327 engine->emit_flush = gen11_emit_flush_rcs; 4328 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4329 break; 4330 default: 4331 engine->emit_flush = gen8_emit_flush_rcs; 4332 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4333 break; 4334 } 4335 } 4336 4337 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4338 { 4339 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4340 intel_engine_set_irq_handler(engine, cs_irq_handler); 4341 } 4342 4343 static void guc_sched_engine_destroy(struct kref *kref) 4344 { 4345 struct i915_sched_engine *sched_engine = 4346 container_of(kref, typeof(*sched_engine), ref); 4347 struct intel_guc *guc = sched_engine->private_data; 4348 4349 guc->sched_engine = NULL; 4350 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4351 kfree(sched_engine); 4352 } 4353 4354 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4355 { 4356 struct drm_i915_private *i915 = engine->i915; 4357 struct intel_guc *guc = &engine->gt->uc.guc; 4358 4359 /* 4360 * The setup relies on several assumptions (e.g. irqs always enabled) 4361 * that are only valid on gen11+ 4362 */ 4363 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4364 4365 if (!guc->sched_engine) { 4366 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4367 if (!guc->sched_engine) 4368 return -ENOMEM; 4369 4370 guc->sched_engine->schedule = i915_schedule; 4371 guc->sched_engine->disabled = guc_sched_engine_disabled; 4372 guc->sched_engine->private_data = guc; 4373 guc->sched_engine->destroy = guc_sched_engine_destroy; 4374 guc->sched_engine->bump_inflight_request_prio = 4375 guc_bump_inflight_request_prio; 4376 guc->sched_engine->retire_inflight_request_prio = 4377 guc_retire_inflight_request_prio; 4378 tasklet_setup(&guc->sched_engine->tasklet, 4379 guc_submission_tasklet); 4380 } 4381 i915_sched_engine_put(engine->sched_engine); 4382 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4383 4384 guc_default_vfuncs(engine); 4385 guc_default_irqs(engine); 4386 guc_init_breadcrumbs(engine); 4387 4388 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4389 rcs_submission_override(engine); 4390 4391 lrc_init_wa_ctx(engine); 4392 4393 /* Finally, take ownership and responsibility for cleanup! */ 4394 engine->sanitize = guc_sanitize; 4395 engine->release = guc_release; 4396 4397 return 0; 4398 } 4399 4400 struct scheduling_policy { 4401 /* internal data */ 4402 u32 max_words, num_words; 4403 u32 count; 4404 /* API data */ 4405 struct guc_update_scheduling_policy h2g; 4406 }; 4407 4408 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4409 { 4410 u32 *start = (void *)&policy->h2g; 4411 u32 *end = policy->h2g.data + policy->num_words; 4412 size_t delta = end - start; 4413 4414 return delta; 4415 } 4416 4417 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4418 { 4419 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4420 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4421 policy->num_words = 0; 4422 policy->count = 0; 4423 4424 return policy; 4425 } 4426 4427 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4428 u32 action, u32 *data, u32 len) 4429 { 4430 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4431 4432 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4433 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4434 FIELD_PREP(GUC_KLV_0_LEN, len); 4435 memcpy(klv_ptr, data, sizeof(u32) * len); 4436 policy->num_words += 1 + len; 4437 policy->count++; 4438 } 4439 4440 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4441 struct scheduling_policy *policy) 4442 { 4443 int ret; 4444 4445 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4446 __guc_scheduling_policy_action_size(policy)); 4447 if (ret < 0) { 4448 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n", 4449 ERR_PTR(ret)); 4450 return ret; 4451 } 4452 4453 if (ret != policy->count) { 4454 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!", 4455 ret, policy->count); 4456 if (ret > policy->count) 4457 return -EPROTO; 4458 } 4459 4460 return 0; 4461 } 4462 4463 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4464 { 4465 struct scheduling_policy policy; 4466 struct intel_gt *gt = guc_to_gt(guc); 4467 intel_wakeref_t wakeref; 4468 int ret; 4469 4470 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 4471 return 0; 4472 4473 __guc_scheduling_policy_start_klv(&policy); 4474 4475 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4476 u32 yield[] = { 4477 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4478 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4479 }; 4480 4481 __guc_scheduling_policy_add_klv(&policy, 4482 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4483 yield, ARRAY_SIZE(yield)); 4484 4485 ret = __guc_action_set_scheduling_policies(guc, &policy); 4486 } 4487 4488 return ret; 4489 } 4490 4491 static void guc_route_semaphores(struct intel_guc *guc, bool to_guc) 4492 { 4493 struct intel_gt *gt = guc_to_gt(guc); 4494 u32 val; 4495 4496 if (GRAPHICS_VER(gt->i915) < 12) 4497 return; 4498 4499 if (to_guc) 4500 val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL; 4501 else 4502 val = 0; 4503 4504 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val); 4505 } 4506 4507 int intel_guc_submission_enable(struct intel_guc *guc) 4508 { 4509 int ret; 4510 4511 /* Semaphore interrupt enable and route to GuC */ 4512 guc_route_semaphores(guc, true); 4513 4514 ret = guc_init_submission(guc); 4515 if (ret) 4516 goto fail_sem; 4517 4518 ret = guc_init_engine_stats(guc); 4519 if (ret) 4520 goto fail_sem; 4521 4522 ret = guc_init_global_schedule_policy(guc); 4523 if (ret) 4524 goto fail_stats; 4525 4526 return 0; 4527 4528 fail_stats: 4529 guc_fini_engine_stats(guc); 4530 fail_sem: 4531 guc_route_semaphores(guc, false); 4532 return ret; 4533 } 4534 4535 /* Note: By the time we're here, GuC may have already been reset */ 4536 void intel_guc_submission_disable(struct intel_guc *guc) 4537 { 4538 guc_cancel_busyness_worker(guc); 4539 4540 /* Semaphore interrupt disable and route to host */ 4541 guc_route_semaphores(guc, false); 4542 } 4543 4544 static bool __guc_submission_supported(struct intel_guc *guc) 4545 { 4546 /* GuC submission is unavailable for pre-Gen11 */ 4547 return intel_guc_is_supported(guc) && 4548 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4549 } 4550 4551 static bool __guc_submission_selected(struct intel_guc *guc) 4552 { 4553 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4554 4555 if (!intel_guc_submission_is_supported(guc)) 4556 return false; 4557 4558 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4559 } 4560 4561 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4562 { 4563 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4564 } 4565 4566 /* 4567 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4568 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4569 * operation. This matches the 30fps game-render + encode (real world) workload this 4570 * knob was tested against. 4571 */ 4572 #define SCHED_DISABLE_DELAY_MS 34 4573 4574 /* 4575 * A threshold of 75% is a reasonable starting point considering that real world apps 4576 * generally don't get anywhere near this. 4577 */ 4578 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4579 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4580 4581 void intel_guc_submission_init_early(struct intel_guc *guc) 4582 { 4583 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4584 4585 spin_lock_init(&guc->submission_state.lock); 4586 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4587 ida_init(&guc->submission_state.guc_ids); 4588 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4589 INIT_WORK(&guc->submission_state.destroyed_worker, 4590 destroyed_worker_func); 4591 INIT_WORK(&guc->submission_state.reset_fail_worker, 4592 reset_fail_worker_func); 4593 4594 spin_lock_init(&guc->timestamp.lock); 4595 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4596 4597 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4598 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4599 guc->submission_state.sched_disable_gucid_threshold = 4600 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4601 guc->submission_supported = __guc_submission_supported(guc); 4602 guc->submission_selected = __guc_submission_selected(guc); 4603 } 4604 4605 static inline struct intel_context * 4606 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4607 { 4608 struct intel_context *ce; 4609 4610 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4611 guc_err(guc, "Invalid ctx_id %u\n", ctx_id); 4612 return NULL; 4613 } 4614 4615 ce = __get_context(guc, ctx_id); 4616 if (unlikely(!ce)) { 4617 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id); 4618 return NULL; 4619 } 4620 4621 if (unlikely(intel_context_is_child(ce))) { 4622 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id); 4623 return NULL; 4624 } 4625 4626 return ce; 4627 } 4628 4629 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4630 const u32 *msg, 4631 u32 len) 4632 { 4633 struct intel_context *ce; 4634 u32 ctx_id; 4635 4636 if (unlikely(len < 1)) { 4637 guc_err(guc, "Invalid length %u\n", len); 4638 return -EPROTO; 4639 } 4640 ctx_id = msg[0]; 4641 4642 ce = g2h_context_lookup(guc, ctx_id); 4643 if (unlikely(!ce)) 4644 return -EPROTO; 4645 4646 trace_intel_context_deregister_done(ce); 4647 4648 #ifdef CONFIG_DRM_I915_SELFTEST 4649 if (unlikely(ce->drop_deregister)) { 4650 ce->drop_deregister = false; 4651 return 0; 4652 } 4653 #endif 4654 4655 if (context_wait_for_deregister_to_register(ce)) { 4656 struct intel_runtime_pm *runtime_pm = 4657 &ce->engine->gt->i915->runtime_pm; 4658 intel_wakeref_t wakeref; 4659 4660 /* 4661 * Previous owner of this guc_id has been deregistered, now safe 4662 * register this context. 4663 */ 4664 with_intel_runtime_pm(runtime_pm, wakeref) 4665 register_context(ce, true); 4666 guc_signal_context_fence(ce); 4667 intel_context_put(ce); 4668 } else if (context_destroyed(ce)) { 4669 /* Context has been destroyed */ 4670 intel_gt_pm_put_async(guc_to_gt(guc)); 4671 release_guc_id(guc, ce); 4672 __guc_context_destroy(ce); 4673 } 4674 4675 decr_outstanding_submission_g2h(guc); 4676 4677 return 0; 4678 } 4679 4680 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4681 const u32 *msg, 4682 u32 len) 4683 { 4684 struct intel_context *ce; 4685 unsigned long flags; 4686 u32 ctx_id; 4687 4688 if (unlikely(len < 2)) { 4689 guc_err(guc, "Invalid length %u\n", len); 4690 return -EPROTO; 4691 } 4692 ctx_id = msg[0]; 4693 4694 ce = g2h_context_lookup(guc, ctx_id); 4695 if (unlikely(!ce)) 4696 return -EPROTO; 4697 4698 if (unlikely(context_destroyed(ce) || 4699 (!context_pending_enable(ce) && 4700 !context_pending_disable(ce)))) { 4701 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n", 4702 ce->guc_state.sched_state, ctx_id); 4703 return -EPROTO; 4704 } 4705 4706 trace_intel_context_sched_done(ce); 4707 4708 if (context_pending_enable(ce)) { 4709 #ifdef CONFIG_DRM_I915_SELFTEST 4710 if (unlikely(ce->drop_schedule_enable)) { 4711 ce->drop_schedule_enable = false; 4712 return 0; 4713 } 4714 #endif 4715 4716 spin_lock_irqsave(&ce->guc_state.lock, flags); 4717 clr_context_pending_enable(ce); 4718 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4719 } else if (context_pending_disable(ce)) { 4720 bool banned; 4721 4722 #ifdef CONFIG_DRM_I915_SELFTEST 4723 if (unlikely(ce->drop_schedule_disable)) { 4724 ce->drop_schedule_disable = false; 4725 return 0; 4726 } 4727 #endif 4728 4729 /* 4730 * Unpin must be done before __guc_signal_context_fence, 4731 * otherwise a race exists between the requests getting 4732 * submitted + retired before this unpin completes resulting in 4733 * the pin_count going to zero and the context still being 4734 * enabled. 4735 */ 4736 intel_context_sched_disable_unpin(ce); 4737 4738 spin_lock_irqsave(&ce->guc_state.lock, flags); 4739 banned = context_banned(ce); 4740 clr_context_banned(ce); 4741 clr_context_pending_disable(ce); 4742 __guc_signal_context_fence(ce); 4743 guc_blocked_fence_complete(ce); 4744 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4745 4746 if (banned) { 4747 guc_cancel_context_requests(ce); 4748 intel_engine_signal_breadcrumbs(ce->engine); 4749 } 4750 } 4751 4752 decr_outstanding_submission_g2h(guc); 4753 intel_context_put(ce); 4754 4755 return 0; 4756 } 4757 4758 static void capture_error_state(struct intel_guc *guc, 4759 struct intel_context *ce) 4760 { 4761 struct intel_gt *gt = guc_to_gt(guc); 4762 struct drm_i915_private *i915 = gt->i915; 4763 intel_wakeref_t wakeref; 4764 intel_engine_mask_t engine_mask; 4765 4766 if (intel_engine_is_virtual(ce->engine)) { 4767 struct intel_engine_cs *e; 4768 intel_engine_mask_t tmp, virtual_mask = ce->engine->mask; 4769 4770 engine_mask = 0; 4771 for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) { 4772 bool match = intel_guc_capture_is_matching_engine(gt, ce, e); 4773 4774 if (match) { 4775 intel_engine_set_hung_context(e, ce); 4776 engine_mask |= e->mask; 4777 i915_increase_reset_engine_count(&i915->gpu_error, 4778 e); 4779 } 4780 } 4781 4782 if (!engine_mask) { 4783 guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s", 4784 ce->guc_id.id, ce->engine->name); 4785 engine_mask = ~0U; 4786 } 4787 } else { 4788 intel_engine_set_hung_context(ce->engine, ce); 4789 engine_mask = ce->engine->mask; 4790 i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); 4791 } 4792 4793 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4794 i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4795 } 4796 4797 static void guc_context_replay(struct intel_context *ce) 4798 { 4799 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4800 4801 __guc_reset_context(ce, ce->engine->mask); 4802 tasklet_hi_schedule(&sched_engine->tasklet); 4803 } 4804 4805 static void guc_handle_context_reset(struct intel_guc *guc, 4806 struct intel_context *ce) 4807 { 4808 trace_intel_context_reset(ce); 4809 4810 guc_dbg(guc, "Got context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n", 4811 ce->guc_id.id, ce->engine->name, 4812 str_yes_no(intel_context_is_exiting(ce)), 4813 str_yes_no(intel_context_is_banned(ce))); 4814 4815 if (likely(intel_context_is_schedulable(ce))) { 4816 capture_error_state(guc, ce); 4817 guc_context_replay(ce); 4818 } else { 4819 guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s", 4820 ce->guc_id.id, ce->engine->name); 4821 } 4822 } 4823 4824 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4825 const u32 *msg, u32 len) 4826 { 4827 struct intel_context *ce; 4828 unsigned long flags; 4829 int ctx_id; 4830 4831 if (unlikely(len != 1)) { 4832 guc_err(guc, "Invalid length %u", len); 4833 return -EPROTO; 4834 } 4835 4836 ctx_id = msg[0]; 4837 4838 /* 4839 * The context lookup uses the xarray but lookups only require an RCU lock 4840 * not the full spinlock. So take the lock explicitly and keep it until the 4841 * context has been reference count locked to ensure it can't be destroyed 4842 * asynchronously until the reset is done. 4843 */ 4844 xa_lock_irqsave(&guc->context_lookup, flags); 4845 ce = g2h_context_lookup(guc, ctx_id); 4846 if (ce) 4847 intel_context_get(ce); 4848 xa_unlock_irqrestore(&guc->context_lookup, flags); 4849 4850 if (unlikely(!ce)) 4851 return -EPROTO; 4852 4853 guc_handle_context_reset(guc, ce); 4854 intel_context_put(ce); 4855 4856 return 0; 4857 } 4858 4859 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4860 const u32 *msg, u32 len) 4861 { 4862 u32 status; 4863 4864 if (unlikely(len != 1)) { 4865 guc_dbg(guc, "Invalid length %u", len); 4866 return -EPROTO; 4867 } 4868 4869 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4870 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4871 guc_warn(guc, "No space for error capture"); 4872 4873 intel_guc_capture_process(guc); 4874 4875 return 0; 4876 } 4877 4878 struct intel_engine_cs * 4879 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4880 { 4881 struct intel_gt *gt = guc_to_gt(guc); 4882 u8 engine_class = guc_class_to_engine_class(guc_class); 4883 4884 /* Class index is checked in class converter */ 4885 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4886 4887 return gt->engine_class[engine_class][instance]; 4888 } 4889 4890 static void reset_fail_worker_func(struct work_struct *w) 4891 { 4892 struct intel_guc *guc = container_of(w, struct intel_guc, 4893 submission_state.reset_fail_worker); 4894 struct intel_gt *gt = guc_to_gt(guc); 4895 intel_engine_mask_t reset_fail_mask; 4896 unsigned long flags; 4897 4898 spin_lock_irqsave(&guc->submission_state.lock, flags); 4899 reset_fail_mask = guc->submission_state.reset_fail_mask; 4900 guc->submission_state.reset_fail_mask = 0; 4901 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4902 4903 if (likely(reset_fail_mask)) { 4904 struct intel_engine_cs *engine; 4905 enum intel_engine_id id; 4906 4907 /* 4908 * GuC is toast at this point - it dead loops after sending the failed 4909 * reset notification. So need to manually determine the guilty context. 4910 * Note that it should be reliable to do this here because the GuC is 4911 * toast and will not be scheduling behind the KMD's back. 4912 */ 4913 for_each_engine_masked(engine, gt, reset_fail_mask, id) 4914 intel_guc_find_hung_context(engine); 4915 4916 intel_gt_handle_error(gt, reset_fail_mask, 4917 I915_ERROR_CAPTURE, 4918 "GuC failed to reset engine mask=0x%x", 4919 reset_fail_mask); 4920 } 4921 } 4922 4923 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4924 const u32 *msg, u32 len) 4925 { 4926 struct intel_engine_cs *engine; 4927 u8 guc_class, instance; 4928 u32 reason; 4929 unsigned long flags; 4930 4931 if (unlikely(len != 3)) { 4932 guc_err(guc, "Invalid length %u", len); 4933 return -EPROTO; 4934 } 4935 4936 guc_class = msg[0]; 4937 instance = msg[1]; 4938 reason = msg[2]; 4939 4940 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4941 if (unlikely(!engine)) { 4942 guc_err(guc, "Invalid engine %d:%d", guc_class, instance); 4943 return -EPROTO; 4944 } 4945 4946 /* 4947 * This is an unexpected failure of a hardware feature. So, log a real 4948 * error message not just the informational that comes with the reset. 4949 */ 4950 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X", 4951 guc_class, instance, engine->name, reason); 4952 4953 spin_lock_irqsave(&guc->submission_state.lock, flags); 4954 guc->submission_state.reset_fail_mask |= engine->mask; 4955 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4956 4957 /* 4958 * A GT reset flushes this worker queue (G2H handler) so we must use 4959 * another worker to trigger a GT reset. 4960 */ 4961 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4962 4963 return 0; 4964 } 4965 4966 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 4967 { 4968 struct intel_guc *guc = &engine->gt->uc.guc; 4969 struct intel_context *ce; 4970 struct i915_request *rq; 4971 unsigned long index; 4972 unsigned long flags; 4973 4974 /* Reset called during driver load? GuC not yet initialised! */ 4975 if (unlikely(!guc_submission_initialized(guc))) 4976 return; 4977 4978 xa_lock_irqsave(&guc->context_lookup, flags); 4979 xa_for_each(&guc->context_lookup, index, ce) { 4980 bool found; 4981 4982 if (!kref_get_unless_zero(&ce->ref)) 4983 continue; 4984 4985 xa_unlock(&guc->context_lookup); 4986 4987 if (!intel_context_is_pinned(ce)) 4988 goto next; 4989 4990 if (intel_engine_is_virtual(ce->engine)) { 4991 if (!(ce->engine->mask & engine->mask)) 4992 goto next; 4993 } else { 4994 if (ce->engine != engine) 4995 goto next; 4996 } 4997 4998 found = false; 4999 spin_lock(&ce->guc_state.lock); 5000 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 5001 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 5002 continue; 5003 5004 found = true; 5005 break; 5006 } 5007 spin_unlock(&ce->guc_state.lock); 5008 5009 if (found) { 5010 intel_engine_set_hung_context(engine, ce); 5011 5012 /* Can only cope with one hang at a time... */ 5013 intel_context_put(ce); 5014 xa_lock(&guc->context_lookup); 5015 goto done; 5016 } 5017 5018 next: 5019 intel_context_put(ce); 5020 xa_lock(&guc->context_lookup); 5021 } 5022 done: 5023 xa_unlock_irqrestore(&guc->context_lookup, flags); 5024 } 5025 5026 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 5027 struct i915_request *hung_rq, 5028 struct drm_printer *m) 5029 { 5030 struct intel_guc *guc = &engine->gt->uc.guc; 5031 struct intel_context *ce; 5032 unsigned long index; 5033 unsigned long flags; 5034 5035 /* Reset called during driver load? GuC not yet initialised! */ 5036 if (unlikely(!guc_submission_initialized(guc))) 5037 return; 5038 5039 xa_lock_irqsave(&guc->context_lookup, flags); 5040 xa_for_each(&guc->context_lookup, index, ce) { 5041 if (!kref_get_unless_zero(&ce->ref)) 5042 continue; 5043 5044 xa_unlock(&guc->context_lookup); 5045 5046 if (!intel_context_is_pinned(ce)) 5047 goto next; 5048 5049 if (intel_engine_is_virtual(ce->engine)) { 5050 if (!(ce->engine->mask & engine->mask)) 5051 goto next; 5052 } else { 5053 if (ce->engine != engine) 5054 goto next; 5055 } 5056 5057 spin_lock(&ce->guc_state.lock); 5058 intel_engine_dump_active_requests(&ce->guc_state.requests, 5059 hung_rq, m); 5060 spin_unlock(&ce->guc_state.lock); 5061 5062 next: 5063 intel_context_put(ce); 5064 xa_lock(&guc->context_lookup); 5065 } 5066 xa_unlock_irqrestore(&guc->context_lookup, flags); 5067 } 5068 5069 void intel_guc_submission_print_info(struct intel_guc *guc, 5070 struct drm_printer *p) 5071 { 5072 struct i915_sched_engine *sched_engine = guc->sched_engine; 5073 struct rb_node *rb; 5074 unsigned long flags; 5075 5076 if (!sched_engine) 5077 return; 5078 5079 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", 5080 guc->submission_version.major, guc->submission_version.minor, 5081 guc->submission_version.patch); 5082 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 5083 atomic_read(&guc->outstanding_submission_g2h)); 5084 drm_printf(p, "GuC tasklet count: %u\n", 5085 atomic_read(&sched_engine->tasklet.count)); 5086 5087 spin_lock_irqsave(&sched_engine->lock, flags); 5088 drm_printf(p, "Requests in GuC submit tasklet:\n"); 5089 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 5090 struct i915_priolist *pl = to_priolist(rb); 5091 struct i915_request *rq; 5092 5093 priolist_for_each_request(rq, pl) 5094 drm_printf(p, "guc_id=%u, seqno=%llu\n", 5095 rq->context->guc_id.id, 5096 rq->fence.seqno); 5097 } 5098 spin_unlock_irqrestore(&sched_engine->lock, flags); 5099 drm_printf(p, "\n"); 5100 } 5101 5102 static inline void guc_log_context_priority(struct drm_printer *p, 5103 struct intel_context *ce) 5104 { 5105 int i; 5106 5107 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 5108 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 5109 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 5110 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 5111 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 5112 i, ce->guc_state.prio_count[i]); 5113 } 5114 drm_printf(p, "\n"); 5115 } 5116 5117 static inline void guc_log_context(struct drm_printer *p, 5118 struct intel_context *ce) 5119 { 5120 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 5121 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 5122 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 5123 ce->ring->head, 5124 ce->lrc_reg_state[CTX_RING_HEAD]); 5125 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 5126 ce->ring->tail, 5127 ce->lrc_reg_state[CTX_RING_TAIL]); 5128 drm_printf(p, "\t\tContext Pin Count: %u\n", 5129 atomic_read(&ce->pin_count)); 5130 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 5131 atomic_read(&ce->guc_id.ref)); 5132 drm_printf(p, "\t\tSchedule State: 0x%x\n", 5133 ce->guc_state.sched_state); 5134 } 5135 5136 void intel_guc_submission_print_context_info(struct intel_guc *guc, 5137 struct drm_printer *p) 5138 { 5139 struct intel_context *ce; 5140 unsigned long index; 5141 unsigned long flags; 5142 5143 xa_lock_irqsave(&guc->context_lookup, flags); 5144 xa_for_each(&guc->context_lookup, index, ce) { 5145 GEM_BUG_ON(intel_context_is_child(ce)); 5146 5147 guc_log_context(p, ce); 5148 guc_log_context_priority(p, ce); 5149 5150 if (intel_context_is_parent(ce)) { 5151 struct intel_context *child; 5152 5153 drm_printf(p, "\t\tNumber children: %u\n", 5154 ce->parallel.number_children); 5155 5156 if (ce->parallel.guc.wq_status) { 5157 drm_printf(p, "\t\tWQI Head: %u\n", 5158 READ_ONCE(*ce->parallel.guc.wq_head)); 5159 drm_printf(p, "\t\tWQI Tail: %u\n", 5160 READ_ONCE(*ce->parallel.guc.wq_tail)); 5161 drm_printf(p, "\t\tWQI Status: %u\n", 5162 READ_ONCE(*ce->parallel.guc.wq_status)); 5163 } 5164 5165 if (ce->engine->emit_bb_start == 5166 emit_bb_start_parent_no_preempt_mid_batch) { 5167 u8 i; 5168 5169 drm_printf(p, "\t\tChildren Go: %u\n", 5170 get_children_go_value(ce)); 5171 for (i = 0; i < ce->parallel.number_children; ++i) 5172 drm_printf(p, "\t\tChildren Join: %u\n", 5173 get_children_join_value(ce, i)); 5174 } 5175 5176 for_each_child(ce, child) 5177 guc_log_context(p, child); 5178 } 5179 } 5180 xa_unlock_irqrestore(&guc->context_lookup, flags); 5181 } 5182 5183 static inline u32 get_children_go_addr(struct intel_context *ce) 5184 { 5185 GEM_BUG_ON(!intel_context_is_parent(ce)); 5186 5187 return i915_ggtt_offset(ce->state) + 5188 __get_parent_scratch_offset(ce) + 5189 offsetof(struct parent_scratch, go.semaphore); 5190 } 5191 5192 static inline u32 get_children_join_addr(struct intel_context *ce, 5193 u8 child_index) 5194 { 5195 GEM_BUG_ON(!intel_context_is_parent(ce)); 5196 5197 return i915_ggtt_offset(ce->state) + 5198 __get_parent_scratch_offset(ce) + 5199 offsetof(struct parent_scratch, join[child_index].semaphore); 5200 } 5201 5202 #define PARENT_GO_BB 1 5203 #define PARENT_GO_FINI_BREADCRUMB 0 5204 #define CHILD_GO_BB 1 5205 #define CHILD_GO_FINI_BREADCRUMB 0 5206 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5207 u64 offset, u32 len, 5208 const unsigned int flags) 5209 { 5210 struct intel_context *ce = rq->context; 5211 u32 *cs; 5212 u8 i; 5213 5214 GEM_BUG_ON(!intel_context_is_parent(ce)); 5215 5216 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5217 if (IS_ERR(cs)) 5218 return PTR_ERR(cs); 5219 5220 /* Wait on children */ 5221 for (i = 0; i < ce->parallel.number_children; ++i) { 5222 *cs++ = (MI_SEMAPHORE_WAIT | 5223 MI_SEMAPHORE_GLOBAL_GTT | 5224 MI_SEMAPHORE_POLL | 5225 MI_SEMAPHORE_SAD_EQ_SDD); 5226 *cs++ = PARENT_GO_BB; 5227 *cs++ = get_children_join_addr(ce, i); 5228 *cs++ = 0; 5229 } 5230 5231 /* Turn off preemption */ 5232 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5233 *cs++ = MI_NOOP; 5234 5235 /* Tell children go */ 5236 cs = gen8_emit_ggtt_write(cs, 5237 CHILD_GO_BB, 5238 get_children_go_addr(ce), 5239 0); 5240 5241 /* Jump to batch */ 5242 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5243 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5244 *cs++ = lower_32_bits(offset); 5245 *cs++ = upper_32_bits(offset); 5246 *cs++ = MI_NOOP; 5247 5248 intel_ring_advance(rq, cs); 5249 5250 return 0; 5251 } 5252 5253 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5254 u64 offset, u32 len, 5255 const unsigned int flags) 5256 { 5257 struct intel_context *ce = rq->context; 5258 struct intel_context *parent = intel_context_to_parent(ce); 5259 u32 *cs; 5260 5261 GEM_BUG_ON(!intel_context_is_child(ce)); 5262 5263 cs = intel_ring_begin(rq, 12); 5264 if (IS_ERR(cs)) 5265 return PTR_ERR(cs); 5266 5267 /* Signal parent */ 5268 cs = gen8_emit_ggtt_write(cs, 5269 PARENT_GO_BB, 5270 get_children_join_addr(parent, 5271 ce->parallel.child_index), 5272 0); 5273 5274 /* Wait on parent for go */ 5275 *cs++ = (MI_SEMAPHORE_WAIT | 5276 MI_SEMAPHORE_GLOBAL_GTT | 5277 MI_SEMAPHORE_POLL | 5278 MI_SEMAPHORE_SAD_EQ_SDD); 5279 *cs++ = CHILD_GO_BB; 5280 *cs++ = get_children_go_addr(parent); 5281 *cs++ = 0; 5282 5283 /* Turn off preemption */ 5284 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5285 5286 /* Jump to batch */ 5287 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5288 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5289 *cs++ = lower_32_bits(offset); 5290 *cs++ = upper_32_bits(offset); 5291 5292 intel_ring_advance(rq, cs); 5293 5294 return 0; 5295 } 5296 5297 static u32 * 5298 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5299 u32 *cs) 5300 { 5301 struct intel_context *ce = rq->context; 5302 u8 i; 5303 5304 GEM_BUG_ON(!intel_context_is_parent(ce)); 5305 5306 /* Wait on children */ 5307 for (i = 0; i < ce->parallel.number_children; ++i) { 5308 *cs++ = (MI_SEMAPHORE_WAIT | 5309 MI_SEMAPHORE_GLOBAL_GTT | 5310 MI_SEMAPHORE_POLL | 5311 MI_SEMAPHORE_SAD_EQ_SDD); 5312 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5313 *cs++ = get_children_join_addr(ce, i); 5314 *cs++ = 0; 5315 } 5316 5317 /* Turn on preemption */ 5318 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5319 *cs++ = MI_NOOP; 5320 5321 /* Tell children go */ 5322 cs = gen8_emit_ggtt_write(cs, 5323 CHILD_GO_FINI_BREADCRUMB, 5324 get_children_go_addr(ce), 5325 0); 5326 5327 return cs; 5328 } 5329 5330 /* 5331 * If this true, a submission of multi-lrc requests had an error and the 5332 * requests need to be skipped. The front end (execuf IOCTL) should've called 5333 * i915_request_skip which squashes the BB but we still need to emit the fini 5334 * breadrcrumbs seqno write. At this point we don't know how many of the 5335 * requests in the multi-lrc submission were generated so we can't do the 5336 * handshake between the parent and children (e.g. if 4 requests should be 5337 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5338 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5339 * has occurred on any of the requests in submission / relationship. 5340 */ 5341 static inline bool skip_handshake(struct i915_request *rq) 5342 { 5343 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5344 } 5345 5346 #define NON_SKIP_LEN 6 5347 static u32 * 5348 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5349 u32 *cs) 5350 { 5351 struct intel_context *ce = rq->context; 5352 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5353 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5354 5355 GEM_BUG_ON(!intel_context_is_parent(ce)); 5356 5357 if (unlikely(skip_handshake(rq))) { 5358 /* 5359 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5360 * the NON_SKIP_LEN comes from the length of the emits below. 5361 */ 5362 memset(cs, 0, sizeof(u32) * 5363 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5364 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5365 } else { 5366 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5367 } 5368 5369 /* Emit fini breadcrumb */ 5370 before_fini_breadcrumb_user_interrupt_cs = cs; 5371 cs = gen8_emit_ggtt_write(cs, 5372 rq->fence.seqno, 5373 i915_request_active_timeline(rq)->hwsp_offset, 5374 0); 5375 5376 /* User interrupt */ 5377 *cs++ = MI_USER_INTERRUPT; 5378 *cs++ = MI_NOOP; 5379 5380 /* Ensure our math for skip + emit is correct */ 5381 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5382 cs); 5383 GEM_BUG_ON(start_fini_breadcrumb_cs + 5384 ce->engine->emit_fini_breadcrumb_dw != cs); 5385 5386 rq->tail = intel_ring_offset(rq, cs); 5387 5388 return cs; 5389 } 5390 5391 static u32 * 5392 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5393 u32 *cs) 5394 { 5395 struct intel_context *ce = rq->context; 5396 struct intel_context *parent = intel_context_to_parent(ce); 5397 5398 GEM_BUG_ON(!intel_context_is_child(ce)); 5399 5400 /* Turn on preemption */ 5401 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5402 *cs++ = MI_NOOP; 5403 5404 /* Signal parent */ 5405 cs = gen8_emit_ggtt_write(cs, 5406 PARENT_GO_FINI_BREADCRUMB, 5407 get_children_join_addr(parent, 5408 ce->parallel.child_index), 5409 0); 5410 5411 /* Wait parent on for go */ 5412 *cs++ = (MI_SEMAPHORE_WAIT | 5413 MI_SEMAPHORE_GLOBAL_GTT | 5414 MI_SEMAPHORE_POLL | 5415 MI_SEMAPHORE_SAD_EQ_SDD); 5416 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5417 *cs++ = get_children_go_addr(parent); 5418 *cs++ = 0; 5419 5420 return cs; 5421 } 5422 5423 static u32 * 5424 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5425 u32 *cs) 5426 { 5427 struct intel_context *ce = rq->context; 5428 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5429 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5430 5431 GEM_BUG_ON(!intel_context_is_child(ce)); 5432 5433 if (unlikely(skip_handshake(rq))) { 5434 /* 5435 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5436 * the NON_SKIP_LEN comes from the length of the emits below. 5437 */ 5438 memset(cs, 0, sizeof(u32) * 5439 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5440 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5441 } else { 5442 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5443 } 5444 5445 /* Emit fini breadcrumb */ 5446 before_fini_breadcrumb_user_interrupt_cs = cs; 5447 cs = gen8_emit_ggtt_write(cs, 5448 rq->fence.seqno, 5449 i915_request_active_timeline(rq)->hwsp_offset, 5450 0); 5451 5452 /* User interrupt */ 5453 *cs++ = MI_USER_INTERRUPT; 5454 *cs++ = MI_NOOP; 5455 5456 /* Ensure our math for skip + emit is correct */ 5457 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5458 cs); 5459 GEM_BUG_ON(start_fini_breadcrumb_cs + 5460 ce->engine->emit_fini_breadcrumb_dw != cs); 5461 5462 rq->tail = intel_ring_offset(rq, cs); 5463 5464 return cs; 5465 } 5466 5467 #undef NON_SKIP_LEN 5468 5469 static struct intel_context * 5470 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5471 unsigned long flags) 5472 { 5473 struct guc_virtual_engine *ve; 5474 struct intel_guc *guc; 5475 unsigned int n; 5476 int err; 5477 5478 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5479 if (!ve) 5480 return ERR_PTR(-ENOMEM); 5481 5482 guc = &siblings[0]->gt->uc.guc; 5483 5484 ve->base.i915 = siblings[0]->i915; 5485 ve->base.gt = siblings[0]->gt; 5486 ve->base.uncore = siblings[0]->uncore; 5487 ve->base.id = -1; 5488 5489 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5490 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5491 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5492 ve->base.saturated = ALL_ENGINES; 5493 5494 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5495 5496 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5497 5498 ve->base.cops = &virtual_guc_context_ops; 5499 ve->base.request_alloc = guc_request_alloc; 5500 ve->base.bump_serial = virtual_guc_bump_serial; 5501 5502 ve->base.submit_request = guc_submit_request; 5503 5504 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5505 5506 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); 5507 ve->base.mask = VIRTUAL_ENGINES; 5508 5509 intel_context_init(&ve->context, &ve->base); 5510 5511 for (n = 0; n < count; n++) { 5512 struct intel_engine_cs *sibling = siblings[n]; 5513 5514 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5515 if (sibling->mask & ve->base.mask) { 5516 guc_dbg(guc, "duplicate %s entry in load balancer\n", 5517 sibling->name); 5518 err = -EINVAL; 5519 goto err_put; 5520 } 5521 5522 ve->base.mask |= sibling->mask; 5523 ve->base.logical_mask |= sibling->logical_mask; 5524 5525 if (n != 0 && ve->base.class != sibling->class) { 5526 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n", 5527 sibling->class, ve->base.class); 5528 err = -EINVAL; 5529 goto err_put; 5530 } else if (n == 0) { 5531 ve->base.class = sibling->class; 5532 ve->base.uabi_class = sibling->uabi_class; 5533 snprintf(ve->base.name, sizeof(ve->base.name), 5534 "v%dx%d", ve->base.class, count); 5535 ve->base.context_size = sibling->context_size; 5536 5537 ve->base.add_active_request = 5538 sibling->add_active_request; 5539 ve->base.remove_active_request = 5540 sibling->remove_active_request; 5541 ve->base.emit_bb_start = sibling->emit_bb_start; 5542 ve->base.emit_flush = sibling->emit_flush; 5543 ve->base.emit_init_breadcrumb = 5544 sibling->emit_init_breadcrumb; 5545 ve->base.emit_fini_breadcrumb = 5546 sibling->emit_fini_breadcrumb; 5547 ve->base.emit_fini_breadcrumb_dw = 5548 sibling->emit_fini_breadcrumb_dw; 5549 ve->base.breadcrumbs = 5550 intel_breadcrumbs_get(sibling->breadcrumbs); 5551 5552 ve->base.flags |= sibling->flags; 5553 5554 ve->base.props.timeslice_duration_ms = 5555 sibling->props.timeslice_duration_ms; 5556 ve->base.props.preempt_timeout_ms = 5557 sibling->props.preempt_timeout_ms; 5558 } 5559 } 5560 5561 return &ve->context; 5562 5563 err_put: 5564 intel_context_put(&ve->context); 5565 return ERR_PTR(err); 5566 } 5567 5568 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5569 { 5570 struct intel_engine_cs *engine; 5571 intel_engine_mask_t tmp, mask = ve->mask; 5572 5573 for_each_engine_masked(engine, ve->gt, mask, tmp) 5574 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5575 return true; 5576 5577 return false; 5578 } 5579 5580 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5581 #include "selftest_guc.c" 5582 #include "selftest_guc_multi_lrc.c" 5583 #include "selftest_guc_hangcheck.c" 5584 #endif 5585